diff --git a/src/backend/tsearch/ts_locale.c b/src/backend/tsearch/ts_locale.c index 85e1cd003a..a369dad12b 100644 --- a/src/backend/tsearch/ts_locale.c +++ b/src/backend/tsearch/ts_locale.c @@ -23,18 +23,29 @@ static void tsearch_readline_callback(void *arg); #ifdef USE_WIDE_UPPER_LOWER +/* + * The reason these functions use a 3-wchar_t output buffer, not 2 as you + * might expect, is that on Windows "wchar_t" is 16 bits and what we'll be + * getting from char2wchar() is UTF16 not UTF32. A single input character + * may therefore produce a surrogate pair rather than just one wchar_t; + * we also need room for a trailing null. When we do get a surrogate pair, + * we pass just the first code to iswdigit() etc, so that these functions will + * always return false for characters outside the Basic Multilingual Plane. + */ +#define WC_BUF_LEN 3 + int t_isdigit(const char *ptr) { int clen = pg_mblen(ptr); - wchar_t character[2]; + wchar_t character[WC_BUF_LEN]; Oid collation = DEFAULT_COLLATION_OID; /* TODO */ pg_locale_t mylocale = 0; /* TODO */ if (clen == 1 || lc_ctype_is_c(collation)) return isdigit(TOUCHAR(ptr)); - char2wchar(character, 2, ptr, clen, mylocale); + char2wchar(character, WC_BUF_LEN, ptr, clen, mylocale); return iswdigit((wint_t) character[0]); } @@ -43,14 +54,14 @@ int t_isspace(const char *ptr) { int clen = pg_mblen(ptr); - wchar_t character[2]; + wchar_t character[WC_BUF_LEN]; Oid collation = DEFAULT_COLLATION_OID; /* TODO */ pg_locale_t mylocale = 0; /* TODO */ if (clen == 1 || lc_ctype_is_c(collation)) return isspace(TOUCHAR(ptr)); - char2wchar(character, 2, ptr, clen, mylocale); + char2wchar(character, WC_BUF_LEN, ptr, clen, mylocale); return iswspace((wint_t) character[0]); } @@ -59,14 +70,14 @@ int t_isalpha(const char *ptr) { int clen = pg_mblen(ptr); - wchar_t character[2]; + wchar_t character[WC_BUF_LEN]; Oid collation = DEFAULT_COLLATION_OID; /* TODO */ pg_locale_t mylocale = 0; /* TODO */ if (clen == 1 || lc_ctype_is_c(collation)) return isalpha(TOUCHAR(ptr)); - char2wchar(character, 2, ptr, clen, mylocale); + char2wchar(character, WC_BUF_LEN, ptr, clen, mylocale); return iswalpha((wint_t) character[0]); } @@ -75,14 +86,14 @@ int t_isprint(const char *ptr) { int clen = pg_mblen(ptr); - wchar_t character[2]; + wchar_t character[WC_BUF_LEN]; Oid collation = DEFAULT_COLLATION_OID; /* TODO */ pg_locale_t mylocale = 0; /* TODO */ if (clen == 1 || lc_ctype_is_c(collation)) return isprint(TOUCHAR(ptr)); - char2wchar(character, 2, ptr, clen, mylocale); + char2wchar(character, WC_BUF_LEN, ptr, clen, mylocale); return iswprint((wint_t) character[0]); }