diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c index a3dc3be5a8..2a98625964 100644 --- a/src/backend/utils/adt/pg_locale.c +++ b/src/backend/utils/adt/pg_locale.c @@ -22,8 +22,9 @@ * settable at run-time. However, we don't actually set those locale * categories permanently. This would have bizarre effects like no * longer accepting standard floating-point literals in some locales. - * Instead, we only set the locales briefly when needed, cache the - * required information obtained from localeconv(), and set them back. + * Instead, we only set these locale categories briefly when needed, + * cache the required information obtained from localeconv() or + * strftime(), and then set the locale categories back to "C". * The cached information is only used by the formatting functions * (to_char, etc.) and the money type. For the user, this should all be * transparent. @@ -42,7 +43,7 @@ * will change the memory save is pointing at. To do this sort of thing * safely, you *must* pstrdup what setlocale returns the first time. * - * FYI, The Open Group locale standard is defined here: + * The POSIX locale standard is available here: * * http://www.opengroup.org/onlinepubs/009695399/basedefs/xbd_chap07.html *---------- @@ -486,7 +487,6 @@ PGLC_localeconv(void) static bool CurrentLocaleConvAllocated = false; struct lconv *extlconv; struct lconv worklconv; - bool trouble = false; char *save_lc_monetary; char *save_lc_numeric; #ifdef WIN32 @@ -518,42 +518,38 @@ PGLC_localeconv(void) */ memset(&worklconv, 0, sizeof(worklconv)); - /* Save user's values of monetary and numeric locales */ + /* Save prevailing values of monetary and numeric locales */ save_lc_monetary = setlocale(LC_MONETARY, NULL); - if (save_lc_monetary) - save_lc_monetary = pstrdup(save_lc_monetary); + if (!save_lc_monetary) + elog(ERROR, "setlocale(NULL) failed"); + save_lc_monetary = pstrdup(save_lc_monetary); save_lc_numeric = setlocale(LC_NUMERIC, NULL); - if (save_lc_numeric) - save_lc_numeric = pstrdup(save_lc_numeric); + if (!save_lc_numeric) + elog(ERROR, "setlocale(NULL) failed"); + save_lc_numeric = pstrdup(save_lc_numeric); #ifdef WIN32 /* - * Ideally, monetary and numeric local symbols could be returned in any - * server encoding. Unfortunately, the WIN32 API does not allow - * setlocale() to return values in a codepage/CTYPE that uses more than - * two bytes per character, such as UTF-8: + * The POSIX standard explicitly says that it is undefined what happens if + * LC_MONETARY or LC_NUMERIC imply an encoding (codeset) different from + * that implied by LC_CTYPE. In practice, all Unix-ish platforms seem to + * believe that localeconv() should return strings that are encoded in the + * codeset implied by the LC_MONETARY or LC_NUMERIC locale name. Hence, + * once we have successfully collected the localeconv() results, we will + * convert them from that codeset to the desired server encoding. * - * http://msdn.microsoft.com/en-us/library/x99tb11d.aspx - * - * Evidently, LC_CTYPE allows us to control the encoding used for strings - * returned by localeconv(). The Open Group standard, mentioned at the - * top of this C file, doesn't explicitly state this. - * - * Therefore, we set LC_CTYPE to match LC_NUMERIC or LC_MONETARY (which - * cannot be UTF8), call localeconv(), and then convert from the - * numeric/monetary LC_CTYPE to the server encoding. One example use of - * this is for the Euro symbol. - * - * Perhaps someday we will use GetLocaleInfoW() which returns values in - * UTF16 and convert from that. + * Windows, of course, resolutely does things its own way; on that + * platform LC_CTYPE has to match LC_MONETARY/LC_NUMERIC to get sane + * results. Hence, we must temporarily set that category as well. */ - /* save user's value of ctype locale */ + /* Save prevailing value of ctype locale */ save_lc_ctype = setlocale(LC_CTYPE, NULL); - if (save_lc_ctype) - save_lc_ctype = pstrdup(save_lc_ctype); + if (!save_lc_ctype) + elog(ERROR, "setlocale(NULL) failed"); + save_lc_ctype = pstrdup(save_lc_ctype); /* Here begins the critical section where we must not throw error */ @@ -597,27 +593,22 @@ PGLC_localeconv(void) worklconv.p_sign_posn = extlconv->p_sign_posn; worklconv.n_sign_posn = extlconv->n_sign_posn; - /* Try to restore internal settings */ - if (save_lc_monetary) - { - if (!setlocale(LC_MONETARY, save_lc_monetary)) - trouble = true; - } - - if (save_lc_numeric) - { - if (!setlocale(LC_NUMERIC, save_lc_numeric)) - trouble = true; - } - + /* + * Restore the prevailing locale settings; failure to do so is fatal. + * Possibly we could limp along with nondefault LC_MONETARY or LC_NUMERIC, + * but proceeding with the wrong value of LC_CTYPE would certainly be bad + * news; and considering that the prevailing LC_MONETARY and LC_NUMERIC + * are almost certainly "C", there's really no reason that restoring those + * should fail. + */ #ifdef WIN32 - /* Try to restore internal ctype settings */ - if (save_lc_ctype) - { - if (!setlocale(LC_CTYPE, save_lc_ctype)) - trouble = true; - } + if (!setlocale(LC_CTYPE, save_lc_ctype)) + elog(FATAL, "failed to restore LC_CTYPE to \"%s\"", save_lc_ctype); #endif + if (!setlocale(LC_MONETARY, save_lc_monetary)) + elog(FATAL, "failed to restore LC_MONETARY to \"%s\"", save_lc_monetary); + if (!setlocale(LC_NUMERIC, save_lc_numeric)) + elog(FATAL, "failed to restore LC_NUMERIC to \"%s\"", save_lc_numeric); /* * At this point we've done our best to clean up, and can call functions @@ -628,21 +619,11 @@ PGLC_localeconv(void) { int encoding; - /* - * Report it if we failed to restore anything. Perhaps this should be - * FATAL, rather than continuing with bad locale settings? - */ - if (trouble) - elog(WARNING, "failed to restore old locale"); - /* Release the pstrdup'd locale names */ - if (save_lc_monetary) - pfree(save_lc_monetary); - if (save_lc_numeric) - pfree(save_lc_numeric); + pfree(save_lc_monetary); + pfree(save_lc_numeric); #ifdef WIN32 - if (save_lc_ctype) - pfree(save_lc_ctype); + pfree(save_lc_ctype); #endif /* If any of the preceding strdup calls failed, complain now. */ @@ -653,15 +634,22 @@ PGLC_localeconv(void) /* * Now we must perform encoding conversion from whatever's associated - * with the locale into the database encoding. + * with the locales into the database encoding. If we can't identify + * the encoding implied by LC_NUMERIC or LC_MONETARY (ie we get -1), + * use PG_SQL_ASCII, which will result in just validating that the + * strings are OK in the database encoding. */ encoding = pg_get_encoding_from_locale(locale_numeric, true); + if (encoding < 0) + encoding = PG_SQL_ASCII; db_encoding_convert(encoding, &worklconv.decimal_point); db_encoding_convert(encoding, &worklconv.thousands_sep); /* grouping is not text and does not require conversion */ encoding = pg_get_encoding_from_locale(locale_monetary, true); + if (encoding < 0) + encoding = PG_SQL_ASCII; db_encoding_convert(encoding, &worklconv.int_curr_symbol); db_encoding_convert(encoding, &worklconv.currency_symbol); @@ -689,15 +677,15 @@ PGLC_localeconv(void) #ifdef WIN32 /* - * On WIN32, strftime() returns the encoding in CP_ACP (the default - * operating system codpage for that computer), which is likely different + * On Windows, strftime() returns its output in encoding CP_ACP (the default + * operating system codepage for the computer), which is likely different * from SERVER_ENCODING. This is especially important in Japanese versions * of Windows which will use SJIS encoding, which we don't support as a * server encoding. * * So, instead of using strftime(), use wcsftime() to return the value in - * wide characters (internally UTF16) and then convert it to the appropriate - * database encoding. + * wide characters (internally UTF16) and then convert to UTF8, which we + * know how to handle directly. * * Note that this only affects the calls to strftime() in this file, which are * used to get the locale-aware strings. Other parts of the backend use @@ -708,10 +696,13 @@ strftime_win32(char *dst, size_t dstlen, const char *format, const struct tm *tm) { size_t len; - wchar_t wformat[8]; /* formats used below need 3 bytes */ + wchar_t wformat[8]; /* formats used below need 3 chars */ wchar_t wbuf[MAX_L10N_DATA]; - /* get a wchar_t version of the format string */ + /* + * Get a wchar_t version of the format string. We only actually use + * plain-ASCII formats in this file, so we can say that they're UTF8. + */ len = MultiByteToWideChar(CP_UTF8, 0, format, -1, wformat, lengthof(wformat)); if (len == 0) @@ -722,7 +713,7 @@ strftime_win32(char *dst, size_t dstlen, if (len == 0) { /* - * strftime failed, possibly because the result would not fit in + * wcsftime failed, possibly because the result would not fit in * MAX_L10N_DATA. Return 0 with the contents of dst unspecified. */ return 0; @@ -735,17 +726,6 @@ strftime_win32(char *dst, size_t dstlen, GetLastError()); dst[len] = '\0'; - if (GetDatabaseEncoding() != PG_UTF8) - { - char *convstr = pg_any_to_server(dst, len, PG_UTF8); - - if (convstr != dst) - { - strlcpy(dst, convstr, dstlen); - len = strlen(dst); - pfree(convstr); - } - } return len; } @@ -754,29 +734,29 @@ strftime_win32(char *dst, size_t dstlen, #define strftime(a,b,c,d) strftime_win32(a,b,c,d) #endif /* WIN32 */ -/* Subroutine for cache_locale_time(). */ +/* + * Subroutine for cache_locale_time(). + * Convert the given string from encoding "encoding" to the database + * encoding, and store the result at *dst, replacing any previous value. + */ static void -cache_single_time(char **dst, const char *format, const struct tm *tm) +cache_single_string(char **dst, const char *src, int encoding) { - char buf[MAX_L10N_DATA]; char *ptr; + char *olddst; - /* - * MAX_L10N_DATA is sufficient buffer space for every known locale, and - * POSIX defines no strftime() errors. (Buffer space exhaustion is not an - * error.) An implementation might report errors (e.g. ENOMEM) by - * returning 0 (or, less plausibly, a negative value) and setting errno. - * Report errno just in case the implementation did that, but clear it in - * advance of the call so we don't emit a stale, unrelated errno. - */ - errno = 0; - if (strftime(buf, MAX_L10N_DATA, format, tm) <= 0) - elog(ERROR, "strftime(%s) failed: %m", format); + /* Convert the string to the database encoding, or validate it's OK */ + ptr = pg_any_to_server(src, strlen(src), encoding); - ptr = MemoryContextStrdup(TopMemoryContext, buf); - if (*dst) - pfree(*dst); - *dst = ptr; + /* Store the string in long-lived storage, replacing any previous value */ + olddst = *dst; + *dst = MemoryContextStrdup(TopMemoryContext, ptr); + if (olddst) + pfree(olddst); + + /* Might as well clean up any palloc'd conversion result, too */ + if (ptr != src) + pfree(ptr); } /* @@ -785,11 +765,14 @@ cache_single_time(char **dst, const char *format, const struct tm *tm) void cache_locale_time(void) { - char *save_lc_time; + char buf[(2 * 7 + 2 * 12) * MAX_L10N_DATA]; + char *bufptr; time_t timenow; struct tm *timeinfo; + bool strftimefail = false; + int encoding; int i; - + char *save_lc_time; #ifdef WIN32 char *save_lc_ctype; #endif @@ -800,26 +783,33 @@ cache_locale_time(void) elog(DEBUG3, "cache_locale_time() executed; locale: \"%s\"", locale_time); - /* save user's value of time locale */ + /* + * As in PGLC_localeconv(), it's critical that we not throw error while + * libc's locale settings have nondefault values. Hence, we just call + * strftime() within the critical section, and then convert and save its + * results afterwards. + */ + + /* Save prevailing value of time locale */ save_lc_time = setlocale(LC_TIME, NULL); - if (save_lc_time) - save_lc_time = pstrdup(save_lc_time); + if (!save_lc_time) + elog(ERROR, "setlocale(NULL) failed"); + save_lc_time = pstrdup(save_lc_time); #ifdef WIN32 /* - * On WIN32, there is no way to get locale-specific time values in a - * specified locale, like we do for monetary/numeric. We can only get - * CP_ACP (see strftime_win32) or UTF16. Therefore, we get UTF16 and - * convert it to the database locale. However, wcsftime() internally uses - * LC_CTYPE, so we set it here. See the WIN32 comment near the top of - * PGLC_localeconv(). + * On Windows, it appears that wcsftime() internally uses LC_CTYPE, so we + * must set it here. This code looks the same as what PGLC_localeconv() + * does, but the underlying reason is different: this does NOT determine + * the encoding we'll get back from strftime_win32(). */ - /* save user's value of ctype locale */ + /* Save prevailing value of ctype locale */ save_lc_ctype = setlocale(LC_CTYPE, NULL); - if (save_lc_ctype) - save_lc_ctype = pstrdup(save_lc_ctype); + if (!save_lc_ctype) + elog(ERROR, "setlocale(NULL) failed"); + save_lc_ctype = pstrdup(save_lc_ctype); /* use lc_time to set the ctype */ setlocale(LC_CTYPE, locale_time); @@ -827,15 +817,33 @@ cache_locale_time(void) setlocale(LC_TIME, locale_time); + /* We use times close to current time as data for strftime(). */ timenow = time(NULL); timeinfo = localtime(&timenow); + /* Store the strftime results in MAX_L10N_DATA-sized portions of buf[] */ + bufptr = buf; + + /* + * MAX_L10N_DATA is sufficient buffer space for every known locale, and + * POSIX defines no strftime() errors. (Buffer space exhaustion is not an + * error.) An implementation might report errors (e.g. ENOMEM) by + * returning 0 (or, less plausibly, a negative value) and setting errno. + * Report errno just in case the implementation did that, but clear it in + * advance of the calls so we don't emit a stale, unrelated errno. + */ + errno = 0; + /* localized days */ for (i = 0; i < 7; i++) { timeinfo->tm_wday = i; - cache_single_time(&localized_abbrev_days[i], "%a", timeinfo); - cache_single_time(&localized_full_days[i], "%A", timeinfo); + if (strftime(bufptr, MAX_L10N_DATA, "%a", timeinfo) <= 0) + strftimefail = true; + bufptr += MAX_L10N_DATA; + if (strftime(bufptr, MAX_L10N_DATA, "%A", timeinfo) <= 0) + strftimefail = true; + bufptr += MAX_L10N_DATA; } /* localized months */ @@ -843,27 +851,78 @@ cache_locale_time(void) { timeinfo->tm_mon = i; timeinfo->tm_mday = 1; /* make sure we don't have invalid date */ - cache_single_time(&localized_abbrev_months[i], "%b", timeinfo); - cache_single_time(&localized_full_months[i], "%B", timeinfo); - } - - /* try to restore internal settings */ - if (save_lc_time) - { - if (!setlocale(LC_TIME, save_lc_time)) - elog(WARNING, "failed to restore old locale"); - pfree(save_lc_time); + if (strftime(bufptr, MAX_L10N_DATA, "%b", timeinfo) <= 0) + strftimefail = true; + bufptr += MAX_L10N_DATA; + if (strftime(bufptr, MAX_L10N_DATA, "%B", timeinfo) <= 0) + strftimefail = true; + bufptr += MAX_L10N_DATA; } + /* + * Restore the prevailing locale settings; as in PGLC_localeconv(), + * failure to do so is fatal. + */ #ifdef WIN32 - /* try to restore internal ctype settings */ - if (save_lc_ctype) - { - if (!setlocale(LC_CTYPE, save_lc_ctype)) - elog(WARNING, "failed to restore old locale"); - pfree(save_lc_ctype); - } + if (!setlocale(LC_CTYPE, save_lc_ctype)) + elog(FATAL, "failed to restore LC_CTYPE to \"%s\"", save_lc_ctype); #endif + if (!setlocale(LC_TIME, save_lc_time)) + elog(FATAL, "failed to restore LC_TIME to \"%s\"", save_lc_time); + + /* + * At this point we've done our best to clean up, and can throw errors, or + * call functions that might throw errors, with a clean conscience. + */ + if (strftimefail) + elog(ERROR, "strftime() failed: %m"); + + /* Release the pstrdup'd locale names */ + pfree(save_lc_time); +#ifdef WIN32 + pfree(save_lc_ctype); +#endif + +#ifndef WIN32 + + /* + * As in PGLC_localeconv(), we must convert strftime()'s output from the + * encoding implied by LC_TIME to the database encoding. If we can't + * identify the LC_TIME encoding, just perform encoding validation. + */ + encoding = pg_get_encoding_from_locale(locale_time, true); + if (encoding < 0) + encoding = PG_SQL_ASCII; + +#else + + /* + * On Windows, strftime_win32() always returns UTF8 data, so convert from + * that if necessary. + */ + encoding = PG_UTF8; + +#endif /* WIN32 */ + + bufptr = buf; + + /* localized days */ + for (i = 0; i < 7; i++) + { + cache_single_string(&localized_abbrev_days[i], bufptr, encoding); + bufptr += MAX_L10N_DATA; + cache_single_string(&localized_full_days[i], bufptr, encoding); + bufptr += MAX_L10N_DATA; + } + + /* localized months */ + for (i = 0; i < 12; i++) + { + cache_single_string(&localized_abbrev_months[i], bufptr, encoding); + bufptr += MAX_L10N_DATA; + cache_single_string(&localized_full_months[i], bufptr, encoding); + bufptr += MAX_L10N_DATA; + } CurrentLCTimeValid = true; } diff --git a/src/test/regress/expected/collate.linux.utf8.out b/src/test/regress/expected/collate.linux.utf8.out index 400a747cdc..44dcffd41e 100644 --- a/src/test/regress/expected/collate.linux.utf8.out +++ b/src/test/regress/expected/collate.linux.utf8.out @@ -430,6 +430,18 @@ SELECT relname FROM pg_class WHERE relname ~* '^abc'; -- to_char SET lc_time TO 'tr_TR'; +SELECT to_char(date '2010-02-01', 'DD TMMON YYYY'); + to_char +------------- + 01 ŞUB 2010 +(1 row) + +SELECT to_char(date '2010-02-01', 'DD TMMON YYYY' COLLATE "tr_TR"); + to_char +------------- + 01 ŞUB 2010 +(1 row) + SELECT to_char(date '2010-04-01', 'DD TMMON YYYY'); to_char ------------- diff --git a/src/test/regress/sql/collate.linux.utf8.sql b/src/test/regress/sql/collate.linux.utf8.sql index b51162e3a1..45256666bc 100644 --- a/src/test/regress/sql/collate.linux.utf8.sql +++ b/src/test/regress/sql/collate.linux.utf8.sql @@ -169,6 +169,8 @@ SELECT relname FROM pg_class WHERE relname ~* '^abc'; -- to_char SET lc_time TO 'tr_TR'; +SELECT to_char(date '2010-02-01', 'DD TMMON YYYY'); +SELECT to_char(date '2010-02-01', 'DD TMMON YYYY' COLLATE "tr_TR"); SELECT to_char(date '2010-04-01', 'DD TMMON YYYY'); SELECT to_char(date '2010-04-01', 'DD TMMON YYYY' COLLATE "tr_TR");