From 07a560673515e6de19010d89549cefdf7544bd63 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Mon, 19 May 2008 18:08:16 +0000 Subject: [PATCH] Make to_char()'s localized month/day names depend on LC_TIME, not LC_MESSAGES. Euler Taveira de Oliveira --- doc/src/sgml/config.sgml | 8 +- doc/src/sgml/func.sgml | 5 +- src/backend/utils/adt/formatting.c | 399 +++++++++----------------- src/backend/utils/adt/oracle_compat.c | 8 +- src/backend/utils/adt/pg_locale.c | 93 +++++- src/include/utils/pg_locale.h | 12 +- 6 files changed, 251 insertions(+), 274 deletions(-) diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml index 1961d28893..490e3ab004 100644 --- a/doc/src/sgml/config.sgml +++ b/doc/src/sgml/config.sgml @@ -1,4 +1,4 @@ - + Server Configuration @@ -4184,9 +4184,9 @@ SET XML OPTION { DOCUMENT | CONTENT }; - Sets the locale to use for formatting date and time values. - (Currently, this setting does nothing, but it might in the - future.) Acceptable values are system-dependent; see to_char family of + functions. Acceptable values are system-dependent; see for more information. If this variable is set to the empty string (which is the default) then the value is inherited from the execution environment of the server in a diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index d168891d4e..48cb4f6c2b 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -1,4 +1,4 @@ - + Functions and Operators @@ -5079,7 +5079,8 @@ SELECT SUBSTRING('XY1234Z', 'Y*?([0-9]{1,3})'); TM prefix - translation mode (print localized day and month names based on lc_messages) + translation mode (print localized day and month names based on + ) TMMonth diff --git a/src/backend/utils/adt/formatting.c b/src/backend/utils/adt/formatting.c index 103543817d..a8d63180dc 100644 --- a/src/backend/utils/adt/formatting.c +++ b/src/backend/utils/adt/formatting.c @@ -1,7 +1,7 @@ /* ----------------------------------------------------------------------- * formatting.c * - * $PostgreSQL: pgsql/src/backend/utils/adt/formatting.c,v 1.139 2008/03/25 22:42:44 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/utils/adt/formatting.c,v 1.140 2008/05/19 18:08:15 tgl Exp $ * * * Portions Copyright (c) 1999-2008, PostgreSQL Global Development Group @@ -75,7 +75,17 @@ #include #include #include -#include + +/* + * towlower() and friends should be in , but some pre-C99 systems + * declare them in . + */ +#ifdef HAVE_WCHAR_H +#include +#endif +#ifdef HAVE_WCTYPE_H +#include +#endif #include "utils/builtins.h" #include "utils/date.h" @@ -86,8 +96,6 @@ #include "utils/pg_locale.h" #include "mb/pg_wchar.h" -#define _(x) gettext((x)) - /* ---------- * Routines type * ---------- @@ -919,6 +927,7 @@ static int strspace_len(char *str); static int strdigits_len(char *str); static char *str_toupper(char *buff); static char *str_tolower(char *buff); +static char *str_initcap(char *buff); static int seq_search(char *name, char **array, int type, int max, int *len); static void do_to_timestamp(text *date_txt, text *fmt, @@ -939,22 +948,13 @@ static NUMCacheEntry *NUM_cache_search(char *str); static NUMCacheEntry *NUM_cache_getnew(char *str); static void NUM_cache_remove(NUMCacheEntry *ent); -static char *localize_month_full(int index); -static char *localize_month(int index); -static char *localize_day_full(int index); -static char *localize_day(int index); - #if defined(HAVE_WCSTOMBS) && defined(HAVE_TOWLOWER) #define USE_WIDE_UPPER_LOWER /* externs are in oracle_compat.c */ extern char *wstring_upper(char *str); extern char *wstring_lower(char *str); - -static char *localized_str_toupper(char *buff); -static char *localized_str_tolower(char *buff); -#else -#define localized_str_toupper str_toupper -#define localized_str_tolower str_tolower +extern wchar_t *texttowcs(const text *txt); +extern text *wcstotext(const wchar_t *str, int ncodes); #endif /* ---------- @@ -1426,103 +1426,123 @@ str_numth(char *dest, char *num, int type) } /* ---------- - * Convert string to upper case. Input string is modified in place. + * Convert string to upper case. It is designed to be multibyte-aware. * ---------- */ static char * str_toupper(char *buff) { - char *p_buff = buff; + char *result; if (!buff) return NULL; - while (*p_buff) +#ifdef USE_WIDE_UPPER_LOWER + if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c()) + result = wstring_upper(buff); + else +#endif /* USE_WIDE_UPPER_LOWER */ { - *p_buff = pg_toupper((unsigned char) *p_buff); - ++p_buff; + char *p; + + result = pstrdup(buff); + + for (p = result; *p; p++) + *p = pg_toupper((unsigned char) *p); } - return buff; + return result; } /* ---------- - * Convert string to lower case. Input string is modified in place. + * Convert string to lower case. It is designed to be multibyte-aware. * ---------- */ static char * str_tolower(char *buff) { - char *p_buff = buff; + char *result; if (!buff) return NULL; - while (*p_buff) - { - *p_buff = pg_tolower((unsigned char) *p_buff); - ++p_buff; - } - return buff; -} - - #ifdef USE_WIDE_UPPER_LOWER + if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c()) + result = wstring_lower(buff); + else +#endif /* USE_WIDE_UPPER_LOWER */ + { + char *p; + + result = pstrdup(buff); + + for (p = result; *p; p++) + *p = pg_tolower((unsigned char) *p); + } + + return result; +} + /* ---------- - * Convert localized string to upper case. - * Input string may be modified in place ... or we might make a copy. + * wide-character-aware initcap function * ---------- */ static char * -localized_str_toupper(char *buff) +str_initcap(char *buff) { + char *result; + bool wasalnum = false; + if (!buff) return NULL; +#ifdef USE_WIDE_UPPER_LOWER if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c()) - return wstring_upper(buff); - else { - char *p_buff = buff; + wchar_t *workspace; + text *in_text; + text *out_text; + int i; - while (*p_buff) + in_text = cstring_to_text(buff); + workspace = texttowcs(in_text); + + for (i = 0; workspace[i] != 0; i++) { - *p_buff = pg_toupper((unsigned char) *p_buff); - ++p_buff; + if (wasalnum) + workspace[i] = towlower(workspace[i]); + else + workspace[i] = towupper(workspace[i]); + wasalnum = iswalnum(workspace[i]); + } + + out_text = wcstotext(workspace, i); + result = text_to_cstring(out_text); + + pfree(workspace); + pfree(in_text); + pfree(out_text); + } + else +#endif /* USE_WIDE_UPPER_LOWER */ + { + char *p; + + result = pstrdup(buff); + + for (p = result; *p; p++) + { + if (wasalnum) + *p = pg_tolower((unsigned char) *p); + else + *p = pg_toupper((unsigned char) *p); + wasalnum = isalnum((unsigned char) *p); } } - return buff; + return result; } -/* ---------- - * Convert localized string to lower case. - * Input string may be modified in place ... or we might make a copy. - * ---------- - */ -static char * -localized_str_tolower(char *buff) -{ - if (!buff) - return NULL; - - if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c()) - return wstring_lower(buff); - else - { - char *p_buff = buff; - - while (*p_buff) - { - *p_buff = pg_tolower((unsigned char) *p_buff); - ++p_buff; - } - } - - return buff; -} -#endif /* USE_WIDE_UPPER_LOWER */ - /* ---------- * Sequential search with to upper/lower conversion * ---------- @@ -1730,6 +1750,9 @@ DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out) workbuff[32]; int i; + /* cache localized days and months */ + cache_locale_time(); + s = out; for (n = node; n->type != NODE_TYPE_END; n++) { @@ -1872,8 +1895,8 @@ DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out) break; if (S_TM(n->suffix)) { - strcpy(workbuff, localize_month_full(tm->tm_mon - 1)); - sprintf(s, "%*s", 0, localized_str_toupper(workbuff)); + strcpy(workbuff, localized_full_months[tm->tm_mon - 1]); + sprintf(s, "%*s", 0, str_toupper(workbuff)); } else { @@ -1887,9 +1910,14 @@ DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out) if (!tm->tm_mon) break; if (S_TM(n->suffix)) - sprintf(s, "%*s", 0, localize_month_full(tm->tm_mon - 1)); + { + strcpy(workbuff, localized_full_months[tm->tm_mon - 1]); + sprintf(s, "%*s", 0, str_initcap(workbuff)); + } else + { sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9, months_full[tm->tm_mon - 1]); + } s += strlen(s); break; case DCH_month: @@ -1898,8 +1926,8 @@ DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out) break; if (S_TM(n->suffix)) { - strcpy(workbuff, localize_month_full(tm->tm_mon - 1)); - sprintf(s, "%*s", 0, localized_str_tolower(workbuff)); + strcpy(workbuff, localized_full_months[tm->tm_mon - 1]); + sprintf(s, "%*s", 0, str_tolower(workbuff)); } else { @@ -1914,13 +1942,13 @@ DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out) break; if (S_TM(n->suffix)) { - strcpy(workbuff, localize_month(tm->tm_mon - 1)); - strcpy(s, localized_str_toupper(workbuff)); + strcpy(workbuff, localized_abbrev_months[tm->tm_mon - 1]); + sprintf(s, "%*s", 0, str_toupper(workbuff)); } else { - strcpy(s, months[tm->tm_mon - 1]); - str_toupper(s); + strcpy(workbuff, months[tm->tm_mon - 1]); + sprintf(s, "%*s", 0, str_toupper(workbuff)); } s += strlen(s); break; @@ -1929,9 +1957,14 @@ DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out) if (!tm->tm_mon) break; if (S_TM(n->suffix)) - strcpy(s, localize_month(tm->tm_mon - 1)); + { + strcpy(workbuff, localized_abbrev_months[tm->tm_mon - 1]); + sprintf(s, "%*s", 0, str_initcap(workbuff)); + } else + { strcpy(s, months[tm->tm_mon - 1]); + } s += strlen(s); break; case DCH_mon: @@ -1940,8 +1973,8 @@ DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out) break; if (S_TM(n->suffix)) { - strcpy(workbuff, localize_month(tm->tm_mon - 1)); - strcpy(s, localized_str_tolower(workbuff)); + strcpy(workbuff, localized_abbrev_months[tm->tm_mon - 1]); + sprintf(s, "%*s", 0, str_tolower(workbuff)); } else { @@ -1960,8 +1993,8 @@ DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out) INVALID_FOR_INTERVAL; if (S_TM(n->suffix)) { - strcpy(workbuff, localize_day_full(tm->tm_wday)); - sprintf(s, "%*s", 0, localized_str_toupper(workbuff)); + strcpy(workbuff, localized_full_days[tm->tm_wday]); + sprintf(s, "%*s", 0, str_toupper(workbuff)); } else { @@ -1973,17 +2006,22 @@ DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out) case DCH_Day: INVALID_FOR_INTERVAL; if (S_TM(n->suffix)) - sprintf(s, "%*s", 0, localize_day_full(tm->tm_wday)); + { + strcpy(workbuff, localized_full_days[tm->tm_wday]); + sprintf(s, "%*s", 0, str_initcap(workbuff)); + } else + { sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9, days[tm->tm_wday]); + } s += strlen(s); break; case DCH_day: INVALID_FOR_INTERVAL; if (S_TM(n->suffix)) { - strcpy(workbuff, localize_day_full(tm->tm_wday)); - sprintf(s, "%*s", 0, localized_str_tolower(workbuff)); + strcpy(workbuff, localized_full_days[tm->tm_wday]); + sprintf(s, "%*s", 0, str_tolower(workbuff)); } else { @@ -1996,30 +2034,35 @@ DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out) INVALID_FOR_INTERVAL; if (S_TM(n->suffix)) { - strcpy(workbuff, localize_day(tm->tm_wday)); - strcpy(s, localized_str_toupper(workbuff)); + strcpy(workbuff, localized_abbrev_days[tm->tm_wday]); + sprintf(s, "%*s", 0, str_toupper(workbuff)); } else { - strcpy(s, days_short[tm->tm_wday]); - str_toupper(s); + strcpy(workbuff, days_short[tm->tm_wday]); + sprintf(s, "%*s", 0, str_toupper(workbuff)); } s += strlen(s); break; case DCH_Dy: INVALID_FOR_INTERVAL; if (S_TM(n->suffix)) - strcpy(s, localize_day(tm->tm_wday)); + { + strcpy(workbuff, localized_abbrev_days[tm->tm_wday]); + sprintf(s, "%*s", 0, str_initcap(workbuff)); + } else + { strcpy(s, days_short[tm->tm_wday]); + } s += strlen(s); break; case DCH_dy: INVALID_FOR_INTERVAL; if (S_TM(n->suffix)) { - strcpy(workbuff, localize_day(tm->tm_wday)); - strcpy(s, localized_str_tolower(workbuff)); + strcpy(workbuff, localized_abbrev_days[tm->tm_wday]); + sprintf(s, "%*s", 0, str_tolower(workbuff)); } else { @@ -2783,174 +2826,6 @@ datetime_to_char_body(TmToChar *tmtc, text *fmt, bool is_interval) return res; } -static char * -localize_month_full(int index) -{ - char *m = NULL; - - switch (index) - { - case 0: - m = _("January"); - break; - case 1: - m = _("February"); - break; - case 2: - m = _("March"); - break; - case 3: - m = _("April"); - break; - case 4: - m = _("May"); - break; - case 5: - m = _("June"); - break; - case 6: - m = _("July"); - break; - case 7: - m = _("August"); - break; - case 8: - m = _("September"); - break; - case 9: - m = _("October"); - break; - case 10: - m = _("November"); - break; - case 11: - m = _("December"); - break; - } - - return m; -} - -static char * -localize_month(int index) -{ - char *m = NULL; - - switch (index) - { - case 0: - m = _("Jan"); - break; - case 1: - m = _("Feb"); - break; - case 2: - m = _("Mar"); - break; - case 3: - m = _("Apr"); - break; - case 4: - /*------ - translator: Translate this as the abbreviation of "May". - In English, it is both the full month name and the - abbreviation, so this hack is needed to distinguish - them. The translation also needs to start with S:, - which will be stripped at run time. */ - m = _("S:May") + 2; - break; - case 5: - m = _("Jun"); - break; - case 6: - m = _("Jul"); - break; - case 7: - m = _("Aug"); - break; - case 8: - m = _("Sep"); - break; - case 9: - m = _("Oct"); - break; - case 10: - m = _("Nov"); - break; - case 11: - m = _("Dec"); - break; - } - - return m; -} - -static char * -localize_day_full(int index) -{ - char *d = NULL; - - switch (index) - { - case 0: - d = _("Sunday"); - break; - case 1: - d = _("Monday"); - break; - case 2: - d = _("Tuesday"); - break; - case 3: - d = _("Wednesday"); - break; - case 4: - d = _("Thursday"); - break; - case 5: - d = _("Friday"); - break; - case 6: - d = _("Saturday"); - break; - } - - return d; -} - -static char * -localize_day(int index) -{ - char *d = NULL; - - switch (index) - { - case 0: - d = _("Sun"); - break; - case 1: - d = _("Mon"); - break; - case 2: - d = _("Tue"); - break; - case 3: - d = _("Wed"); - break; - case 4: - d = _("Thu"); - break; - case 5: - d = _("Fri"); - break; - case 6: - d = _("Sat"); - break; - } - - return d; -} - /**************************************************************************** * Public routines ***************************************************************************/ diff --git a/src/backend/utils/adt/oracle_compat.c b/src/backend/utils/adt/oracle_compat.c index 257f60a8bb..532015bc00 100644 --- a/src/backend/utils/adt/oracle_compat.c +++ b/src/backend/utils/adt/oracle_compat.c @@ -9,7 +9,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/adt/oracle_compat.c,v 1.78 2008/03/25 22:42:44 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/utils/adt/oracle_compat.c,v 1.79 2008/05/19 18:08:16 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -48,6 +48,8 @@ #define USE_WIDE_UPPER_LOWER char *wstring_lower(char *str); char *wstring_upper(char *str); +wchar_t *texttowcs(const text *txt); +text *wcstotext(const wchar_t *str, int ncodes); #endif static text *dotrim(const char *string, int stringlen, @@ -60,7 +62,7 @@ static text *dotrim(const char *string, int stringlen, /* * Convert a TEXT value into a palloc'd wchar string. */ -static wchar_t * +wchar_t * texttowcs(const text *txt) { int nbytes = VARSIZE_ANY_EXHDR(txt); @@ -112,7 +114,7 @@ texttowcs(const text *txt) * must be zero-terminated, but we also require the caller to pass the string * length, since it will know it anyway in current uses. */ -static text * +text * wcstotext(const wchar_t *str, int ncodes) { text *result; diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c index 9072cb54f2..dfc6c886a5 100644 --- a/src/backend/utils/adt/pg_locale.c +++ b/src/backend/utils/adt/pg_locale.c @@ -4,7 +4,7 @@ * * Portions Copyright (c) 2002-2008, PostgreSQL Global Development Group * - * $PostgreSQL: pgsql/src/backend/utils/adt/pg_locale.c,v 1.40 2008/01/01 19:45:52 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/utils/adt/pg_locale.c,v 1.41 2008/05/19 18:08:16 tgl Exp $ * *----------------------------------------------------------------------- */ @@ -48,20 +48,31 @@ #include "postgres.h" #include +#include #include "catalog/pg_control.h" +#include "utils/memutils.h" #include "utils/pg_locale.h" -/* GUC storage area */ +#define MAX_L10N_DATA 80 + +/* GUC settings */ char *locale_messages; char *locale_monetary; char *locale_numeric; char *locale_time; +/* lc_time localization cache */ +char *localized_abbrev_days[7]; +char *localized_full_days[7]; +char *localized_abbrev_months[12]; +char *localized_full_months[12]; + /* indicates whether locale information cache is valid */ static bool CurrentLocaleConvValid = false; +static bool CurrentLCTimeValid = false; /* Environment variable storage area */ @@ -209,7 +220,10 @@ locale_xxx_assign(int category, const char *value, bool doit, GucSource source) /* need to reload cache next time? */ if (doit && value != NULL) + { CurrentLocaleConvValid = false; + CurrentLCTimeValid = false; + } return value; } @@ -424,3 +438,78 @@ PGLC_localeconv(void) CurrentLocaleConvValid = true; return &CurrentLocaleConv; } + + +/* + * Update the lc_time localization cache variables if needed. + */ +void +cache_locale_time(void) +{ + char *save_lc_time; + time_t timenow; + struct tm *timeinfo; + char buf[MAX_L10N_DATA]; + char *ptr; + int i; + + /* did we do this already? */ + if (CurrentLCTimeValid) + return; + + elog(DEBUG3, "cache_locale_time() executed; locale: \"%s\"", locale_time); + + /* set user's value of time locale */ + save_lc_time = setlocale(LC_TIME, NULL); + if (save_lc_time) + save_lc_time = pstrdup(save_lc_time); + + setlocale(LC_TIME, locale_time); + + timenow = time(NULL); + timeinfo = localtime(&timenow); + + /* localized days */ + for (i = 0; i < 7; i++) + { + timeinfo->tm_wday = i; + strftime(buf, MAX_L10N_DATA, "%a", timeinfo); + ptr = MemoryContextStrdup(TopMemoryContext, buf); + if (localized_abbrev_days[i]) + pfree(localized_abbrev_days[i]); + localized_abbrev_days[i] = ptr; + + strftime(buf, MAX_L10N_DATA, "%A", timeinfo); + ptr = MemoryContextStrdup(TopMemoryContext, buf); + if (localized_full_days[i]) + pfree(localized_full_days[i]); + localized_full_days[i] = ptr; + } + + /* localized months */ + for (i = 0; i < 12; i++) + { + timeinfo->tm_mon = i; + timeinfo->tm_mday = 1; /* make sure we don't have invalid date */ + strftime(buf, MAX_L10N_DATA, "%b", timeinfo); + ptr = MemoryContextStrdup(TopMemoryContext, buf); + if (localized_abbrev_months[i]) + pfree(localized_abbrev_months[i]); + localized_abbrev_months[i] = ptr; + + strftime(buf, MAX_L10N_DATA, "%B", timeinfo); + ptr = MemoryContextStrdup(TopMemoryContext, buf); + if (localized_full_months[i]) + pfree(localized_full_months[i]); + localized_full_months[i] = ptr; + } + + /* try to restore internal settings */ + if (save_lc_time) + { + setlocale(LC_TIME, save_lc_time); + pfree(save_lc_time); + } + + CurrentLCTimeValid = true; +} diff --git a/src/include/utils/pg_locale.h b/src/include/utils/pg_locale.h index da30b06616..2dee75524c 100644 --- a/src/include/utils/pg_locale.h +++ b/src/include/utils/pg_locale.h @@ -2,7 +2,7 @@ * * PostgreSQL locale utilities * - * $PostgreSQL: pgsql/src/include/utils/pg_locale.h,v 1.24 2008/01/01 19:45:59 momjian Exp $ + * $PostgreSQL: pgsql/src/include/utils/pg_locale.h,v 1.25 2008/05/19 18:08:16 tgl Exp $ * * Copyright (c) 2002-2008, PostgreSQL Global Development Group * @@ -17,11 +17,19 @@ #include "utils/guc.h" +/* GUC settings */ extern char *locale_messages; extern char *locale_monetary; extern char *locale_numeric; extern char *locale_time; +/* lc_time localization cache */ +extern char *localized_abbrev_days[]; +extern char *localized_full_days[]; +extern char *localized_abbrev_months[]; +extern char *localized_full_months[]; + + extern const char *locale_messages_assign(const char *value, bool doit, GucSource source); extern const char *locale_monetary_assign(const char *value, @@ -42,4 +50,6 @@ extern bool lc_ctype_is_c(void); */ extern struct lconv *PGLC_localeconv(void); +extern void cache_locale_time(void); + #endif /* _PG_LOCALE_ */