From f6ec7430f920991e417383c154f9c38c04a992b7 Mon Sep 17 00:00:00 2001 From: Bruce Momjian Date: Mon, 23 Jun 2008 19:27:19 +0000 Subject: [PATCH] Merge duplicate upper/lower/initcap() routines in oracle_compat.c and formatting.c to use common code; remove duplicate functions and support routines that are no longer needed. --- src/backend/utils/adt/formatting.c | 210 +++++++----- src/backend/utils/adt/oracle_compat.c | 445 ++------------------------ src/include/utils/formatting.h | 6 +- 3 files changed, 165 insertions(+), 496 deletions(-) diff --git a/src/backend/utils/adt/formatting.c b/src/backend/utils/adt/formatting.c index 4e6ad070e7..c435d4ba83 100644 --- a/src/backend/utils/adt/formatting.c +++ b/src/backend/utils/adt/formatting.c @@ -1,7 +1,7 @@ /* ----------------------------------------------------------------------- * formatting.c * - * $PostgreSQL: pgsql/src/backend/utils/adt/formatting.c,v 1.142 2008/06/17 16:09:06 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/utils/adt/formatting.c,v 1.143 2008/06/23 19:27:19 momjian Exp $ * * * Portions Copyright (c) 1999-2008, PostgreSQL Global Development Group @@ -925,9 +925,6 @@ static char *get_th(char *num, int type); static char *str_numth(char *dest, char *num, int type); static int strspace_len(char *str); static int strdigits_len(char *str); -static char *str_toupper(char *buff); -static char *str_tolower(char *buff); -static char *str_initcap(char *buff); static int seq_search(char *name, char **array, int type, int max, int *len); static void do_to_timestamp(text *date_txt, text *fmt, @@ -1424,12 +1421,24 @@ str_numth(char *dest, char *num, int type) return dest; } +/* + * If the system provides the needed functions for wide-character manipulation + * (which are all standardized by C99), then we implement upper/lower/initcap + * using wide-character functions, if necessary. Otherwise we use the + * traditional functions, which of course will not work as desired + * in multibyte character sets. Note that in either case we are effectively + * assuming that the database character encoding matches the encoding implied + * by LC_CTYPE. + */ + /* ---------- - * Convert string to upper case. It is designed to be multibyte-aware. + * wide-character-aware lower function + * We pass the number of bytes so we can pass varlena and char* + * to this function. * ---------- */ -static char * -str_toupper(char *buff) +char * +str_tolower(char *buff, size_t nbytes) { char *result; @@ -1438,13 +1447,78 @@ str_toupper(char *buff) #ifdef USE_WIDE_UPPER_LOWER if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c()) - result = wstring_upper(buff); + { + wchar_t *workspace; + int curr_char = 0; + + /* Output workspace cannot have more codes than input bytes */ + workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t)); + + char2wchar(workspace, nbytes + 1, buff, nbytes + 1); + + for (curr_char = 0; workspace[curr_char] != 0; curr_char++) + workspace[curr_char] = towlower(workspace[curr_char]); + + /* Make result large enough; case change might change number of bytes */ + result = palloc(curr_char * MB_CUR_MAX + 1); + + wchar2char(result, workspace, curr_char * MB_CUR_MAX + 1); + pfree(workspace); + } else #endif /* USE_WIDE_UPPER_LOWER */ { char *p; - result = pstrdup(buff); + result = pnstrdup(buff, nbytes); + + for (p = result; *p; p++) + *p = pg_tolower((unsigned char) *p); + } + + return result; +} + +/* ---------- + * wide-character-aware upper function + * We pass the number of bytes so we can pass varlena and char* + * to this function. + * ---------- + */ +char * +str_toupper(char *buff, size_t nbytes) +{ + char *result; + + if (!buff) + return NULL; + +#ifdef USE_WIDE_UPPER_LOWER + if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c()) + { + wchar_t *workspace; + int curr_char = 0; + + /* Output workspace cannot have more codes than input bytes */ + workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t)); + + char2wchar(workspace, nbytes + 1, buff, nbytes + 1); + + for (curr_char = 0; workspace[curr_char] != 0; curr_char++) + workspace[curr_char] = towupper(workspace[curr_char]); + + /* Make result large enough; case change might change number of bytes */ + result = palloc(curr_char * MB_CUR_MAX + 1); + + wchar2char(result, workspace, curr_char * MB_CUR_MAX + 1); + pfree(workspace); + } + else +#endif /* USE_WIDE_UPPER_LOWER */ + { + char *p; + + result = pnstrdup(buff, nbytes); for (p = result; *p; p++) *p = pg_toupper((unsigned char) *p); @@ -1453,41 +1527,14 @@ str_toupper(char *buff) return result; } -/* ---------- - * Convert string to lower case. It is designed to be multibyte-aware. - * ---------- - */ -static char * -str_tolower(char *buff) -{ - char *result; - - if (!buff) - return NULL; - -#ifdef USE_WIDE_UPPER_LOWER - if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c()) - result = wstring_lower(buff); - else -#endif /* USE_WIDE_UPPER_LOWER */ - { - char *p; - - result = pstrdup(buff); - - for (p = result; *p; p++) - *p = pg_tolower((unsigned char) *p); - } - - return result; -} - /* ---------- * wide-character-aware initcap function + * We pass the number of bytes so we can pass varlena and char* + * to this function. * ---------- */ -static char * -str_initcap(char *buff) +char * +str_initcap(char *buff, size_t nbytes) { char *result; bool wasalnum = false; @@ -1499,35 +1546,34 @@ str_initcap(char *buff) if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c()) { wchar_t *workspace; - text *in_text; - text *out_text; - int i; + int curr_char = 0; - in_text = cstring_to_text(buff); - workspace = texttowcs(in_text); + /* Output workspace cannot have more codes than input bytes */ + workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t)); - for (i = 0; workspace[i] != 0; i++) + char2wchar(workspace, nbytes + 1, buff, nbytes + 1); + + for (curr_char = 0; workspace[curr_char] != 0; curr_char++) { if (wasalnum) - workspace[i] = towlower(workspace[i]); + workspace[curr_char] = towlower(workspace[curr_char]); else - workspace[i] = towupper(workspace[i]); - wasalnum = iswalnum(workspace[i]); + workspace[curr_char] = towupper(workspace[curr_char]); + wasalnum = iswalnum(workspace[curr_char]); } - out_text = wcstotext(workspace, i); - result = text_to_cstring(out_text); + /* Make result large enough; case change might change number of bytes */ + result = palloc(curr_char * MB_CUR_MAX + 1); + wchar2char(result, workspace, curr_char * MB_CUR_MAX + 1); pfree(workspace); - pfree(in_text); - pfree(out_text); } else #endif /* USE_WIDE_UPPER_LOWER */ { char *p; - result = pstrdup(buff); + result = pnstrdup(buff, nbytes); for (p = result; *p; p++) { @@ -1851,7 +1897,7 @@ DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out) { char *p = pstrdup(tmtcTzn(in)); - strcpy(s, str_tolower(p)); + strcpy(s, str_tolower(p, strlen(p))); pfree(p); s += strlen(s); } @@ -1893,11 +1939,13 @@ DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out) if (!tm->tm_mon) break; if (S_TM(n->suffix)) - strcpy(s, str_toupper(localized_full_months[tm->tm_mon - 1])); + strcpy(s, str_toupper(localized_full_months[tm->tm_mon - 1], + strlen(localized_full_months[tm->tm_mon - 1]))); else { strcpy(workbuff, months_full[tm->tm_mon - 1]); - sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9, str_toupper(workbuff)); + sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9, + str_toupper(workbuff, strlen(workbuff))); } s += strlen(s); break; @@ -1906,7 +1954,8 @@ DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out) if (!tm->tm_mon) break; if (S_TM(n->suffix)) - strcpy(s, str_initcap(localized_full_months[tm->tm_mon - 1])); + strcpy(s, str_initcap(localized_full_months[tm->tm_mon - 1], + strlen(localized_full_months[tm->tm_mon - 1]))); else sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9, months_full[tm->tm_mon - 1]); s += strlen(s); @@ -1916,7 +1965,8 @@ DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out) if (!tm->tm_mon) break; if (S_TM(n->suffix)) - strcpy(s, str_tolower(localized_full_months[tm->tm_mon - 1])); + strcpy(s, str_tolower(localized_full_months[tm->tm_mon - 1], + strlen(localized_full_months[tm->tm_mon - 1]))); else { sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9, months_full[tm->tm_mon - 1]); @@ -1929,9 +1979,11 @@ DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out) if (!tm->tm_mon) break; if (S_TM(n->suffix)) - strcpy(s, str_toupper(localized_abbrev_months[tm->tm_mon - 1])); + strcpy(s, str_toupper(localized_abbrev_months[tm->tm_mon - 1], + strlen(localized_abbrev_months[tm->tm_mon - 1]))); else - strcpy(s, str_toupper(months[tm->tm_mon - 1])); + strcpy(s, str_toupper(months[tm->tm_mon - 1], + strlen(months[tm->tm_mon - 1]))); s += strlen(s); break; case DCH_Mon: @@ -1939,7 +1991,8 @@ DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out) if (!tm->tm_mon) break; if (S_TM(n->suffix)) - strcpy(s, str_initcap(localized_abbrev_months[tm->tm_mon - 1])); + strcpy(s, str_initcap(localized_abbrev_months[tm->tm_mon - 1], + strlen(localized_abbrev_months[tm->tm_mon - 1]))); else strcpy(s, months[tm->tm_mon - 1]); s += strlen(s); @@ -1949,7 +2002,8 @@ DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out) if (!tm->tm_mon) break; if (S_TM(n->suffix)) - strcpy(s, str_tolower(localized_abbrev_months[tm->tm_mon - 1])); + strcpy(s, str_tolower(localized_abbrev_months[tm->tm_mon - 1], + strlen(localized_abbrev_months[tm->tm_mon - 1]))); else { strcpy(s, months[tm->tm_mon - 1]); @@ -1966,18 +2020,21 @@ DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out) case DCH_DAY: INVALID_FOR_INTERVAL; if (S_TM(n->suffix)) - strcpy(s, str_toupper(localized_full_days[tm->tm_wday])); + strcpy(s, str_toupper(localized_full_days[tm->tm_wday], + strlen(localized_full_days[tm->tm_wday]))); else { strcpy(workbuff, days[tm->tm_wday]); - sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9, str_toupper(workbuff)); + sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9, + str_toupper(workbuff, strlen(workbuff))); } s += strlen(s); break; case DCH_Day: INVALID_FOR_INTERVAL; if (S_TM(n->suffix)) - strcpy(s, str_initcap(localized_full_days[tm->tm_wday])); + strcpy(s, str_initcap(localized_full_days[tm->tm_wday], + strlen(localized_full_days[tm->tm_wday]))); else sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9, days[tm->tm_wday]); s += strlen(s); @@ -1985,7 +2042,8 @@ DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out) case DCH_day: INVALID_FOR_INTERVAL; if (S_TM(n->suffix)) - strcpy(s, str_tolower(localized_full_days[tm->tm_wday])); + strcpy(s, str_tolower(localized_full_days[tm->tm_wday], + strlen(localized_full_days[tm->tm_wday]))); else { sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9, days[tm->tm_wday]); @@ -1996,15 +2054,18 @@ DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out) case DCH_DY: INVALID_FOR_INTERVAL; if (S_TM(n->suffix)) - strcpy(s, str_toupper(localized_abbrev_days[tm->tm_wday])); + strcpy(s, str_toupper(localized_abbrev_days[tm->tm_wday], + strlen(localized_abbrev_days[tm->tm_wday]))); else - strcpy(s, str_toupper(days_short[tm->tm_wday])); + strcpy(s, str_toupper(days_short[tm->tm_wday], + strlen(days_short[tm->tm_wday]))); s += strlen(s); break; case DCH_Dy: INVALID_FOR_INTERVAL; if (S_TM(n->suffix)) - strcpy(s, str_initcap(localized_abbrev_days[tm->tm_wday])); + strcpy(s, str_initcap(localized_abbrev_days[tm->tm_wday], + strlen(localized_abbrev_days[tm->tm_wday]))); else strcpy(s, days_short[tm->tm_wday]); s += strlen(s); @@ -2012,7 +2073,8 @@ DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out) case DCH_dy: INVALID_FOR_INTERVAL; if (S_TM(n->suffix)) - strcpy(s, str_tolower(localized_abbrev_days[tm->tm_wday])); + strcpy(s, str_tolower(localized_abbrev_days[tm->tm_wday], + strlen(localized_abbrev_days[tm->tm_wday]))); else { strcpy(s, days_short[tm->tm_wday]); @@ -4277,12 +4339,14 @@ NUM_processor(FormatNode *node, NUMDesc *Num, char *inout, char *number, case NUM_rn: if (IS_FILLMODE(Np->Num)) { - strcpy(Np->inout_p, str_tolower(Np->number_p)); + strcpy(Np->inout_p, str_tolower(Np->number_p, + strlen(Np->number_p))); Np->inout_p += strlen(Np->inout_p) - 1; } else { - sprintf(Np->inout_p, "%15s", str_tolower(Np->number_p)); + sprintf(Np->inout_p, "%15s", str_tolower(Np->number_p, + strlen(Np->number_p))); Np->inout_p += strlen(Np->inout_p) - 1; } break; diff --git a/src/backend/utils/adt/oracle_compat.c b/src/backend/utils/adt/oracle_compat.c index 372ff83956..bf29e7d165 100644 --- a/src/backend/utils/adt/oracle_compat.c +++ b/src/backend/utils/adt/oracle_compat.c @@ -9,7 +9,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/adt/oracle_compat.c,v 1.80 2008/06/17 16:09:06 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/utils/adt/oracle_compat.c,v 1.81 2008/06/23 19:27:19 momjian Exp $ * *------------------------------------------------------------------------- */ @@ -29,292 +29,16 @@ #endif #include "utils/builtins.h" +#include "utils/formatting.h" #include "utils/pg_locale.h" #include "mb/pg_wchar.h" -/* - * If the system provides the needed functions for wide-character manipulation - * (which are all standardized by C99), then we implement upper/lower/initcap - * using wide-character functions. Otherwise we use the traditional - * functions, which of course will not work as desired in multibyte character - * sets. Note that in either case we are effectively assuming that the - * database character encoding matches the encoding implied by LC_CTYPE. - */ -#ifdef USE_WIDE_UPPER_LOWER -char *wstring_lower(char *str); -char *wstring_upper(char *str); -wchar_t *texttowcs(const text *txt); -text *wcstotext(const wchar_t *str, int ncodes); -#endif - static text *dotrim(const char *string, int stringlen, const char *set, int setlen, bool doltrim, bool dortrim); -#ifdef USE_WIDE_UPPER_LOWER - -/* - * Convert a TEXT value into a palloc'd wchar string. - */ -wchar_t * -texttowcs(const text *txt) -{ - int nbytes = VARSIZE_ANY_EXHDR(txt); - char *workstr; - wchar_t *result; - size_t ncodes; - - /* Overflow paranoia */ - if (nbytes < 0 || - nbytes > (int) (INT_MAX / sizeof(wchar_t)) - 1) - ereport(ERROR, - (errcode(ERRCODE_OUT_OF_MEMORY), - errmsg("out of memory"))); - - /* Need a null-terminated version of the input */ - workstr = text_to_cstring(txt); - - /* Output workspace cannot have more codes than input bytes */ - result = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t)); - - /* Do the conversion */ - ncodes = mbstowcs(result, workstr, nbytes + 1); - - if (ncodes == (size_t) -1) - { - /* - * Invalid multibyte character encountered. We try to give a useful - * error message by letting pg_verifymbstr check the string. But it's - * possible that the string is OK to us, and not OK to mbstowcs --- - * this suggests that the LC_CTYPE locale is different from the - * database encoding. Give a generic error message if verifymbstr - * can't find anything wrong. - */ - pg_verifymbstr(workstr, nbytes, false); - ereport(ERROR, - (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE), - errmsg("invalid multibyte character for locale"), - errhint("The server's LC_CTYPE locale is probably incompatible with the database encoding."))); - } - - Assert(ncodes <= (size_t) nbytes); - - return result; -} - - -/* - * Convert a wchar string into a palloc'd TEXT value. The wchar string - * must be zero-terminated, but we also require the caller to pass the string - * length, since it will know it anyway in current uses. - */ -text * -wcstotext(const wchar_t *str, int ncodes) -{ - text *result; - size_t nbytes; - - /* Overflow paranoia */ - if (ncodes < 0 || - ncodes > (int) ((INT_MAX - VARHDRSZ) / MB_CUR_MAX) - 1) - ereport(ERROR, - (errcode(ERRCODE_OUT_OF_MEMORY), - errmsg("out of memory"))); - - /* Make workspace certainly large enough for result */ - result = (text *) palloc((ncodes + 1) * MB_CUR_MAX + VARHDRSZ); - - /* Do the conversion */ - nbytes = wcstombs((char *) VARDATA(result), str, - (ncodes + 1) * MB_CUR_MAX); - - if (nbytes == (size_t) -1) - { - /* Invalid multibyte character encountered ... shouldn't happen */ - ereport(ERROR, - (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE), - errmsg("invalid multibyte character for locale"))); - } - - Assert(nbytes <= (size_t) (ncodes * MB_CUR_MAX)); - - SET_VARSIZE(result, nbytes + VARHDRSZ); - - return result; -} -#endif /* USE_WIDE_UPPER_LOWER */ - - -/* - * On Windows, the "Unicode" locales assume UTF16 not UTF8 encoding. - * To make use of the upper/lower functionality, we need to map UTF8 to - * UTF16, which for some reason mbstowcs and wcstombs won't do for us. - * This conversion layer takes care of it. - */ - -#ifdef WIN32 - -/* texttowcs for the case of UTF8 to UTF16 */ -static wchar_t * -win32_utf8_texttowcs(const text *txt) -{ - int nbytes = VARSIZE_ANY_EXHDR(txt); - wchar_t *result; - int r; - - /* Overflow paranoia */ - if (nbytes < 0 || - nbytes > (int) (INT_MAX / sizeof(wchar_t)) - 1) - ereport(ERROR, - (errcode(ERRCODE_OUT_OF_MEMORY), - errmsg("out of memory"))); - - /* Output workspace cannot have more codes than input bytes */ - result = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t)); - - /* stupid Microsloth API does not work for zero-length input */ - if (nbytes == 0) - r = 0; - else - { - /* Do the conversion */ - r = MultiByteToWideChar(CP_UTF8, 0, VARDATA_ANY(txt), nbytes, - result, nbytes); - - if (r <= 0) /* assume it's NO_UNICODE_TRANSLATION */ - { - /* see notes above about error reporting */ - pg_verifymbstr(VARDATA_ANY(txt), nbytes, false); - ereport(ERROR, - (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE), - errmsg("invalid multibyte character for locale"), - errhint("The server's LC_CTYPE locale is probably incompatible with the database encoding."))); - } - } - - /* Append trailing null wchar (MultiByteToWideChar won't have) */ - Assert(r <= nbytes); - result[r] = 0; - - return result; -} - -/* wcstotext for the case of UTF16 to UTF8 */ -static text * -win32_utf8_wcstotext(const wchar_t *str) -{ - text *result; - int nbytes; - int r; - - /* Compute size of output string (this *will* include trailing null) */ - nbytes = WideCharToMultiByte(CP_UTF8, 0, str, -1, NULL, 0, NULL, NULL); - if (nbytes <= 0) /* shouldn't happen */ - ereport(ERROR, - (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE), - errmsg("UTF-16 to UTF-8 translation failed: %lu", - GetLastError()))); - - result = palloc(nbytes + VARHDRSZ); - - r = WideCharToMultiByte(CP_UTF8, 0, str, -1, VARDATA(result), nbytes, - NULL, NULL); - if (r != nbytes) /* shouldn't happen */ - ereport(ERROR, - (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE), - errmsg("UTF-16 to UTF-8 translation failed: %lu", - GetLastError()))); - - SET_VARSIZE(result, nbytes + VARHDRSZ - 1); /* -1 to ignore null */ - - return result; -} - -/* interface layer to check which encoding is in use */ - -static wchar_t * -win32_texttowcs(const text *txt) -{ - if (GetDatabaseEncoding() == PG_UTF8) - return win32_utf8_texttowcs(txt); - else - return texttowcs(txt); -} - -static text * -win32_wcstotext(const wchar_t *str, int ncodes) -{ - if (GetDatabaseEncoding() == PG_UTF8) - return win32_utf8_wcstotext(str); - else - return wcstotext(str, ncodes); -} - -/* use macros to cause routines below to call interface layer */ - -#define texttowcs win32_texttowcs -#define wcstotext win32_wcstotext -#endif /* WIN32 */ - -#ifdef USE_WIDE_UPPER_LOWER -/* - * string_upper and string_lower are used for correct multibyte upper/lower - * transformations localized strings. Returns pointers to transformated - * string. - */ -char * -wstring_upper(char *str) -{ - wchar_t *workspace; - text *in_text; - text *out_text; - char *result; - int i; - - in_text = cstring_to_text(str); - workspace = texttowcs(in_text); - - for (i = 0; workspace[i] != 0; i++) - workspace[i] = towupper(workspace[i]); - - out_text = wcstotext(workspace, i); - result = text_to_cstring(out_text); - - pfree(workspace); - pfree(in_text); - pfree(out_text); - - return result; -} - -char * -wstring_lower(char *str) -{ - wchar_t *workspace; - text *in_text; - text *out_text; - char *result; - int i; - - in_text = cstring_to_text(str); - workspace = texttowcs(in_text); - - for (i = 0; workspace[i] != 0; i++) - workspace[i] = towlower(workspace[i]); - - out_text = wcstotext(workspace, i); - result = text_to_cstring(out_text); - - pfree(workspace); - pfree(in_text); - pfree(out_text); - - return result; -} -#endif /* USE_WIDE_UPPER_LOWER */ - /******************************************************************** * * lower @@ -332,52 +56,15 @@ wstring_lower(char *str) Datum lower(PG_FUNCTION_ARGS) { -#ifdef USE_WIDE_UPPER_LOWER + text *in_string = PG_GETARG_TEXT_PP(0); + char *out_string; + text *result; - /* - * Use wide char code only when max encoding length > 1 and ctype != C. - * Some operating systems fail with multi-byte encodings and a C locale. - * Also, for a C locale there is no need to process as multibyte. - */ - if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c()) - { - text *string = PG_GETARG_TEXT_PP(0); - text *result; - wchar_t *workspace; - int i; + out_string = str_tolower(VARDATA_ANY(in_string), VARSIZE_ANY_EXHDR(in_string)); + result = cstring_to_text(out_string); + pfree(out_string); - workspace = texttowcs(string); - - for (i = 0; workspace[i] != 0; i++) - workspace[i] = towlower(workspace[i]); - - result = wcstotext(workspace, i); - - pfree(workspace); - - PG_RETURN_TEXT_P(result); - } - else -#endif /* USE_WIDE_UPPER_LOWER */ - { - text *string = PG_GETARG_TEXT_P_COPY(0); - char *ptr; - int m; - - /* - * Since we copied the string, we can scribble directly on the value - */ - ptr = VARDATA(string); - m = VARSIZE(string) - VARHDRSZ; - - while (m-- > 0) - { - *ptr = tolower((unsigned char) *ptr); - ptr++; - } - - PG_RETURN_TEXT_P(string); - } + PG_RETURN_TEXT_P(result); } @@ -398,52 +85,15 @@ lower(PG_FUNCTION_ARGS) Datum upper(PG_FUNCTION_ARGS) { -#ifdef USE_WIDE_UPPER_LOWER + text *in_string = PG_GETARG_TEXT_PP(0); + char *out_string; + text *result; - /* - * Use wide char code only when max encoding length > 1 and ctype != C. - * Some operating systems fail with multi-byte encodings and a C locale. - * Also, for a C locale there is no need to process as multibyte. - */ - if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c()) - { - text *string = PG_GETARG_TEXT_PP(0); - text *result; - wchar_t *workspace; - int i; + out_string = str_toupper(VARDATA_ANY(in_string), VARSIZE_ANY_EXHDR(in_string)); + result = cstring_to_text(out_string); + pfree(out_string); - workspace = texttowcs(string); - - for (i = 0; workspace[i] != 0; i++) - workspace[i] = towupper(workspace[i]); - - result = wcstotext(workspace, i); - - pfree(workspace); - - PG_RETURN_TEXT_P(result); - } - else -#endif /* USE_WIDE_UPPER_LOWER */ - { - text *string = PG_GETARG_TEXT_P_COPY(0); - char *ptr; - int m; - - /* - * Since we copied the string, we can scribble directly on the value - */ - ptr = VARDATA(string); - m = VARSIZE(string) - VARHDRSZ; - - while (m-- > 0) - { - *ptr = toupper((unsigned char) *ptr); - ptr++; - } - - PG_RETURN_TEXT_P(string); - } + PG_RETURN_TEXT_P(result); } @@ -467,64 +117,15 @@ upper(PG_FUNCTION_ARGS) Datum initcap(PG_FUNCTION_ARGS) { -#ifdef USE_WIDE_UPPER_LOWER + text *in_string = PG_GETARG_TEXT_PP(0); + char *out_string; + text *result; - /* - * Use wide char code only when max encoding length > 1 and ctype != C. - * Some operating systems fail with multi-byte encodings and a C locale. - * Also, for a C locale there is no need to process as multibyte. - */ - if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c()) - { - text *string = PG_GETARG_TEXT_PP(0); - text *result; - wchar_t *workspace; - int wasalnum = 0; - int i; + out_string = str_initcap(VARDATA_ANY(in_string), VARSIZE_ANY_EXHDR(in_string)); + result = cstring_to_text(out_string); + pfree(out_string); - workspace = texttowcs(string); - - for (i = 0; workspace[i] != 0; i++) - { - if (wasalnum) - workspace[i] = towlower(workspace[i]); - else - workspace[i] = towupper(workspace[i]); - wasalnum = iswalnum(workspace[i]); - } - - result = wcstotext(workspace, i); - - pfree(workspace); - - PG_RETURN_TEXT_P(result); - } - else -#endif /* USE_WIDE_UPPER_LOWER */ - { - text *string = PG_GETARG_TEXT_P_COPY(0); - int wasalnum = 0; - char *ptr; - int m; - - /* - * Since we copied the string, we can scribble directly on the value - */ - ptr = VARDATA(string); - m = VARSIZE(string) - VARHDRSZ; - - while (m-- > 0) - { - if (wasalnum) - *ptr = tolower((unsigned char) *ptr); - else - *ptr = toupper((unsigned char) *ptr); - wasalnum = isalnum((unsigned char) *ptr); - ptr++; - } - - PG_RETURN_TEXT_P(string); - } + PG_RETURN_TEXT_P(result); } diff --git a/src/include/utils/formatting.h b/src/include/utils/formatting.h index 2474afbf73..0674adfa8a 100644 --- a/src/include/utils/formatting.h +++ b/src/include/utils/formatting.h @@ -2,7 +2,7 @@ /* ----------------------------------------------------------------------- * formatting.h * - * $PostgreSQL: pgsql/src/include/utils/formatting.h,v 1.18 2008/01/01 19:45:59 momjian Exp $ + * $PostgreSQL: pgsql/src/include/utils/formatting.h,v 1.19 2008/06/23 19:27:19 momjian Exp $ * * * Portions Copyright (c) 1999-2008, PostgreSQL Global Development Group @@ -21,6 +21,10 @@ #include "fmgr.h" +extern char *str_tolower(char *buff, size_t nbytes); +extern char *str_toupper(char *buff, size_t nbytes); +extern char *str_initcap(char *buff, size_t nbytes); + extern Datum timestamp_to_char(PG_FUNCTION_ARGS); extern Datum timestamptz_to_char(PG_FUNCTION_ARGS); extern Datum interval_to_char(PG_FUNCTION_ARGS);