/*------------------------------------------------------------------------- * oracle_compat.c * Oracle compatible functions. * * Copyright (c) 1996-2007, PostgreSQL Global Development Group * * Author: Edmund Mergl * Multibyte enhancement: Tatsuo Ishii * * * IDENTIFICATION * $PostgreSQL: pgsql/src/backend/utils/adt/oracle_compat.c,v 1.70 2007/02/27 23:48:08 tgl Exp $ * *------------------------------------------------------------------------- */ #include "postgres.h" #include #include /* * towlower() and friends should be in , but some pre-C99 systems * declare them in . */ #ifdef HAVE_WCHAR_H #include #endif #ifdef HAVE_WCTYPE_H #include #endif #include "utils/builtins.h" #include "utils/pg_locale.h" #include "mb/pg_wchar.h" /* * If the system provides the needed functions for wide-character manipulation * (which are all standardized by C99), then we implement upper/lower/initcap * using wide-character functions. Otherwise we use the traditional * functions, which of course will not work as desired in multibyte character * sets. Note that in either case we are effectively assuming that the * database character encoding matches the encoding implied by LC_CTYPE. * * We assume if we have these two functions, we have their friends too, and * can use the wide-character method. */ #if defined(HAVE_WCSTOMBS) && defined(HAVE_TOWLOWER) #define USE_WIDE_UPPER_LOWER char *wstring_lower (char *str); char *wstring_upper(char *str); #endif static text *dotrim(const char *string, int stringlen, const char *set, int setlen, bool doltrim, bool dortrim); #ifdef USE_WIDE_UPPER_LOWER /* * Convert a TEXT value into a palloc'd wchar string. */ static wchar_t * texttowcs(const text *txt) { int nbytes = VARSIZE(txt) - VARHDRSZ; char *workstr; wchar_t *result; size_t ncodes; /* Overflow paranoia */ if (nbytes < 0 || nbytes > (int) (INT_MAX / sizeof(wchar_t)) - 1) ereport(ERROR, (errcode(ERRCODE_OUT_OF_MEMORY), errmsg("out of memory"))); /* Need a null-terminated version of the input */ workstr = (char *) palloc(nbytes + 1); memcpy(workstr, VARDATA(txt), nbytes); workstr[nbytes] = '\0'; /* Output workspace cannot have more codes than input bytes */ result = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t)); /* Do the conversion */ ncodes = mbstowcs(result, workstr, nbytes + 1); if (ncodes == (size_t) -1) { /* * Invalid multibyte character encountered. We try to give a useful * error message by letting pg_verifymbstr check the string. But it's * possible that the string is OK to us, and not OK to mbstowcs --- * this suggests that the LC_CTYPE locale is different from the * database encoding. Give a generic error message if verifymbstr * can't find anything wrong. */ pg_verifymbstr(workstr, nbytes, false); ereport(ERROR, (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE), errmsg("invalid multibyte character for locale"), errhint("The server's LC_CTYPE locale is probably incompatible with the database encoding."))); } Assert(ncodes <= (size_t) nbytes); return result; } /* * Convert a wchar string into a palloc'd TEXT value. The wchar string * must be zero-terminated, but we also require the caller to pass the string * length, since it will know it anyway in current uses. */ static text * wcstotext(const wchar_t *str, int ncodes) { text *result; size_t nbytes; /* Overflow paranoia */ if (ncodes < 0 || ncodes > (int) ((INT_MAX - VARHDRSZ) / MB_CUR_MAX) - 1) ereport(ERROR, (errcode(ERRCODE_OUT_OF_MEMORY), errmsg("out of memory"))); /* Make workspace certainly large enough for result */ result = (text *) palloc((ncodes + 1) * MB_CUR_MAX + VARHDRSZ); /* Do the conversion */ nbytes = wcstombs((char *) VARDATA(result), str, (ncodes + 1) * MB_CUR_MAX); if (nbytes == (size_t) -1) { /* Invalid multibyte character encountered ... shouldn't happen */ ereport(ERROR, (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE), errmsg("invalid multibyte character for locale"))); } Assert(nbytes <= (size_t) (ncodes * MB_CUR_MAX)); SET_VARSIZE(result, nbytes + VARHDRSZ); return result; } #endif /* USE_WIDE_UPPER_LOWER */ /* * On Windows, the "Unicode" locales assume UTF16 not UTF8 encoding. * To make use of the upper/lower functionality, we need to map UTF8 to * UTF16, which for some reason mbstowcs and wcstombs won't do for us. * This conversion layer takes care of it. */ #ifdef WIN32 /* texttowcs for the case of UTF8 to UTF16 */ static wchar_t * win32_utf8_texttowcs(const text *txt) { int nbytes = VARSIZE(txt) - VARHDRSZ; wchar_t *result; int r; /* Overflow paranoia */ if (nbytes < 0 || nbytes > (int) (INT_MAX / sizeof(wchar_t)) - 1) ereport(ERROR, (errcode(ERRCODE_OUT_OF_MEMORY), errmsg("out of memory"))); /* Output workspace cannot have more codes than input bytes */ result = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t)); /* stupid Microsloth API does not work for zero-length input */ if (nbytes == 0) r = 0; else { /* Do the conversion */ r = MultiByteToWideChar(CP_UTF8, 0, VARDATA(txt), nbytes, result, nbytes); if (!r) /* assume it's NO_UNICODE_TRANSLATION */ { /* see notes above about error reporting */ pg_verifymbstr(VARDATA(txt), nbytes, false); ereport(ERROR, (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE), errmsg("invalid multibyte character for locale"), errhint("The server's LC_CTYPE locale is probably incompatible with the database encoding."))); } } Assert(r <= nbytes); result[r] = 0; return result; } /* wcstotext for the case of UTF16 to UTF8 */ static text * win32_utf8_wcstotext(const wchar_t *str) { text *result; int nbytes; int r; nbytes = WideCharToMultiByte(CP_UTF8, 0, str, -1, NULL, 0, NULL, NULL); if (nbytes == 0) /* shouldn't happen */ ereport(ERROR, (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE), errmsg("UTF-16 to UTF-8 translation failed: %lu", GetLastError()))); result = palloc(nbytes + VARHDRSZ); r = WideCharToMultiByte(CP_UTF8, 0, str, -1, VARDATA(result), nbytes, NULL, NULL); if (r == 0) /* shouldn't happen */ ereport(ERROR, (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE), errmsg("UTF-16 to UTF-8 translation failed: %lu", GetLastError()))); SET_VARSIZE(result, nbytes + VARHDRSZ - 1); /* -1 to ignore null */ return result; } /* interface layer to check which encoding is in use */ static wchar_t * win32_texttowcs(const text *txt) { if (GetDatabaseEncoding() == PG_UTF8) return win32_utf8_texttowcs(txt); else return texttowcs(txt); } static text * win32_wcstotext(const wchar_t *str, int ncodes) { if (GetDatabaseEncoding() == PG_UTF8) return win32_utf8_wcstotext(str); else return wcstotext(str, ncodes); } /* use macros to cause routines below to call interface layer */ #define texttowcs win32_texttowcs #define wcstotext win32_wcstotext #endif /* WIN32 */ #ifdef USE_WIDE_UPPER_LOWER /* * string_upper and string_lower are used for correct multibyte upper/lower * transformations localized strings. Returns pointers to transformated * string. */ char * wstring_upper(char *str) { wchar_t *workspace; text *in_text; text *out_text; char *result; int nbytes = strlen(str); int i; in_text = palloc(nbytes + VARHDRSZ); memcpy(VARDATA(in_text), str, nbytes); SET_VARSIZE(in_text, nbytes + VARHDRSZ); workspace = texttowcs(in_text); for (i = 0; workspace[i] != 0; i++) workspace[i] = towupper(workspace[i]); out_text = wcstotext(workspace, i); nbytes = VARSIZE(out_text) - VARHDRSZ; result = palloc(nbytes + 1); memcpy(result, VARDATA(out_text), nbytes); result[nbytes] = '\0'; pfree(workspace); pfree(in_text); pfree(out_text); return result; } char * wstring_lower(char *str) { wchar_t *workspace; text *in_text; text *out_text; char *result; int nbytes = strlen(str); int i; in_text = palloc(nbytes + VARHDRSZ); memcpy(VARDATA(in_text), str, nbytes); SET_VARSIZE(in_text, nbytes + VARHDRSZ); workspace = texttowcs(in_text); for (i = 0; workspace[i] != 0; i++) workspace[i] = towlower(workspace[i]); out_text = wcstotext(workspace, i); nbytes = VARSIZE(out_text) - VARHDRSZ; result = palloc(nbytes + 1); memcpy(result, VARDATA(out_text), nbytes); result[nbytes] = '\0'; pfree(workspace); pfree(in_text); pfree(out_text); return result; } #endif /* USE_WIDE_UPPER_LOWER */ /******************************************************************** * * lower * * Syntax: * * text lower(text string) * * Purpose: * * Returns string, with all letters forced to lowercase. * ********************************************************************/ Datum lower(PG_FUNCTION_ARGS) { #ifdef USE_WIDE_UPPER_LOWER /* * Use wide char code only when max encoding length > 1 and ctype != C. * Some operating systems fail with multi-byte encodings and a C locale. * Also, for a C locale there is no need to process as multibyte. */ if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c()) { text *string = PG_GETARG_TEXT_P(0); text *result; wchar_t *workspace; int i; workspace = texttowcs(string); for (i = 0; workspace[i] != 0; i++) workspace[i] = towlower(workspace[i]); result = wcstotext(workspace, i); pfree(workspace); PG_RETURN_TEXT_P(result); } else #endif /* USE_WIDE_UPPER_LOWER */ { text *string = PG_GETARG_TEXT_P_COPY(0); char *ptr; int m; /* * Since we copied the string, we can scribble directly on the value */ ptr = VARDATA(string); m = VARSIZE(string) - VARHDRSZ; while (m-- > 0) { *ptr = tolower((unsigned char) *ptr); ptr++; } PG_RETURN_TEXT_P(string); } } /******************************************************************** * * upper * * Syntax: * * text upper(text string) * * Purpose: * * Returns string, with all letters forced to uppercase. * ********************************************************************/ Datum upper(PG_FUNCTION_ARGS) { #ifdef USE_WIDE_UPPER_LOWER /* * Use wide char code only when max encoding length > 1 and ctype != C. * Some operating systems fail with multi-byte encodings and a C locale. * Also, for a C locale there is no need to process as multibyte. */ if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c()) { text *string = PG_GETARG_TEXT_P(0); text *result; wchar_t *workspace; int i; workspace = texttowcs(string); for (i = 0; workspace[i] != 0; i++) workspace[i] = towupper(workspace[i]); result = wcstotext(workspace, i); pfree(workspace); PG_RETURN_TEXT_P(result); } else #endif /* USE_WIDE_UPPER_LOWER */ { text *string = PG_GETARG_TEXT_P_COPY(0); char *ptr; int m; /* * Since we copied the string, we can scribble directly on the value */ ptr = VARDATA(string); m = VARSIZE(string) - VARHDRSZ; while (m-- > 0) { *ptr = toupper((unsigned char) *ptr); ptr++; } PG_RETURN_TEXT_P(string); } } /******************************************************************** * * initcap * * Syntax: * * text initcap(text string) * * Purpose: * * Returns string, with first letter of each word in uppercase, all * other letters in lowercase. A word is defined as a sequence of * alphanumeric characters, delimited by non-alphanumeric * characters. * ********************************************************************/ Datum initcap(PG_FUNCTION_ARGS) { #ifdef USE_WIDE_UPPER_LOWER /* * Use wide char code only when max encoding length > 1 and ctype != C. * Some operating systems fail with multi-byte encodings and a C locale. * Also, for a C locale there is no need to process as multibyte. */ if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c()) { text *string = PG_GETARG_TEXT_P(0); text *result; wchar_t *workspace; int wasalnum = 0; int i; workspace = texttowcs(string); for (i = 0; workspace[i] != 0; i++) { if (wasalnum) workspace[i] = towlower(workspace[i]); else workspace[i] = towupper(workspace[i]); wasalnum = iswalnum(workspace[i]); } result = wcstotext(workspace, i); pfree(workspace); PG_RETURN_TEXT_P(result); } else #endif /* USE_WIDE_UPPER_LOWER */ { text *string = PG_GETARG_TEXT_P_COPY(0); int wasalnum = 0; char *ptr; int m; /* * Since we copied the string, we can scribble directly on the value */ ptr = VARDATA(string); m = VARSIZE(string) - VARHDRSZ; while (m-- > 0) { if (wasalnum) *ptr = tolower((unsigned char) *ptr); else *ptr = toupper((unsigned char) *ptr); wasalnum = isalnum((unsigned char) *ptr); ptr++; } PG_RETURN_TEXT_P(string); } } /******************************************************************** * * lpad * * Syntax: * * text lpad(text string1, int4 len, text string2) * * Purpose: * * Returns string1, left-padded to length len with the sequence of * characters in string2. If len is less than the length of string1, * instead truncate (on the right) to len. * ********************************************************************/ Datum lpad(PG_FUNCTION_ARGS) { text *string1 = PG_GETARG_TEXT_P(0); int32 len = PG_GETARG_INT32(1); text *string2 = PG_GETARG_TEXT_P(2); text *ret; char *ptr1, *ptr2, *ptr2end, *ptr_ret; int m, s1len, s2len; int bytelen; /* Negative len is silently taken as zero */ if (len < 0) len = 0; s1len = VARSIZE(string1) - VARHDRSZ; if (s1len < 0) s1len = 0; /* shouldn't happen */ s2len = VARSIZE(string2) - VARHDRSZ; if (s2len < 0) s2len = 0; /* shouldn't happen */ s1len = pg_mbstrlen_with_len(VARDATA(string1), s1len); if (s1len > len) s1len = len; /* truncate string1 to len chars */ if (s2len <= 0) len = s1len; /* nothing to pad with, so don't pad */ bytelen = pg_database_encoding_max_length() * len; /* check for integer overflow */ if (len != 0 && bytelen / pg_database_encoding_max_length() != len) ereport(ERROR, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), errmsg("requested length too large"))); ret = (text *) palloc(VARHDRSZ + bytelen); m = len - s1len; ptr2 = VARDATA(string2); ptr2end = ptr2 + s2len; ptr_ret = VARDATA(ret); while (m--) { int mlen = pg_mblen(ptr2); memcpy(ptr_ret, ptr2, mlen); ptr_ret += mlen; ptr2 += mlen; if (ptr2 == ptr2end) /* wrap around at end of s2 */ ptr2 = VARDATA(string2); } ptr1 = VARDATA(string1); while (s1len--) { int mlen = pg_mblen(ptr1); memcpy(ptr_ret, ptr1, mlen); ptr_ret += mlen; ptr1 += mlen; } SET_VARSIZE(ret, ptr_ret - (char *) ret); PG_RETURN_TEXT_P(ret); } /******************************************************************** * * rpad * * Syntax: * * text rpad(text string1, int4 len, text string2) * * Purpose: * * Returns string1, right-padded to length len with the sequence of * characters in string2. If len is less than the length of string1, * instead truncate (on the right) to len. * ********************************************************************/ Datum rpad(PG_FUNCTION_ARGS) { text *string1 = PG_GETARG_TEXT_P(0); int32 len = PG_GETARG_INT32(1); text *string2 = PG_GETARG_TEXT_P(2); text *ret; char *ptr1, *ptr2, *ptr2end, *ptr_ret; int m, s1len, s2len; int bytelen; /* Negative len is silently taken as zero */ if (len < 0) len = 0; s1len = VARSIZE(string1) - VARHDRSZ; if (s1len < 0) s1len = 0; /* shouldn't happen */ s2len = VARSIZE(string2) - VARHDRSZ; if (s2len < 0) s2len = 0; /* shouldn't happen */ s1len = pg_mbstrlen_with_len(VARDATA(string1), s1len); if (s1len > len) s1len = len; /* truncate string1 to len chars */ if (s2len <= 0) len = s1len; /* nothing to pad with, so don't pad */ bytelen = pg_database_encoding_max_length() * len; /* Check for integer overflow */ if (len != 0 && bytelen / pg_database_encoding_max_length() != len) ereport(ERROR, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), errmsg("requested length too large"))); ret = (text *) palloc(VARHDRSZ + bytelen); m = len - s1len; ptr1 = VARDATA(string1); ptr_ret = VARDATA(ret); while (s1len--) { int mlen = pg_mblen(ptr1); memcpy(ptr_ret, ptr1, mlen); ptr_ret += mlen; ptr1 += mlen; } ptr2 = VARDATA(string2); ptr2end = ptr2 + s2len; while (m--) { int mlen = pg_mblen(ptr2); memcpy(ptr_ret, ptr2, mlen); ptr_ret += mlen; ptr2 += mlen; if (ptr2 == ptr2end) /* wrap around at end of s2 */ ptr2 = VARDATA(string2); } SET_VARSIZE(ret, ptr_ret - (char *) ret); PG_RETURN_TEXT_P(ret); } /******************************************************************** * * btrim * * Syntax: * * text btrim(text string, text set) * * Purpose: * * Returns string with characters removed from the front and back * up to the first character not in set. * ********************************************************************/ Datum btrim(PG_FUNCTION_ARGS) { text *string = PG_GETARG_TEXT_P(0); text *set = PG_GETARG_TEXT_P(1); text *ret; ret = dotrim(VARDATA(string), VARSIZE(string) - VARHDRSZ, VARDATA(set), VARSIZE(set) - VARHDRSZ, true, true); PG_RETURN_TEXT_P(ret); } /******************************************************************** * * btrim1 --- btrim with set fixed as ' ' * ********************************************************************/ Datum btrim1(PG_FUNCTION_ARGS) { text *string = PG_GETARG_TEXT_P(0); text *ret; ret = dotrim(VARDATA(string), VARSIZE(string) - VARHDRSZ, " ", 1, true, true); PG_RETURN_TEXT_P(ret); } /* * Common implementation for btrim, ltrim, rtrim */ static text * dotrim(const char *string, int stringlen, const char *set, int setlen, bool doltrim, bool dortrim) { text *result; int i; /* Nothing to do if either string or set is empty */ if (stringlen > 0 && setlen > 0) { if (pg_database_encoding_max_length() > 1) { /* * In the multibyte-encoding case, build arrays of pointers to * character starts, so that we can avoid inefficient checks in * the inner loops. */ const char **stringchars; const char **setchars; int *stringmblen; int *setmblen; int stringnchars; int setnchars; int resultndx; int resultnchars; const char *p; int len; int mblen; const char *str_pos; int str_len; stringchars = (const char **) palloc(stringlen * sizeof(char *)); stringmblen = (int *) palloc(stringlen * sizeof(int)); stringnchars = 0; p = string; len = stringlen; while (len > 0) { stringchars[stringnchars] = p; stringmblen[stringnchars] = mblen = pg_mblen(p); stringnchars++; p += mblen; len -= mblen; } setchars = (const char **) palloc(setlen * sizeof(char *)); setmblen = (int *) palloc(setlen * sizeof(int)); setnchars = 0; p = set; len = setlen; while (len > 0) { setchars[setnchars] = p; setmblen[setnchars] = mblen = pg_mblen(p); setnchars++; p += mblen; len -= mblen; } resultndx = 0; /* index in stringchars[] */ resultnchars = stringnchars; if (doltrim) { while (resultnchars > 0) { str_pos = stringchars[resultndx]; str_len = stringmblen[resultndx]; for (i = 0; i < setnchars; i++) { if (str_len == setmblen[i] && memcmp(str_pos, setchars[i], str_len) == 0) break; } if (i >= setnchars) break; /* no match here */ string += str_len; stringlen -= str_len; resultndx++; resultnchars--; } } if (dortrim) { while (resultnchars > 0) { str_pos = stringchars[resultndx + resultnchars - 1]; str_len = stringmblen[resultndx + resultnchars - 1]; for (i = 0; i < setnchars; i++) { if (str_len == setmblen[i] && memcmp(str_pos, setchars[i], str_len) == 0) break; } if (i >= setnchars) break; /* no match here */ stringlen -= str_len; resultnchars--; } } pfree(stringchars); pfree(stringmblen); pfree(setchars); pfree(setmblen); } else { /* * In the single-byte-encoding case, we don't need such overhead. */ if (doltrim) { while (stringlen > 0) { char str_ch = *string; for (i = 0; i < setlen; i++) { if (str_ch == set[i]) break; } if (i >= setlen) break; /* no match here */ string++; stringlen--; } } if (dortrim) { while (stringlen > 0) { char str_ch = string[stringlen - 1]; for (i = 0; i < setlen; i++) { if (str_ch == set[i]) break; } if (i >= setlen) break; /* no match here */ stringlen--; } } } } /* Return selected portion of string */ result = (text *) palloc(VARHDRSZ + stringlen); SET_VARSIZE(result, VARHDRSZ + stringlen); memcpy(VARDATA(result), string, stringlen); return result; } /******************************************************************** * * byteatrim * * Syntax: * * bytea byteatrim(byta string, bytea set) * * Purpose: * * Returns string with characters removed from the front and back * up to the first character not in set. * * Cloned from btrim and modified as required. ********************************************************************/ Datum byteatrim(PG_FUNCTION_ARGS) { bytea *string = PG_GETARG_BYTEA_P(0); bytea *set = PG_GETARG_BYTEA_P(1); bytea *ret; char *ptr, *end, *ptr2, *end2; int m; if ((m = VARSIZE(string) - VARHDRSZ) <= 0 || (VARSIZE(set) - VARHDRSZ) <= 0) PG_RETURN_BYTEA_P(string); ptr = VARDATA(string); end = VARDATA(string) + VARSIZE(string) - VARHDRSZ - 1; end2 = VARDATA(set) + VARSIZE(set) - VARHDRSZ - 1; while (m > 0) { ptr2 = VARDATA(set); while (ptr2 <= end2) { if (*ptr == *ptr2) break; ++ptr2; } if (ptr2 > end2) break; ptr++; m--; } while (m > 0) { ptr2 = VARDATA(set); while (ptr2 <= end2) { if (*end == *ptr2) break; ++ptr2; } if (ptr2 > end2) break; end--; m--; } ret = (bytea *) palloc(VARHDRSZ + m); SET_VARSIZE(ret, VARHDRSZ + m); memcpy(VARDATA(ret), ptr, m); PG_RETURN_BYTEA_P(ret); } /******************************************************************** * * ltrim * * Syntax: * * text ltrim(text string, text set) * * Purpose: * * Returns string with initial characters removed up to the first * character not in set. * ********************************************************************/ Datum ltrim(PG_FUNCTION_ARGS) { text *string = PG_GETARG_TEXT_P(0); text *set = PG_GETARG_TEXT_P(1); text *ret; ret = dotrim(VARDATA(string), VARSIZE(string) - VARHDRSZ, VARDATA(set), VARSIZE(set) - VARHDRSZ, true, false); PG_RETURN_TEXT_P(ret); } /******************************************************************** * * ltrim1 --- ltrim with set fixed as ' ' * ********************************************************************/ Datum ltrim1(PG_FUNCTION_ARGS) { text *string = PG_GETARG_TEXT_P(0); text *ret; ret = dotrim(VARDATA(string), VARSIZE(string) - VARHDRSZ, " ", 1, true, false); PG_RETURN_TEXT_P(ret); } /******************************************************************** * * rtrim * * Syntax: * * text rtrim(text string, text set) * * Purpose: * * Returns string with final characters removed after the last * character not in set. * ********************************************************************/ Datum rtrim(PG_FUNCTION_ARGS) { text *string = PG_GETARG_TEXT_P(0); text *set = PG_GETARG_TEXT_P(1); text *ret; ret = dotrim(VARDATA(string), VARSIZE(string) - VARHDRSZ, VARDATA(set), VARSIZE(set) - VARHDRSZ, false, true); PG_RETURN_TEXT_P(ret); } /******************************************************************** * * rtrim1 --- rtrim with set fixed as ' ' * ********************************************************************/ Datum rtrim1(PG_FUNCTION_ARGS) { text *string = PG_GETARG_TEXT_P(0); text *ret; ret = dotrim(VARDATA(string), VARSIZE(string) - VARHDRSZ, " ", 1, false, true); PG_RETURN_TEXT_P(ret); } /******************************************************************** * * translate * * Syntax: * * text translate(text string, text from, text to) * * Purpose: * * Returns string after replacing all occurrences of characters in from * with the corresponding character in to. If from is longer than to, * occurrences of the extra characters in from are deleted. * Improved by Edwin Ramirez . * ********************************************************************/ Datum translate(PG_FUNCTION_ARGS) { text *string = PG_GETARG_TEXT_P(0); text *from = PG_GETARG_TEXT_P(1); text *to = PG_GETARG_TEXT_P(2); text *result; char *from_ptr, *to_ptr; char *source, *target; int m, fromlen, tolen, retlen, i; int str_len; int estimate_len; int len; int source_len; int from_index; if ((m = VARSIZE(string) - VARHDRSZ) <= 0) PG_RETURN_TEXT_P(string); fromlen = VARSIZE(from) - VARHDRSZ; from_ptr = VARDATA(from); tolen = VARSIZE(to) - VARHDRSZ; to_ptr = VARDATA(to); str_len = VARSIZE(string); estimate_len = (tolen * 1.0 / fromlen + 0.5) * str_len; estimate_len = estimate_len > str_len ? estimate_len : str_len; result = (text *) palloc(estimate_len); source = VARDATA(string); target = VARDATA(result); retlen = 0; while (m > 0) { source_len = pg_mblen(source); from_index = 0; for (i = 0; i < fromlen; i += len) { len = pg_mblen(&from_ptr[i]); if (len == source_len && memcmp(source, &from_ptr[i], len) == 0) break; from_index++; } if (i < fromlen) { /* substitute */ char *p = to_ptr; for (i = 0; i < from_index; i++) { p += pg_mblen(p); if (p >= (to_ptr + tolen)) break; } if (p < (to_ptr + tolen)) { len = pg_mblen(p); memcpy(target, p, len); target += len; retlen += len; } } else { /* no match, so copy */ memcpy(target, source, source_len); target += source_len; retlen += source_len; } source += source_len; m -= source_len; } SET_VARSIZE(result, retlen + VARHDRSZ); /* * There may be some wasted space in the result if deletions occurred, but * it's not worth reallocating it; the function result probably won't live * long anyway. */ PG_RETURN_TEXT_P(result); } /******************************************************************** * * ascii * * Syntax: * * int ascii(text string) * * Purpose: * * Returns the decimal representation of the first character from * string. * ********************************************************************/ Datum ascii(PG_FUNCTION_ARGS) { text *string = PG_GETARG_TEXT_P(0); if (VARSIZE(string) <= VARHDRSZ) PG_RETURN_INT32(0); PG_RETURN_INT32((int32) *((unsigned char *) VARDATA(string))); } /******************************************************************** * * chr * * Syntax: * * text chr(int val) * * Purpose: * * Returns the character having the binary equivalent to val * ********************************************************************/ Datum chr(PG_FUNCTION_ARGS) { int32 cvalue = PG_GETARG_INT32(0); text *result; result = (text *) palloc(VARHDRSZ + 1); SET_VARSIZE(result, VARHDRSZ + 1); *VARDATA(result) = (char) cvalue; PG_RETURN_TEXT_P(result); } /******************************************************************** * * repeat * * Syntax: * * text repeat(text string, int val) * * Purpose: * * Repeat string by val. * ********************************************************************/ Datum repeat(PG_FUNCTION_ARGS) { text *string = PG_GETARG_TEXT_P(0); int32 count = PG_GETARG_INT32(1); text *result; int slen, tlen; int i; char *cp; if (count < 0) count = 0; slen = (VARSIZE(string) - VARHDRSZ); tlen = (VARHDRSZ + (count * slen)); /* Check for integer overflow */ if (slen != 0 && count != 0) { int check = count * slen; int check2 = check + VARHDRSZ; if ((check / slen) != count || check2 <= check) ereport(ERROR, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), errmsg("requested length too large"))); } result = (text *) palloc(tlen); SET_VARSIZE(result, tlen); cp = VARDATA(result); for (i = 0; i < count; i++) { memcpy(cp, VARDATA(string), slen); cp += slen; } PG_RETURN_TEXT_P(result); }