From b6159202c99d4021fb078cede90b26f94883143d Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Fri, 23 Jun 2017 12:22:06 -0400 Subject: [PATCH] Fix memory leakage in ICU encoding conversion, and other code review. Callers of icu_to_uchar() neglected to pfree the result string when done with it. This results in catastrophic memory leaks in varstr_cmp(), because of our prevailing assumption that btree comparison functions don't leak memory. For safety, make all the call sites clean up leaks, though I suspect that we could get away without it in formatting.c. I audited callers of icu_from_uchar() as well, but found no places that seemed to have a comparable issue. Add function API specifications for icu_to_uchar() and icu_from_uchar(); the lack of any thought-through specification is perhaps not unrelated to the existence of this bug in the first place. Fix icu_to_uchar() to guarantee a nul-terminated result; although no existing caller appears to care, the fact that it would have been nul-terminated except in extreme corner cases seems ideally designed to bite someone on the rear someday. Fix ucnv_fromUChars() destCapacity argument --- in the worst case, that could perhaps have led to a non-nul-terminated result, too. Fix icu_from_uchar() to have a more reasonable definition of the function result --- no callers are actually paying attention, so this isn't a live bug, but it's certainly sloppily designed. Const-ify icu_from_uchar()'s input string for consistency. That is not the end of what needs to be done to these functions, but it's as much as I have the patience for right now. Discussion: https://postgr.es/m/1955.1498181798@sss.pgh.pa.us --- src/backend/commands/collationcmds.c | 15 +++++++++--- src/backend/utils/adt/formatting.c | 3 +++ src/backend/utils/adt/pg_locale.c | 35 ++++++++++++++++++++++++---- src/backend/utils/adt/varlena.c | 16 +++++++++++-- src/include/utils/pg_locale.h | 2 +- 5 files changed, 60 insertions(+), 11 deletions(-) diff --git a/src/backend/commands/collationcmds.c b/src/backend/commands/collationcmds.c index bdfd73906b..1a62b3e30a 100644 --- a/src/backend/commands/collationcmds.c +++ b/src/backend/commands/collationcmds.c @@ -381,6 +381,10 @@ normalize_libc_locale_name(char *new, const char *old) #ifdef USE_ICU +/* + * Get the ICU language tag for a locale name. + * The result is a palloc'd string. + */ static char * get_icu_language_tag(const char *localename) { @@ -397,7 +401,10 @@ get_icu_language_tag(const char *localename) return pstrdup(buf); } - +/* + * Get a comment (specifically, the display name) for an ICU locale. + * The result is a palloc'd string. + */ static char * get_icu_locale_comment(const char *localename) { @@ -407,10 +414,12 @@ get_icu_locale_comment(const char *localename) char *result; status = U_ZERO_ERROR; - len_uchar = uloc_getDisplayName(localename, "en", &displayname[0], sizeof(displayname), &status); + len_uchar = uloc_getDisplayName(localename, "en", + &displayname[0], sizeof(displayname), + &status); if (U_FAILURE(status)) ereport(ERROR, - (errmsg("could get display name for locale \"%s\": %s", + (errmsg("could not get display name for locale \"%s\": %s", localename, u_errorName(status)))); icu_from_uchar(&result, displayname, len_uchar); diff --git a/src/backend/utils/adt/formatting.c b/src/backend/utils/adt/formatting.c index 31f04ff5ed..46f45f6654 100644 --- a/src/backend/utils/adt/formatting.c +++ b/src/backend/utils/adt/formatting.c @@ -1561,6 +1561,7 @@ str_tolower(const char *buff, size_t nbytes, Oid collid) len_conv = icu_convert_case(u_strToLower, mylocale, &buff_conv, buff_uchar, len_uchar); icu_from_uchar(&result, buff_conv, len_conv); + pfree(buff_uchar); } else #endif @@ -1684,6 +1685,7 @@ str_toupper(const char *buff, size_t nbytes, Oid collid) len_conv = icu_convert_case(u_strToUpper, mylocale, &buff_conv, buff_uchar, len_uchar); icu_from_uchar(&result, buff_conv, len_conv); + pfree(buff_uchar); } else #endif @@ -1808,6 +1810,7 @@ str_initcap(const char *buff, size_t nbytes, Oid collid) len_conv = icu_convert_case(u_strToTitle_default_BI, mylocale, &buff_conv, buff_uchar, len_uchar); icu_from_uchar(&result, buff_conv, len_conv); + pfree(buff_uchar); } else #endif diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c index ad3fe74bbd..0f5ec954c3 100644 --- a/src/backend/utils/adt/pg_locale.c +++ b/src/backend/utils/adt/pg_locale.c @@ -1486,6 +1486,18 @@ init_icu_converter(void) icu_converter = conv; } +/* + * Convert a string in the database encoding into a string of UChars. + * + * The source string at buff is of length nbytes + * (it needn't be nul-terminated) + * + * *buff_uchar receives a pointer to the palloc'd result string, and + * the function's result is the number of UChars generated. + * + * The result string is nul-terminated, though most callers rely on the + * result length instead. + */ int32_t icu_to_uchar(UChar **buff_uchar, const char *buff, size_t nbytes) { @@ -1494,18 +1506,30 @@ icu_to_uchar(UChar **buff_uchar, const char *buff, size_t nbytes) init_icu_converter(); - len_uchar = 2 * nbytes; /* max length per docs */ + len_uchar = 2 * nbytes + 1; /* max length per docs */ *buff_uchar = palloc(len_uchar * sizeof(**buff_uchar)); status = U_ZERO_ERROR; - len_uchar = ucnv_toUChars(icu_converter, *buff_uchar, len_uchar, buff, nbytes, &status); + len_uchar = ucnv_toUChars(icu_converter, *buff_uchar, len_uchar, + buff, nbytes, &status); if (U_FAILURE(status)) ereport(ERROR, (errmsg("ucnv_toUChars failed: %s", u_errorName(status)))); return len_uchar; } +/* + * Convert a string of UChars into the database encoding. + * + * The source string at buff_uchar is of length len_uchar + * (it needn't be nul-terminated) + * + * *result receives a pointer to the palloc'd result string, and the + * function's result is the number of bytes generated (not counting nul). + * + * The result string is nul-terminated. + */ int32_t -icu_from_uchar(char **result, UChar *buff_uchar, int32_t len_uchar) +icu_from_uchar(char **result, const UChar *buff_uchar, int32_t len_uchar) { UErrorCode status; int32_t len_result; @@ -1515,13 +1539,14 @@ icu_from_uchar(char **result, UChar *buff_uchar, int32_t len_uchar) len_result = UCNV_GET_MAX_BYTES_FOR_STRING(len_uchar, ucnv_getMaxCharSize(icu_converter)); *result = palloc(len_result + 1); status = U_ZERO_ERROR; - ucnv_fromUChars(icu_converter, *result, len_result, buff_uchar, len_uchar, &status); + len_result = ucnv_fromUChars(icu_converter, *result, len_result + 1, + buff_uchar, len_uchar, &status); if (U_FAILURE(status)) ereport(ERROR, (errmsg("ucnv_fromUChars failed: %s", u_errorName(status)))); return len_result; } -#endif +#endif /* USE_ICU */ /* * These functions convert from/to libc's wchar_t, *not* pg_wchar_t. diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c index 2407394b06..ebfb823fb8 100644 --- a/src/backend/utils/adt/varlena.c +++ b/src/backend/utils/adt/varlena.c @@ -1569,6 +1569,9 @@ varstr_cmp(char *arg1, int len1, char *arg2, int len2, Oid collid) result = ucol_strcoll(mylocale->info.icu.ucol, uchar1, ulen1, uchar2, ulen2); + + pfree(uchar1); + pfree(uchar2); } #else /* not USE_ICU */ /* shouldn't happen */ @@ -2155,6 +2158,9 @@ varstrfastcmp_locale(Datum x, Datum y, SortSupport ssup) result = ucol_strcoll(sss->locale->info.icu.ucol, uchar1, ulen1, uchar2, ulen2); + + pfree(uchar1); + pfree(uchar2); } #else /* not USE_ICU */ /* shouldn't happen */ @@ -2279,7 +2285,7 @@ varstr_abbrev_convert(Datum original, SortSupport ssup) Size bsize; #ifdef USE_ICU int32_t ulen = -1; - UChar *uchar; + UChar *uchar = NULL; #endif /* @@ -2354,7 +2360,8 @@ varstr_abbrev_convert(Datum original, SortSupport ssup) &status); if (U_FAILURE(status)) ereport(ERROR, - (errmsg("sort key generation failed: %s", u_errorName(status)))); + (errmsg("sort key generation failed: %s", + u_errorName(status)))); } else bsize = ucol_getSortKey(sss->locale->info.icu.ucol, @@ -2394,6 +2401,11 @@ varstr_abbrev_convert(Datum original, SortSupport ssup) * okay. See remarks on bytea case above.) */ memcpy(pres, sss->buf2, Min(sizeof(Datum), bsize)); + +#ifdef USE_ICU + if (uchar) + pfree(uchar); +#endif } /* diff --git a/src/include/utils/pg_locale.h b/src/include/utils/pg_locale.h index 5e3711f008..a02d27ba26 100644 --- a/src/include/utils/pg_locale.h +++ b/src/include/utils/pg_locale.h @@ -93,7 +93,7 @@ extern char *get_collation_actual_version(char collprovider, const char *collcol #ifdef USE_ICU extern int32_t icu_to_uchar(UChar **buff_uchar, const char *buff, size_t nbytes); -extern int32_t icu_from_uchar(char **result, UChar *buff_uchar, int32_t len_uchar); +extern int32_t icu_from_uchar(char **result, const UChar *buff_uchar, int32_t len_uchar); #endif /* These functions convert from/to libc's wchar_t, *not* pg_wchar_t */