Fix memory leakage in ICU encoding conversion, and other code review.
Callers of icu_to_uchar() neglected to pfree the result string when done with it. This results in catastrophic memory leaks in varstr_cmp(), because of our prevailing assumption that btree comparison functions don't leak memory. For safety, make all the call sites clean up leaks, though I suspect that we could get away without it in formatting.c. I audited callers of icu_from_uchar() as well, but found no places that seemed to have a comparable issue. Add function API specifications for icu_to_uchar() and icu_from_uchar(); the lack of any thought-through specification is perhaps not unrelated to the existence of this bug in the first place. Fix icu_to_uchar() to guarantee a nul-terminated result; although no existing caller appears to care, the fact that it would have been nul-terminated except in extreme corner cases seems ideally designed to bite someone on the rear someday. Fix ucnv_fromUChars() destCapacity argument --- in the worst case, that could perhaps have led to a non-nul-terminated result, too. Fix icu_from_uchar() to have a more reasonable definition of the function result --- no callers are actually paying attention, so this isn't a live bug, but it's certainly sloppily designed. Const-ify icu_from_uchar()'s input string for consistency. That is not the end of what needs to be done to these functions, but it's as much as I have the patience for right now. Discussion: https://postgr.es/m/1955.1498181798@sss.pgh.pa.us
This commit is contained in:
parent
8be8510cf8
commit
b6159202c9
|
@ -381,6 +381,10 @@ normalize_libc_locale_name(char *new, const char *old)
|
||||||
|
|
||||||
|
|
||||||
#ifdef USE_ICU
|
#ifdef USE_ICU
|
||||||
|
/*
|
||||||
|
* Get the ICU language tag for a locale name.
|
||||||
|
* The result is a palloc'd string.
|
||||||
|
*/
|
||||||
static char *
|
static char *
|
||||||
get_icu_language_tag(const char *localename)
|
get_icu_language_tag(const char *localename)
|
||||||
{
|
{
|
||||||
|
@ -397,7 +401,10 @@ get_icu_language_tag(const char *localename)
|
||||||
return pstrdup(buf);
|
return pstrdup(buf);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Get a comment (specifically, the display name) for an ICU locale.
|
||||||
|
* The result is a palloc'd string.
|
||||||
|
*/
|
||||||
static char *
|
static char *
|
||||||
get_icu_locale_comment(const char *localename)
|
get_icu_locale_comment(const char *localename)
|
||||||
{
|
{
|
||||||
|
@ -407,10 +414,12 @@ get_icu_locale_comment(const char *localename)
|
||||||
char *result;
|
char *result;
|
||||||
|
|
||||||
status = U_ZERO_ERROR;
|
status = U_ZERO_ERROR;
|
||||||
len_uchar = uloc_getDisplayName(localename, "en", &displayname[0], sizeof(displayname), &status);
|
len_uchar = uloc_getDisplayName(localename, "en",
|
||||||
|
&displayname[0], sizeof(displayname),
|
||||||
|
&status);
|
||||||
if (U_FAILURE(status))
|
if (U_FAILURE(status))
|
||||||
ereport(ERROR,
|
ereport(ERROR,
|
||||||
(errmsg("could get display name for locale \"%s\": %s",
|
(errmsg("could not get display name for locale \"%s\": %s",
|
||||||
localename, u_errorName(status))));
|
localename, u_errorName(status))));
|
||||||
|
|
||||||
icu_from_uchar(&result, displayname, len_uchar);
|
icu_from_uchar(&result, displayname, len_uchar);
|
||||||
|
|
|
@ -1561,6 +1561,7 @@ str_tolower(const char *buff, size_t nbytes, Oid collid)
|
||||||
len_conv = icu_convert_case(u_strToLower, mylocale,
|
len_conv = icu_convert_case(u_strToLower, mylocale,
|
||||||
&buff_conv, buff_uchar, len_uchar);
|
&buff_conv, buff_uchar, len_uchar);
|
||||||
icu_from_uchar(&result, buff_conv, len_conv);
|
icu_from_uchar(&result, buff_conv, len_conv);
|
||||||
|
pfree(buff_uchar);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
#endif
|
#endif
|
||||||
|
@ -1684,6 +1685,7 @@ str_toupper(const char *buff, size_t nbytes, Oid collid)
|
||||||
len_conv = icu_convert_case(u_strToUpper, mylocale,
|
len_conv = icu_convert_case(u_strToUpper, mylocale,
|
||||||
&buff_conv, buff_uchar, len_uchar);
|
&buff_conv, buff_uchar, len_uchar);
|
||||||
icu_from_uchar(&result, buff_conv, len_conv);
|
icu_from_uchar(&result, buff_conv, len_conv);
|
||||||
|
pfree(buff_uchar);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
#endif
|
#endif
|
||||||
|
@ -1808,6 +1810,7 @@ str_initcap(const char *buff, size_t nbytes, Oid collid)
|
||||||
len_conv = icu_convert_case(u_strToTitle_default_BI, mylocale,
|
len_conv = icu_convert_case(u_strToTitle_default_BI, mylocale,
|
||||||
&buff_conv, buff_uchar, len_uchar);
|
&buff_conv, buff_uchar, len_uchar);
|
||||||
icu_from_uchar(&result, buff_conv, len_conv);
|
icu_from_uchar(&result, buff_conv, len_conv);
|
||||||
|
pfree(buff_uchar);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -1486,6 +1486,18 @@ init_icu_converter(void)
|
||||||
icu_converter = conv;
|
icu_converter = conv;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Convert a string in the database encoding into a string of UChars.
|
||||||
|
*
|
||||||
|
* The source string at buff is of length nbytes
|
||||||
|
* (it needn't be nul-terminated)
|
||||||
|
*
|
||||||
|
* *buff_uchar receives a pointer to the palloc'd result string, and
|
||||||
|
* the function's result is the number of UChars generated.
|
||||||
|
*
|
||||||
|
* The result string is nul-terminated, though most callers rely on the
|
||||||
|
* result length instead.
|
||||||
|
*/
|
||||||
int32_t
|
int32_t
|
||||||
icu_to_uchar(UChar **buff_uchar, const char *buff, size_t nbytes)
|
icu_to_uchar(UChar **buff_uchar, const char *buff, size_t nbytes)
|
||||||
{
|
{
|
||||||
|
@ -1494,18 +1506,30 @@ icu_to_uchar(UChar **buff_uchar, const char *buff, size_t nbytes)
|
||||||
|
|
||||||
init_icu_converter();
|
init_icu_converter();
|
||||||
|
|
||||||
len_uchar = 2 * nbytes; /* max length per docs */
|
len_uchar = 2 * nbytes + 1; /* max length per docs */
|
||||||
*buff_uchar = palloc(len_uchar * sizeof(**buff_uchar));
|
*buff_uchar = palloc(len_uchar * sizeof(**buff_uchar));
|
||||||
status = U_ZERO_ERROR;
|
status = U_ZERO_ERROR;
|
||||||
len_uchar = ucnv_toUChars(icu_converter, *buff_uchar, len_uchar, buff, nbytes, &status);
|
len_uchar = ucnv_toUChars(icu_converter, *buff_uchar, len_uchar,
|
||||||
|
buff, nbytes, &status);
|
||||||
if (U_FAILURE(status))
|
if (U_FAILURE(status))
|
||||||
ereport(ERROR,
|
ereport(ERROR,
|
||||||
(errmsg("ucnv_toUChars failed: %s", u_errorName(status))));
|
(errmsg("ucnv_toUChars failed: %s", u_errorName(status))));
|
||||||
return len_uchar;
|
return len_uchar;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Convert a string of UChars into the database encoding.
|
||||||
|
*
|
||||||
|
* The source string at buff_uchar is of length len_uchar
|
||||||
|
* (it needn't be nul-terminated)
|
||||||
|
*
|
||||||
|
* *result receives a pointer to the palloc'd result string, and the
|
||||||
|
* function's result is the number of bytes generated (not counting nul).
|
||||||
|
*
|
||||||
|
* The result string is nul-terminated.
|
||||||
|
*/
|
||||||
int32_t
|
int32_t
|
||||||
icu_from_uchar(char **result, UChar *buff_uchar, int32_t len_uchar)
|
icu_from_uchar(char **result, const UChar *buff_uchar, int32_t len_uchar)
|
||||||
{
|
{
|
||||||
UErrorCode status;
|
UErrorCode status;
|
||||||
int32_t len_result;
|
int32_t len_result;
|
||||||
|
@ -1515,13 +1539,14 @@ icu_from_uchar(char **result, UChar *buff_uchar, int32_t len_uchar)
|
||||||
len_result = UCNV_GET_MAX_BYTES_FOR_STRING(len_uchar, ucnv_getMaxCharSize(icu_converter));
|
len_result = UCNV_GET_MAX_BYTES_FOR_STRING(len_uchar, ucnv_getMaxCharSize(icu_converter));
|
||||||
*result = palloc(len_result + 1);
|
*result = palloc(len_result + 1);
|
||||||
status = U_ZERO_ERROR;
|
status = U_ZERO_ERROR;
|
||||||
ucnv_fromUChars(icu_converter, *result, len_result, buff_uchar, len_uchar, &status);
|
len_result = ucnv_fromUChars(icu_converter, *result, len_result + 1,
|
||||||
|
buff_uchar, len_uchar, &status);
|
||||||
if (U_FAILURE(status))
|
if (U_FAILURE(status))
|
||||||
ereport(ERROR,
|
ereport(ERROR,
|
||||||
(errmsg("ucnv_fromUChars failed: %s", u_errorName(status))));
|
(errmsg("ucnv_fromUChars failed: %s", u_errorName(status))));
|
||||||
return len_result;
|
return len_result;
|
||||||
}
|
}
|
||||||
#endif
|
#endif /* USE_ICU */
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* These functions convert from/to libc's wchar_t, *not* pg_wchar_t.
|
* These functions convert from/to libc's wchar_t, *not* pg_wchar_t.
|
||||||
|
|
|
@ -1569,6 +1569,9 @@ varstr_cmp(char *arg1, int len1, char *arg2, int len2, Oid collid)
|
||||||
result = ucol_strcoll(mylocale->info.icu.ucol,
|
result = ucol_strcoll(mylocale->info.icu.ucol,
|
||||||
uchar1, ulen1,
|
uchar1, ulen1,
|
||||||
uchar2, ulen2);
|
uchar2, ulen2);
|
||||||
|
|
||||||
|
pfree(uchar1);
|
||||||
|
pfree(uchar2);
|
||||||
}
|
}
|
||||||
#else /* not USE_ICU */
|
#else /* not USE_ICU */
|
||||||
/* shouldn't happen */
|
/* shouldn't happen */
|
||||||
|
@ -2155,6 +2158,9 @@ varstrfastcmp_locale(Datum x, Datum y, SortSupport ssup)
|
||||||
result = ucol_strcoll(sss->locale->info.icu.ucol,
|
result = ucol_strcoll(sss->locale->info.icu.ucol,
|
||||||
uchar1, ulen1,
|
uchar1, ulen1,
|
||||||
uchar2, ulen2);
|
uchar2, ulen2);
|
||||||
|
|
||||||
|
pfree(uchar1);
|
||||||
|
pfree(uchar2);
|
||||||
}
|
}
|
||||||
#else /* not USE_ICU */
|
#else /* not USE_ICU */
|
||||||
/* shouldn't happen */
|
/* shouldn't happen */
|
||||||
|
@ -2279,7 +2285,7 @@ varstr_abbrev_convert(Datum original, SortSupport ssup)
|
||||||
Size bsize;
|
Size bsize;
|
||||||
#ifdef USE_ICU
|
#ifdef USE_ICU
|
||||||
int32_t ulen = -1;
|
int32_t ulen = -1;
|
||||||
UChar *uchar;
|
UChar *uchar = NULL;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -2354,7 +2360,8 @@ varstr_abbrev_convert(Datum original, SortSupport ssup)
|
||||||
&status);
|
&status);
|
||||||
if (U_FAILURE(status))
|
if (U_FAILURE(status))
|
||||||
ereport(ERROR,
|
ereport(ERROR,
|
||||||
(errmsg("sort key generation failed: %s", u_errorName(status))));
|
(errmsg("sort key generation failed: %s",
|
||||||
|
u_errorName(status))));
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
bsize = ucol_getSortKey(sss->locale->info.icu.ucol,
|
bsize = ucol_getSortKey(sss->locale->info.icu.ucol,
|
||||||
|
@ -2394,6 +2401,11 @@ varstr_abbrev_convert(Datum original, SortSupport ssup)
|
||||||
* okay. See remarks on bytea case above.)
|
* okay. See remarks on bytea case above.)
|
||||||
*/
|
*/
|
||||||
memcpy(pres, sss->buf2, Min(sizeof(Datum), bsize));
|
memcpy(pres, sss->buf2, Min(sizeof(Datum), bsize));
|
||||||
|
|
||||||
|
#ifdef USE_ICU
|
||||||
|
if (uchar)
|
||||||
|
pfree(uchar);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
|
@ -93,7 +93,7 @@ extern char *get_collation_actual_version(char collprovider, const char *collcol
|
||||||
|
|
||||||
#ifdef USE_ICU
|
#ifdef USE_ICU
|
||||||
extern int32_t icu_to_uchar(UChar **buff_uchar, const char *buff, size_t nbytes);
|
extern int32_t icu_to_uchar(UChar **buff_uchar, const char *buff, size_t nbytes);
|
||||||
extern int32_t icu_from_uchar(char **result, UChar *buff_uchar, int32_t len_uchar);
|
extern int32_t icu_from_uchar(char **result, const UChar *buff_uchar, int32_t len_uchar);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* These functions convert from/to libc's wchar_t, *not* pg_wchar_t */
|
/* These functions convert from/to libc's wchar_t, *not* pg_wchar_t */
|
||||||
|
|
Loading…
Reference in New Issue