Support language tags in older ICU versions (53 and earlier).

By calling uloc_canonicalize() before parsing the attributes, the
existing locale attribute parsing logic works on language tags as
well.

Fix a small memory leak, too.

Discussion: http://postgr.es/m/60da0cecfb512a78b8666b31631a636215d8ce73.camel@j-davis.com
Reviewed-by: Peter Eisentraut
This commit is contained in:
Jeff Davis 2023-03-21 15:49:18 -07:00
parent e8e1f96c49
commit 869650fa86
4 changed files with 50 additions and 11 deletions

View File

@ -950,7 +950,6 @@ pg_import_system_collations(PG_FUNCTION_ARGS)
const char *name;
char *langtag;
char *icucomment;
const char *iculocstr;
Oid collid;
if (i == -1)
@ -959,20 +958,19 @@ pg_import_system_collations(PG_FUNCTION_ARGS)
name = uloc_getAvailable(i);
langtag = get_icu_language_tag(name);
iculocstr = U_ICU_VERSION_MAJOR_NUM >= 54 ? langtag : name;
/*
* Be paranoid about not allowing any non-ASCII strings into
* pg_collation
*/
if (!pg_is_ascii(langtag) || !pg_is_ascii(iculocstr))
if (!pg_is_ascii(langtag))
continue;
collid = CollationCreate(psprintf("%s-x-icu", langtag),
nspid, GetUserId(),
COLLPROVIDER_ICU, true, -1,
NULL, NULL, iculocstr, NULL,
get_collation_actual_version(COLLPROVIDER_ICU, iculocstr),
NULL, NULL, langtag, NULL,
get_collation_actual_version(COLLPROVIDER_ICU, langtag),
true, true);
if (OidIsValid(collid))
{

View File

@ -2634,9 +2634,12 @@ icu_from_uchar(char **result, const UChar *buff_uchar, int32_t len_uchar)
}
/*
* Parse collation attributes and apply them to the open collator. This takes
* a string like "und@colStrength=primary;colCaseLevel=yes" and parses and
* applies the key-value arguments.
* Parse collation attributes from the given locale string and apply them to
* the open collator.
*
* First, the locale string is canonicalized to an ICU format locale ID such
* as "und@colStrength=primary;colCaseLevel=yes". Then, it parses and applies
* the key-value arguments.
*
* Starting with ICU version 54, the attributes are processed automatically by
* ucol_open(), so this is only necessary for emulating this behavior on older
@ -2646,9 +2649,34 @@ pg_attribute_unused()
static void
icu_set_collation_attributes(UCollator *collator, const char *loc)
{
char *str = asc_tolower(loc, strlen(loc));
UErrorCode status;
int32_t len;
char *icu_locale_id;
char *lower_str;
char *str;
str = strchr(str, '@');
/*
* The input locale may be a BCP 47 language tag, e.g.
* "und-u-kc-ks-level1", which expresses the same attributes in a
* different form. It will be converted to the equivalent ICU format
* locale ID, e.g. "und@colcaselevel=yes;colstrength=primary", by
* uloc_canonicalize().
*/
status = U_ZERO_ERROR;
len = uloc_canonicalize(loc, NULL, 0, &status);
icu_locale_id = palloc(len + 1);
status = U_ZERO_ERROR;
len = uloc_canonicalize(loc, icu_locale_id, len + 1, &status);
if (U_FAILURE(status))
ereport(ERROR,
(errmsg("canonicalization failed for locale string \"%s\": %s",
loc, u_errorName(status))));
lower_str = asc_tolower(icu_locale_id, strlen(icu_locale_id));
pfree(icu_locale_id);
str = strchr(lower_str, '@');
if (!str)
return;
str++;
@ -2663,7 +2691,6 @@ icu_set_collation_attributes(UCollator *collator, const char *loc)
char *value;
UColAttribute uattr;
UColAttributeValue uvalue;
UErrorCode status;
status = U_ZERO_ERROR;
@ -2730,6 +2757,8 @@ icu_set_collation_attributes(UCollator *collator, const char *loc)
loc, u_errorName(status))));
}
}
pfree(lower_str);
}
#endif /* USE_ICU */

View File

@ -1304,6 +1304,14 @@ SELECT 'abc' <= 'ABC' COLLATE case_insensitive, 'abc' >= 'ABC' COLLATE case_inse
t | t
(1 row)
-- test language tags
CREATE COLLATION lt_insensitive (provider = icu, locale = 'en-u-ks-level1', deterministic = false);
SELECT 'aBcD' COLLATE lt_insensitive = 'AbCd' COLLATE lt_insensitive;
?column?
----------
t
(1 row)
CREATE TABLE test1cs (x text COLLATE case_sensitive);
CREATE TABLE test2cs (x text COLLATE case_sensitive);
CREATE TABLE test3cs (x text COLLATE case_sensitive);

View File

@ -518,6 +518,10 @@ CREATE COLLATION case_insensitive (provider = icu, locale = '@colStrength=second
SELECT 'abc' <= 'ABC' COLLATE case_sensitive, 'abc' >= 'ABC' COLLATE case_sensitive;
SELECT 'abc' <= 'ABC' COLLATE case_insensitive, 'abc' >= 'ABC' COLLATE case_insensitive;
-- test language tags
CREATE COLLATION lt_insensitive (provider = icu, locale = 'en-u-ks-level1', deterministic = false);
SELECT 'aBcD' COLLATE lt_insensitive = 'AbCd' COLLATE lt_insensitive;
CREATE TABLE test1cs (x text COLLATE case_sensitive);
CREATE TABLE test2cs (x text COLLATE case_sensitive);
CREATE TABLE test3cs (x text COLLATE case_sensitive);