Handle the "und" locale in ICU versions 54 and older.

The "und" locale is an alternative spelling of the root locale, but it
was not recognized until ICU 55. To maintain common behavior across
all supported ICU versions, check for "und" and replace with "root"
before opening.

Previously, the lack of support for "und" was dangerous, because
versions 54 and older fall back to the environment when a locale is
not found. If the user specified "und" for the language (which is
expected and documented), it could not only resolve to the wrong
collator, but it could unexpectedly change (which could lead to
corrupt indexes).

This effectively reverts commit d72900bded, which worked around the
problem for the built-in "unicode" collation, and is no longer
necessary.

Discussion: https://postgr.es/m/60da0cecfb512a78b8666b31631a636215d8ce73.camel@j-davis.com
Discussion: https://postgr.es/m/0c6fa66f2753217d2a40480a96bd2ccf023536a1.camel@j-davis.com
Reviewed-by: Peter Eisentraut
This commit is contained in:
Jeff Davis 2023-03-23 09:50:06 -07:00
parent 949e2e7c4f
commit 3b50275b12
4 changed files with 44 additions and 1 deletions

View File

@ -2503,6 +2503,7 @@ pg_ucol_open(const char *loc_str)
{
UCollator *collator;
UErrorCode status;
char *fixed_str = NULL;
/*
* Must never open default collator, because it depends on the environment
@ -2517,6 +2518,36 @@ pg_ucol_open(const char *loc_str)
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("opening default collator is not supported")));
/*
* In ICU versions 54 and earlier, "und" is not a recognized spelling of
* the root locale. If the first component of the locale is "und", replace
* with "root" before opening.
*/
if (U_ICU_VERSION_MAJOR_NUM < 55)
{
char lang[ULOC_LANG_CAPACITY];
status = U_ZERO_ERROR;
uloc_getLanguage(loc_str, lang, ULOC_LANG_CAPACITY, &status);
if (U_FAILURE(status))
{
ereport(ERROR,
(errmsg("could not get language from locale \"%s\": %s",
loc_str, u_errorName(status))));
}
if (strcmp(lang, "und") == 0)
{
const char *remainder = loc_str + strlen("und");
fixed_str = palloc(strlen("root") + strlen(remainder) + 1);
strcpy(fixed_str, "root");
strcat(fixed_str, remainder);
loc_str = fixed_str;
}
}
status = U_ZERO_ERROR;
collator = ucol_open(loc_str, &status);
if (U_FAILURE(status))
@ -2527,6 +2558,9 @@ pg_ucol_open(const char *loc_str)
if (U_ICU_VERSION_MAJOR_NUM < 54)
icu_set_collation_attributes(collator, loc_str);
if (fixed_str != NULL)
pfree(fixed_str);
return collator;
}

View File

@ -1701,7 +1701,7 @@ setup_collation(FILE *cmdfd)
* that they win if libc defines a locale with the same name.
*/
PG_CMD_PRINTF("INSERT INTO pg_collation (oid, collname, collnamespace, collowner, collprovider, collisdeterministic, collencoding, colliculocale)"
"VALUES (pg_nextoid('pg_catalog.pg_collation', 'oid', 'pg_catalog.pg_collation_oid_index'), 'unicode', 'pg_catalog'::regnamespace, %u, '%c', true, -1, '');\n\n",
"VALUES (pg_nextoid('pg_catalog.pg_collation', 'oid', 'pg_catalog.pg_collation_oid_index'), 'unicode', 'pg_catalog'::regnamespace, %u, '%c', true, -1, 'und');\n\n",
BOOTSTRAP_SUPERUSERID, COLLPROVIDER_ICU);
PG_CMD_PRINTF("INSERT INTO pg_collation (oid, collname, collnamespace, collowner, collprovider, collisdeterministic, collencoding, collcollate, collctype)"

View File

@ -1312,6 +1312,13 @@ SELECT 'aBcD' COLLATE lt_insensitive = 'AbCd' COLLATE lt_insensitive;
t
(1 row)
CREATE COLLATION lt_upperfirst (provider = icu, locale = 'und-u-kf-upper');
SELECT 'Z' COLLATE lt_upperfirst < 'z' COLLATE lt_upperfirst;
?column?
----------
t
(1 row)
CREATE TABLE test1cs (x text COLLATE case_sensitive);
CREATE TABLE test2cs (x text COLLATE case_sensitive);
CREATE TABLE test3cs (x text COLLATE case_sensitive);

View File

@ -521,6 +521,8 @@ SELECT 'abc' <= 'ABC' COLLATE case_insensitive, 'abc' >= 'ABC' COLLATE case_inse
-- test language tags
CREATE COLLATION lt_insensitive (provider = icu, locale = 'en-u-ks-level1', deterministic = false);
SELECT 'aBcD' COLLATE lt_insensitive = 'AbCd' COLLATE lt_insensitive;
CREATE COLLATION lt_upperfirst (provider = icu, locale = 'und-u-kf-upper');
SELECT 'Z' COLLATE lt_upperfirst < 'z' COLLATE lt_upperfirst;
CREATE TABLE test1cs (x text COLLATE case_sensitive);
CREATE TABLE test2cs (x text COLLATE case_sensitive);