Allow tailoring of ICU locales with custom rules

This exposes the ICU facility to add custom collation rules to a
standard collation.

New options are added to CREATE COLLATION, CREATE DATABASE, createdb,
and initdb to set the rules.

Reviewed-by: Laurenz Albe <laurenz.albe@cybertec.at>
Reviewed-by: Daniel Verite <daniel@manitou-mail.org>
Discussion: https://www.postgresql.org/message-id/flat/821c71a4-6ef0-d366-9acf-bb8e367f739f@enterprisedb.com
This commit is contained in:
Peter Eisentraut 2023-03-08 16:35:42 +01:00
parent b1534ed99d
commit 30a53b7929
22 changed files with 380 additions and 59 deletions

View File

@ -2428,6 +2428,15 @@ SCRAM-SHA-256$<replaceable>&lt;iteration count&gt;</replaceable>:<replaceable>&l
</para></entry>
</row>
<row>
<entry role="catalog_table_entry"><para role="column_definition">
<structfield>collicurules</structfield> <type>text</type>
</para>
<para>
ICU collation rules for this collation object
</para></entry>
</row>
<row>
<entry role="catalog_table_entry"><para role="column_definition">
<structfield>collversion</structfield> <type>text</type>
@ -3106,6 +3115,15 @@ SCRAM-SHA-256$<replaceable>&lt;iteration count&gt;</replaceable>:<replaceable>&l
</para></entry>
</row>
<row>
<entry role="catalog_table_entry"><para role="column_definition">
<structfield>daticurules</structfield> <type>text</type>
</para>
<para>
ICU collation rules for this database
</para></entry>
</row>
<row>
<entry role="catalog_table_entry"><para role="column_definition">
<structfield>datcollversion</structfield> <type>text</type>

View File

@ -27,6 +27,7 @@ CREATE COLLATION [ IF NOT EXISTS ] <replaceable>name</replaceable> (
[ LC_CTYPE = <replaceable>lc_ctype</replaceable>, ]
[ PROVIDER = <replaceable>provider</replaceable>, ]
[ DETERMINISTIC = <replaceable>boolean</replaceable>, ]
[ RULES = <replaceable>rules</replaceable>, ]
[ VERSION = <replaceable>version</replaceable> ]
)
CREATE COLLATION [ IF NOT EXISTS ] <replaceable>name</replaceable> FROM <replaceable>existing_collation</replaceable>
@ -149,6 +150,19 @@ CREATE COLLATION [ IF NOT EXISTS ] <replaceable>name</replaceable> FROM <replace
</listitem>
</varlistentry>
<varlistentry>
<term><replaceable>rules</replaceable></term>
<listitem>
<para>
Specifies additional collation rules to customize the behavior of the
collation. This is supported for ICU only. See <ulink
url="https://unicode-org.github.io/icu/userguide/collation/customization/"/>
for details on the syntax.
</para>
</listitem>
</varlistentry>
<varlistentry>
<term><replaceable>version</replaceable></term>
@ -228,6 +242,14 @@ CREATE COLLATION german_phonebook (provider = icu, locale = 'de-u-co-phonebk');
</programlisting>
</para>
<para>
To create a collation using the ICU provider, based on the English ICU
locale, with custom rules:
<programlisting>
<![CDATA[CREATE COLLATION en_custom (provider = icu, locale = 'en', rules = '&a < g');]]>
</programlisting>
</para>
<para>
To create a collation from an existing collation:
<programlisting>

View File

@ -30,6 +30,7 @@ CREATE DATABASE <replaceable class="parameter">name</replaceable>
[ LC_COLLATE [=] <replaceable class="parameter">lc_collate</replaceable> ]
[ LC_CTYPE [=] <replaceable class="parameter">lc_ctype</replaceable> ]
[ ICU_LOCALE [=] <replaceable class="parameter">icu_locale</replaceable> ]
[ ICU_RULES [=] <replaceable class="parameter">icu_rules</replaceable> ]
[ LOCALE_PROVIDER [=] <replaceable class="parameter">locale_provider</replaceable> ]
[ COLLATION_VERSION = <replaceable>collation_version</replaceable> ]
[ TABLESPACE [=] <replaceable class="parameter">tablespace_name</replaceable> ]
@ -192,6 +193,19 @@ CREATE DATABASE <replaceable class="parameter">name</replaceable>
</listitem>
</varlistentry>
<varlistentry id="create-database-icu-rules">
<term><replaceable class="parameter">icu_rules</replaceable></term>
<listitem>
<para>
Specifies additional collation rules to customize the behavior of the
default collation of this database. This is supported for ICU only.
See <ulink
url="https://unicode-org.github.io/icu/userguide/collation/customization/"/>
for details on the syntax.
</para>
</listitem>
</varlistentry>
<varlistentry id="create-database-locale-provider">
<term><replaceable>locale_provider</replaceable></term>

View File

@ -157,6 +157,16 @@ PostgreSQL documentation
</listitem>
</varlistentry>
<varlistentry>
<term><option>--icu-rules=<replaceable class="parameter">rules</replaceable></option></term>
<listitem>
<para>
Specifies additional collation rules to customize the behavior of the
default collation of this database. This is supported for ICU only.
</para>
</listitem>
</varlistentry>
<varlistentry>
<term><option>--locale-provider={<literal>libc</literal>|<literal>icu</literal>}</option></term>
<listitem>

View File

@ -239,6 +239,16 @@ PostgreSQL documentation
</listitem>
</varlistentry>
<varlistentry id="app-initdb-icu-rules">
<term><option>--icu-rules=<replaceable>rules</replaceable></option></term>
<listitem>
<para>
Specifies additional collation rules to customize the behavior of the
default collation. This is supported for ICU only.
</para>
</listitem>
</varlistentry>
<varlistentry id="app-initdb-data-checksums" xreflabel="data checksums">
<term><option>-k</option></term>
<term><option>--data-checksums</option></term>

View File

@ -50,6 +50,7 @@ CollationCreate(const char *collname, Oid collnamespace,
int32 collencoding,
const char *collcollate, const char *collctype,
const char *colliculocale,
const char *collicurules,
const char *collversion,
bool if_not_exists,
bool quiet)
@ -194,6 +195,10 @@ CollationCreate(const char *collname, Oid collnamespace,
values[Anum_pg_collation_colliculocale - 1] = CStringGetTextDatum(colliculocale);
else
nulls[Anum_pg_collation_colliculocale - 1] = true;
if (collicurules)
values[Anum_pg_collation_collicurules - 1] = CStringGetTextDatum(collicurules);
else
nulls[Anum_pg_collation_collicurules - 1] = true;
if (collversion)
values[Anum_pg_collation_collversion - 1] = CStringGetTextDatum(collversion);
else

View File

@ -64,10 +64,12 @@ DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_e
DefElem *lcctypeEl = NULL;
DefElem *providerEl = NULL;
DefElem *deterministicEl = NULL;
DefElem *rulesEl = NULL;
DefElem *versionEl = NULL;
char *collcollate;
char *collctype;
char *colliculocale;
char *collicurules;
bool collisdeterministic;
int collencoding;
char collprovider;
@ -99,6 +101,8 @@ DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_e
defelp = &providerEl;
else if (strcmp(defel->defname, "deterministic") == 0)
defelp = &deterministicEl;
else if (strcmp(defel->defname, "rules") == 0)
defelp = &rulesEl;
else if (strcmp(defel->defname, "version") == 0)
defelp = &versionEl;
else
@ -161,6 +165,12 @@ DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_e
else
colliculocale = NULL;
datum = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_collicurules, &isnull);
if (!isnull)
collicurules = TextDatumGetCString(datum);
else
collicurules = NULL;
ReleaseSysCache(tp);
/*
@ -182,6 +192,7 @@ DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_e
collcollate = NULL;
collctype = NULL;
colliculocale = NULL;
collicurules = NULL;
if (providerEl)
collproviderstr = defGetString(providerEl);
@ -191,6 +202,9 @@ DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_e
else
collisdeterministic = true;
if (rulesEl)
collicurules = defGetString(rulesEl);
if (versionEl)
collversion = defGetString(versionEl);
@ -297,6 +311,7 @@ DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_e
collcollate,
collctype,
colliculocale,
collicurules,
collversion,
if_not_exists,
false); /* not quiet */
@ -680,7 +695,7 @@ create_collation_from_locale(const char *locale, int nspid,
*/
collid = CollationCreate(locale, nspid, GetUserId(),
COLLPROVIDER_LIBC, true, enc,
locale, locale, NULL,
locale, locale, NULL, NULL,
get_collation_actual_version(COLLPROVIDER_LIBC, locale),
true, true);
if (OidIsValid(collid))
@ -755,7 +770,7 @@ win32_read_locale(LPWSTR pStr, DWORD dwFlags, LPARAM lparam)
collid = CollationCreate(alias, param->nspid, GetUserId(),
COLLPROVIDER_LIBC, true, enc,
localebuf, localebuf, NULL,
localebuf, localebuf, NULL, NULL,
get_collation_actual_version(COLLPROVIDER_LIBC, localebuf),
true, true);
if (OidIsValid(collid))
@ -889,7 +904,7 @@ pg_import_system_collations(PG_FUNCTION_ARGS)
collid = CollationCreate(alias, nspid, GetUserId(),
COLLPROVIDER_LIBC, true, enc,
locale, locale, NULL,
locale, locale, NULL, NULL,
get_collation_actual_version(COLLPROVIDER_LIBC, locale),
true, true);
if (OidIsValid(collid))
@ -951,7 +966,7 @@ pg_import_system_collations(PG_FUNCTION_ARGS)
collid = CollationCreate(psprintf("%s-x-icu", langtag),
nspid, GetUserId(),
COLLPROVIDER_ICU, true, -1,
NULL, NULL, iculocstr,
NULL, NULL, iculocstr, NULL,
get_collation_actual_version(COLLPROVIDER_ICU, iculocstr),
true, true);
if (OidIsValid(collid))

View File

@ -119,6 +119,7 @@ static bool get_db_info(const char *name, LOCKMODE lockmode,
int *encodingP, bool *dbIsTemplateP, bool *dbAllowConnP,
TransactionId *dbFrozenXidP, MultiXactId *dbMinMultiP,
Oid *dbTablespace, char **dbCollate, char **dbCtype, char **dbIculocale,
char **dbIcurules,
char *dbLocProvider,
char **dbCollversion);
static void remove_dbtablespaces(Oid db_id);
@ -675,6 +676,7 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt)
char *src_collate = NULL;
char *src_ctype = NULL;
char *src_iculocale = NULL;
char *src_icurules = NULL;
char src_locprovider = '\0';
char *src_collversion = NULL;
bool src_istemplate;
@ -698,6 +700,7 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt)
DefElem *dcollate = NULL;
DefElem *dctype = NULL;
DefElem *diculocale = NULL;
DefElem *dicurules = NULL;
DefElem *dlocprovider = NULL;
DefElem *distemplate = NULL;
DefElem *dallowconnections = NULL;
@ -710,6 +713,7 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt)
char *dbcollate = NULL;
char *dbctype = NULL;
char *dbiculocale = NULL;
char *dbicurules = NULL;
char dblocprovider = '\0';
char *canonname;
int encoding = -1;
@ -775,6 +779,12 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt)
errorConflictingDefElem(defel, pstate);
diculocale = defel;
}
else if (strcmp(defel->defname, "icu_rules") == 0)
{
if (dicurules)
errorConflictingDefElem(defel, pstate);
dicurules = defel;
}
else if (strcmp(defel->defname, "locale_provider") == 0)
{
if (dlocprovider)
@ -893,6 +903,8 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt)
dbctype = defGetString(dctype);
if (diculocale && diculocale->arg)
dbiculocale = defGetString(diculocale);
if (dicurules && dicurules->arg)
dbicurules = defGetString(dicurules);
if (dlocprovider && dlocprovider->arg)
{
char *locproviderstr = defGetString(dlocprovider);
@ -958,7 +970,7 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt)
&src_dboid, &src_owner, &src_encoding,
&src_istemplate, &src_allowconn,
&src_frozenxid, &src_minmxid, &src_deftablespace,
&src_collate, &src_ctype, &src_iculocale, &src_locprovider,
&src_collate, &src_ctype, &src_iculocale, &src_icurules, &src_locprovider,
&src_collversion))
ereport(ERROR,
(errcode(ERRCODE_UNDEFINED_DATABASE),
@ -1006,6 +1018,8 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt)
dblocprovider = src_locprovider;
if (dbiculocale == NULL && dblocprovider == COLLPROVIDER_ICU)
dbiculocale = src_iculocale;
if (dbicurules == NULL && dblocprovider == COLLPROVIDER_ICU)
dbicurules = src_icurules;
/* Some encodings are client only */
if (!PG_VALID_BE_ENCODING(encoding))
@ -1097,6 +1111,9 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt)
if (dblocprovider == COLLPROVIDER_ICU)
{
char *val1;
char *val2;
Assert(dbiculocale);
Assert(src_iculocale);
if (strcmp(dbiculocale, src_iculocale) != 0)
@ -1105,6 +1122,19 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt)
errmsg("new ICU locale (%s) is incompatible with the ICU locale of the template database (%s)",
dbiculocale, src_iculocale),
errhint("Use the same ICU locale as in the template database, or use template0 as template.")));
val1 = dbicurules;
if (!val1)
val1 = "";
val2 = src_icurules;
if (!val2)
val2 = "";
if (strcmp(val1, val2) != 0)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("new ICU collation rules (%s) are incompatible with the ICU collation rules of the template database (%s)",
val1, val2),
errhint("Use the same ICU collation rules as in the template database, or use template0 as template.")));
}
}
@ -1313,6 +1343,10 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt)
new_record[Anum_pg_database_daticulocale - 1] = CStringGetTextDatum(dbiculocale);
else
new_record_nulls[Anum_pg_database_daticulocale - 1] = true;
if (dbicurules)
new_record[Anum_pg_database_daticurules - 1] = CStringGetTextDatum(dbicurules);
else
new_record_nulls[Anum_pg_database_daticurules - 1] = true;
if (dbcollversion)
new_record[Anum_pg_database_datcollversion - 1] = CStringGetTextDatum(dbcollversion);
else
@ -1526,7 +1560,7 @@ dropdb(const char *dbname, bool missing_ok, bool force)
pgdbrel = table_open(DatabaseRelationId, RowExclusiveLock);
if (!get_db_info(dbname, AccessExclusiveLock, &db_id, NULL, NULL,
&db_istemplate, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL))
&db_istemplate, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL))
{
if (!missing_ok)
{
@ -1726,7 +1760,7 @@ RenameDatabase(const char *oldname, const char *newname)
rel = table_open(DatabaseRelationId, RowExclusiveLock);
if (!get_db_info(oldname, AccessExclusiveLock, &db_id, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL))
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL))
ereport(ERROR,
(errcode(ERRCODE_UNDEFINED_DATABASE),
errmsg("database \"%s\" does not exist", oldname)));
@ -1836,7 +1870,7 @@ movedb(const char *dbname, const char *tblspcname)
pgdbrel = table_open(DatabaseRelationId, RowExclusiveLock);
if (!get_db_info(dbname, AccessExclusiveLock, &db_id, NULL, NULL,
NULL, NULL, NULL, NULL, &src_tblspcoid, NULL, NULL, NULL, NULL, NULL))
NULL, NULL, NULL, NULL, &src_tblspcoid, NULL, NULL, NULL, NULL, NULL, NULL))
ereport(ERROR,
(errcode(ERRCODE_UNDEFINED_DATABASE),
errmsg("database \"%s\" does not exist", dbname)));
@ -2599,6 +2633,7 @@ get_db_info(const char *name, LOCKMODE lockmode,
int *encodingP, bool *dbIsTemplateP, bool *dbAllowConnP,
TransactionId *dbFrozenXidP, MultiXactId *dbMinMultiP,
Oid *dbTablespace, char **dbCollate, char **dbCtype, char **dbIculocale,
char **dbIcurules,
char *dbLocProvider,
char **dbCollversion)
{
@ -2715,6 +2750,14 @@ get_db_info(const char *name, LOCKMODE lockmode,
else
*dbIculocale = TextDatumGetCString(datum);
}
if (dbIcurules)
{
datum = SysCacheGetAttr(DATABASEOID, tuple, Anum_pg_database_daticurules, &isnull);
if (isnull)
*dbIcurules = NULL;
else
*dbIcurules = TextDatumGetCString(datum);
}
if (dbCollversion)
{
datum = SysCacheGetAttr(DATABASEOID, tuple, Anum_pg_database_datcollversion, &isnull);

View File

@ -69,6 +69,7 @@
#ifdef USE_ICU
#include <unicode/ucnv.h>
#include <unicode/ustring.h>
#endif
#ifdef __GLIBC__
@ -1421,6 +1422,7 @@ struct pg_locale_struct default_locale;
void
make_icu_collator(const char *iculocstr,
const char *icurules,
struct pg_locale_struct *resultp)
{
#ifdef USE_ICU
@ -1437,6 +1439,35 @@ make_icu_collator(const char *iculocstr,
if (U_ICU_VERSION_MAJOR_NUM < 54)
icu_set_collation_attributes(collator, iculocstr);
/*
* If rules are specified, we extract the rules of the standard collation,
* add our own rules, and make a new collator with the combined rules.
*/
if (icurules)
{
const UChar *default_rules;
UChar *agg_rules;
UChar *my_rules;
int32_t length;
default_rules = ucol_getRules(collator, &length);
icu_to_uchar(&my_rules, icurules, strlen(icurules));
agg_rules = palloc_array(UChar, u_strlen(default_rules) + u_strlen(my_rules) + 1);
u_strcpy(agg_rules, default_rules);
u_strcat(agg_rules, my_rules);
ucol_close(collator);
status = U_ZERO_ERROR;
collator = ucol_openRules(agg_rules, u_strlen(agg_rules),
UCOL_DEFAULT, UCOL_DEFAULT_STRENGTH, NULL, &status);
if (U_FAILURE(status))
ereport(ERROR,
(errmsg("could not open collator for locale \"%s\" with rules \"%s\": %s",
iculocstr, icurules, u_errorName(status))));
}
/* We will leak this string if the caller errors later :-( */
resultp->info.icu.locale = MemoryContextStrdup(TopMemoryContext, iculocstr);
resultp->info.icu.ucol = collator;
@ -1608,11 +1639,19 @@ pg_newlocale_from_collation(Oid collid)
else if (collform->collprovider == COLLPROVIDER_ICU)
{
const char *iculocstr;
const char *icurules;
datum = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_colliculocale, &isnull);
Assert(!isnull);
iculocstr = TextDatumGetCString(datum);
make_icu_collator(iculocstr, &result);
datum = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_collicurules, &isnull);
if (!isnull)
icurules = TextDatumGetCString(datum);
else
icurules = NULL;
make_icu_collator(iculocstr, icurules, &result);
}
datum = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_collversion,

View File

@ -421,10 +421,19 @@ CheckMyDatabase(const char *name, bool am_superuser, bool override_allow_connect
if (dbform->datlocprovider == COLLPROVIDER_ICU)
{
char *icurules;
datum = SysCacheGetAttr(DATABASEOID, tup, Anum_pg_database_daticulocale, &isnull);
Assert(!isnull);
iculocale = TextDatumGetCString(datum);
make_icu_collator(iculocale, &default_locale);
datum = SysCacheGetAttr(DATABASEOID, tup, Anum_pg_database_daticurules, &isnull);
if (!isnull)
icurules = TextDatumGetCString(datum);
else
icurules = NULL;
make_icu_collator(iculocale, icurules, &default_locale);
}
else
iculocale = NULL;

View File

@ -135,6 +135,7 @@ static char *lc_time = NULL;
static char *lc_messages = NULL;
static char locale_provider = COLLPROVIDER_LIBC;
static char *icu_locale = NULL;
static char *icu_rules = NULL;
static const char *default_text_search_config = NULL;
static char *username = NULL;
static bool pwprompt = false;
@ -1312,7 +1313,10 @@ bootstrap_template1(void)
escape_quotes_bki(lc_ctype));
bki_lines = replace_token(bki_lines, "ICU_LOCALE",
locale_provider == COLLPROVIDER_ICU ? escape_quotes_bki(icu_locale) : "_null_");
icu_locale ? escape_quotes_bki(icu_locale) : "_null_");
bki_lines = replace_token(bki_lines, "ICU_RULES",
icu_rules ? escape_quotes_bki(icu_rules) : "_null_");
sprintf(buf, "%c", locale_provider);
bki_lines = replace_token(bki_lines, "LOCALE_PROVIDER", buf);
@ -2107,6 +2111,7 @@ usage(const char *progname)
printf(_(" -E, --encoding=ENCODING set default encoding for new databases\n"));
printf(_(" -g, --allow-group-access allow group read/execute on data directory\n"));
printf(_(" --icu-locale=LOCALE set ICU locale ID for new databases\n"));
printf(_(" --icu-rules=RULES set additional ICU collation rules for new databases\n"));
printf(_(" -k, --data-checksums use data page checksums\n"));
printf(_(" --locale=LOCALE set default locale for new databases\n"));
printf(_(" --lc-collate=, --lc-ctype=, --lc-messages=LOCALE\n"
@ -2767,6 +2772,7 @@ main(int argc, char *argv[])
{"discard-caches", no_argument, NULL, 14},
{"locale-provider", required_argument, NULL, 15},
{"icu-locale", required_argument, NULL, 16},
{"icu-rules", required_argument, NULL, 17},
{NULL, 0, NULL, 0}
};
@ -2924,6 +2930,9 @@ main(int argc, char *argv[])
case 16:
icu_locale = pg_strdup(optarg);
break;
case 17:
icu_rules = pg_strdup(optarg);
break;
default:
/* getopt_long already emitted a complaint */
pg_log_error_hint("Try \"%s --help\" for more information.", progname);
@ -2954,6 +2963,10 @@ main(int argc, char *argv[])
pg_fatal("%s cannot be specified unless locale provider \"%s\" is chosen",
"--icu-locale", "icu");
if (icu_rules && locale_provider != COLLPROVIDER_ICU)
pg_fatal("%s cannot be specified unless locale provider \"%s\" is chosen",
"--icu-rules", "icu");
atexit(cleanup_directories_atexit);
/* If we only need to fsync, just do it and exit */

View File

@ -2843,6 +2843,7 @@ dumpDatabase(Archive *fout)
i_collate,
i_ctype,
i_daticulocale,
i_daticurules,
i_frozenxid,
i_minmxid,
i_datacl,
@ -2861,6 +2862,7 @@ dumpDatabase(Archive *fout)
*collate,
*ctype,
*iculocale,
*icurules,
*datistemplate,
*datconnlimit,
*tablespace;
@ -2887,6 +2889,10 @@ dumpDatabase(Archive *fout)
appendPQExpBufferStr(dbQry, "datlocprovider, daticulocale, datcollversion, ");
else
appendPQExpBufferStr(dbQry, "'c' AS datlocprovider, NULL AS daticulocale, NULL AS datcollversion, ");
if (fout->remoteVersion >= 160000)
appendPQExpBufferStr(dbQry, "daticurules, ");
else
appendPQExpBufferStr(dbQry, "NULL AS daticurules, ");
appendPQExpBufferStr(dbQry,
"(SELECT spcname FROM pg_tablespace t WHERE t.oid = dattablespace) AS tablespace, "
"shobj_description(oid, 'pg_database') AS description "
@ -2904,6 +2910,7 @@ dumpDatabase(Archive *fout)
i_collate = PQfnumber(res, "datcollate");
i_ctype = PQfnumber(res, "datctype");
i_daticulocale = PQfnumber(res, "daticulocale");
i_daticurules = PQfnumber(res, "daticurules");
i_frozenxid = PQfnumber(res, "datfrozenxid");
i_minmxid = PQfnumber(res, "datminmxid");
i_datacl = PQfnumber(res, "datacl");
@ -2925,6 +2932,10 @@ dumpDatabase(Archive *fout)
iculocale = PQgetvalue(res, 0, i_daticulocale);
else
iculocale = NULL;
if (!PQgetisnull(res, 0, i_daticurules))
icurules = PQgetvalue(res, 0, i_daticurules);
else
icurules = NULL;
frozenxid = atooid(PQgetvalue(res, 0, i_frozenxid));
minmxid = atooid(PQgetvalue(res, 0, i_minmxid));
dbdacl.acl = PQgetvalue(res, 0, i_datacl);
@ -2990,6 +3001,11 @@ dumpDatabase(Archive *fout)
appendPQExpBufferStr(creaQry, " ICU_LOCALE = ");
appendStringLiteralAH(creaQry, iculocale, fout);
}
if (icurules)
{
appendPQExpBufferStr(creaQry, " ICU_RULES = ");
appendStringLiteralAH(creaQry, icurules, fout);
}
/*
* For binary upgrade, carry over the collation version. For normal
@ -13153,10 +13169,12 @@ dumpCollation(Archive *fout, const CollInfo *collinfo)
int i_collcollate;
int i_collctype;
int i_colliculocale;
int i_collicurules;
const char *collprovider;
const char *collcollate;
const char *collctype;
const char *colliculocale;
const char *collicurules;
/* Do nothing in data-only dump */
if (dopt->dataOnly)
@ -13194,6 +13212,13 @@ dumpCollation(Archive *fout, const CollInfo *collinfo)
appendPQExpBufferStr(query,
"NULL AS colliculocale, ");
if (fout->remoteVersion >= 160000)
appendPQExpBufferStr(query,
"collicurules, ");
else
appendPQExpBufferStr(query,
"NULL AS collicurules, ");
appendPQExpBuffer(query,
"collcollate, "
"collctype "
@ -13208,6 +13233,7 @@ dumpCollation(Archive *fout, const CollInfo *collinfo)
i_collcollate = PQfnumber(res, "collcollate");
i_collctype = PQfnumber(res, "collctype");
i_colliculocale = PQfnumber(res, "colliculocale");
i_collicurules = PQfnumber(res, "collicurules");
collprovider = PQgetvalue(res, 0, i_collprovider);
@ -13226,6 +13252,11 @@ dumpCollation(Archive *fout, const CollInfo *collinfo)
else
colliculocale = NULL;
if (!PQgetisnull(res, 0, i_collicurules))
collicurules = PQgetvalue(res, 0, i_collicurules);
else
collicurules = NULL;
appendPQExpBuffer(delq, "DROP COLLATION %s;\n",
fmtQualifiedDumpable(collinfo));
@ -13271,6 +13302,12 @@ dumpCollation(Archive *fout, const CollInfo *collinfo)
}
}
if (collicurules)
{
appendPQExpBufferStr(q, ", rules = ");
appendStringLiteralAH(q, collicurules, fout);
}
/*
* For binary upgrade, carry over the collation version. For normal
* dump/restore, omit the version, so that it is computed upon restore.

View File

@ -923,38 +923,52 @@ listAllDbs(const char *pattern, bool verbose)
initPQExpBuffer(&buf);
printfPQExpBuffer(&buf,
"SELECT d.datname as \"%s\",\n"
" pg_catalog.pg_get_userbyid(d.datdba) as \"%s\",\n"
" pg_catalog.pg_encoding_to_char(d.encoding) as \"%s\",\n"
" d.datcollate as \"%s\",\n"
" d.datctype as \"%s\",\n",
"SELECT\n"
" d.datname as \"%s\",\n"
" pg_catalog.pg_get_userbyid(d.datdba) as \"%s\",\n"
" pg_catalog.pg_encoding_to_char(d.encoding) as \"%s\",\n",
gettext_noop("Name"),
gettext_noop("Owner"),
gettext_noop("Encoding"),
gettext_noop("Encoding"));
if (pset.sversion >= 150000)
appendPQExpBuffer(&buf,
" CASE d.datlocprovider WHEN 'c' THEN 'libc' WHEN 'i' THEN 'icu' END AS \"%s\",\n",
gettext_noop("Locale Provider"));
else
appendPQExpBuffer(&buf,
" 'libc' AS \"%s\",\n",
gettext_noop("Locale Provider"));
appendPQExpBuffer(&buf,
" d.datcollate as \"%s\",\n"
" d.datctype as \"%s\",\n",
gettext_noop("Collate"),
gettext_noop("Ctype"));
if (pset.sversion >= 150000)
appendPQExpBuffer(&buf,
" d.daticulocale as \"%s\",\n"
" CASE d.datlocprovider WHEN 'c' THEN 'libc' WHEN 'i' THEN 'icu' END AS \"%s\",\n",
gettext_noop("ICU Locale"),
gettext_noop("Locale Provider"));
" d.daticulocale as \"%s\",\n",
gettext_noop("ICU Locale"));
else
appendPQExpBuffer(&buf,
" NULL as \"%s\",\n"
" 'libc' AS \"%s\",\n",
gettext_noop("ICU Locale"),
gettext_noop("Locale Provider"));
appendPQExpBufferStr(&buf, " ");
" NULL as \"%s\",\n",
gettext_noop("ICU Locale"));
if (pset.sversion >= 160000)
appendPQExpBuffer(&buf,
" d.daticurules as \"%s\",\n",
gettext_noop("ICU Rules"));
else
appendPQExpBuffer(&buf,
" NULL as \"%s\",\n",
gettext_noop("ICU Rules"));
appendPQExpBufferStr(&buf, " ");
printACLColumn(&buf, "d.datacl");
if (verbose)
appendPQExpBuffer(&buf,
",\n CASE WHEN pg_catalog.has_database_privilege(d.datname, 'CONNECT')\n"
" THEN pg_catalog.pg_size_pretty(pg_catalog.pg_database_size(d.datname))\n"
" ELSE 'No Access'\n"
" END as \"%s\""
",\n t.spcname as \"%s\""
",\n pg_catalog.shobj_description(d.oid, 'pg_database') as \"%s\"",
",\n CASE WHEN pg_catalog.has_database_privilege(d.datname, 'CONNECT')\n"
" THEN pg_catalog.pg_size_pretty(pg_catalog.pg_database_size(d.datname))\n"
" ELSE 'No Access'\n"
" END as \"%s\""
",\n t.spcname as \"%s\""
",\n pg_catalog.shobj_description(d.oid, 'pg_database') as \"%s\"",
gettext_noop("Size"),
gettext_noop("Tablespace"),
gettext_noop("Description"));
@ -4849,52 +4863,64 @@ listCollations(const char *pattern, bool verbose, bool showSystem)
PQExpBufferData buf;
PGresult *res;
printQueryOpt myopt = pset.popt;
static const bool translate_columns[] = {false, false, false, false, false, false, true, false};
static const bool translate_columns[] = {false, false, false, false, false, false, false, true, false};
initPQExpBuffer(&buf);
printfPQExpBuffer(&buf,
"SELECT n.nspname AS \"%s\",\n"
" c.collname AS \"%s\",\n"
" c.collcollate AS \"%s\",\n"
" c.collctype AS \"%s\"",
"SELECT\n"
" n.nspname AS \"%s\",\n"
" c.collname AS \"%s\",\n",
gettext_noop("Schema"),
gettext_noop("Name"),
gettext_noop("Name"));
if (pset.sversion >= 100000)
appendPQExpBuffer(&buf,
" CASE c.collprovider WHEN 'd' THEN 'default' WHEN 'c' THEN 'libc' WHEN 'i' THEN 'icu' END AS \"%s\",\n",
gettext_noop("Provider"));
else
appendPQExpBuffer(&buf,
" 'libc' AS \"%s\",\n",
gettext_noop("Provider"));
appendPQExpBuffer(&buf,
" c.collcollate AS \"%s\",\n"
" c.collctype AS \"%s\",\n",
gettext_noop("Collate"),
gettext_noop("Ctype"));
if (pset.sversion >= 150000)
appendPQExpBuffer(&buf,
",\n c.colliculocale AS \"%s\"",
" c.colliculocale AS \"%s\",\n",
gettext_noop("ICU Locale"));
else
appendPQExpBuffer(&buf,
",\n c.collcollate AS \"%s\"",
" c.collcollate AS \"%s\",\n",
gettext_noop("ICU Locale"));
if (pset.sversion >= 100000)
if (pset.sversion >= 160000)
appendPQExpBuffer(&buf,
",\n CASE c.collprovider WHEN 'd' THEN 'default' WHEN 'c' THEN 'libc' WHEN 'i' THEN 'icu' END AS \"%s\"",
gettext_noop("Provider"));
" c.collicurules AS \"%s\",\n",
gettext_noop("ICU Rules"));
else
appendPQExpBuffer(&buf,
",\n 'libc' AS \"%s\"",
gettext_noop("Provider"));
" NULL AS \"%s\",\n",
gettext_noop("ICU Rules"));
if (pset.sversion >= 120000)
appendPQExpBuffer(&buf,
",\n CASE WHEN c.collisdeterministic THEN '%s' ELSE '%s' END AS \"%s\"",
" CASE WHEN c.collisdeterministic THEN '%s' ELSE '%s' END AS \"%s\"",
gettext_noop("yes"), gettext_noop("no"),
gettext_noop("Deterministic?"));
else
appendPQExpBuffer(&buf,
",\n '%s' AS \"%s\"",
" '%s' AS \"%s\"",
gettext_noop("yes"),
gettext_noop("Deterministic?"));
if (verbose)
appendPQExpBuffer(&buf,
",\n pg_catalog.obj_description(c.oid, 'pg_collation') AS \"%s\"",
",\n pg_catalog.obj_description(c.oid, 'pg_collation') AS \"%s\"",
gettext_noop("Description"));
appendPQExpBufferStr(&buf,

View File

@ -41,6 +41,7 @@ main(int argc, char *argv[])
{"maintenance-db", required_argument, NULL, 3},
{"locale-provider", required_argument, NULL, 4},
{"icu-locale", required_argument, NULL, 5},
{"icu-rules", required_argument, NULL, 6},
{NULL, 0, NULL, 0}
};
@ -67,6 +68,7 @@ main(int argc, char *argv[])
char *locale = NULL;
char *locale_provider = NULL;
char *icu_locale = NULL;
char *icu_rules = NULL;
PQExpBufferData sql;
@ -134,6 +136,9 @@ main(int argc, char *argv[])
case 5:
icu_locale = pg_strdup(optarg);
break;
case 6:
icu_rules = pg_strdup(optarg);
break;
default:
/* getopt_long already emitted a complaint */
pg_log_error_hint("Try \"%s --help\" for more information.", progname);
@ -231,6 +236,11 @@ main(int argc, char *argv[])
appendPQExpBufferStr(&sql, " ICU_LOCALE ");
appendStringLiteralConn(&sql, icu_locale, conn);
}
if (icu_rules)
{
appendPQExpBufferStr(&sql, " ICU_RULES ");
appendStringLiteralConn(&sql, icu_rules, conn);
}
appendPQExpBufferChar(&sql, ';');
@ -288,6 +298,7 @@ help(const char *progname)
printf(_(" --lc-collate=LOCALE LC_COLLATE setting for the database\n"));
printf(_(" --lc-ctype=LOCALE LC_CTYPE setting for the database\n"));
printf(_(" --icu-locale=LOCALE ICU locale setting for the database\n"));
printf(_(" --icu-rules=RULES ICU rules setting for the database\n"));
printf(_(" --locale-provider={libc|icu}\n"
" locale provider for the database's default collation\n"));
printf(_(" -O, --owner=OWNER database user to own the new database\n"));

View File

@ -57,6 +57,6 @@
*/
/* yyyymmddN */
#define CATALOG_VERSION_NO 202302221
#define CATALOG_VERSION_NO 202303081
#endif

View File

@ -43,6 +43,7 @@ CATALOG(pg_collation,3456,CollationRelationId)
text collcollate BKI_DEFAULT(_null_); /* LC_COLLATE setting */
text collctype BKI_DEFAULT(_null_); /* LC_CTYPE setting */
text colliculocale BKI_DEFAULT(_null_); /* ICU locale ID */
text collicurules BKI_DEFAULT(_null_); /* ICU collation rules */
text collversion BKI_DEFAULT(_null_); /* provider-dependent
* version of collation
* data */
@ -91,6 +92,7 @@ extern Oid CollationCreate(const char *collname, Oid collnamespace,
int32 collencoding,
const char *collcollate, const char *collctype,
const char *colliculocale,
const char *collicurules,
const char *collversion,
bool if_not_exists,
bool quiet);

View File

@ -18,6 +18,6 @@
datlocprovider => 'LOCALE_PROVIDER', datistemplate => 't',
datallowconn => 't', datconnlimit => '-1', datfrozenxid => '0',
datminmxid => '1', dattablespace => 'pg_default', datcollate => 'LC_COLLATE',
datctype => 'LC_CTYPE', daticulocale => 'ICU_LOCALE', datacl => '_null_' },
datctype => 'LC_CTYPE', daticulocale => 'ICU_LOCALE', daticurules => 'ICU_RULES', datacl => '_null_' },
]

View File

@ -71,6 +71,9 @@ CATALOG(pg_database,1262,DatabaseRelationId) BKI_SHARED_RELATION BKI_ROWTYPE_OID
/* ICU locale ID */
text daticulocale;
/* ICU collation rules */
text daticurules;
/* provider-dependent version of collation data */
text datcollversion BKI_DEFAULT(_null_);

View File

@ -95,6 +95,7 @@ typedef struct pg_locale_struct *pg_locale_t;
extern PGDLLIMPORT struct pg_locale_struct default_locale;
extern void make_icu_collator(const char *iculocstr,
const char *icurules,
struct pg_locale_struct *resultp);
extern bool pg_locale_deterministic(pg_locale_t locale);

View File

@ -1190,6 +1190,36 @@ SELECT 'Goldmann' < 'Götz' COLLATE "de-x-icu", 'Goldmann' > 'Götz' COLLATE tes
t | t
(1 row)
-- rules
CREATE COLLATION testcoll_rules1 (provider = icu, locale = '', rules = '&a < g');
CREATE TABLE test7 (a text);
-- example from https://unicode-org.github.io/icu/userguide/collation/customization/#syntax
INSERT INTO test7 VALUES ('Abernathy'), ('apple'), ('bird'), ('Boston'), ('Graham'), ('green');
SELECT * FROM test7 ORDER BY a COLLATE "en-x-icu";
a
-----------
Abernathy
apple
bird
Boston
Graham
green
(6 rows)
SELECT * FROM test7 ORDER BY a COLLATE testcoll_rules1;
a
-----------
Abernathy
apple
green
bird
Boston
Graham
(6 rows)
DROP TABLE test7;
CREATE COLLATION testcoll_rulesx (provider = icu, locale = '', rules = '!!wrong!!');
ERROR: could not open collator for locale "" with rules "!!wrong!!": U_INVALID_FORMAT_ERROR
-- nondeterministic collations
CREATE COLLATION ctest_det (provider = icu, locale = '', deterministic = true);
CREATE COLLATION ctest_nondet (provider = icu, locale = '', deterministic = false);

View File

@ -6199,9 +6199,9 @@ List of schemas
(0 rows)
\dO "no.such.collation"
List of collations
Schema | Name | Collate | Ctype | ICU Locale | Provider | Deterministic?
--------+------+---------+-------+------------+----------+----------------
List of collations
Schema | Name | Provider | Collate | Ctype | ICU Locale | ICU Rules | Deterministic?
--------+------+----------+---------+-------+------------+-----------+----------------
(0 rows)
\dp "no.such.access.privilege"
@ -6388,9 +6388,9 @@ cross-database references are not implemented: "no.such.schema"."no.such.languag
(0 rows)
\dO "no.such.schema"."no.such.collation"
List of collations
Schema | Name | Collate | Ctype | ICU Locale | Provider | Deterministic?
--------+------+---------+-------+------------+----------+----------------
List of collations
Schema | Name | Provider | Collate | Ctype | ICU Locale | ICU Rules | Deterministic?
--------+------+----------+---------+-------+------------+-----------+----------------
(0 rows)
\dp "no.such.schema"."no.such.access.privilege"
@ -6531,9 +6531,9 @@ List of text search templates
(0 rows)
\dO regression."no.such.schema"."no.such.collation"
List of collations
Schema | Name | Collate | Ctype | ICU Locale | Provider | Deterministic?
--------+------+---------+-------+------------+----------+----------------
List of collations
Schema | Name | Provider | Collate | Ctype | ICU Locale | ICU Rules | Deterministic?
--------+------+----------+---------+-------+------------+-----------+----------------
(0 rows)
\dp regression."no.such.schema"."no.such.access.privilege"

View File

@ -472,6 +472,19 @@ CREATE COLLATION testcoll_de_phonebook (provider = icu, locale = 'de@collation=p
SELECT 'Goldmann' < 'Götz' COLLATE "de-x-icu", 'Goldmann' > 'Götz' COLLATE testcoll_de_phonebook;
-- rules
CREATE COLLATION testcoll_rules1 (provider = icu, locale = '', rules = '&a < g');
CREATE TABLE test7 (a text);
-- example from https://unicode-org.github.io/icu/userguide/collation/customization/#syntax
INSERT INTO test7 VALUES ('Abernathy'), ('apple'), ('bird'), ('Boston'), ('Graham'), ('green');
SELECT * FROM test7 ORDER BY a COLLATE "en-x-icu";
SELECT * FROM test7 ORDER BY a COLLATE testcoll_rules1;
DROP TABLE test7;
CREATE COLLATION testcoll_rulesx (provider = icu, locale = '', rules = '!!wrong!!');
-- nondeterministic collations
CREATE COLLATION ctest_det (provider = icu, locale = '', deterministic = true);