diff --git a/doc/src/sgml/charset.sgml b/doc/src/sgml/charset.sgml index 4fc143025e..7114eb7b52 100644 --- a/doc/src/sgml/charset.sgml +++ b/doc/src/sgml/charset.sgml @@ -342,22 +342,14 @@ initdb --locale=sv_SE Locale Providers - PostgreSQL supports multiple locale - providers. This specifies which library supplies the locale - data. One standard provider name is libc, which uses - the locales provided by the operating system C library. These are the - locales used by most tools provided by the operating system. Another - provider is icu, which uses the external - ICUICU library. ICU locales can - only be used if support for ICU was configured when PostgreSQL was built. + A locale provider specifies which library defines the locale behavior for + collations and character classifications. The commands and tools that select the locale settings, as described - above, each have an option to select the locale provider. The examples - shown earlier all use the libc provider, which is the - default. Here is an example to initialize a database cluster using the - ICU provider: + above, each have an option to select the locale provider. Here is an + example to initialize a database cluster using the ICU provider: initdb --locale-provider=icu --icu-locale=en @@ -370,12 +362,76 @@ initdb --locale-provider=icu --icu-locale=en - Which locale provider to use depends on individual requirements. For most - basic uses, either provider will give adequate results. For the libc - provider, it depends on what the operating system offers; some operating - systems are better than others. For advanced uses, ICU offers more locale - variants and customization options. + Regardless of the locale provider, the operating system is still used to + provide some locale-aware behavior, such as messages (see ). + + + The available locale providers are listed below: + + + + + builtin + + + The builtin provider uses built-in operations. Only + the C locale is supported for this provider. + + + The C locale behavior is identical to the + C locale in the libc provider. When using this + locale, the behavior may depend on the database encoding. + + + + + + icu + + + The icu provider uses the external + ICUICU + library. PostgreSQL must have been + configured with support. + + + ICU provides collation and character classification behavior that is + independent of the operating system and database encoding, which is + preferable if you expect to transition to other platforms without any + change in results. LC_COLLATE and + LC_CTYPE can be set independently of the ICU + locale. + + + + For the ICU provider, results may depend on the version of the ICU + library used, as it is updated to reflect changes in natural language + over time. + + + + + + + libc + + + The libc provider uses the operating system's C + library. The collation and character classification behavior is + controlled by the settings LC_COLLATE and + LC_CTYPE, so they cannot be set independently. + + + + The same locale name may have different behavior on different + platforms when using the libc provider. + + + + + diff --git a/doc/src/sgml/ref/create_collation.sgml b/doc/src/sgml/ref/create_collation.sgml index 5cf9777764..98cd7d56be 100644 --- a/doc/src/sgml/ref/create_collation.sgml +++ b/doc/src/sgml/ref/create_collation.sgml @@ -96,6 +96,11 @@ CREATE COLLATION [ IF NOT EXISTS ] name FROM locale, you cannot specify either of those parameters. + + If provider is builtin, + then locale must be specified and set to + C. + @@ -129,9 +134,9 @@ CREATE COLLATION [ IF NOT EXISTS ] name FROM Specifies the provider to use for locale services associated with this - collation. Possible values are - icuICU - (if the server was built with ICU support) or libc. + collation. Possible values are builtin, + icuICU (if + the server was built with ICU support) or libc. libc is the default. See for details. diff --git a/doc/src/sgml/ref/create_database.sgml b/doc/src/sgml/ref/create_database.sgml index 72927960eb..6c1fd95602 100644 --- a/doc/src/sgml/ref/create_database.sgml +++ b/doc/src/sgml/ref/create_database.sgml @@ -162,6 +162,11 @@ CREATE DATABASE name linkend="create-database-lc-ctype"/>, or individually. + + If is + builtin, then locale + must be specified and set to C. + The other locale settings , name Specifies the provider to use for the default collation in this - database. Possible values are + database. Possible values are builtin, icuICU (if the server was built with ICU support) or libc. By default, the provider is the same as that of the - + Specifies the locale provider for the database's default collation. diff --git a/doc/src/sgml/ref/initdb.sgml b/doc/src/sgml/ref/initdb.sgml index cd75cae10e..4760570f6a 100644 --- a/doc/src/sgml/ref/initdb.sgml +++ b/doc/src/sgml/ref/initdb.sgml @@ -286,6 +286,11 @@ PostgreSQL documentation environment that initdb runs in. Locale support is described in . + + If is builtin, + must be specified and set to + C. + @@ -314,8 +319,18 @@ PostgreSQL documentation + + + + + Specifies the locale name when the builtin provider is used. Locale support + is described in . + + + + - + This option sets the locale provider for databases created in the new diff --git a/src/backend/catalog/pg_collation.c b/src/backend/catalog/pg_collation.c index e42f2afccb..7f2f701229 100644 --- a/src/backend/catalog/pg_collation.c +++ b/src/backend/catalog/pg_collation.c @@ -64,7 +64,10 @@ CollationCreate(const char *collname, Oid collnamespace, Assert(collname); Assert(collnamespace); Assert(collowner); - Assert((collcollate && collctype) || colllocale); + Assert((collprovider == COLLPROVIDER_LIBC && + collcollate && collctype && !colllocale) || + (collprovider != COLLPROVIDER_LIBC && + !collcollate && !collctype && colllocale)); /* * Make sure there is no existing collation of same name & encoding. diff --git a/src/backend/commands/collationcmds.c b/src/backend/commands/collationcmds.c index 59d7e17804..9059f8b3ef 100644 --- a/src/backend/commands/collationcmds.c +++ b/src/backend/commands/collationcmds.c @@ -66,7 +66,7 @@ DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_e DefElem *versionEl = NULL; char *collcollate; char *collctype; - char *colllocale; + const char *colllocale; char *collicurules; bool collisdeterministic; int collencoding; @@ -213,7 +213,9 @@ DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_e if (collproviderstr) { - if (pg_strcasecmp(collproviderstr, "icu") == 0) + if (pg_strcasecmp(collproviderstr, "builtin") == 0) + collprovider = COLLPROVIDER_BUILTIN; + else if (pg_strcasecmp(collproviderstr, "icu") == 0) collprovider = COLLPROVIDER_ICU; else if (pg_strcasecmp(collproviderstr, "libc") == 0) collprovider = COLLPROVIDER_LIBC; @@ -243,7 +245,18 @@ DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_e if (lcctypeEl) collctype = defGetString(lcctypeEl); - if (collprovider == COLLPROVIDER_LIBC) + if (collprovider == COLLPROVIDER_BUILTIN) + { + if (!colllocale) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("parameter \"%s\" must be specified", + "locale"))); + + colllocale = builtin_validate_locale(GetDatabaseEncoding(), + colllocale); + } + else if (collprovider == COLLPROVIDER_LIBC) { if (!collcollate) ereport(ERROR, @@ -303,7 +316,11 @@ DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_e (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), errmsg("ICU rules cannot be specified unless locale provider is ICU"))); - if (collprovider == COLLPROVIDER_ICU) + if (collprovider == COLLPROVIDER_BUILTIN) + { + collencoding = GetDatabaseEncoding(); + } + else if (collprovider == COLLPROVIDER_ICU) { #ifdef USE_ICU /* @@ -332,7 +349,16 @@ DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_e } if (!collversion) - collversion = get_collation_actual_version(collprovider, collprovider == COLLPROVIDER_ICU ? colllocale : collcollate); + { + const char *locale; + + if (collprovider == COLLPROVIDER_LIBC) + locale = collcollate; + else + locale = colllocale; + + collversion = get_collation_actual_version(collprovider, locale); + } newoid = CollationCreate(collName, collNamespace, @@ -433,8 +459,13 @@ AlterCollation(AlterCollationStmt *stmt) datum = SysCacheGetAttr(COLLOID, tup, Anum_pg_collation_collversion, &isnull); oldversion = isnull ? NULL : TextDatumGetCString(datum); - datum = SysCacheGetAttrNotNull(COLLOID, tup, collForm->collprovider == COLLPROVIDER_ICU ? Anum_pg_collation_colllocale : Anum_pg_collation_collcollate); - newversion = get_collation_actual_version(collForm->collprovider, TextDatumGetCString(datum)); + if (collForm->collprovider == COLLPROVIDER_LIBC) + datum = SysCacheGetAttrNotNull(COLLOID, tup, Anum_pg_collation_collcollate); + else + datum = SysCacheGetAttrNotNull(COLLOID, tup, Anum_pg_collation_colllocale); + + newversion = get_collation_actual_version(collForm->collprovider, + TextDatumGetCString(datum)); /* cannot change from NULL to non-NULL or vice versa */ if ((!oldversion && newversion) || (oldversion && !newversion)) @@ -498,11 +529,16 @@ pg_collation_actual_version(PG_FUNCTION_ARGS) provider = ((Form_pg_database) GETSTRUCT(dbtup))->datlocprovider; - datum = SysCacheGetAttrNotNull(DATABASEOID, dbtup, - provider == COLLPROVIDER_ICU ? - Anum_pg_database_datlocale : Anum_pg_database_datcollate); - - locale = TextDatumGetCString(datum); + if (provider == COLLPROVIDER_LIBC) + { + datum = SysCacheGetAttrNotNull(DATABASEOID, dbtup, Anum_pg_database_datcollate); + locale = TextDatumGetCString(datum); + } + else + { + datum = SysCacheGetAttrNotNull(DATABASEOID, dbtup, Anum_pg_database_datlocale); + locale = TextDatumGetCString(datum); + } ReleaseSysCache(dbtup); } @@ -519,11 +555,17 @@ pg_collation_actual_version(PG_FUNCTION_ARGS) provider = ((Form_pg_collation) GETSTRUCT(colltp))->collprovider; Assert(provider != COLLPROVIDER_DEFAULT); - datum = SysCacheGetAttrNotNull(COLLOID, colltp, - provider == COLLPROVIDER_ICU ? - Anum_pg_collation_colllocale : Anum_pg_collation_collcollate); - locale = TextDatumGetCString(datum); + if (provider == COLLPROVIDER_LIBC) + { + datum = SysCacheGetAttrNotNull(COLLOID, colltp, Anum_pg_collation_collcollate); + locale = TextDatumGetCString(datum); + } + else + { + datum = SysCacheGetAttrNotNull(COLLOID, colltp, Anum_pg_collation_colllocale); + locale = TextDatumGetCString(datum); + } ReleaseSysCache(colltp); } diff --git a/src/backend/commands/dbcommands.c b/src/backend/commands/dbcommands.c index 0f27d7b14c..65464fac8e 100644 --- a/src/backend/commands/dbcommands.c +++ b/src/backend/commands/dbcommands.c @@ -697,6 +697,7 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt) DefElem *dtemplate = NULL; DefElem *dencoding = NULL; DefElem *dlocale = NULL; + DefElem *dbuiltinlocale = NULL; DefElem *dcollate = NULL; DefElem *dctype = NULL; DefElem *diculocale = NULL; @@ -712,7 +713,7 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt) const char *dbtemplate = NULL; char *dbcollate = NULL; char *dbctype = NULL; - char *dblocale = NULL; + const char *dblocale = NULL; char *dbicurules = NULL; char dblocprovider = '\0'; char *canonname; @@ -761,6 +762,12 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt) errorConflictingDefElem(defel, pstate); dlocale = defel; } + else if (strcmp(defel->defname, "builtin_locale") == 0) + { + if (dbuiltinlocale) + errorConflictingDefElem(defel, pstate); + dbuiltinlocale = defel; + } else if (strcmp(defel->defname, "lc_collate") == 0) { if (dcollate) @@ -896,7 +903,10 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt) { dbcollate = defGetString(dlocale); dbctype = defGetString(dlocale); + dblocale = defGetString(dlocale); } + if (dbuiltinlocale && dbuiltinlocale->arg) + dblocale = defGetString(dbuiltinlocale); if (dcollate && dcollate->arg) dbcollate = defGetString(dcollate); if (dctype && dctype->arg) @@ -909,7 +919,9 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt) { char *locproviderstr = defGetString(dlocprovider); - if (pg_strcasecmp(locproviderstr, "icu") == 0) + if (pg_strcasecmp(locproviderstr, "builtin") == 0) + dblocprovider = COLLPROVIDER_BUILTIN; + else if (pg_strcasecmp(locproviderstr, "icu") == 0) dblocprovider = COLLPROVIDER_ICU; else if (pg_strcasecmp(locproviderstr, "libc") == 0) dblocprovider = COLLPROVIDER_LIBC; @@ -1026,14 +1038,9 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt) dbctype = src_ctype; if (dblocprovider == '\0') dblocprovider = src_locprovider; - if (dblocale == NULL && dblocprovider == COLLPROVIDER_ICU) - { - if (dlocale && dlocale->arg) - dblocale = defGetString(dlocale); - else - dblocale = src_locale; - } - if (dbicurules == NULL && dblocprovider == COLLPROVIDER_ICU) + if (dblocale == NULL) + dblocale = src_locale; + if (dbicurules == NULL) dbicurules = src_icurules; /* Some encodings are client only */ @@ -1058,7 +1065,42 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt) check_encoding_locale_matches(encoding, dbcollate, dbctype); - if (dblocprovider == COLLPROVIDER_ICU) + /* validate provider-specific parameters */ + if (dblocprovider != COLLPROVIDER_BUILTIN) + { + if (dbuiltinlocale) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("BUILTIN_LOCALE cannot be specified unless locale provider is builtin"))); + } + else if (dblocprovider != COLLPROVIDER_ICU) + { + if (diculocale) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("ICU locale cannot be specified unless locale provider is ICU"))); + + if (dbicurules) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("ICU rules cannot be specified unless locale provider is ICU"))); + } + + /* validate and canonicalize locale for the provider */ + if (dblocprovider == COLLPROVIDER_BUILTIN) + { + /* + * This would happen if template0 uses the libc provider but the new + * database uses builtin. + */ + if (!dblocale) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("LOCALE or BUILTIN_LOCALE must be specified"))); + + dblocale = builtin_validate_locale(encoding, dblocale); + } + else if (dblocprovider == COLLPROVIDER_ICU) { if (!(is_encoding_supported_by_icu(encoding))) ereport(ERROR, @@ -1097,18 +1139,10 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt) icu_validate_locale(dblocale); } - else - { - if (dblocale) - ereport(ERROR, - (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), - errmsg("ICU locale cannot be specified unless locale provider is ICU"))); - if (dbicurules) - ereport(ERROR, - (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), - errmsg("ICU rules cannot be specified unless locale provider is ICU"))); - } + /* for libc, locale comes from datcollate and datctype */ + if (dblocprovider == COLLPROVIDER_LIBC) + dblocale = NULL; /* * Check that the new encoding and locale settings match the source @@ -1195,8 +1229,14 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt) if (src_collversion && !dcollversion) { char *actual_versionstr; + const char *locale; - actual_versionstr = get_collation_actual_version(dblocprovider, dblocprovider == COLLPROVIDER_ICU ? dblocale : dbcollate); + if (dblocprovider == COLLPROVIDER_LIBC) + locale = dbcollate; + else + locale = dblocale; + + actual_versionstr = get_collation_actual_version(dblocprovider, locale); if (!actual_versionstr) ereport(ERROR, (errmsg("template database \"%s\" has a collation version, but no actual collation version could be determined", @@ -1224,7 +1264,16 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt) * collation version, which is normally only the case for template0. */ if (dbcollversion == NULL) - dbcollversion = get_collation_actual_version(dblocprovider, dblocprovider == COLLPROVIDER_ICU ? dblocale : dbcollate); + { + const char *locale; + + if (dblocprovider == COLLPROVIDER_LIBC) + locale = dbcollate; + else + locale = dblocale; + + dbcollversion = get_collation_actual_version(dblocprovider, locale); + } /* Resolve default tablespace for new database */ if (dtablespacename && dtablespacename->arg) @@ -1363,8 +1412,8 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt) * block on the unique index, and fail after we commit). */ - Assert((dblocprovider == COLLPROVIDER_ICU && dblocale) || - (dblocprovider != COLLPROVIDER_ICU && !dblocale)); + Assert((dblocprovider != COLLPROVIDER_LIBC && dblocale) || + (dblocprovider == COLLPROVIDER_LIBC && !dblocale)); /* Form tuple */ new_record[Anum_pg_database_oid - 1] = ObjectIdGetDatum(dboid); @@ -2471,10 +2520,21 @@ AlterDatabaseRefreshColl(AlterDatabaseRefreshCollStmt *stmt) datum = heap_getattr(tuple, Anum_pg_database_datcollversion, RelationGetDescr(rel), &isnull); oldversion = isnull ? NULL : TextDatumGetCString(datum); - datum = heap_getattr(tuple, datForm->datlocprovider == COLLPROVIDER_ICU ? Anum_pg_database_datlocale : Anum_pg_database_datcollate, RelationGetDescr(rel), &isnull); - if (isnull) - elog(ERROR, "unexpected null in pg_database"); - newversion = get_collation_actual_version(datForm->datlocprovider, TextDatumGetCString(datum)); + if (datForm->datlocprovider == COLLPROVIDER_LIBC) + { + datum = heap_getattr(tuple, Anum_pg_database_datcollate, RelationGetDescr(rel), &isnull); + if (isnull) + elog(ERROR, "unexpected null in pg_database"); + } + else + { + datum = heap_getattr(tuple, Anum_pg_database_datlocale, RelationGetDescr(rel), &isnull); + if (isnull) + elog(ERROR, "unexpected null in pg_database"); + } + + newversion = get_collation_actual_version(datForm->datlocprovider, + TextDatumGetCString(datum)); /* cannot change from NULL to non-NULL or vice versa */ if ((!oldversion && newversion) || (oldversion && !newversion)) @@ -2669,8 +2729,13 @@ pg_database_collation_actual_version(PG_FUNCTION_ARGS) datlocprovider = ((Form_pg_database) GETSTRUCT(tp))->datlocprovider; - datum = SysCacheGetAttrNotNull(DATABASEOID, tp, datlocprovider == COLLPROVIDER_ICU ? Anum_pg_database_datlocale : Anum_pg_database_datcollate); - version = get_collation_actual_version(datlocprovider, TextDatumGetCString(datum)); + if (datlocprovider == COLLPROVIDER_LIBC) + datum = SysCacheGetAttrNotNull(DATABASEOID, tp, Anum_pg_database_datcollate); + else + datum = SysCacheGetAttrNotNull(DATABASEOID, tp, Anum_pg_database_datlocale); + + version = get_collation_actual_version(datlocprovider, + TextDatumGetCString(datum)); ReleaseSysCache(tp); diff --git a/src/backend/utils/adt/formatting.c b/src/backend/utils/adt/formatting.c index 036a463491..5f483b8dbc 100644 --- a/src/backend/utils/adt/formatting.c +++ b/src/backend/utils/adt/formatting.c @@ -1680,6 +1680,8 @@ str_tolower(const char *buff, size_t nbytes, Oid collid) else #endif { + Assert(!mylocale || mylocale->provider == COLLPROVIDER_LIBC); + if (pg_database_encoding_max_length() > 1) { wchar_t *workspace; @@ -1798,6 +1800,8 @@ str_toupper(const char *buff, size_t nbytes, Oid collid) else #endif { + Assert(!mylocale || mylocale->provider == COLLPROVIDER_LIBC); + if (pg_database_encoding_max_length() > 1) { wchar_t *workspace; @@ -1917,6 +1921,8 @@ str_initcap(const char *buff, size_t nbytes, Oid collid) else #endif { + Assert(!mylocale || mylocale->provider == COLLPROVIDER_LIBC); + if (pg_database_encoding_max_length() > 1) { wchar_t *workspace; diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c index 77d5752dc8..39390fbe4e 100644 --- a/src/backend/utils/adt/pg_locale.c +++ b/src/backend/utils/adt/pg_locale.c @@ -1268,7 +1268,18 @@ lookup_collation_cache(Oid collation, bool set_flags) elog(ERROR, "cache lookup failed for collation %u", collation); collform = (Form_pg_collation) GETSTRUCT(tp); - if (collform->collprovider == COLLPROVIDER_LIBC) + if (collform->collprovider == COLLPROVIDER_BUILTIN) + { + Datum datum; + const char *colllocale; + + datum = SysCacheGetAttrNotNull(COLLOID, tp, Anum_pg_collation_colllocale); + colllocale = TextDatumGetCString(datum); + + cache_entry->collate_is_c = true; + cache_entry->ctype_is_c = (strcmp(colllocale, "C") == 0); + } + else if (collform->collprovider == COLLPROVIDER_LIBC) { Datum datum; const char *collcollate; @@ -1319,16 +1330,30 @@ lc_collate_is_c(Oid collation) if (collation == DEFAULT_COLLATION_OID) { static int result = -1; - char *localeptr; - - if (default_locale.provider == COLLPROVIDER_ICU) - return false; + const char *localeptr; if (result >= 0) return (bool) result; - localeptr = setlocale(LC_COLLATE, NULL); - if (!localeptr) - elog(ERROR, "invalid LC_COLLATE setting"); + + if (default_locale.provider == COLLPROVIDER_BUILTIN) + { + result = true; + return (bool) result; + } + else if (default_locale.provider == COLLPROVIDER_ICU) + { + result = false; + return (bool) result; + } + else if (default_locale.provider == COLLPROVIDER_LIBC) + { + localeptr = setlocale(LC_CTYPE, NULL); + if (!localeptr) + elog(ERROR, "invalid LC_CTYPE setting"); + } + else + elog(ERROR, "unexpected collation provider '%c'", + default_locale.provider); if (strcmp(localeptr, "C") == 0) result = true; @@ -1372,16 +1397,29 @@ lc_ctype_is_c(Oid collation) if (collation == DEFAULT_COLLATION_OID) { static int result = -1; - char *localeptr; - - if (default_locale.provider == COLLPROVIDER_ICU) - return false; + const char *localeptr; if (result >= 0) return (bool) result; - localeptr = setlocale(LC_CTYPE, NULL); - if (!localeptr) - elog(ERROR, "invalid LC_CTYPE setting"); + + if (default_locale.provider == COLLPROVIDER_BUILTIN) + { + localeptr = default_locale.info.builtin.locale; + } + else if (default_locale.provider == COLLPROVIDER_ICU) + { + result = false; + return (bool) result; + } + else if (default_locale.provider == COLLPROVIDER_LIBC) + { + localeptr = setlocale(LC_CTYPE, NULL); + if (!localeptr) + elog(ERROR, "invalid LC_CTYPE setting"); + } + else + elog(ERROR, "unexpected collation provider '%c'", + default_locale.provider); if (strcmp(localeptr, "C") == 0) result = true; @@ -1519,10 +1557,10 @@ pg_newlocale_from_collation(Oid collid) if (collid == DEFAULT_COLLATION_OID) { - if (default_locale.provider == COLLPROVIDER_ICU) - return &default_locale; - else + if (default_locale.provider == COLLPROVIDER_LIBC) return (pg_locale_t) 0; + else + return &default_locale; } cache_entry = lookup_collation_cache(collid, false); @@ -1547,7 +1585,19 @@ pg_newlocale_from_collation(Oid collid) result.provider = collform->collprovider; result.deterministic = collform->collisdeterministic; - if (collform->collprovider == COLLPROVIDER_LIBC) + if (collform->collprovider == COLLPROVIDER_BUILTIN) + { + const char *locstr; + + datum = SysCacheGetAttrNotNull(COLLOID, tp, Anum_pg_collation_colllocale); + locstr = TextDatumGetCString(datum); + + builtin_validate_locale(GetDatabaseEncoding(), locstr); + + result.info.builtin.locale = MemoryContextStrdup(TopMemoryContext, + locstr); + } + else if (collform->collprovider == COLLPROVIDER_LIBC) { const char *collcollate; const char *collctype pg_attribute_unused(); @@ -1626,7 +1676,11 @@ pg_newlocale_from_collation(Oid collid) collversionstr = TextDatumGetCString(datum); - datum = SysCacheGetAttrNotNull(COLLOID, tp, collform->collprovider == COLLPROVIDER_ICU ? Anum_pg_collation_colllocale : Anum_pg_collation_collcollate); + Assert(collform->collprovider != COLLPROVIDER_BUILTIN); + if (collform->collprovider == COLLPROVIDER_LIBC) + datum = SysCacheGetAttrNotNull(COLLOID, tp, Anum_pg_collation_collcollate); + else + datum = SysCacheGetAttrNotNull(COLLOID, tp, Anum_pg_collation_colllocale); actual_versionstr = get_collation_actual_version(collform->collprovider, TextDatumGetCString(datum)); @@ -1677,6 +1731,10 @@ get_collation_actual_version(char collprovider, const char *collcollate) { char *collversion = NULL; + /* the builtin collation provider is not versioned */ + if (collprovider == COLLPROVIDER_BUILTIN) + return NULL; + #ifdef USE_ICU if (collprovider == COLLPROVIDER_ICU) { @@ -2443,6 +2501,31 @@ pg_strnxfrm_prefix(char *dest, size_t destsize, const char *src, return result; } +const char * +builtin_validate_locale(int encoding, const char *locale) +{ + const char *canonical_name = NULL; + int required_encoding = -1; + + if (strcmp(locale, "C") == 0) + canonical_name = "C"; + + if (!canonical_name) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("invalid locale name \"%s\" for builtin provider", + locale))); + + if (required_encoding >= 0 && encoding != required_encoding) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("encoding \"%s\" does not match locale \"%s\"", + pg_encoding_to_char(encoding), locale))); + + return canonical_name; +} + + #ifdef USE_ICU /* diff --git a/src/backend/utils/init/postinit.c b/src/backend/utils/init/postinit.c index 2875bc97d3..0805398e24 100644 --- a/src/backend/utils/init/postinit.c +++ b/src/backend/utils/init/postinit.c @@ -423,7 +423,17 @@ CheckMyDatabase(const char *name, bool am_superuser, bool override_allow_connect strcmp(ctype, "POSIX") == 0) database_ctype_is_c = true; - if (dbform->datlocprovider == COLLPROVIDER_ICU) + if (dbform->datlocprovider == COLLPROVIDER_BUILTIN) + { + datum = SysCacheGetAttrNotNull(DATABASEOID, tup, Anum_pg_database_datlocale); + datlocale = TextDatumGetCString(datum); + + builtin_validate_locale(dbform->encoding, datlocale); + + default_locale.info.builtin.locale = MemoryContextStrdup( + TopMemoryContext, datlocale); + } + else if (dbform->datlocprovider == COLLPROVIDER_ICU) { char *icurules; @@ -461,10 +471,16 @@ CheckMyDatabase(const char *name, bool am_superuser, bool override_allow_connect { char *actual_versionstr; char *collversionstr; + char *locale; collversionstr = TextDatumGetCString(datum); - actual_versionstr = get_collation_actual_version(dbform->datlocprovider, dbform->datlocprovider == COLLPROVIDER_ICU ? datlocale : collate); + if (dbform->datlocprovider == COLLPROVIDER_LIBC) + locale = collate; + else + locale = datlocale; + + actual_versionstr = get_collation_actual_version(dbform->datlocprovider, locale); if (!actual_versionstr) /* should not happen */ elog(WARNING, diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c index de58002a5d..8d53ef4a1f 100644 --- a/src/bin/initdb/initdb.c +++ b/src/bin/initdb/initdb.c @@ -145,7 +145,9 @@ static char *lc_numeric = NULL; static char *lc_time = NULL; static char *lc_messages = NULL; static char locale_provider = COLLPROVIDER_LIBC; +static bool builtin_locale_specified = false; static char *datlocale = NULL; +static bool icu_locale_specified = false; static char *icu_rules = NULL; static const char *default_text_search_config = NULL; static char *username = NULL; @@ -2368,7 +2370,7 @@ setlocales(void) lc_monetary = locale; if (!lc_messages) lc_messages = locale; - if (!datlocale && locale_provider == COLLPROVIDER_ICU) + if (!datlocale && locale_provider != COLLPROVIDER_LIBC) datlocale = locale; } @@ -2395,14 +2397,20 @@ setlocales(void) lc_messages = canonname; #endif - if (locale_provider == COLLPROVIDER_ICU) + if (locale_provider != COLLPROVIDER_LIBC && datlocale == NULL) + pg_fatal("locale must be specified if provider is %s", + collprovider_name(locale_provider)); + + if (locale_provider == COLLPROVIDER_BUILTIN) + { + if (strcmp(datlocale, "C") != 0) + pg_fatal("invalid locale name \"%s\" for builtin provider", + datlocale); + } + else if (locale_provider == COLLPROVIDER_ICU) { char *langtag; - /* acquire default locale from the environment, if not specified */ - if (datlocale == NULL) - pg_fatal("ICU locale must be specified"); - /* canonicalize to a language tag */ langtag = icu_language_tag(datlocale); printf(_("Using language tag \"%s\" for ICU locale \"%s\".\n"), @@ -2447,7 +2455,8 @@ usage(const char *progname) " set default locale in the respective category for\n" " new databases (default taken from environment)\n")); printf(_(" --no-locale equivalent to --locale=C\n")); - printf(_(" --locale-provider={libc|icu}\n" + printf(_(" --builtin-locale=LOCALE set builtin locale name for new databases\n")); + printf(_(" --locale-provider={builtin|libc|icu}\n" " set default locale provider for new databases\n")); printf(_(" --pwfile=FILE read password for the new superuser from file\n")); printf(_(" -T, --text-search-config=CFG\n" @@ -2609,9 +2618,9 @@ setup_locale_encoding(void) else { printf(_("The database cluster will be initialized with this locale configuration:\n")); - printf(_(" provider: %s\n"), collprovider_name(locale_provider)); - if (datlocale) - printf(_(" ICU locale: %s\n"), datlocale); + printf(_(" default collation provider: %s\n"), collprovider_name(locale_provider)); + if (locale_provider != COLLPROVIDER_LIBC) + printf(_(" default collation locale: %s\n"), datlocale); printf(_(" LC_COLLATE: %s\n" " LC_CTYPE: %s\n" " LC_MESSAGES: %s\n" @@ -3104,9 +3113,10 @@ main(int argc, char *argv[]) {"allow-group-access", no_argument, NULL, 'g'}, {"discard-caches", no_argument, NULL, 14}, {"locale-provider", required_argument, NULL, 15}, - {"icu-locale", required_argument, NULL, 16}, - {"icu-rules", required_argument, NULL, 17}, - {"sync-method", required_argument, NULL, 18}, + {"builtin-locale", required_argument, NULL, 16}, + {"icu-locale", required_argument, NULL, 17}, + {"icu-rules", required_argument, NULL, 18}, + {"sync-method", required_argument, NULL, 19}, {NULL, 0, NULL, 0} }; @@ -3274,7 +3284,9 @@ main(int argc, char *argv[]) "-c debug_discard_caches=1"); break; case 15: - if (strcmp(optarg, "icu") == 0) + if (strcmp(optarg, "builtin") == 0) + locale_provider = COLLPROVIDER_BUILTIN; + else if (strcmp(optarg, "icu") == 0) locale_provider = COLLPROVIDER_ICU; else if (strcmp(optarg, "libc") == 0) locale_provider = COLLPROVIDER_LIBC; @@ -3283,11 +3295,16 @@ main(int argc, char *argv[]) break; case 16: datlocale = pg_strdup(optarg); + builtin_locale_specified = true; break; case 17: - icu_rules = pg_strdup(optarg); + datlocale = pg_strdup(optarg); + icu_locale_specified = true; break; case 18: + icu_rules = pg_strdup(optarg); + break; + case 19: if (!parse_sync_method(optarg, &sync_method)) exit(1); break; @@ -3317,7 +3334,11 @@ main(int argc, char *argv[]) exit(1); } - if (datlocale && locale_provider != COLLPROVIDER_ICU) + if (builtin_locale_specified && locale_provider != COLLPROVIDER_BUILTIN) + pg_fatal("%s cannot be specified unless locale provider \"%s\" is chosen", + "--builtin-locale", "builtin"); + + if (icu_locale_specified && locale_provider != COLLPROVIDER_ICU) pg_fatal("%s cannot be specified unless locale provider \"%s\" is chosen", "--icu-locale", "icu"); diff --git a/src/bin/initdb/t/001_initdb.pl b/src/bin/initdb/t/001_initdb.pl index 594b20cc74..e719f70dae 100644 --- a/src/bin/initdb/t/001_initdb.pl +++ b/src/bin/initdb/t/001_initdb.pl @@ -117,7 +117,7 @@ if ($ENV{with_icu} eq 'yes') { command_fails_like( [ 'initdb', '--no-sync', '--locale-provider=icu', "$tempdir/data2" ], - qr/initdb: error: ICU locale must be specified/, + qr/initdb: error: locale must be specified if provider is icu/, 'locale provider ICU requires --icu-locale'); command_ok( @@ -138,7 +138,7 @@ if ($ENV{with_icu} eq 'yes') '--lc-monetary=C', '--lc-time=C', "$tempdir/data4" ], - qr/^\s+ICU locale:\s+und\n/ms, + qr/^\s+default collation locale:\s+und\n/ms, 'options --locale-provider=icu --locale=und --lc-*=C'); command_fails_like( @@ -184,6 +184,42 @@ else 'locale provider ICU fails since no ICU support'); } +command_fails( + [ 'initdb', '--no-sync', '--locale-provider=builtin', "$tempdir/data6" ], + 'locale provider builtin fails without --locale'); + +command_ok( + [ + 'initdb', '--no-sync', + '--locale-provider=builtin', '--locale=C', + "$tempdir/data7" + ], + 'locale provider builtin with --locale'); + +command_ok( + [ + 'initdb', '--no-sync', + '--locale-provider=builtin', '--lc-ctype=C', + '--locale=C', "$tempdir/data10" + ], + 'locale provider builtin with --lc-ctype'); + +command_fails( + [ + 'initdb', '--no-sync', + '--locale-provider=builtin', '--icu-locale=en', + "$tempdir/dataX" + ], + 'fails for locale provider builtin with ICU locale'); + +command_fails( + [ + 'initdb', '--no-sync', + '--locale-provider=builtin', '--icu-rules=""', + "$tempdir/dataX" + ], + 'fails for locale provider builtin with ICU rules'); + command_fails( [ 'initdb', '--no-sync', '--locale-provider=xyz', "$tempdir/dataX" ], 'fails for invalid locale provider'); diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c index 23e6217b73..171e591696 100644 --- a/src/bin/pg_dump/pg_dump.c +++ b/src/bin/pg_dump/pg_dump.c @@ -3114,7 +3114,9 @@ dumpDatabase(Archive *fout) } appendPQExpBufferStr(creaQry, " LOCALE_PROVIDER = "); - if (datlocprovider[0] == 'c') + if (datlocprovider[0] == 'b') + appendPQExpBufferStr(creaQry, "builtin"); + else if (datlocprovider[0] == 'c') appendPQExpBufferStr(creaQry, "libc"); else if (datlocprovider[0] == 'i') appendPQExpBufferStr(creaQry, "icu"); @@ -3142,7 +3144,11 @@ dumpDatabase(Archive *fout) } if (locale) { - appendPQExpBufferStr(creaQry, " ICU_LOCALE = "); + if (datlocprovider[0] == 'b') + appendPQExpBufferStr(creaQry, " BUILTIN_LOCALE = "); + else + appendPQExpBufferStr(creaQry, " ICU_LOCALE = "); + appendStringLiteralAH(creaQry, locale, fout); } @@ -13870,7 +13876,9 @@ dumpCollation(Archive *fout, const CollInfo *collinfo) fmtQualifiedDumpable(collinfo)); appendPQExpBufferStr(q, "provider = "); - if (collprovider[0] == 'c') + if (collprovider[0] == 'b') + appendPQExpBufferStr(q, "builtin"); + else if (collprovider[0] == 'c') appendPQExpBufferStr(q, "libc"); else if (collprovider[0] == 'i') appendPQExpBufferStr(q, "icu"); @@ -13891,6 +13899,15 @@ dumpCollation(Archive *fout, const CollInfo *collinfo) /* no locale -- the default collation cannot be reloaded anyway */ } + else if (collprovider[0] == 'b') + { + if (collcollate || collctype || !colllocale || collicurules) + pg_log_warning("invalid collation \"%s\"", qcollname); + + appendPQExpBufferStr(q, ", locale = "); + appendStringLiteralAH(q, colllocale ? colllocale : "", + fout); + } else if (collprovider[0] == 'i') { if (fout->remoteVersion >= 150000) diff --git a/src/bin/pg_upgrade/t/002_pg_upgrade.pl b/src/bin/pg_upgrade/t/002_pg_upgrade.pl index 34a459496e..ed79c0930b 100644 --- a/src/bin/pg_upgrade/t/002_pg_upgrade.pl +++ b/src/bin/pg_upgrade/t/002_pg_upgrade.pl @@ -104,19 +104,13 @@ if ($oldnode->pg_version >= 11) push @custom_opts, '--allow-group-access'; } -# Set up the locale settings for the original cluster, so that we -# can test that pg_upgrade copies the locale settings of template0 -# from the old to the new cluster. +my $old_provider_field; +my $old_datlocale_field; -my $original_encoding = "6"; # UTF-8 -my $original_provider = "c"; -my $original_locale = "C"; -my $original_datlocale = ""; -my $provider_field = "'c' AS datlocprovider"; -my $old_datlocale_field = "NULL AS datlocale"; -if ($oldnode->pg_version >= 15 && $ENV{with_icu} eq 'yes') +# account for field additions and changes +if ($oldnode->pg_version >= 15) { - $provider_field = "datlocprovider"; + $old_provider_field = "datlocprovider"; if ($oldnode->pg_version >= '17devel') { $old_datlocale_field = "datlocale"; @@ -125,18 +119,65 @@ if ($oldnode->pg_version >= 15 && $ENV{with_icu} eq 'yes') { $old_datlocale_field = "daticulocale AS datlocale"; } +} +else +{ + $old_provider_field = "'c' AS datlocprovider"; + $old_datlocale_field = "NULL AS datlocale"; +} + +# Set up the locale settings for the original cluster, so that we +# can test that pg_upgrade copies the locale settings of template0 +# from the old to the new cluster. + +my $original_enc_name; +my $original_provider; +my $original_datcollate = "C"; +my $original_datctype = "C"; +my $original_datlocale; + +if ($oldnode->pg_version >= '17devel') +{ + $original_enc_name = "UTF-8"; + $original_provider = "b"; + $original_datlocale = "C"; +} +elsif ($oldnode->pg_version >= 15 && $ENV{with_icu} eq 'yes') +{ + $original_enc_name = "UTF-8"; $original_provider = "i"; $original_datlocale = "fr-CA"; } +else +{ + $original_enc_name = "SQL_ASCII"; + $original_provider = "c"; + $original_datlocale = ""; +} + +my %encodings = ('UTF-8' => 6, 'SQL_ASCII' => 0); +my $original_encoding = $encodings{$original_enc_name}; my @initdb_params = @custom_opts; -push @initdb_params, ('--encoding', 'UTF-8'); -push @initdb_params, ('--locale', $original_locale); -if ($original_provider eq "i") +push @initdb_params, ('--encoding', $original_enc_name); +push @initdb_params, ('--lc-collate', $original_datcollate); +push @initdb_params, ('--lc-ctype', $original_datctype); + +# add --locale-provider, if supported +my %provider_name = ('b' => 'builtin', 'i' => 'icu', 'c' => 'libc'); +if ($oldnode->pg_version >= 15) { - push @initdb_params, ('--locale-provider', 'icu'); - push @initdb_params, ('--icu-locale', 'fr-CA'); + push @initdb_params, + ('--locale-provider', $provider_name{$original_provider}); + if ($original_provider eq 'b') + { + push @initdb_params, ('--builtin-locale', $original_datlocale); + } + elsif ($original_provider eq 'i') + { + push @initdb_params, ('--icu-locale', $original_datlocale); + } } $node_params{extra} = \@initdb_params; @@ -146,10 +187,10 @@ $oldnode->start; my $result; $result = $oldnode->safe_psql( 'postgres', - "SELECT encoding, $provider_field, datcollate, datctype, $old_datlocale_field + "SELECT encoding, $old_provider_field, datcollate, datctype, $old_datlocale_field FROM pg_database WHERE datname='template0'"); is( $result, - "$original_encoding|$original_provider|$original_locale|$original_locale|$original_datlocale", + "$original_encoding|$original_provider|$original_datcollate|$original_datctype|$original_datlocale", "check locales in original cluster"); # The default location of the source code is the root of this directory. @@ -433,10 +474,10 @@ if (-d $log_path) # Test that upgraded cluster has original locale settings. $result = $newnode->safe_psql( 'postgres', - "SELECT encoding, $provider_field, datcollate, datctype, datlocale + "SELECT encoding, datlocprovider, datcollate, datctype, datlocale FROM pg_database WHERE datname='template0'"); is( $result, - "$original_encoding|$original_provider|$original_locale|$original_locale|$original_datlocale", + "$original_encoding|$original_provider|$original_datcollate|$original_datctype|$original_datlocale", "check that locales in new cluster match original cluster"); # Second dump from the upgraded instance. diff --git a/src/bin/psql/describe.c b/src/bin/psql/describe.c index 68b2ea8872..1ab80eb7ca 100644 --- a/src/bin/psql/describe.c +++ b/src/bin/psql/describe.c @@ -926,7 +926,7 @@ listAllDbs(const char *pattern, bool verbose) gettext_noop("Encoding")); if (pset.sversion >= 150000) appendPQExpBuffer(&buf, - " CASE d.datlocprovider WHEN 'c' THEN 'libc' WHEN 'i' THEN 'icu' END AS \"%s\",\n", + " CASE d.datlocprovider WHEN 'b' THEN 'builtin' WHEN 'c' THEN 'libc' WHEN 'i' THEN 'icu' END AS \"%s\",\n", gettext_noop("Locale Provider")); else appendPQExpBuffer(&buf, @@ -4974,7 +4974,7 @@ listCollations(const char *pattern, bool verbose, bool showSystem) if (pset.sversion >= 100000) appendPQExpBuffer(&buf, - " CASE c.collprovider WHEN 'd' THEN 'default' WHEN 'c' THEN 'libc' WHEN 'i' THEN 'icu' END AS \"%s\",\n", + " CASE c.collprovider WHEN 'd' THEN 'default' WHEN 'b' THEN 'builtin' WHEN 'c' THEN 'libc' WHEN 'i' THEN 'icu' END AS \"%s\",\n", gettext_noop("Provider")); else appendPQExpBuffer(&buf, diff --git a/src/bin/scripts/createdb.c b/src/bin/scripts/createdb.c index 14970a6a5f..007061e756 100644 --- a/src/bin/scripts/createdb.c +++ b/src/bin/scripts/createdb.c @@ -40,8 +40,9 @@ main(int argc, char *argv[]) {"locale", required_argument, NULL, 'l'}, {"maintenance-db", required_argument, NULL, 3}, {"locale-provider", required_argument, NULL, 4}, - {"icu-locale", required_argument, NULL, 5}, - {"icu-rules", required_argument, NULL, 6}, + {"builtin-locale", required_argument, NULL, 5}, + {"icu-locale", required_argument, NULL, 6}, + {"icu-rules", required_argument, NULL, 7}, {NULL, 0, NULL, 0} }; @@ -67,6 +68,7 @@ main(int argc, char *argv[]) char *lc_ctype = NULL; char *locale = NULL; char *locale_provider = NULL; + char *builtin_locale = NULL; char *icu_locale = NULL; char *icu_rules = NULL; @@ -134,9 +136,12 @@ main(int argc, char *argv[]) locale_provider = pg_strdup(optarg); break; case 5: - icu_locale = pg_strdup(optarg); + builtin_locale = pg_strdup(optarg); break; case 6: + icu_locale = pg_strdup(optarg); + break; + case 7: icu_rules = pg_strdup(optarg); break; default: @@ -216,6 +221,11 @@ main(int argc, char *argv[]) appendPQExpBufferStr(&sql, " LOCALE "); appendStringLiteralConn(&sql, locale, conn); } + if (builtin_locale) + { + appendPQExpBufferStr(&sql, " BUILTIN_LOCALE "); + appendStringLiteralConn(&sql, builtin_locale, conn); + } if (lc_collate) { appendPQExpBufferStr(&sql, " LC_COLLATE "); @@ -294,9 +304,10 @@ help(const char *progname) printf(_(" -l, --locale=LOCALE locale settings for the database\n")); printf(_(" --lc-collate=LOCALE LC_COLLATE setting for the database\n")); printf(_(" --lc-ctype=LOCALE LC_CTYPE setting for the database\n")); + printf(_(" --builtin-locale=LOCALE builtin locale setting for the database\n")); printf(_(" --icu-locale=LOCALE ICU locale setting for the database\n")); printf(_(" --icu-rules=RULES ICU rules setting for the database\n")); - printf(_(" --locale-provider={libc|icu}\n" + printf(_(" --locale-provider={builtin|libc|icu}\n" " locale provider for the database's default collation\n")); printf(_(" -O, --owner=OWNER database user to own the new database\n")); printf(_(" -S, --strategy=STRATEGY database creation strategy wal_log or file_copy\n")); diff --git a/src/bin/scripts/t/020_createdb.pl b/src/bin/scripts/t/020_createdb.pl index 37e47b0078..dfd635bfab 100644 --- a/src/bin/scripts/t/020_createdb.pl +++ b/src/bin/scripts/t/020_createdb.pl @@ -105,6 +105,66 @@ else 'create database with ICU fails since no ICU support'); } +$node->command_fails( + [ + 'createdb', '-T', + 'template0', '--locale-provider=builtin', + 'tbuiltin1' + ], + 'create database with provider "builtin" fails without --locale'); + +$node->command_ok( + [ + 'createdb', '-T', + 'template0', '--locale-provider=builtin', + '--locale=C', 'tbuiltin2' + ], + 'create database with provider "builtin" and locale "C"'); + +$node->command_ok( + [ + 'createdb', '-T', + 'template0', '--locale-provider=builtin', + '--locale=C', '--lc-collate=C', + 'tbuiltin3' + ], + 'create database with provider "builtin" and LC_COLLATE=C'); + +$node->command_ok( + [ + 'createdb', '-T', + 'template0', '--locale-provider=builtin', + '--locale=C', '--lc-ctype=C', + 'tbuiltin4' + ], + 'create database with provider "builtin" and LC_CTYPE=C'); + +$node->command_fails( + [ + 'createdb', '-T', + 'template0', '--locale-provider=builtin', + '--locale=C', '--icu-locale=en', + 'tbuiltin7' + ], + 'create database with provider "builtin" and ICU_LOCALE="en"'); + +$node->command_fails( + [ + 'createdb', '-T', + 'template0', '--locale-provider=builtin', + '--locale=C', '--icu-rules=""', + 'tbuiltin8' + ], + 'create database with provider "builtin" and ICU_RULES=""'); + +$node->command_fails( + [ + 'createdb', '-T', + 'template1', '--locale-provider=builtin', + '--locale=C', 'tbuiltin9' + ], + 'create database with provider "builtin" not matching template'); + $node->command_fails([ 'createdb', 'foobar1' ], 'fails if database already exists'); diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h index 0779311716..429989efd9 100644 --- a/src/include/catalog/catversion.h +++ b/src/include/catalog/catversion.h @@ -57,6 +57,6 @@ */ /* yyyymmddN */ -#define CATALOG_VERSION_NO 202403131 +#define CATALOG_VERSION_NO 202403132 #endif diff --git a/src/include/catalog/pg_collation.dat b/src/include/catalog/pg_collation.dat index 7396ff10c4..938432e8a4 100644 --- a/src/include/catalog/pg_collation.dat +++ b/src/include/catalog/pg_collation.dat @@ -23,9 +23,9 @@ descr => 'standard POSIX collation', collname => 'POSIX', collprovider => 'c', collencoding => '-1', collcollate => 'POSIX', collctype => 'POSIX' }, -{ oid => '962', descr => 'sorts by Unicode code point', - collname => 'ucs_basic', collprovider => 'c', collencoding => '6', - collcollate => 'C', collctype => 'C' }, +{ oid => '962', descr => 'sorts by Unicode code point, C character semantics', + collname => 'ucs_basic', collprovider => 'b', collencoding => '6', + colllocale => 'C' }, { oid => '963', descr => 'sorts using the Unicode Collation Algorithm with default settings', collname => 'unicode', collprovider => 'i', collencoding => '-1', diff --git a/src/include/catalog/pg_collation.h b/src/include/catalog/pg_collation.h index a3e196fb53..5ce289d74b 100644 --- a/src/include/catalog/pg_collation.h +++ b/src/include/catalog/pg_collation.h @@ -68,6 +68,7 @@ MAKE_SYSCACHE(COLLOID, pg_collation_oid_index, 8); #ifdef EXPOSE_TO_CLIENT_CODE #define COLLPROVIDER_DEFAULT 'd' +#define COLLPROVIDER_BUILTIN 'b' #define COLLPROVIDER_ICU 'i' #define COLLPROVIDER_LIBC 'c' @@ -76,6 +77,8 @@ collprovider_name(char c) { switch (c) { + case COLLPROVIDER_BUILTIN: + return "builtin"; case COLLPROVIDER_ICU: return "icu"; case COLLPROVIDER_LIBC: diff --git a/src/include/utils/pg_locale.h b/src/include/utils/pg_locale.h index 28c925b5af..3d949d5112 100644 --- a/src/include/utils/pg_locale.h +++ b/src/include/utils/pg_locale.h @@ -76,6 +76,10 @@ struct pg_locale_struct bool deterministic; union { + struct + { + const char *locale; + } builtin; locale_t lt; #ifdef USE_ICU struct @@ -113,6 +117,7 @@ extern size_t pg_strxfrm_prefix(char *dest, const char *src, size_t destsize, extern size_t pg_strnxfrm_prefix(char *dest, size_t destsize, const char *src, size_t srclen, pg_locale_t locale); +extern const char *builtin_validate_locale(int encoding, const char *loc_str); extern void icu_validate_locale(const char *loc_str); extern char *icu_language_tag(const char *loc_str, int elevel); diff --git a/src/test/icu/t/010_database.pl b/src/test/icu/t/010_database.pl index 8a1fc12ec6..5f8ef16803 100644 --- a/src/test/icu/t/010_database.pl +++ b/src/test/icu/t/010_database.pl @@ -27,9 +27,8 @@ CREATE TABLE icu (def text, en text COLLATE "en-x-icu", upfirst text COLLATE upp INSERT INTO icu VALUES ('a', 'a', 'a'), ('b', 'b', 'b'), ('A', 'A', 'A'), ('B', 'B', 'B'); }); -is( $node1->safe_psql('dbicu', q{SELECT icu_unicode_version() IS NOT NULL}), - qq(t), - 'ICU unicode version defined'); +is($node1->safe_psql('dbicu', q{SELECT icu_unicode_version() IS NOT NULL}), + qq(t), 'ICU unicode version defined'); is( $node1->safe_psql('dbicu', q{SELECT def FROM icu ORDER BY def}), qq(A @@ -63,14 +62,13 @@ is( $node1->psql( 0, "C locale works for ICU"); -# Test that LOCALE works for ICU locales if LC_COLLATE and LC_CTYPE -# are specified -is( $node1->psql( - 'postgres', - q{CREATE DATABASE dbicu2 LOCALE_PROVIDER icu LOCALE '@colStrength=primary' - LC_COLLATE='C' LC_CTYPE='C' TEMPLATE template0 ENCODING UTF8} - ), - 0, - "LOCALE works for ICU locales if LC_COLLATE and LC_CTYPE are specified"); +my ($ret, $stdout, $stderr) = $node1->psql('postgres', + q{CREATE DATABASE dbicu LOCALE_PROVIDER builtin LOCALE 'C' TEMPLATE dbicu} +); +isnt($ret, 0, "locale provider must match template: exit code not 0"); +like( + $stderr, + qr/ERROR: new locale provider \(builtin\) does not match locale provider of the template database \(icu\)/, + "locale provider must match template: error message"); done_testing(); diff --git a/src/test/regress/expected/collate.out b/src/test/regress/expected/collate.out index 0649564485..593a622637 100644 --- a/src/test/regress/expected/collate.out +++ b/src/test/regress/expected/collate.out @@ -650,6 +650,22 @@ EXPLAIN (COSTS OFF) (3 rows) -- CREATE/DROP COLLATION +CREATE COLLATION builtin_c ( PROVIDER = builtin, LOCALE = "C" ); +SELECT b FROM collate_test1 ORDER BY b COLLATE builtin_c; + b +----- + ABD + Abc + abc + bbc +(4 rows) + +CREATE COLLATION builtin2 ( PROVIDER = builtin ); -- fails +ERROR: parameter "locale" must be specified +CREATE COLLATION builtin2 ( PROVIDER = builtin, LOCALE = "en_US" ); -- fails +ERROR: invalid locale name "en_US" for builtin provider +CREATE COLLATION builtin2 ( PROVIDER = builtin, LC_CTYPE = "C", LC_COLLATE = "C" ); -- fails +ERROR: parameter "locale" must be specified CREATE COLLATION mycoll1 FROM "C"; CREATE COLLATION mycoll2 ( LC_COLLATE = "POSIX", LC_CTYPE = "POSIX" ); CREATE COLLATION mycoll3 FROM "default"; -- intentionally unsupported @@ -754,7 +770,7 @@ DETAIL: FROM cannot be specified together with any other options. -- must get rid of them. -- DROP SCHEMA collate_tests CASCADE; -NOTICE: drop cascades to 19 other objects +NOTICE: drop cascades to 20 other objects DETAIL: drop cascades to table collate_test1 drop cascades to table collate_test_like drop cascades to table collate_test2 @@ -771,6 +787,7 @@ drop cascades to function dup(anyelement) drop cascades to table collate_test20 drop cascades to table collate_test21 drop cascades to table collate_test22 +drop cascades to collation builtin_c drop cascades to collation mycoll2 drop cascades to table collate_test23 drop cascades to view collate_on_int diff --git a/src/test/regress/sql/collate.sql b/src/test/regress/sql/collate.sql index c3d40fc195..4b0e4472c3 100644 --- a/src/test/regress/sql/collate.sql +++ b/src/test/regress/sql/collate.sql @@ -244,6 +244,14 @@ EXPLAIN (COSTS OFF) -- CREATE/DROP COLLATION +CREATE COLLATION builtin_c ( PROVIDER = builtin, LOCALE = "C" ); + +SELECT b FROM collate_test1 ORDER BY b COLLATE builtin_c; + +CREATE COLLATION builtin2 ( PROVIDER = builtin ); -- fails +CREATE COLLATION builtin2 ( PROVIDER = builtin, LOCALE = "en_US" ); -- fails +CREATE COLLATION builtin2 ( PROVIDER = builtin, LC_CTYPE = "C", LC_COLLATE = "C" ); -- fails + CREATE COLLATION mycoll1 FROM "C"; CREATE COLLATION mycoll2 ( LC_COLLATE = "POSIX", LC_CTYPE = "POSIX" ); CREATE COLLATION mycoll3 FROM "default"; -- intentionally unsupported