diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index e073f7b57c..36319222e6 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -19711,9 +19711,9 @@ postgres=# SELECT * FROM pg_walfile_name_offset(pg_stop_backup()); pg_import_system_collations - pg_import_system_collations(if_not_exists boolean, schema regnamespace) + pg_import_system_collations(schema regnamespace) - void + integer Import operating system collations @@ -19730,18 +19730,20 @@ postgres=# SELECT * FROM pg_walfile_name_offset(pg_stop_backup()); - pg_import_system_collations populates the system - catalog pg_collation with collations based on all the - locales it finds on the operating system. This is + pg_import_system_collations adds collations to the system + catalog pg_collation based on all the + locales it finds in the operating system. This is what initdb uses; see for more details. If additional locales are installed into the operating system later on, this function - can be run again to add collations for the new locales. In that case, the - parameter if_not_exists should be set to true to - skip over existing collations. The schema - parameter would typically be pg_catalog, but that is - not a requirement. (Collation objects based on locales that are no longer - present on the operating system are never removed by this function.) + can be run again to add collations for the new locales. Locales that + match existing entries in pg_collation will be skipped. + (But collation objects based on locales that are no longer + present in the operating system are not removed by this function.) + The schema parameter would typically + be pg_catalog, but that is not a requirement; + the collations could be installed into some other schema as well. + The function returns the number of new collation objects it created. diff --git a/src/backend/catalog/pg_collation.c b/src/backend/catalog/pg_collation.c index 6b1b72f245..ca62896ecb 100644 --- a/src/backend/catalog/pg_collation.c +++ b/src/backend/catalog/pg_collation.c @@ -37,6 +37,11 @@ * CollationCreate * * Add a new tuple to pg_collation. + * + * if_not_exists: if true, don't fail on duplicate name, just print a notice + * and return InvalidOid. + * quiet: if true, don't fail on duplicate name, just silently return + * InvalidOid (overrides if_not_exists). */ Oid CollationCreate(const char *collname, Oid collnamespace, @@ -45,7 +50,8 @@ CollationCreate(const char *collname, Oid collnamespace, int32 collencoding, const char *collcollate, const char *collctype, const char *collversion, - bool if_not_exists) + bool if_not_exists, + bool quiet) { Relation rel; TupleDesc tupDesc; @@ -77,7 +83,9 @@ CollationCreate(const char *collname, Oid collnamespace, Int32GetDatum(collencoding), ObjectIdGetDatum(collnamespace))) { - if (if_not_exists) + if (quiet) + return InvalidOid; + else if (if_not_exists) { ereport(NOTICE, (errcode(ERRCODE_DUPLICATE_OBJECT), @@ -119,7 +127,12 @@ CollationCreate(const char *collname, Oid collnamespace, Int32GetDatum(-1), ObjectIdGetDatum(collnamespace)))) { - if (if_not_exists) + if (quiet) + { + heap_close(rel, NoLock); + return InvalidOid; + } + else if (if_not_exists) { heap_close(rel, NoLock); ereport(NOTICE, diff --git a/src/backend/commands/collationcmds.c b/src/backend/commands/collationcmds.c index 1a62b3e30a..a0b3b23816 100644 --- a/src/backend/commands/collationcmds.c +++ b/src/backend/commands/collationcmds.c @@ -37,6 +37,14 @@ #include "utils/syscache.h" +typedef struct +{ + char *localename; /* name of locale, as per "locale -a" */ + char *alias; /* shortened alias for same */ + int enc; /* encoding */ +} CollAliasData; + + /* * CREATE COLLATION */ @@ -196,7 +204,8 @@ DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_e collcollate, collctype, collversion, - if_not_exists); + if_not_exists, + false); /* not quiet */ if (!OidIsValid(newoid)) return InvalidObjectAddress; @@ -344,13 +353,18 @@ pg_collation_actual_version(PG_FUNCTION_ARGS) } +/* will we use "locale -a" in pg_import_system_collations? */ +#if defined(HAVE_LOCALE_T) && !defined(WIN32) +#define READ_LOCALE_A_OUTPUT +#endif + +#ifdef READ_LOCALE_A_OUTPUT /* * "Normalize" a libc locale name, stripping off encoding tags such as * ".utf8" (e.g., "en_US.utf8" -> "en_US", but "br_FR.iso885915@euro" * -> "br_FR@euro"). Return true if a new, different name was * generated. */ -pg_attribute_unused() static bool normalize_libc_locale_name(char *new, const char *old) { @@ -379,6 +393,20 @@ normalize_libc_locale_name(char *new, const char *old) return changed; } +/* + * qsort comparator for CollAliasData items + */ +static int +cmpaliases(const void *a, const void *b) +{ + const CollAliasData *ca = (const CollAliasData *) a; + const CollAliasData *cb = (const CollAliasData *) b; + + /* comparing localename is enough because other fields are derived */ + return strcmp(ca->localename, cb->localename); +} +#endif /* READ_LOCALE_A_OUTPUT */ + #ifdef USE_ICU /* @@ -429,140 +457,190 @@ get_icu_locale_comment(const char *localename) #endif /* USE_ICU */ +/* + * pg_import_system_collations: add known system collations to pg_collation + */ Datum pg_import_system_collations(PG_FUNCTION_ARGS) { - bool if_not_exists = PG_GETARG_BOOL(0); - Oid nspid = PG_GETARG_OID(1); + Oid nspid = PG_GETARG_OID(0); + int ncreated = 0; -#if defined(HAVE_LOCALE_T) && !defined(WIN32) - FILE *locale_a_handle; - char localebuf[NAMEDATALEN]; /* we assume ASCII so this is fine */ - int count = 0; - List *aliaslist = NIL; - List *localelist = NIL; - List *enclist = NIL; - ListCell *lca, - *lcl, - *lce; -#endif + /* silence compiler warning if we have no locale implementation at all */ + (void) nspid; if (!superuser()) ereport(ERROR, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), (errmsg("must be superuser to import system collations")))); -#if !(defined(HAVE_LOCALE_T) && !defined(WIN32)) && !defined(USE_ICU) - /* silence compiler warnings */ - (void) if_not_exists; - (void) nspid; -#endif - -#if defined(HAVE_LOCALE_T) && !defined(WIN32) - locale_a_handle = OpenPipeStream("locale -a", "r"); - if (locale_a_handle == NULL) - ereport(ERROR, - (errcode_for_file_access(), - errmsg("could not execute command \"%s\": %m", - "locale -a"))); - - while (fgets(localebuf, sizeof(localebuf), locale_a_handle)) + /* Load collations known to libc, using "locale -a" to enumerate them */ +#ifdef READ_LOCALE_A_OUTPUT { - int i; - size_t len; - int enc; - bool skip; - char alias[NAMEDATALEN]; + FILE *locale_a_handle; + char localebuf[NAMEDATALEN]; /* we assume ASCII so this is fine */ + int nvalid = 0; + Oid collid; + CollAliasData *aliases; + int naliases, + maxaliases, + i; - len = strlen(localebuf); + /* expansible array of aliases */ + maxaliases = 100; + aliases = (CollAliasData *) palloc(maxaliases * sizeof(CollAliasData)); + naliases = 0; - if (len == 0 || localebuf[len - 1] != '\n') + locale_a_handle = OpenPipeStream("locale -a", "r"); + if (locale_a_handle == NULL) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not execute command \"%s\": %m", + "locale -a"))); + + while (fgets(localebuf, sizeof(localebuf), locale_a_handle)) { - elog(DEBUG1, "locale name too long, skipped: \"%s\"", localebuf); - continue; - } - localebuf[len - 1] = '\0'; + size_t len; + int enc; + bool skip; + char alias[NAMEDATALEN]; - /* - * Some systems have locale names that don't consist entirely of ASCII - * letters (such as "bokmål" or "français"). This is - * pretty silly, since we need the locale itself to interpret the - * non-ASCII characters. We can't do much with those, so we filter - * them out. - */ - skip = false; - for (i = 0; i < len; i++) - { - if (IS_HIGHBIT_SET(localebuf[i])) + len = strlen(localebuf); + + if (len == 0 || localebuf[len - 1] != '\n') { - skip = true; - break; + elog(DEBUG1, "locale name too long, skipped: \"%s\"", localebuf); + continue; + } + localebuf[len - 1] = '\0'; + + /* + * Some systems have locale names that don't consist entirely of + * ASCII letters (such as "bokmål" or "français"). + * This is pretty silly, since we need the locale itself to + * interpret the non-ASCII characters. We can't do much with + * those, so we filter them out. + */ + skip = false; + for (i = 0; i < len; i++) + { + if (IS_HIGHBIT_SET(localebuf[i])) + { + skip = true; + break; + } + } + if (skip) + { + elog(DEBUG1, "locale name has non-ASCII characters, skipped: \"%s\"", localebuf); + continue; + } + + enc = pg_get_encoding_from_locale(localebuf, false); + if (enc < 0) + { + /* error message printed by pg_get_encoding_from_locale() */ + continue; + } + if (!PG_VALID_BE_ENCODING(enc)) + continue; /* ignore locales for client-only encodings */ + if (enc == PG_SQL_ASCII) + continue; /* C/POSIX are already in the catalog */ + + /* count valid locales found in operating system */ + nvalid++; + + /* + * Create a collation named the same as the locale, but quietly + * doing nothing if it already exists. This is the behavior we + * need even at initdb time, because some versions of "locale -a" + * can report the same locale name more than once. And it's + * convenient for later import runs, too, since you just about + * always want to add on new locales without a lot of chatter + * about existing ones. + */ + collid = CollationCreate(localebuf, nspid, GetUserId(), + COLLPROVIDER_LIBC, enc, + localebuf, localebuf, + get_collation_actual_version(COLLPROVIDER_LIBC, localebuf), + true, true); + if (OidIsValid(collid)) + { + ncreated++; + + /* Must do CCI between inserts to handle duplicates correctly */ + CommandCounterIncrement(); + } + + /* + * Generate aliases such as "en_US" in addition to "en_US.utf8" + * for ease of use. Note that collation names are unique per + * encoding only, so this doesn't clash with "en_US" for LATIN1, + * say. + * + * However, it might conflict with a name we'll see later in the + * "locale -a" output. So save up the aliases and try to add them + * after we've read all the output. + */ + if (normalize_libc_locale_name(alias, localebuf)) + { + if (naliases >= maxaliases) + { + maxaliases *= 2; + aliases = (CollAliasData *) + repalloc(aliases, maxaliases * sizeof(CollAliasData)); + } + aliases[naliases].localename = pstrdup(localebuf); + aliases[naliases].alias = pstrdup(alias); + aliases[naliases].enc = enc; + naliases++; } } - if (skip) - { - elog(DEBUG1, "locale name has non-ASCII characters, skipped: \"%s\"", localebuf); - continue; - } - enc = pg_get_encoding_from_locale(localebuf, false); - if (enc < 0) - { - /* error message printed by pg_get_encoding_from_locale() */ - continue; - } - if (!PG_VALID_BE_ENCODING(enc)) - continue; /* ignore locales for client-only encodings */ - if (enc == PG_SQL_ASCII) - continue; /* C/POSIX are already in the catalog */ - - count++; - - CollationCreate(localebuf, nspid, GetUserId(), COLLPROVIDER_LIBC, enc, - localebuf, localebuf, - get_collation_actual_version(COLLPROVIDER_LIBC, localebuf), - if_not_exists); - - CommandCounterIncrement(); + ClosePipeStream(locale_a_handle); /* - * Generate aliases such as "en_US" in addition to "en_US.utf8" for - * ease of use. Note that collation names are unique per encoding - * only, so this doesn't clash with "en_US" for LATIN1, say. - * - * However, it might conflict with a name we'll see later in the - * "locale -a" output. So save up the aliases and try to add them - * after we've read all the output. + * Before processing the aliases, sort them by locale name. The point + * here is that if "locale -a" gives us multiple locale names with the + * same encoding and base name, say "en_US.utf8" and "en_US.utf-8", we + * want to pick a deterministic one of them. First in ASCII sort + * order is a good enough rule. (Before PG 10, the code corresponding + * to this logic in initdb.c had an additional ordering rule, to + * prefer the locale name exactly matching the alias, if any. We + * don't need to consider that here, because we would have already + * created such a pg_collation entry above, and that one will win.) */ - if (normalize_libc_locale_name(alias, localebuf)) + if (naliases > 1) + qsort((void *) aliases, naliases, sizeof(CollAliasData), cmpaliases); + + /* Now add aliases, ignoring any that match pre-existing entries */ + for (i = 0; i < naliases; i++) { - aliaslist = lappend(aliaslist, pstrdup(alias)); - localelist = lappend(localelist, pstrdup(localebuf)); - enclist = lappend_int(enclist, enc); + char *locale = aliases[i].localename; + char *alias = aliases[i].alias; + int enc = aliases[i].enc; + + collid = CollationCreate(alias, nspid, GetUserId(), + COLLPROVIDER_LIBC, enc, + locale, locale, + get_collation_actual_version(COLLPROVIDER_LIBC, locale), + true, true); + if (OidIsValid(collid)) + { + ncreated++; + + CommandCounterIncrement(); + } } + + /* Give a warning if "locale -a" seems to be malfunctioning */ + if (nvalid == 0) + ereport(WARNING, + (errmsg("no usable system locales were found"))); } +#endif /* READ_LOCALE_A_OUTPUT */ - ClosePipeStream(locale_a_handle); - - /* Now try to add any aliases we created */ - forthree(lca, aliaslist, lcl, localelist, lce, enclist) - { - char *alias = (char *) lfirst(lca); - char *locale = (char *) lfirst(lcl); - int enc = lfirst_int(lce); - - CollationCreate(alias, nspid, GetUserId(), COLLPROVIDER_LIBC, enc, - locale, locale, - get_collation_actual_version(COLLPROVIDER_LIBC, locale), - true); - CommandCounterIncrement(); - } - - if (count == 0) - ereport(WARNING, - (errmsg("no usable system locales were found"))); -#endif /* not HAVE_LOCALE_T && not WIN32 */ - + /* Load collations known to ICU */ #ifdef USE_ICU if (!is_encoding_supported_by_icu(GetDatabaseEncoding())) { @@ -597,13 +675,20 @@ pg_import_system_collations(PG_FUNCTION_ARGS) langtag = get_icu_language_tag(name); collcollate = U_ICU_VERSION_MAJOR_NUM >= 54 ? langtag : name; collid = CollationCreate(psprintf("%s-x-icu", langtag), - nspid, GetUserId(), COLLPROVIDER_ICU, -1, + nspid, GetUserId(), + COLLPROVIDER_ICU, -1, collcollate, collcollate, get_collation_actual_version(COLLPROVIDER_ICU, collcollate), - if_not_exists); + true, true); + if (OidIsValid(collid)) + { + ncreated++; - CreateComments(collid, CollationRelationId, 0, - get_icu_locale_comment(name)); + CommandCounterIncrement(); + + CreateComments(collid, CollationRelationId, 0, + get_icu_locale_comment(name)); + } /* * Add keyword variants @@ -624,12 +709,20 @@ pg_import_system_collations(PG_FUNCTION_ARGS) langtag = get_icu_language_tag(localeid); collcollate = U_ICU_VERSION_MAJOR_NUM >= 54 ? langtag : localeid; collid = CollationCreate(psprintf("%s-x-icu", langtag), - nspid, GetUserId(), COLLPROVIDER_ICU, -1, + nspid, GetUserId(), + COLLPROVIDER_ICU, -1, collcollate, collcollate, get_collation_actual_version(COLLPROVIDER_ICU, collcollate), - if_not_exists); - CreateComments(collid, CollationRelationId, 0, - get_icu_locale_comment(localeid)); + true, true); + if (OidIsValid(collid)) + { + ncreated++; + + CommandCounterIncrement(); + + CreateComments(collid, CollationRelationId, 0, + get_icu_locale_comment(localeid)); + } } if (U_FAILURE(status)) ereport(ERROR, @@ -638,7 +731,7 @@ pg_import_system_collations(PG_FUNCTION_ARGS) uenum_close(en); } } -#endif +#endif /* USE_ICU */ - PG_RETURN_VOID(); + PG_RETURN_INT32(ncreated); } diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c index b76eb1eae4..7303bbe892 100644 --- a/src/bin/initdb/initdb.c +++ b/src/bin/initdb/initdb.c @@ -1636,10 +1636,16 @@ setup_description(FILE *cmdfd) static void setup_collation(FILE *cmdfd) { - PG_CMD_PUTS("SELECT pg_import_system_collations(if_not_exists => false, schema => 'pg_catalog');\n\n"); + /* + * Add an SQL-standard name. We don't want to pin this, so it doesn't go + * in pg_collation.h. But add it before reading system collations, so + * that it wins if libc defines a locale named ucs_basic. + */ + PG_CMD_PRINTF3("INSERT INTO pg_collation (collname, collnamespace, collowner, collprovider, collencoding, collcollate, collctype) VALUES ('ucs_basic', 'pg_catalog'::regnamespace, %u, '%c', %d, 'C', 'C');\n\n", + BOOTSTRAP_SUPERUSERID, COLLPROVIDER_LIBC, PG_UTF8); - /* Add an SQL-standard name */ - PG_CMD_PRINTF3("INSERT INTO pg_collation (collname, collnamespace, collowner, collprovider, collencoding, collcollate, collctype) VALUES ('ucs_basic', 'pg_catalog'::regnamespace, %u, '%c', %d, 'C', 'C');\n\n", BOOTSTRAP_SUPERUSERID, COLLPROVIDER_LIBC, PG_UTF8); + /* Now import all collations we can find in the operating system */ + PG_CMD_PUTS("SELECT pg_import_system_collations('pg_catalog');\n\n"); } /* diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h index f4d2770e7e..14f916cde1 100644 --- a/src/include/catalog/catversion.h +++ b/src/include/catalog/catversion.h @@ -53,6 +53,6 @@ */ /* yyyymmddN */ -#define CATALOG_VERSION_NO 201706202 +#define CATALOG_VERSION_NO 201706231 #endif diff --git a/src/include/catalog/pg_collation_fn.h b/src/include/catalog/pg_collation_fn.h index 26b8f0d3d1..0ef31389d5 100644 --- a/src/include/catalog/pg_collation_fn.h +++ b/src/include/catalog/pg_collation_fn.h @@ -20,7 +20,8 @@ extern Oid CollationCreate(const char *collname, Oid collnamespace, int32 collencoding, const char *collcollate, const char *collctype, const char *collversion, - bool if_not_exists); + bool if_not_exists, + bool quiet); extern void RemoveCollationById(Oid collationOid); #endif /* PG_COLLATION_FN_H */ diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h index 1191b4ab1b..8b33b4e0ea 100644 --- a/src/include/catalog/pg_proc.h +++ b/src/include/catalog/pg_proc.h @@ -5462,11 +5462,12 @@ DESCR("pg_controldata recovery state information as a function"); DATA(insert OID = 3444 ( pg_control_init PGNSP PGUID 12 1 0 0 0 f f f f t f v s 0 0 2249 "" "{23,23,23,23,23,23,23,23,23,16,16,23}" "{o,o,o,o,o,o,o,o,o,o,o,o}" "{max_data_alignment,database_block_size,blocks_per_segment,wal_block_size,bytes_per_wal_segment,max_identifier_length,max_index_columns,max_toast_chunk_size,large_object_chunk_size,float4_pass_by_value,float8_pass_by_value,data_page_checksum_version}" _null_ _null_ pg_control_init _null_ _null_ _null_ )); DESCR("pg_controldata init state information as a function"); -DATA(insert OID = 3445 ( pg_import_system_collations PGNSP PGUID 12 100 0 0 0 f f f f t f v r 2 0 2278 "16 4089" _null_ _null_ "{if_not_exists,schema}" _null_ _null_ pg_import_system_collations _null_ _null_ _null_ )); +/* collation management functions */ +DATA(insert OID = 3445 ( pg_import_system_collations PGNSP PGUID 12 100 0 0 0 f f f f t f v r 1 0 23 "4089" _null_ _null_ _null_ _null_ _null_ pg_import_system_collations _null_ _null_ _null_ )); DESCR("import collations from operating system"); DATA(insert OID = 3448 ( pg_collation_actual_version PGNSP PGUID 12 100 0 0 0 f f f f t f v s 1 0 25 "26" _null_ _null_ _null_ _null_ _null_ pg_collation_actual_version _null_ _null_ _null_ )); -DESCR("import collations from operating system"); +DESCR("get actual version of collation from operating system"); /* system management/monitoring related functions */ DATA(insert OID = 3353 ( pg_ls_logdir PGNSP PGUID 12 10 20 0 0 f f f f t t v s 0 0 2249 "" "{25,20,1184}" "{o,o,o}" "{name,size,modification}" _null_ _null_ pg_ls_logdir _null_ _null_ _null_ ));