postgresql/src/backend/commands/collationcmds.c

738 lines
20 KiB
C
Raw Normal View History

/*-------------------------------------------------------------------------
*
* collationcmds.c
* collation-related commands support code
*
2017-01-03 19:48:53 +01:00
* Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
* src/backend/commands/collationcmds.c
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "access/heapam.h"
#include "access/htup_details.h"
#include "access/xact.h"
#include "catalog/dependency.h"
#include "catalog/indexing.h"
#include "catalog/namespace.h"
#include "catalog/objectaccess.h"
#include "catalog/pg_collation.h"
#include "catalog/pg_collation_fn.h"
#include "commands/alter.h"
#include "commands/collationcmds.h"
#include "commands/comment.h"
#include "commands/dbcommands.h"
#include "commands/defrem.h"
#include "mb/pg_wchar.h"
#include "miscadmin.h"
#include "utils/builtins.h"
#include "utils/lsyscache.h"
#include "utils/pg_locale.h"
#include "utils/rel.h"
#include "utils/syscache.h"
Rethink behavior of pg_import_system_collations(). Marco Atzeri reported that initdb would fail if "locale -a" reported the same locale name more than once. All previous versions of Postgres implicitly de-duplicated the results of "locale -a", but the rewrite to move the collation import logic into C had lost that property. It had also lost the property that locale names matching built-in collation names were silently ignored. The simplest way to fix this is to make initdb run the function in if-not-exists mode, which means that there's no real use-case for non if-not-exists mode; we might as well just drop the boolean argument and simplify the function's definition to be "add any collations not already known". This change also gets rid of some odd corner cases caused by the fact that aliases were added in if-not-exists mode even if the function argument said otherwise. While at it, adjust the behavior so that pg_import_system_collations() doesn't spew "collation foo already exists, skipping" messages during a re-run; that's completely unhelpful, especially since there are often hundreds of them. And make it return a count of the number of collations it did add, which seems like it might be helpful. Also, re-integrate the previous coding's property that it would make a deterministic selection of which alias to use if there were conflicting possibilities. This would only come into play if "locale -a" reports multiple equivalent locale names, say "de_DE.utf8" and "de_DE.UTF-8", but that hardly seems out of the question. In passing, fix incorrect behavior in pg_import_system_collations()'s ICU code path: it neglected CommandCounterIncrement, which would result in failures if ICU returns duplicate names, and it would try to create comments even if a new collation hadn't been created. Also, reorder operations in initdb so that the 'ucs_basic' collation is created before calling pg_import_system_collations() not after. This prevents a failure if "locale -a" were to report a locale named that. There's no reason to think that that ever happens in the wild, but the old coding would have survived it, so let's be equally robust. Discussion: https://postgr.es/m/20c74bc3-d6ca-243d-1bbc-12f17fa4fe9a@gmail.com
2017-06-23 20:19:48 +02:00
typedef struct
{
char *localename; /* name of locale, as per "locale -a" */
char *alias; /* shortened alias for same */
int enc; /* encoding */
} CollAliasData;
/*
* CREATE COLLATION
*/
ObjectAddress
DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_exists)
{
char *collName;
Oid collNamespace;
AclResult aclresult;
ListCell *pl;
2011-04-10 17:42:00 +02:00
DefElem *fromEl = NULL;
DefElem *localeEl = NULL;
DefElem *lccollateEl = NULL;
DefElem *lcctypeEl = NULL;
DefElem *providerEl = NULL;
DefElem *versionEl = NULL;
char *collcollate = NULL;
char *collctype = NULL;
char *collproviderstr = NULL;
int collencoding;
char collprovider = 0;
char *collversion = NULL;
Oid newoid;
ObjectAddress address;
collNamespace = QualifiedNameGetCreationNamespace(names, &collName);
aclresult = pg_namespace_aclcheck(collNamespace, GetUserId(), ACL_CREATE);
if (aclresult != ACLCHECK_OK)
aclcheck_error(aclresult, ACL_KIND_NAMESPACE,
get_namespace_name(collNamespace));
foreach(pl, parameters)
{
DefElem *defel = lfirst_node(DefElem, pl);
DefElem **defelp;
if (pg_strcasecmp(defel->defname, "from") == 0)
defelp = &fromEl;
else if (pg_strcasecmp(defel->defname, "locale") == 0)
defelp = &localeEl;
else if (pg_strcasecmp(defel->defname, "lc_collate") == 0)
defelp = &lccollateEl;
else if (pg_strcasecmp(defel->defname, "lc_ctype") == 0)
defelp = &lcctypeEl;
else if (pg_strcasecmp(defel->defname, "provider") == 0)
defelp = &providerEl;
else if (pg_strcasecmp(defel->defname, "version") == 0)
defelp = &versionEl;
else
{
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("collation attribute \"%s\" not recognized",
defel->defname),
parser_errposition(pstate, defel->location)));
break;
}
*defelp = defel;
}
if ((localeEl && (lccollateEl || lcctypeEl))
|| (fromEl && list_length(parameters) != 1))
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("conflicting or redundant options")));
if (fromEl)
{
Oid collid;
HeapTuple tp;
2011-04-10 17:42:00 +02:00
collid = get_collation_oid(defGetQualifiedName(fromEl), false);
tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collid));
if (!HeapTupleIsValid(tp))
elog(ERROR, "cache lookup failed for collation %u", collid);
collcollate = pstrdup(NameStr(((Form_pg_collation) GETSTRUCT(tp))->collcollate));
collctype = pstrdup(NameStr(((Form_pg_collation) GETSTRUCT(tp))->collctype));
collprovider = ((Form_pg_collation) GETSTRUCT(tp))->collprovider;
ReleaseSysCache(tp);
/*
* Copying the "default" collation is not allowed because most code
* checks for DEFAULT_COLLATION_OID instead of COLLPROVIDER_DEFAULT,
* and so having a second collation with COLLPROVIDER_DEFAULT would
* not work and potentially confuse or crash some code. This could be
* fixed with some legwork.
*/
if (collprovider == COLLPROVIDER_DEFAULT)
ereport(ERROR,
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
errmsg("collation \"default\" cannot be copied")));
}
if (localeEl)
{
collcollate = defGetString(localeEl);
collctype = defGetString(localeEl);
}
if (lccollateEl)
collcollate = defGetString(lccollateEl);
if (lcctypeEl)
collctype = defGetString(lcctypeEl);
if (providerEl)
collproviderstr = defGetString(providerEl);
if (versionEl)
collversion = defGetString(versionEl);
if (collproviderstr)
{
if (pg_strcasecmp(collproviderstr, "icu") == 0)
collprovider = COLLPROVIDER_ICU;
else if (pg_strcasecmp(collproviderstr, "libc") == 0)
collprovider = COLLPROVIDER_LIBC;
else
ereport(ERROR,
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
errmsg("unrecognized collation provider: %s",
collproviderstr)));
}
else if (!fromEl)
collprovider = COLLPROVIDER_LIBC;
if (!collcollate)
ereport(ERROR,
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
errmsg("parameter \"lc_collate\" must be specified")));
if (!collctype)
ereport(ERROR,
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
errmsg("parameter \"lc_ctype\" must be specified")));
if (collprovider == COLLPROVIDER_ICU)
collencoding = -1;
else
{
collencoding = GetDatabaseEncoding();
check_encoding_locale_matches(collencoding, collcollate, collctype);
}
if (!collversion)
collversion = get_collation_actual_version(collprovider, collcollate);
newoid = CollationCreate(collName,
collNamespace,
GetUserId(),
collprovider,
collencoding,
collcollate,
collctype,
collversion,
Rethink behavior of pg_import_system_collations(). Marco Atzeri reported that initdb would fail if "locale -a" reported the same locale name more than once. All previous versions of Postgres implicitly de-duplicated the results of "locale -a", but the rewrite to move the collation import logic into C had lost that property. It had also lost the property that locale names matching built-in collation names were silently ignored. The simplest way to fix this is to make initdb run the function in if-not-exists mode, which means that there's no real use-case for non if-not-exists mode; we might as well just drop the boolean argument and simplify the function's definition to be "add any collations not already known". This change also gets rid of some odd corner cases caused by the fact that aliases were added in if-not-exists mode even if the function argument said otherwise. While at it, adjust the behavior so that pg_import_system_collations() doesn't spew "collation foo already exists, skipping" messages during a re-run; that's completely unhelpful, especially since there are often hundreds of them. And make it return a count of the number of collations it did add, which seems like it might be helpful. Also, re-integrate the previous coding's property that it would make a deterministic selection of which alias to use if there were conflicting possibilities. This would only come into play if "locale -a" reports multiple equivalent locale names, say "de_DE.utf8" and "de_DE.UTF-8", but that hardly seems out of the question. In passing, fix incorrect behavior in pg_import_system_collations()'s ICU code path: it neglected CommandCounterIncrement, which would result in failures if ICU returns duplicate names, and it would try to create comments even if a new collation hadn't been created. Also, reorder operations in initdb so that the 'ucs_basic' collation is created before calling pg_import_system_collations() not after. This prevents a failure if "locale -a" were to report a locale named that. There's no reason to think that that ever happens in the wild, but the old coding would have survived it, so let's be equally robust. Discussion: https://postgr.es/m/20c74bc3-d6ca-243d-1bbc-12f17fa4fe9a@gmail.com
2017-06-23 20:19:48 +02:00
if_not_exists,
false); /* not quiet */
if (!OidIsValid(newoid))
return InvalidObjectAddress;
ObjectAddressSet(address, CollationRelationId, newoid);
/* check that the locales can be loaded */
CommandCounterIncrement();
(void) pg_newlocale_from_collation(newoid);
return address;
}
/*
* Subroutine for ALTER COLLATION SET SCHEMA and RENAME
*
* Is there a collation with the same name of the given collation already in
* the given namespace? If so, raise an appropriate error message.
*/
void
IsThereCollationInNamespace(const char *collname, Oid nspOid)
{
/* make sure the name doesn't already exist in new schema */
if (SearchSysCacheExists3(COLLNAMEENCNSP,
CStringGetDatum(collname),
Int32GetDatum(GetDatabaseEncoding()),
ObjectIdGetDatum(nspOid)))
ereport(ERROR,
(errcode(ERRCODE_DUPLICATE_OBJECT),
errmsg("collation \"%s\" for encoding \"%s\" already exists in schema \"%s\"",
collname, GetDatabaseEncodingName(),
get_namespace_name(nspOid))));
/* mustn't match an any-encoding entry, either */
if (SearchSysCacheExists3(COLLNAMEENCNSP,
CStringGetDatum(collname),
Int32GetDatum(-1),
ObjectIdGetDatum(nspOid)))
ereport(ERROR,
(errcode(ERRCODE_DUPLICATE_OBJECT),
errmsg("collation \"%s\" already exists in schema \"%s\"",
collname, get_namespace_name(nspOid))));
}
/*
* ALTER COLLATION
*/
ObjectAddress
AlterCollation(AlterCollationStmt *stmt)
{
Relation rel;
Oid collOid;
HeapTuple tup;
Form_pg_collation collForm;
Datum collversion;
bool isnull;
char *oldversion;
char *newversion;
ObjectAddress address;
rel = heap_open(CollationRelationId, RowExclusiveLock);
collOid = get_collation_oid(stmt->collname, false);
if (!pg_collation_ownercheck(collOid, GetUserId()))
aclcheck_error(ACLCHECK_NOT_OWNER, ACL_KIND_COLLATION,
NameListToString(stmt->collname));
tup = SearchSysCacheCopy1(COLLOID, ObjectIdGetDatum(collOid));
if (!HeapTupleIsValid(tup))
elog(ERROR, "cache lookup failed for collation %u", collOid);
collForm = (Form_pg_collation) GETSTRUCT(tup);
collversion = SysCacheGetAttr(COLLOID, tup, Anum_pg_collation_collversion,
&isnull);
oldversion = isnull ? NULL : TextDatumGetCString(collversion);
newversion = get_collation_actual_version(collForm->collprovider, NameStr(collForm->collcollate));
/* cannot change from NULL to non-NULL or vice versa */
if ((!oldversion && newversion) || (oldversion && !newversion))
elog(ERROR, "invalid collation version change");
else if (oldversion && newversion && strcmp(newversion, oldversion) != 0)
{
bool nulls[Natts_pg_collation];
bool replaces[Natts_pg_collation];
Datum values[Natts_pg_collation];
ereport(NOTICE,
(errmsg("changing version from %s to %s",
oldversion, newversion)));
memset(values, 0, sizeof(values));
memset(nulls, false, sizeof(nulls));
memset(replaces, false, sizeof(replaces));
values[Anum_pg_collation_collversion - 1] = CStringGetTextDatum(newversion);
replaces[Anum_pg_collation_collversion - 1] = true;
tup = heap_modify_tuple(tup, RelationGetDescr(rel),
values, nulls, replaces);
}
else
ereport(NOTICE,
(errmsg("version has not changed")));
CatalogTupleUpdate(rel, &tup->t_self, tup);
InvokeObjectPostAlterHook(CollationRelationId, collOid, 0);
ObjectAddressSet(address, CollationRelationId, collOid);
heap_freetuple(tup);
heap_close(rel, NoLock);
return address;
}
Datum
pg_collation_actual_version(PG_FUNCTION_ARGS)
{
Oid collid = PG_GETARG_OID(0);
HeapTuple tp;
char *collcollate;
char collprovider;
char *version;
tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collid));
if (!HeapTupleIsValid(tp))
ereport(ERROR,
(errcode(ERRCODE_UNDEFINED_OBJECT),
errmsg("collation with OID %u does not exist", collid)));
collcollate = pstrdup(NameStr(((Form_pg_collation) GETSTRUCT(tp))->collcollate));
collprovider = ((Form_pg_collation) GETSTRUCT(tp))->collprovider;
ReleaseSysCache(tp);
version = get_collation_actual_version(collprovider, collcollate);
if (version)
PG_RETURN_TEXT_P(cstring_to_text(version));
else
PG_RETURN_NULL();
}
Rethink behavior of pg_import_system_collations(). Marco Atzeri reported that initdb would fail if "locale -a" reported the same locale name more than once. All previous versions of Postgres implicitly de-duplicated the results of "locale -a", but the rewrite to move the collation import logic into C had lost that property. It had also lost the property that locale names matching built-in collation names were silently ignored. The simplest way to fix this is to make initdb run the function in if-not-exists mode, which means that there's no real use-case for non if-not-exists mode; we might as well just drop the boolean argument and simplify the function's definition to be "add any collations not already known". This change also gets rid of some odd corner cases caused by the fact that aliases were added in if-not-exists mode even if the function argument said otherwise. While at it, adjust the behavior so that pg_import_system_collations() doesn't spew "collation foo already exists, skipping" messages during a re-run; that's completely unhelpful, especially since there are often hundreds of them. And make it return a count of the number of collations it did add, which seems like it might be helpful. Also, re-integrate the previous coding's property that it would make a deterministic selection of which alias to use if there were conflicting possibilities. This would only come into play if "locale -a" reports multiple equivalent locale names, say "de_DE.utf8" and "de_DE.UTF-8", but that hardly seems out of the question. In passing, fix incorrect behavior in pg_import_system_collations()'s ICU code path: it neglected CommandCounterIncrement, which would result in failures if ICU returns duplicate names, and it would try to create comments even if a new collation hadn't been created. Also, reorder operations in initdb so that the 'ucs_basic' collation is created before calling pg_import_system_collations() not after. This prevents a failure if "locale -a" were to report a locale named that. There's no reason to think that that ever happens in the wild, but the old coding would have survived it, so let's be equally robust. Discussion: https://postgr.es/m/20c74bc3-d6ca-243d-1bbc-12f17fa4fe9a@gmail.com
2017-06-23 20:19:48 +02:00
/* will we use "locale -a" in pg_import_system_collations? */
#if defined(HAVE_LOCALE_T) && !defined(WIN32)
#define READ_LOCALE_A_OUTPUT
#endif
#ifdef READ_LOCALE_A_OUTPUT
/*
* "Normalize" a libc locale name, stripping off encoding tags such as
* ".utf8" (e.g., "en_US.utf8" -> "en_US", but "br_FR.iso885915@euro"
* -> "br_FR@euro"). Return true if a new, different name was
* generated.
*/
static bool
normalize_libc_locale_name(char *new, const char *old)
{
char *n = new;
const char *o = old;
bool changed = false;
while (*o)
{
if (*o == '.')
{
/* skip over encoding tag such as ".utf8" or ".UTF-8" */
o++;
while ((*o >= 'A' && *o <= 'Z')
|| (*o >= 'a' && *o <= 'z')
|| (*o >= '0' && *o <= '9')
|| (*o == '-'))
o++;
changed = true;
}
else
*n++ = *o++;
}
*n = '\0';
return changed;
}
Rethink behavior of pg_import_system_collations(). Marco Atzeri reported that initdb would fail if "locale -a" reported the same locale name more than once. All previous versions of Postgres implicitly de-duplicated the results of "locale -a", but the rewrite to move the collation import logic into C had lost that property. It had also lost the property that locale names matching built-in collation names were silently ignored. The simplest way to fix this is to make initdb run the function in if-not-exists mode, which means that there's no real use-case for non if-not-exists mode; we might as well just drop the boolean argument and simplify the function's definition to be "add any collations not already known". This change also gets rid of some odd corner cases caused by the fact that aliases were added in if-not-exists mode even if the function argument said otherwise. While at it, adjust the behavior so that pg_import_system_collations() doesn't spew "collation foo already exists, skipping" messages during a re-run; that's completely unhelpful, especially since there are often hundreds of them. And make it return a count of the number of collations it did add, which seems like it might be helpful. Also, re-integrate the previous coding's property that it would make a deterministic selection of which alias to use if there were conflicting possibilities. This would only come into play if "locale -a" reports multiple equivalent locale names, say "de_DE.utf8" and "de_DE.UTF-8", but that hardly seems out of the question. In passing, fix incorrect behavior in pg_import_system_collations()'s ICU code path: it neglected CommandCounterIncrement, which would result in failures if ICU returns duplicate names, and it would try to create comments even if a new collation hadn't been created. Also, reorder operations in initdb so that the 'ucs_basic' collation is created before calling pg_import_system_collations() not after. This prevents a failure if "locale -a" were to report a locale named that. There's no reason to think that that ever happens in the wild, but the old coding would have survived it, so let's be equally robust. Discussion: https://postgr.es/m/20c74bc3-d6ca-243d-1bbc-12f17fa4fe9a@gmail.com
2017-06-23 20:19:48 +02:00
/*
* qsort comparator for CollAliasData items
*/
static int
cmpaliases(const void *a, const void *b)
{
const CollAliasData *ca = (const CollAliasData *) a;
const CollAliasData *cb = (const CollAliasData *) b;
/* comparing localename is enough because other fields are derived */
return strcmp(ca->localename, cb->localename);
}
#endif /* READ_LOCALE_A_OUTPUT */
#ifdef USE_ICU
Fix memory leakage in ICU encoding conversion, and other code review. Callers of icu_to_uchar() neglected to pfree the result string when done with it. This results in catastrophic memory leaks in varstr_cmp(), because of our prevailing assumption that btree comparison functions don't leak memory. For safety, make all the call sites clean up leaks, though I suspect that we could get away without it in formatting.c. I audited callers of icu_from_uchar() as well, but found no places that seemed to have a comparable issue. Add function API specifications for icu_to_uchar() and icu_from_uchar(); the lack of any thought-through specification is perhaps not unrelated to the existence of this bug in the first place. Fix icu_to_uchar() to guarantee a nul-terminated result; although no existing caller appears to care, the fact that it would have been nul-terminated except in extreme corner cases seems ideally designed to bite someone on the rear someday. Fix ucnv_fromUChars() destCapacity argument --- in the worst case, that could perhaps have led to a non-nul-terminated result, too. Fix icu_from_uchar() to have a more reasonable definition of the function result --- no callers are actually paying attention, so this isn't a live bug, but it's certainly sloppily designed. Const-ify icu_from_uchar()'s input string for consistency. That is not the end of what needs to be done to these functions, but it's as much as I have the patience for right now. Discussion: https://postgr.es/m/1955.1498181798@sss.pgh.pa.us
2017-06-23 18:22:06 +02:00
/*
* Get the ICU language tag for a locale name.
* The result is a palloc'd string.
*/
static char *
get_icu_language_tag(const char *localename)
{
char buf[ULOC_FULLNAME_CAPACITY];
UErrorCode status;
status = U_ZERO_ERROR;
uloc_toLanguageTag(localename, buf, sizeof(buf), TRUE, &status);
if (U_FAILURE(status))
ereport(ERROR,
(errmsg("could not convert locale name \"%s\" to language tag: %s",
localename, u_errorName(status))));
return pstrdup(buf);
}
Fix memory leakage in ICU encoding conversion, and other code review. Callers of icu_to_uchar() neglected to pfree the result string when done with it. This results in catastrophic memory leaks in varstr_cmp(), because of our prevailing assumption that btree comparison functions don't leak memory. For safety, make all the call sites clean up leaks, though I suspect that we could get away without it in formatting.c. I audited callers of icu_from_uchar() as well, but found no places that seemed to have a comparable issue. Add function API specifications for icu_to_uchar() and icu_from_uchar(); the lack of any thought-through specification is perhaps not unrelated to the existence of this bug in the first place. Fix icu_to_uchar() to guarantee a nul-terminated result; although no existing caller appears to care, the fact that it would have been nul-terminated except in extreme corner cases seems ideally designed to bite someone on the rear someday. Fix ucnv_fromUChars() destCapacity argument --- in the worst case, that could perhaps have led to a non-nul-terminated result, too. Fix icu_from_uchar() to have a more reasonable definition of the function result --- no callers are actually paying attention, so this isn't a live bug, but it's certainly sloppily designed. Const-ify icu_from_uchar()'s input string for consistency. That is not the end of what needs to be done to these functions, but it's as much as I have the patience for right now. Discussion: https://postgr.es/m/1955.1498181798@sss.pgh.pa.us
2017-06-23 18:22:06 +02:00
/*
* Get a comment (specifically, the display name) for an ICU locale.
* The result is a palloc'd string.
*/
static char *
get_icu_locale_comment(const char *localename)
{
UErrorCode status;
UChar displayname[128];
int32 len_uchar;
char *result;
status = U_ZERO_ERROR;
Fix memory leakage in ICU encoding conversion, and other code review. Callers of icu_to_uchar() neglected to pfree the result string when done with it. This results in catastrophic memory leaks in varstr_cmp(), because of our prevailing assumption that btree comparison functions don't leak memory. For safety, make all the call sites clean up leaks, though I suspect that we could get away without it in formatting.c. I audited callers of icu_from_uchar() as well, but found no places that seemed to have a comparable issue. Add function API specifications for icu_to_uchar() and icu_from_uchar(); the lack of any thought-through specification is perhaps not unrelated to the existence of this bug in the first place. Fix icu_to_uchar() to guarantee a nul-terminated result; although no existing caller appears to care, the fact that it would have been nul-terminated except in extreme corner cases seems ideally designed to bite someone on the rear someday. Fix ucnv_fromUChars() destCapacity argument --- in the worst case, that could perhaps have led to a non-nul-terminated result, too. Fix icu_from_uchar() to have a more reasonable definition of the function result --- no callers are actually paying attention, so this isn't a live bug, but it's certainly sloppily designed. Const-ify icu_from_uchar()'s input string for consistency. That is not the end of what needs to be done to these functions, but it's as much as I have the patience for right now. Discussion: https://postgr.es/m/1955.1498181798@sss.pgh.pa.us
2017-06-23 18:22:06 +02:00
len_uchar = uloc_getDisplayName(localename, "en",
displayname, lengthof(displayname),
Fix memory leakage in ICU encoding conversion, and other code review. Callers of icu_to_uchar() neglected to pfree the result string when done with it. This results in catastrophic memory leaks in varstr_cmp(), because of our prevailing assumption that btree comparison functions don't leak memory. For safety, make all the call sites clean up leaks, though I suspect that we could get away without it in formatting.c. I audited callers of icu_from_uchar() as well, but found no places that seemed to have a comparable issue. Add function API specifications for icu_to_uchar() and icu_from_uchar(); the lack of any thought-through specification is perhaps not unrelated to the existence of this bug in the first place. Fix icu_to_uchar() to guarantee a nul-terminated result; although no existing caller appears to care, the fact that it would have been nul-terminated except in extreme corner cases seems ideally designed to bite someone on the rear someday. Fix ucnv_fromUChars() destCapacity argument --- in the worst case, that could perhaps have led to a non-nul-terminated result, too. Fix icu_from_uchar() to have a more reasonable definition of the function result --- no callers are actually paying attention, so this isn't a live bug, but it's certainly sloppily designed. Const-ify icu_from_uchar()'s input string for consistency. That is not the end of what needs to be done to these functions, but it's as much as I have the patience for right now. Discussion: https://postgr.es/m/1955.1498181798@sss.pgh.pa.us
2017-06-23 18:22:06 +02:00
&status);
if (U_FAILURE(status))
ereport(ERROR,
Fix memory leakage in ICU encoding conversion, and other code review. Callers of icu_to_uchar() neglected to pfree the result string when done with it. This results in catastrophic memory leaks in varstr_cmp(), because of our prevailing assumption that btree comparison functions don't leak memory. For safety, make all the call sites clean up leaks, though I suspect that we could get away without it in formatting.c. I audited callers of icu_from_uchar() as well, but found no places that seemed to have a comparable issue. Add function API specifications for icu_to_uchar() and icu_from_uchar(); the lack of any thought-through specification is perhaps not unrelated to the existence of this bug in the first place. Fix icu_to_uchar() to guarantee a nul-terminated result; although no existing caller appears to care, the fact that it would have been nul-terminated except in extreme corner cases seems ideally designed to bite someone on the rear someday. Fix ucnv_fromUChars() destCapacity argument --- in the worst case, that could perhaps have led to a non-nul-terminated result, too. Fix icu_from_uchar() to have a more reasonable definition of the function result --- no callers are actually paying attention, so this isn't a live bug, but it's certainly sloppily designed. Const-ify icu_from_uchar()'s input string for consistency. That is not the end of what needs to be done to these functions, but it's as much as I have the patience for right now. Discussion: https://postgr.es/m/1955.1498181798@sss.pgh.pa.us
2017-06-23 18:22:06 +02:00
(errmsg("could not get display name for locale \"%s\": %s",
localename, u_errorName(status))));
icu_from_uchar(&result, displayname, len_uchar);
return result;
}
Phase 2 of pgindent updates. Change pg_bsd_indent to follow upstream rules for placement of comments to the right of code, and remove pgindent hack that caused comments following #endif to not obey the general rule. Commit e3860ffa4dd0dad0dd9eea4be9cc1412373a8c89 wasn't actually using the published version of pg_bsd_indent, but a hacked-up version that tried to minimize the amount of movement of comments to the right of code. The situation of interest is where such a comment has to be moved to the right of its default placement at column 33 because there's code there. BSD indent has always moved right in units of tab stops in such cases --- but in the previous incarnation, indent was working in 8-space tab stops, while now it knows we use 4-space tabs. So the net result is that in about half the cases, such comments are placed one tab stop left of before. This is better all around: it leaves more room on the line for comment text, and it means that in such cases the comment uniformly starts at the next 4-space tab stop after the code, rather than sometimes one and sometimes two tabs after. Also, ensure that comments following #endif are indented the same as comments following other preprocessor commands such as #else. That inconsistency turns out to have been self-inflicted damage from a poorly-thought-through post-indent "fixup" in pgindent. This patch is much less interesting than the first round of indent changes, but also bulkier, so I thought it best to separate the effects. Discussion: https://postgr.es/m/E1dAmxK-0006EE-1r@gemulon.postgresql.org Discussion: https://postgr.es/m/30527.1495162840@sss.pgh.pa.us
2017-06-21 21:18:54 +02:00
#endif /* USE_ICU */
Rethink behavior of pg_import_system_collations(). Marco Atzeri reported that initdb would fail if "locale -a" reported the same locale name more than once. All previous versions of Postgres implicitly de-duplicated the results of "locale -a", but the rewrite to move the collation import logic into C had lost that property. It had also lost the property that locale names matching built-in collation names were silently ignored. The simplest way to fix this is to make initdb run the function in if-not-exists mode, which means that there's no real use-case for non if-not-exists mode; we might as well just drop the boolean argument and simplify the function's definition to be "add any collations not already known". This change also gets rid of some odd corner cases caused by the fact that aliases were added in if-not-exists mode even if the function argument said otherwise. While at it, adjust the behavior so that pg_import_system_collations() doesn't spew "collation foo already exists, skipping" messages during a re-run; that's completely unhelpful, especially since there are often hundreds of them. And make it return a count of the number of collations it did add, which seems like it might be helpful. Also, re-integrate the previous coding's property that it would make a deterministic selection of which alias to use if there were conflicting possibilities. This would only come into play if "locale -a" reports multiple equivalent locale names, say "de_DE.utf8" and "de_DE.UTF-8", but that hardly seems out of the question. In passing, fix incorrect behavior in pg_import_system_collations()'s ICU code path: it neglected CommandCounterIncrement, which would result in failures if ICU returns duplicate names, and it would try to create comments even if a new collation hadn't been created. Also, reorder operations in initdb so that the 'ucs_basic' collation is created before calling pg_import_system_collations() not after. This prevents a failure if "locale -a" were to report a locale named that. There's no reason to think that that ever happens in the wild, but the old coding would have survived it, so let's be equally robust. Discussion: https://postgr.es/m/20c74bc3-d6ca-243d-1bbc-12f17fa4fe9a@gmail.com
2017-06-23 20:19:48 +02:00
/*
* pg_import_system_collations: add known system collations to pg_collation
*/
Datum
pg_import_system_collations(PG_FUNCTION_ARGS)
{
Rethink behavior of pg_import_system_collations(). Marco Atzeri reported that initdb would fail if "locale -a" reported the same locale name more than once. All previous versions of Postgres implicitly de-duplicated the results of "locale -a", but the rewrite to move the collation import logic into C had lost that property. It had also lost the property that locale names matching built-in collation names were silently ignored. The simplest way to fix this is to make initdb run the function in if-not-exists mode, which means that there's no real use-case for non if-not-exists mode; we might as well just drop the boolean argument and simplify the function's definition to be "add any collations not already known". This change also gets rid of some odd corner cases caused by the fact that aliases were added in if-not-exists mode even if the function argument said otherwise. While at it, adjust the behavior so that pg_import_system_collations() doesn't spew "collation foo already exists, skipping" messages during a re-run; that's completely unhelpful, especially since there are often hundreds of them. And make it return a count of the number of collations it did add, which seems like it might be helpful. Also, re-integrate the previous coding's property that it would make a deterministic selection of which alias to use if there were conflicting possibilities. This would only come into play if "locale -a" reports multiple equivalent locale names, say "de_DE.utf8" and "de_DE.UTF-8", but that hardly seems out of the question. In passing, fix incorrect behavior in pg_import_system_collations()'s ICU code path: it neglected CommandCounterIncrement, which would result in failures if ICU returns duplicate names, and it would try to create comments even if a new collation hadn't been created. Also, reorder operations in initdb so that the 'ucs_basic' collation is created before calling pg_import_system_collations() not after. This prevents a failure if "locale -a" were to report a locale named that. There's no reason to think that that ever happens in the wild, but the old coding would have survived it, so let's be equally robust. Discussion: https://postgr.es/m/20c74bc3-d6ca-243d-1bbc-12f17fa4fe9a@gmail.com
2017-06-23 20:19:48 +02:00
Oid nspid = PG_GETARG_OID(0);
int ncreated = 0;
Rethink behavior of pg_import_system_collations(). Marco Atzeri reported that initdb would fail if "locale -a" reported the same locale name more than once. All previous versions of Postgres implicitly de-duplicated the results of "locale -a", but the rewrite to move the collation import logic into C had lost that property. It had also lost the property that locale names matching built-in collation names were silently ignored. The simplest way to fix this is to make initdb run the function in if-not-exists mode, which means that there's no real use-case for non if-not-exists mode; we might as well just drop the boolean argument and simplify the function's definition to be "add any collations not already known". This change also gets rid of some odd corner cases caused by the fact that aliases were added in if-not-exists mode even if the function argument said otherwise. While at it, adjust the behavior so that pg_import_system_collations() doesn't spew "collation foo already exists, skipping" messages during a re-run; that's completely unhelpful, especially since there are often hundreds of them. And make it return a count of the number of collations it did add, which seems like it might be helpful. Also, re-integrate the previous coding's property that it would make a deterministic selection of which alias to use if there were conflicting possibilities. This would only come into play if "locale -a" reports multiple equivalent locale names, say "de_DE.utf8" and "de_DE.UTF-8", but that hardly seems out of the question. In passing, fix incorrect behavior in pg_import_system_collations()'s ICU code path: it neglected CommandCounterIncrement, which would result in failures if ICU returns duplicate names, and it would try to create comments even if a new collation hadn't been created. Also, reorder operations in initdb so that the 'ucs_basic' collation is created before calling pg_import_system_collations() not after. This prevents a failure if "locale -a" were to report a locale named that. There's no reason to think that that ever happens in the wild, but the old coding would have survived it, so let's be equally robust. Discussion: https://postgr.es/m/20c74bc3-d6ca-243d-1bbc-12f17fa4fe9a@gmail.com
2017-06-23 20:19:48 +02:00
/* silence compiler warning if we have no locale implementation at all */
(void) nspid;
if (!superuser())
ereport(ERROR,
(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
(errmsg("must be superuser to import system collations"))));
Rethink behavior of pg_import_system_collations(). Marco Atzeri reported that initdb would fail if "locale -a" reported the same locale name more than once. All previous versions of Postgres implicitly de-duplicated the results of "locale -a", but the rewrite to move the collation import logic into C had lost that property. It had also lost the property that locale names matching built-in collation names were silently ignored. The simplest way to fix this is to make initdb run the function in if-not-exists mode, which means that there's no real use-case for non if-not-exists mode; we might as well just drop the boolean argument and simplify the function's definition to be "add any collations not already known". This change also gets rid of some odd corner cases caused by the fact that aliases were added in if-not-exists mode even if the function argument said otherwise. While at it, adjust the behavior so that pg_import_system_collations() doesn't spew "collation foo already exists, skipping" messages during a re-run; that's completely unhelpful, especially since there are often hundreds of them. And make it return a count of the number of collations it did add, which seems like it might be helpful. Also, re-integrate the previous coding's property that it would make a deterministic selection of which alias to use if there were conflicting possibilities. This would only come into play if "locale -a" reports multiple equivalent locale names, say "de_DE.utf8" and "de_DE.UTF-8", but that hardly seems out of the question. In passing, fix incorrect behavior in pg_import_system_collations()'s ICU code path: it neglected CommandCounterIncrement, which would result in failures if ICU returns duplicate names, and it would try to create comments even if a new collation hadn't been created. Also, reorder operations in initdb so that the 'ucs_basic' collation is created before calling pg_import_system_collations() not after. This prevents a failure if "locale -a" were to report a locale named that. There's no reason to think that that ever happens in the wild, but the old coding would have survived it, so let's be equally robust. Discussion: https://postgr.es/m/20c74bc3-d6ca-243d-1bbc-12f17fa4fe9a@gmail.com
2017-06-23 20:19:48 +02:00
/* Load collations known to libc, using "locale -a" to enumerate them */
#ifdef READ_LOCALE_A_OUTPUT
{
FILE *locale_a_handle;
char localebuf[NAMEDATALEN]; /* we assume ASCII so this is fine */
int nvalid = 0;
Oid collid;
CollAliasData *aliases;
int naliases,
maxaliases,
i;
/* expansible array of aliases */
maxaliases = 100;
aliases = (CollAliasData *) palloc(maxaliases * sizeof(CollAliasData));
naliases = 0;
locale_a_handle = OpenPipeStream("locale -a", "r");
if (locale_a_handle == NULL)
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not execute command \"%s\": %m",
"locale -a")));
Rethink behavior of pg_import_system_collations(). Marco Atzeri reported that initdb would fail if "locale -a" reported the same locale name more than once. All previous versions of Postgres implicitly de-duplicated the results of "locale -a", but the rewrite to move the collation import logic into C had lost that property. It had also lost the property that locale names matching built-in collation names were silently ignored. The simplest way to fix this is to make initdb run the function in if-not-exists mode, which means that there's no real use-case for non if-not-exists mode; we might as well just drop the boolean argument and simplify the function's definition to be "add any collations not already known". This change also gets rid of some odd corner cases caused by the fact that aliases were added in if-not-exists mode even if the function argument said otherwise. While at it, adjust the behavior so that pg_import_system_collations() doesn't spew "collation foo already exists, skipping" messages during a re-run; that's completely unhelpful, especially since there are often hundreds of them. And make it return a count of the number of collations it did add, which seems like it might be helpful. Also, re-integrate the previous coding's property that it would make a deterministic selection of which alias to use if there were conflicting possibilities. This would only come into play if "locale -a" reports multiple equivalent locale names, say "de_DE.utf8" and "de_DE.UTF-8", but that hardly seems out of the question. In passing, fix incorrect behavior in pg_import_system_collations()'s ICU code path: it neglected CommandCounterIncrement, which would result in failures if ICU returns duplicate names, and it would try to create comments even if a new collation hadn't been created. Also, reorder operations in initdb so that the 'ucs_basic' collation is created before calling pg_import_system_collations() not after. This prevents a failure if "locale -a" were to report a locale named that. There's no reason to think that that ever happens in the wild, but the old coding would have survived it, so let's be equally robust. Discussion: https://postgr.es/m/20c74bc3-d6ca-243d-1bbc-12f17fa4fe9a@gmail.com
2017-06-23 20:19:48 +02:00
while (fgets(localebuf, sizeof(localebuf), locale_a_handle))
{
size_t len;
int enc;
bool skip;
char alias[NAMEDATALEN];
Rethink behavior of pg_import_system_collations(). Marco Atzeri reported that initdb would fail if "locale -a" reported the same locale name more than once. All previous versions of Postgres implicitly de-duplicated the results of "locale -a", but the rewrite to move the collation import logic into C had lost that property. It had also lost the property that locale names matching built-in collation names were silently ignored. The simplest way to fix this is to make initdb run the function in if-not-exists mode, which means that there's no real use-case for non if-not-exists mode; we might as well just drop the boolean argument and simplify the function's definition to be "add any collations not already known". This change also gets rid of some odd corner cases caused by the fact that aliases were added in if-not-exists mode even if the function argument said otherwise. While at it, adjust the behavior so that pg_import_system_collations() doesn't spew "collation foo already exists, skipping" messages during a re-run; that's completely unhelpful, especially since there are often hundreds of them. And make it return a count of the number of collations it did add, which seems like it might be helpful. Also, re-integrate the previous coding's property that it would make a deterministic selection of which alias to use if there were conflicting possibilities. This would only come into play if "locale -a" reports multiple equivalent locale names, say "de_DE.utf8" and "de_DE.UTF-8", but that hardly seems out of the question. In passing, fix incorrect behavior in pg_import_system_collations()'s ICU code path: it neglected CommandCounterIncrement, which would result in failures if ICU returns duplicate names, and it would try to create comments even if a new collation hadn't been created. Also, reorder operations in initdb so that the 'ucs_basic' collation is created before calling pg_import_system_collations() not after. This prevents a failure if "locale -a" were to report a locale named that. There's no reason to think that that ever happens in the wild, but the old coding would have survived it, so let's be equally robust. Discussion: https://postgr.es/m/20c74bc3-d6ca-243d-1bbc-12f17fa4fe9a@gmail.com
2017-06-23 20:19:48 +02:00
len = strlen(localebuf);
Rethink behavior of pg_import_system_collations(). Marco Atzeri reported that initdb would fail if "locale -a" reported the same locale name more than once. All previous versions of Postgres implicitly de-duplicated the results of "locale -a", but the rewrite to move the collation import logic into C had lost that property. It had also lost the property that locale names matching built-in collation names were silently ignored. The simplest way to fix this is to make initdb run the function in if-not-exists mode, which means that there's no real use-case for non if-not-exists mode; we might as well just drop the boolean argument and simplify the function's definition to be "add any collations not already known". This change also gets rid of some odd corner cases caused by the fact that aliases were added in if-not-exists mode even if the function argument said otherwise. While at it, adjust the behavior so that pg_import_system_collations() doesn't spew "collation foo already exists, skipping" messages during a re-run; that's completely unhelpful, especially since there are often hundreds of them. And make it return a count of the number of collations it did add, which seems like it might be helpful. Also, re-integrate the previous coding's property that it would make a deterministic selection of which alias to use if there were conflicting possibilities. This would only come into play if "locale -a" reports multiple equivalent locale names, say "de_DE.utf8" and "de_DE.UTF-8", but that hardly seems out of the question. In passing, fix incorrect behavior in pg_import_system_collations()'s ICU code path: it neglected CommandCounterIncrement, which would result in failures if ICU returns duplicate names, and it would try to create comments even if a new collation hadn't been created. Also, reorder operations in initdb so that the 'ucs_basic' collation is created before calling pg_import_system_collations() not after. This prevents a failure if "locale -a" were to report a locale named that. There's no reason to think that that ever happens in the wild, but the old coding would have survived it, so let's be equally robust. Discussion: https://postgr.es/m/20c74bc3-d6ca-243d-1bbc-12f17fa4fe9a@gmail.com
2017-06-23 20:19:48 +02:00
if (len == 0 || localebuf[len - 1] != '\n')
{
elog(DEBUG1, "locale name too long, skipped: \"%s\"", localebuf);
continue;
}
localebuf[len - 1] = '\0';
Rethink behavior of pg_import_system_collations(). Marco Atzeri reported that initdb would fail if "locale -a" reported the same locale name more than once. All previous versions of Postgres implicitly de-duplicated the results of "locale -a", but the rewrite to move the collation import logic into C had lost that property. It had also lost the property that locale names matching built-in collation names were silently ignored. The simplest way to fix this is to make initdb run the function in if-not-exists mode, which means that there's no real use-case for non if-not-exists mode; we might as well just drop the boolean argument and simplify the function's definition to be "add any collations not already known". This change also gets rid of some odd corner cases caused by the fact that aliases were added in if-not-exists mode even if the function argument said otherwise. While at it, adjust the behavior so that pg_import_system_collations() doesn't spew "collation foo already exists, skipping" messages during a re-run; that's completely unhelpful, especially since there are often hundreds of them. And make it return a count of the number of collations it did add, which seems like it might be helpful. Also, re-integrate the previous coding's property that it would make a deterministic selection of which alias to use if there were conflicting possibilities. This would only come into play if "locale -a" reports multiple equivalent locale names, say "de_DE.utf8" and "de_DE.UTF-8", but that hardly seems out of the question. In passing, fix incorrect behavior in pg_import_system_collations()'s ICU code path: it neglected CommandCounterIncrement, which would result in failures if ICU returns duplicate names, and it would try to create comments even if a new collation hadn't been created. Also, reorder operations in initdb so that the 'ucs_basic' collation is created before calling pg_import_system_collations() not after. This prevents a failure if "locale -a" were to report a locale named that. There's no reason to think that that ever happens in the wild, but the old coding would have survived it, so let's be equally robust. Discussion: https://postgr.es/m/20c74bc3-d6ca-243d-1bbc-12f17fa4fe9a@gmail.com
2017-06-23 20:19:48 +02:00
/*
* Some systems have locale names that don't consist entirely of
* ASCII letters (such as "bokm&aring;l" or "fran&ccedil;ais").
* This is pretty silly, since we need the locale itself to
* interpret the non-ASCII characters. We can't do much with
* those, so we filter them out.
*/
skip = false;
for (i = 0; i < len; i++)
{
if (IS_HIGHBIT_SET(localebuf[i]))
{
skip = true;
break;
}
}
if (skip)
{
elog(DEBUG1, "locale name has non-ASCII characters, skipped: \"%s\"", localebuf);
continue;
}
Rethink behavior of pg_import_system_collations(). Marco Atzeri reported that initdb would fail if "locale -a" reported the same locale name more than once. All previous versions of Postgres implicitly de-duplicated the results of "locale -a", but the rewrite to move the collation import logic into C had lost that property. It had also lost the property that locale names matching built-in collation names were silently ignored. The simplest way to fix this is to make initdb run the function in if-not-exists mode, which means that there's no real use-case for non if-not-exists mode; we might as well just drop the boolean argument and simplify the function's definition to be "add any collations not already known". This change also gets rid of some odd corner cases caused by the fact that aliases were added in if-not-exists mode even if the function argument said otherwise. While at it, adjust the behavior so that pg_import_system_collations() doesn't spew "collation foo already exists, skipping" messages during a re-run; that's completely unhelpful, especially since there are often hundreds of them. And make it return a count of the number of collations it did add, which seems like it might be helpful. Also, re-integrate the previous coding's property that it would make a deterministic selection of which alias to use if there were conflicting possibilities. This would only come into play if "locale -a" reports multiple equivalent locale names, say "de_DE.utf8" and "de_DE.UTF-8", but that hardly seems out of the question. In passing, fix incorrect behavior in pg_import_system_collations()'s ICU code path: it neglected CommandCounterIncrement, which would result in failures if ICU returns duplicate names, and it would try to create comments even if a new collation hadn't been created. Also, reorder operations in initdb so that the 'ucs_basic' collation is created before calling pg_import_system_collations() not after. This prevents a failure if "locale -a" were to report a locale named that. There's no reason to think that that ever happens in the wild, but the old coding would have survived it, so let's be equally robust. Discussion: https://postgr.es/m/20c74bc3-d6ca-243d-1bbc-12f17fa4fe9a@gmail.com
2017-06-23 20:19:48 +02:00
enc = pg_get_encoding_from_locale(localebuf, false);
if (enc < 0)
{
Rethink behavior of pg_import_system_collations(). Marco Atzeri reported that initdb would fail if "locale -a" reported the same locale name more than once. All previous versions of Postgres implicitly de-duplicated the results of "locale -a", but the rewrite to move the collation import logic into C had lost that property. It had also lost the property that locale names matching built-in collation names were silently ignored. The simplest way to fix this is to make initdb run the function in if-not-exists mode, which means that there's no real use-case for non if-not-exists mode; we might as well just drop the boolean argument and simplify the function's definition to be "add any collations not already known". This change also gets rid of some odd corner cases caused by the fact that aliases were added in if-not-exists mode even if the function argument said otherwise. While at it, adjust the behavior so that pg_import_system_collations() doesn't spew "collation foo already exists, skipping" messages during a re-run; that's completely unhelpful, especially since there are often hundreds of them. And make it return a count of the number of collations it did add, which seems like it might be helpful. Also, re-integrate the previous coding's property that it would make a deterministic selection of which alias to use if there were conflicting possibilities. This would only come into play if "locale -a" reports multiple equivalent locale names, say "de_DE.utf8" and "de_DE.UTF-8", but that hardly seems out of the question. In passing, fix incorrect behavior in pg_import_system_collations()'s ICU code path: it neglected CommandCounterIncrement, which would result in failures if ICU returns duplicate names, and it would try to create comments even if a new collation hadn't been created. Also, reorder operations in initdb so that the 'ucs_basic' collation is created before calling pg_import_system_collations() not after. This prevents a failure if "locale -a" were to report a locale named that. There's no reason to think that that ever happens in the wild, but the old coding would have survived it, so let's be equally robust. Discussion: https://postgr.es/m/20c74bc3-d6ca-243d-1bbc-12f17fa4fe9a@gmail.com
2017-06-23 20:19:48 +02:00
/* error message printed by pg_get_encoding_from_locale() */
continue;
}
Rethink behavior of pg_import_system_collations(). Marco Atzeri reported that initdb would fail if "locale -a" reported the same locale name more than once. All previous versions of Postgres implicitly de-duplicated the results of "locale -a", but the rewrite to move the collation import logic into C had lost that property. It had also lost the property that locale names matching built-in collation names were silently ignored. The simplest way to fix this is to make initdb run the function in if-not-exists mode, which means that there's no real use-case for non if-not-exists mode; we might as well just drop the boolean argument and simplify the function's definition to be "add any collations not already known". This change also gets rid of some odd corner cases caused by the fact that aliases were added in if-not-exists mode even if the function argument said otherwise. While at it, adjust the behavior so that pg_import_system_collations() doesn't spew "collation foo already exists, skipping" messages during a re-run; that's completely unhelpful, especially since there are often hundreds of them. And make it return a count of the number of collations it did add, which seems like it might be helpful. Also, re-integrate the previous coding's property that it would make a deterministic selection of which alias to use if there were conflicting possibilities. This would only come into play if "locale -a" reports multiple equivalent locale names, say "de_DE.utf8" and "de_DE.UTF-8", but that hardly seems out of the question. In passing, fix incorrect behavior in pg_import_system_collations()'s ICU code path: it neglected CommandCounterIncrement, which would result in failures if ICU returns duplicate names, and it would try to create comments even if a new collation hadn't been created. Also, reorder operations in initdb so that the 'ucs_basic' collation is created before calling pg_import_system_collations() not after. This prevents a failure if "locale -a" were to report a locale named that. There's no reason to think that that ever happens in the wild, but the old coding would have survived it, so let's be equally robust. Discussion: https://postgr.es/m/20c74bc3-d6ca-243d-1bbc-12f17fa4fe9a@gmail.com
2017-06-23 20:19:48 +02:00
if (!PG_VALID_BE_ENCODING(enc))
continue; /* ignore locales for client-only encodings */
if (enc == PG_SQL_ASCII)
continue; /* C/POSIX are already in the catalog */
Rethink behavior of pg_import_system_collations(). Marco Atzeri reported that initdb would fail if "locale -a" reported the same locale name more than once. All previous versions of Postgres implicitly de-duplicated the results of "locale -a", but the rewrite to move the collation import logic into C had lost that property. It had also lost the property that locale names matching built-in collation names were silently ignored. The simplest way to fix this is to make initdb run the function in if-not-exists mode, which means that there's no real use-case for non if-not-exists mode; we might as well just drop the boolean argument and simplify the function's definition to be "add any collations not already known". This change also gets rid of some odd corner cases caused by the fact that aliases were added in if-not-exists mode even if the function argument said otherwise. While at it, adjust the behavior so that pg_import_system_collations() doesn't spew "collation foo already exists, skipping" messages during a re-run; that's completely unhelpful, especially since there are often hundreds of them. And make it return a count of the number of collations it did add, which seems like it might be helpful. Also, re-integrate the previous coding's property that it would make a deterministic selection of which alias to use if there were conflicting possibilities. This would only come into play if "locale -a" reports multiple equivalent locale names, say "de_DE.utf8" and "de_DE.UTF-8", but that hardly seems out of the question. In passing, fix incorrect behavior in pg_import_system_collations()'s ICU code path: it neglected CommandCounterIncrement, which would result in failures if ICU returns duplicate names, and it would try to create comments even if a new collation hadn't been created. Also, reorder operations in initdb so that the 'ucs_basic' collation is created before calling pg_import_system_collations() not after. This prevents a failure if "locale -a" were to report a locale named that. There's no reason to think that that ever happens in the wild, but the old coding would have survived it, so let's be equally robust. Discussion: https://postgr.es/m/20c74bc3-d6ca-243d-1bbc-12f17fa4fe9a@gmail.com
2017-06-23 20:19:48 +02:00
/* count valid locales found in operating system */
nvalid++;
Rethink behavior of pg_import_system_collations(). Marco Atzeri reported that initdb would fail if "locale -a" reported the same locale name more than once. All previous versions of Postgres implicitly de-duplicated the results of "locale -a", but the rewrite to move the collation import logic into C had lost that property. It had also lost the property that locale names matching built-in collation names were silently ignored. The simplest way to fix this is to make initdb run the function in if-not-exists mode, which means that there's no real use-case for non if-not-exists mode; we might as well just drop the boolean argument and simplify the function's definition to be "add any collations not already known". This change also gets rid of some odd corner cases caused by the fact that aliases were added in if-not-exists mode even if the function argument said otherwise. While at it, adjust the behavior so that pg_import_system_collations() doesn't spew "collation foo already exists, skipping" messages during a re-run; that's completely unhelpful, especially since there are often hundreds of them. And make it return a count of the number of collations it did add, which seems like it might be helpful. Also, re-integrate the previous coding's property that it would make a deterministic selection of which alias to use if there were conflicting possibilities. This would only come into play if "locale -a" reports multiple equivalent locale names, say "de_DE.utf8" and "de_DE.UTF-8", but that hardly seems out of the question. In passing, fix incorrect behavior in pg_import_system_collations()'s ICU code path: it neglected CommandCounterIncrement, which would result in failures if ICU returns duplicate names, and it would try to create comments even if a new collation hadn't been created. Also, reorder operations in initdb so that the 'ucs_basic' collation is created before calling pg_import_system_collations() not after. This prevents a failure if "locale -a" were to report a locale named that. There's no reason to think that that ever happens in the wild, but the old coding would have survived it, so let's be equally robust. Discussion: https://postgr.es/m/20c74bc3-d6ca-243d-1bbc-12f17fa4fe9a@gmail.com
2017-06-23 20:19:48 +02:00
/*
* Create a collation named the same as the locale, but quietly
* doing nothing if it already exists. This is the behavior we
* need even at initdb time, because some versions of "locale -a"
* can report the same locale name more than once. And it's
* convenient for later import runs, too, since you just about
* always want to add on new locales without a lot of chatter
* about existing ones.
*/
collid = CollationCreate(localebuf, nspid, GetUserId(),
COLLPROVIDER_LIBC, enc,
localebuf, localebuf,
get_collation_actual_version(COLLPROVIDER_LIBC, localebuf),
true, true);
if (OidIsValid(collid))
{
ncreated++;
/* Must do CCI between inserts to handle duplicates correctly */
CommandCounterIncrement();
}
Rethink behavior of pg_import_system_collations(). Marco Atzeri reported that initdb would fail if "locale -a" reported the same locale name more than once. All previous versions of Postgres implicitly de-duplicated the results of "locale -a", but the rewrite to move the collation import logic into C had lost that property. It had also lost the property that locale names matching built-in collation names were silently ignored. The simplest way to fix this is to make initdb run the function in if-not-exists mode, which means that there's no real use-case for non if-not-exists mode; we might as well just drop the boolean argument and simplify the function's definition to be "add any collations not already known". This change also gets rid of some odd corner cases caused by the fact that aliases were added in if-not-exists mode even if the function argument said otherwise. While at it, adjust the behavior so that pg_import_system_collations() doesn't spew "collation foo already exists, skipping" messages during a re-run; that's completely unhelpful, especially since there are often hundreds of them. And make it return a count of the number of collations it did add, which seems like it might be helpful. Also, re-integrate the previous coding's property that it would make a deterministic selection of which alias to use if there were conflicting possibilities. This would only come into play if "locale -a" reports multiple equivalent locale names, say "de_DE.utf8" and "de_DE.UTF-8", but that hardly seems out of the question. In passing, fix incorrect behavior in pg_import_system_collations()'s ICU code path: it neglected CommandCounterIncrement, which would result in failures if ICU returns duplicate names, and it would try to create comments even if a new collation hadn't been created. Also, reorder operations in initdb so that the 'ucs_basic' collation is created before calling pg_import_system_collations() not after. This prevents a failure if "locale -a" were to report a locale named that. There's no reason to think that that ever happens in the wild, but the old coding would have survived it, so let's be equally robust. Discussion: https://postgr.es/m/20c74bc3-d6ca-243d-1bbc-12f17fa4fe9a@gmail.com
2017-06-23 20:19:48 +02:00
/*
* Generate aliases such as "en_US" in addition to "en_US.utf8"
* for ease of use. Note that collation names are unique per
* encoding only, so this doesn't clash with "en_US" for LATIN1,
* say.
*
* However, it might conflict with a name we'll see later in the
* "locale -a" output. So save up the aliases and try to add them
* after we've read all the output.
*/
if (normalize_libc_locale_name(alias, localebuf))
{
if (naliases >= maxaliases)
{
maxaliases *= 2;
aliases = (CollAliasData *)
repalloc(aliases, maxaliases * sizeof(CollAliasData));
}
aliases[naliases].localename = pstrdup(localebuf);
aliases[naliases].alias = pstrdup(alias);
aliases[naliases].enc = enc;
naliases++;
}
}
Rethink behavior of pg_import_system_collations(). Marco Atzeri reported that initdb would fail if "locale -a" reported the same locale name more than once. All previous versions of Postgres implicitly de-duplicated the results of "locale -a", but the rewrite to move the collation import logic into C had lost that property. It had also lost the property that locale names matching built-in collation names were silently ignored. The simplest way to fix this is to make initdb run the function in if-not-exists mode, which means that there's no real use-case for non if-not-exists mode; we might as well just drop the boolean argument and simplify the function's definition to be "add any collations not already known". This change also gets rid of some odd corner cases caused by the fact that aliases were added in if-not-exists mode even if the function argument said otherwise. While at it, adjust the behavior so that pg_import_system_collations() doesn't spew "collation foo already exists, skipping" messages during a re-run; that's completely unhelpful, especially since there are often hundreds of them. And make it return a count of the number of collations it did add, which seems like it might be helpful. Also, re-integrate the previous coding's property that it would make a deterministic selection of which alias to use if there were conflicting possibilities. This would only come into play if "locale -a" reports multiple equivalent locale names, say "de_DE.utf8" and "de_DE.UTF-8", but that hardly seems out of the question. In passing, fix incorrect behavior in pg_import_system_collations()'s ICU code path: it neglected CommandCounterIncrement, which would result in failures if ICU returns duplicate names, and it would try to create comments even if a new collation hadn't been created. Also, reorder operations in initdb so that the 'ucs_basic' collation is created before calling pg_import_system_collations() not after. This prevents a failure if "locale -a" were to report a locale named that. There's no reason to think that that ever happens in the wild, but the old coding would have survived it, so let's be equally robust. Discussion: https://postgr.es/m/20c74bc3-d6ca-243d-1bbc-12f17fa4fe9a@gmail.com
2017-06-23 20:19:48 +02:00
ClosePipeStream(locale_a_handle);
/*
Rethink behavior of pg_import_system_collations(). Marco Atzeri reported that initdb would fail if "locale -a" reported the same locale name more than once. All previous versions of Postgres implicitly de-duplicated the results of "locale -a", but the rewrite to move the collation import logic into C had lost that property. It had also lost the property that locale names matching built-in collation names were silently ignored. The simplest way to fix this is to make initdb run the function in if-not-exists mode, which means that there's no real use-case for non if-not-exists mode; we might as well just drop the boolean argument and simplify the function's definition to be "add any collations not already known". This change also gets rid of some odd corner cases caused by the fact that aliases were added in if-not-exists mode even if the function argument said otherwise. While at it, adjust the behavior so that pg_import_system_collations() doesn't spew "collation foo already exists, skipping" messages during a re-run; that's completely unhelpful, especially since there are often hundreds of them. And make it return a count of the number of collations it did add, which seems like it might be helpful. Also, re-integrate the previous coding's property that it would make a deterministic selection of which alias to use if there were conflicting possibilities. This would only come into play if "locale -a" reports multiple equivalent locale names, say "de_DE.utf8" and "de_DE.UTF-8", but that hardly seems out of the question. In passing, fix incorrect behavior in pg_import_system_collations()'s ICU code path: it neglected CommandCounterIncrement, which would result in failures if ICU returns duplicate names, and it would try to create comments even if a new collation hadn't been created. Also, reorder operations in initdb so that the 'ucs_basic' collation is created before calling pg_import_system_collations() not after. This prevents a failure if "locale -a" were to report a locale named that. There's no reason to think that that ever happens in the wild, but the old coding would have survived it, so let's be equally robust. Discussion: https://postgr.es/m/20c74bc3-d6ca-243d-1bbc-12f17fa4fe9a@gmail.com
2017-06-23 20:19:48 +02:00
* Before processing the aliases, sort them by locale name. The point
* here is that if "locale -a" gives us multiple locale names with the
* same encoding and base name, say "en_US.utf8" and "en_US.utf-8", we
* want to pick a deterministic one of them. First in ASCII sort
* order is a good enough rule. (Before PG 10, the code corresponding
* to this logic in initdb.c had an additional ordering rule, to
* prefer the locale name exactly matching the alias, if any. We
* don't need to consider that here, because we would have already
* created such a pg_collation entry above, and that one will win.)
*/
Rethink behavior of pg_import_system_collations(). Marco Atzeri reported that initdb would fail if "locale -a" reported the same locale name more than once. All previous versions of Postgres implicitly de-duplicated the results of "locale -a", but the rewrite to move the collation import logic into C had lost that property. It had also lost the property that locale names matching built-in collation names were silently ignored. The simplest way to fix this is to make initdb run the function in if-not-exists mode, which means that there's no real use-case for non if-not-exists mode; we might as well just drop the boolean argument and simplify the function's definition to be "add any collations not already known". This change also gets rid of some odd corner cases caused by the fact that aliases were added in if-not-exists mode even if the function argument said otherwise. While at it, adjust the behavior so that pg_import_system_collations() doesn't spew "collation foo already exists, skipping" messages during a re-run; that's completely unhelpful, especially since there are often hundreds of them. And make it return a count of the number of collations it did add, which seems like it might be helpful. Also, re-integrate the previous coding's property that it would make a deterministic selection of which alias to use if there were conflicting possibilities. This would only come into play if "locale -a" reports multiple equivalent locale names, say "de_DE.utf8" and "de_DE.UTF-8", but that hardly seems out of the question. In passing, fix incorrect behavior in pg_import_system_collations()'s ICU code path: it neglected CommandCounterIncrement, which would result in failures if ICU returns duplicate names, and it would try to create comments even if a new collation hadn't been created. Also, reorder operations in initdb so that the 'ucs_basic' collation is created before calling pg_import_system_collations() not after. This prevents a failure if "locale -a" were to report a locale named that. There's no reason to think that that ever happens in the wild, but the old coding would have survived it, so let's be equally robust. Discussion: https://postgr.es/m/20c74bc3-d6ca-243d-1bbc-12f17fa4fe9a@gmail.com
2017-06-23 20:19:48 +02:00
if (naliases > 1)
qsort((void *) aliases, naliases, sizeof(CollAliasData), cmpaliases);
/* Now add aliases, ignoring any that match pre-existing entries */
for (i = 0; i < naliases; i++)
{
Rethink behavior of pg_import_system_collations(). Marco Atzeri reported that initdb would fail if "locale -a" reported the same locale name more than once. All previous versions of Postgres implicitly de-duplicated the results of "locale -a", but the rewrite to move the collation import logic into C had lost that property. It had also lost the property that locale names matching built-in collation names were silently ignored. The simplest way to fix this is to make initdb run the function in if-not-exists mode, which means that there's no real use-case for non if-not-exists mode; we might as well just drop the boolean argument and simplify the function's definition to be "add any collations not already known". This change also gets rid of some odd corner cases caused by the fact that aliases were added in if-not-exists mode even if the function argument said otherwise. While at it, adjust the behavior so that pg_import_system_collations() doesn't spew "collation foo already exists, skipping" messages during a re-run; that's completely unhelpful, especially since there are often hundreds of them. And make it return a count of the number of collations it did add, which seems like it might be helpful. Also, re-integrate the previous coding's property that it would make a deterministic selection of which alias to use if there were conflicting possibilities. This would only come into play if "locale -a" reports multiple equivalent locale names, say "de_DE.utf8" and "de_DE.UTF-8", but that hardly seems out of the question. In passing, fix incorrect behavior in pg_import_system_collations()'s ICU code path: it neglected CommandCounterIncrement, which would result in failures if ICU returns duplicate names, and it would try to create comments even if a new collation hadn't been created. Also, reorder operations in initdb so that the 'ucs_basic' collation is created before calling pg_import_system_collations() not after. This prevents a failure if "locale -a" were to report a locale named that. There's no reason to think that that ever happens in the wild, but the old coding would have survived it, so let's be equally robust. Discussion: https://postgr.es/m/20c74bc3-d6ca-243d-1bbc-12f17fa4fe9a@gmail.com
2017-06-23 20:19:48 +02:00
char *locale = aliases[i].localename;
char *alias = aliases[i].alias;
int enc = aliases[i].enc;
collid = CollationCreate(alias, nspid, GetUserId(),
COLLPROVIDER_LIBC, enc,
locale, locale,
get_collation_actual_version(COLLPROVIDER_LIBC, locale),
true, true);
if (OidIsValid(collid))
{
ncreated++;
Rethink behavior of pg_import_system_collations(). Marco Atzeri reported that initdb would fail if "locale -a" reported the same locale name more than once. All previous versions of Postgres implicitly de-duplicated the results of "locale -a", but the rewrite to move the collation import logic into C had lost that property. It had also lost the property that locale names matching built-in collation names were silently ignored. The simplest way to fix this is to make initdb run the function in if-not-exists mode, which means that there's no real use-case for non if-not-exists mode; we might as well just drop the boolean argument and simplify the function's definition to be "add any collations not already known". This change also gets rid of some odd corner cases caused by the fact that aliases were added in if-not-exists mode even if the function argument said otherwise. While at it, adjust the behavior so that pg_import_system_collations() doesn't spew "collation foo already exists, skipping" messages during a re-run; that's completely unhelpful, especially since there are often hundreds of them. And make it return a count of the number of collations it did add, which seems like it might be helpful. Also, re-integrate the previous coding's property that it would make a deterministic selection of which alias to use if there were conflicting possibilities. This would only come into play if "locale -a" reports multiple equivalent locale names, say "de_DE.utf8" and "de_DE.UTF-8", but that hardly seems out of the question. In passing, fix incorrect behavior in pg_import_system_collations()'s ICU code path: it neglected CommandCounterIncrement, which would result in failures if ICU returns duplicate names, and it would try to create comments even if a new collation hadn't been created. Also, reorder operations in initdb so that the 'ucs_basic' collation is created before calling pg_import_system_collations() not after. This prevents a failure if "locale -a" were to report a locale named that. There's no reason to think that that ever happens in the wild, but the old coding would have survived it, so let's be equally robust. Discussion: https://postgr.es/m/20c74bc3-d6ca-243d-1bbc-12f17fa4fe9a@gmail.com
2017-06-23 20:19:48 +02:00
CommandCounterIncrement();
}
}
Rethink behavior of pg_import_system_collations(). Marco Atzeri reported that initdb would fail if "locale -a" reported the same locale name more than once. All previous versions of Postgres implicitly de-duplicated the results of "locale -a", but the rewrite to move the collation import logic into C had lost that property. It had also lost the property that locale names matching built-in collation names were silently ignored. The simplest way to fix this is to make initdb run the function in if-not-exists mode, which means that there's no real use-case for non if-not-exists mode; we might as well just drop the boolean argument and simplify the function's definition to be "add any collations not already known". This change also gets rid of some odd corner cases caused by the fact that aliases were added in if-not-exists mode even if the function argument said otherwise. While at it, adjust the behavior so that pg_import_system_collations() doesn't spew "collation foo already exists, skipping" messages during a re-run; that's completely unhelpful, especially since there are often hundreds of them. And make it return a count of the number of collations it did add, which seems like it might be helpful. Also, re-integrate the previous coding's property that it would make a deterministic selection of which alias to use if there were conflicting possibilities. This would only come into play if "locale -a" reports multiple equivalent locale names, say "de_DE.utf8" and "de_DE.UTF-8", but that hardly seems out of the question. In passing, fix incorrect behavior in pg_import_system_collations()'s ICU code path: it neglected CommandCounterIncrement, which would result in failures if ICU returns duplicate names, and it would try to create comments even if a new collation hadn't been created. Also, reorder operations in initdb so that the 'ucs_basic' collation is created before calling pg_import_system_collations() not after. This prevents a failure if "locale -a" were to report a locale named that. There's no reason to think that that ever happens in the wild, but the old coding would have survived it, so let's be equally robust. Discussion: https://postgr.es/m/20c74bc3-d6ca-243d-1bbc-12f17fa4fe9a@gmail.com
2017-06-23 20:19:48 +02:00
/* Give a warning if "locale -a" seems to be malfunctioning */
if (nvalid == 0)
ereport(WARNING,
(errmsg("no usable system locales were found")));
}
Rethink behavior of pg_import_system_collations(). Marco Atzeri reported that initdb would fail if "locale -a" reported the same locale name more than once. All previous versions of Postgres implicitly de-duplicated the results of "locale -a", but the rewrite to move the collation import logic into C had lost that property. It had also lost the property that locale names matching built-in collation names were silently ignored. The simplest way to fix this is to make initdb run the function in if-not-exists mode, which means that there's no real use-case for non if-not-exists mode; we might as well just drop the boolean argument and simplify the function's definition to be "add any collations not already known". This change also gets rid of some odd corner cases caused by the fact that aliases were added in if-not-exists mode even if the function argument said otherwise. While at it, adjust the behavior so that pg_import_system_collations() doesn't spew "collation foo already exists, skipping" messages during a re-run; that's completely unhelpful, especially since there are often hundreds of them. And make it return a count of the number of collations it did add, which seems like it might be helpful. Also, re-integrate the previous coding's property that it would make a deterministic selection of which alias to use if there were conflicting possibilities. This would only come into play if "locale -a" reports multiple equivalent locale names, say "de_DE.utf8" and "de_DE.UTF-8", but that hardly seems out of the question. In passing, fix incorrect behavior in pg_import_system_collations()'s ICU code path: it neglected CommandCounterIncrement, which would result in failures if ICU returns duplicate names, and it would try to create comments even if a new collation hadn't been created. Also, reorder operations in initdb so that the 'ucs_basic' collation is created before calling pg_import_system_collations() not after. This prevents a failure if "locale -a" were to report a locale named that. There's no reason to think that that ever happens in the wild, but the old coding would have survived it, so let's be equally robust. Discussion: https://postgr.es/m/20c74bc3-d6ca-243d-1bbc-12f17fa4fe9a@gmail.com
2017-06-23 20:19:48 +02:00
#endif /* READ_LOCALE_A_OUTPUT */
Rethink behavior of pg_import_system_collations(). Marco Atzeri reported that initdb would fail if "locale -a" reported the same locale name more than once. All previous versions of Postgres implicitly de-duplicated the results of "locale -a", but the rewrite to move the collation import logic into C had lost that property. It had also lost the property that locale names matching built-in collation names were silently ignored. The simplest way to fix this is to make initdb run the function in if-not-exists mode, which means that there's no real use-case for non if-not-exists mode; we might as well just drop the boolean argument and simplify the function's definition to be "add any collations not already known". This change also gets rid of some odd corner cases caused by the fact that aliases were added in if-not-exists mode even if the function argument said otherwise. While at it, adjust the behavior so that pg_import_system_collations() doesn't spew "collation foo already exists, skipping" messages during a re-run; that's completely unhelpful, especially since there are often hundreds of them. And make it return a count of the number of collations it did add, which seems like it might be helpful. Also, re-integrate the previous coding's property that it would make a deterministic selection of which alias to use if there were conflicting possibilities. This would only come into play if "locale -a" reports multiple equivalent locale names, say "de_DE.utf8" and "de_DE.UTF-8", but that hardly seems out of the question. In passing, fix incorrect behavior in pg_import_system_collations()'s ICU code path: it neglected CommandCounterIncrement, which would result in failures if ICU returns duplicate names, and it would try to create comments even if a new collation hadn't been created. Also, reorder operations in initdb so that the 'ucs_basic' collation is created before calling pg_import_system_collations() not after. This prevents a failure if "locale -a" were to report a locale named that. There's no reason to think that that ever happens in the wild, but the old coding would have survived it, so let's be equally robust. Discussion: https://postgr.es/m/20c74bc3-d6ca-243d-1bbc-12f17fa4fe9a@gmail.com
2017-06-23 20:19:48 +02:00
/* Load collations known to ICU */
#ifdef USE_ICU
if (!is_encoding_supported_by_icu(GetDatabaseEncoding()))
{
ereport(NOTICE,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("encoding \"%s\" not supported by ICU",
pg_encoding_to_char(GetDatabaseEncoding()))));
}
else
{
int i;
/*
* Start the loop at -1 to sneak in the root locale without too much
* code duplication.
*/
for (i = -1; i < ucol_countAvailable(); i++)
{
const char *name;
char *langtag;
const char *collcollate;
UEnumeration *en;
UErrorCode status;
const char *val;
Oid collid;
if (i == -1)
name = ""; /* ICU root locale */
else
name = ucol_getAvailable(i);
langtag = get_icu_language_tag(name);
collcollate = U_ICU_VERSION_MAJOR_NUM >= 54 ? langtag : name;
collid = CollationCreate(psprintf("%s-x-icu", langtag),
Rethink behavior of pg_import_system_collations(). Marco Atzeri reported that initdb would fail if "locale -a" reported the same locale name more than once. All previous versions of Postgres implicitly de-duplicated the results of "locale -a", but the rewrite to move the collation import logic into C had lost that property. It had also lost the property that locale names matching built-in collation names were silently ignored. The simplest way to fix this is to make initdb run the function in if-not-exists mode, which means that there's no real use-case for non if-not-exists mode; we might as well just drop the boolean argument and simplify the function's definition to be "add any collations not already known". This change also gets rid of some odd corner cases caused by the fact that aliases were added in if-not-exists mode even if the function argument said otherwise. While at it, adjust the behavior so that pg_import_system_collations() doesn't spew "collation foo already exists, skipping" messages during a re-run; that's completely unhelpful, especially since there are often hundreds of them. And make it return a count of the number of collations it did add, which seems like it might be helpful. Also, re-integrate the previous coding's property that it would make a deterministic selection of which alias to use if there were conflicting possibilities. This would only come into play if "locale -a" reports multiple equivalent locale names, say "de_DE.utf8" and "de_DE.UTF-8", but that hardly seems out of the question. In passing, fix incorrect behavior in pg_import_system_collations()'s ICU code path: it neglected CommandCounterIncrement, which would result in failures if ICU returns duplicate names, and it would try to create comments even if a new collation hadn't been created. Also, reorder operations in initdb so that the 'ucs_basic' collation is created before calling pg_import_system_collations() not after. This prevents a failure if "locale -a" were to report a locale named that. There's no reason to think that that ever happens in the wild, but the old coding would have survived it, so let's be equally robust. Discussion: https://postgr.es/m/20c74bc3-d6ca-243d-1bbc-12f17fa4fe9a@gmail.com
2017-06-23 20:19:48 +02:00
nspid, GetUserId(),
COLLPROVIDER_ICU, -1,
collcollate, collcollate,
get_collation_actual_version(COLLPROVIDER_ICU, collcollate),
Rethink behavior of pg_import_system_collations(). Marco Atzeri reported that initdb would fail if "locale -a" reported the same locale name more than once. All previous versions of Postgres implicitly de-duplicated the results of "locale -a", but the rewrite to move the collation import logic into C had lost that property. It had also lost the property that locale names matching built-in collation names were silently ignored. The simplest way to fix this is to make initdb run the function in if-not-exists mode, which means that there's no real use-case for non if-not-exists mode; we might as well just drop the boolean argument and simplify the function's definition to be "add any collations not already known". This change also gets rid of some odd corner cases caused by the fact that aliases were added in if-not-exists mode even if the function argument said otherwise. While at it, adjust the behavior so that pg_import_system_collations() doesn't spew "collation foo already exists, skipping" messages during a re-run; that's completely unhelpful, especially since there are often hundreds of them. And make it return a count of the number of collations it did add, which seems like it might be helpful. Also, re-integrate the previous coding's property that it would make a deterministic selection of which alias to use if there were conflicting possibilities. This would only come into play if "locale -a" reports multiple equivalent locale names, say "de_DE.utf8" and "de_DE.UTF-8", but that hardly seems out of the question. In passing, fix incorrect behavior in pg_import_system_collations()'s ICU code path: it neglected CommandCounterIncrement, which would result in failures if ICU returns duplicate names, and it would try to create comments even if a new collation hadn't been created. Also, reorder operations in initdb so that the 'ucs_basic' collation is created before calling pg_import_system_collations() not after. This prevents a failure if "locale -a" were to report a locale named that. There's no reason to think that that ever happens in the wild, but the old coding would have survived it, so let's be equally robust. Discussion: https://postgr.es/m/20c74bc3-d6ca-243d-1bbc-12f17fa4fe9a@gmail.com
2017-06-23 20:19:48 +02:00
true, true);
if (OidIsValid(collid))
{
ncreated++;
Rethink behavior of pg_import_system_collations(). Marco Atzeri reported that initdb would fail if "locale -a" reported the same locale name more than once. All previous versions of Postgres implicitly de-duplicated the results of "locale -a", but the rewrite to move the collation import logic into C had lost that property. It had also lost the property that locale names matching built-in collation names were silently ignored. The simplest way to fix this is to make initdb run the function in if-not-exists mode, which means that there's no real use-case for non if-not-exists mode; we might as well just drop the boolean argument and simplify the function's definition to be "add any collations not already known". This change also gets rid of some odd corner cases caused by the fact that aliases were added in if-not-exists mode even if the function argument said otherwise. While at it, adjust the behavior so that pg_import_system_collations() doesn't spew "collation foo already exists, skipping" messages during a re-run; that's completely unhelpful, especially since there are often hundreds of them. And make it return a count of the number of collations it did add, which seems like it might be helpful. Also, re-integrate the previous coding's property that it would make a deterministic selection of which alias to use if there were conflicting possibilities. This would only come into play if "locale -a" reports multiple equivalent locale names, say "de_DE.utf8" and "de_DE.UTF-8", but that hardly seems out of the question. In passing, fix incorrect behavior in pg_import_system_collations()'s ICU code path: it neglected CommandCounterIncrement, which would result in failures if ICU returns duplicate names, and it would try to create comments even if a new collation hadn't been created. Also, reorder operations in initdb so that the 'ucs_basic' collation is created before calling pg_import_system_collations() not after. This prevents a failure if "locale -a" were to report a locale named that. There's no reason to think that that ever happens in the wild, but the old coding would have survived it, so let's be equally robust. Discussion: https://postgr.es/m/20c74bc3-d6ca-243d-1bbc-12f17fa4fe9a@gmail.com
2017-06-23 20:19:48 +02:00
CommandCounterIncrement();
CreateComments(collid, CollationRelationId, 0,
get_icu_locale_comment(name));
}
/*
* Add keyword variants
*/
status = U_ZERO_ERROR;
en = ucol_getKeywordValuesForLocale("collation", name, TRUE, &status);
if (U_FAILURE(status))
ereport(ERROR,
(errmsg("could not get keyword values for locale \"%s\": %s",
name, u_errorName(status))));
status = U_ZERO_ERROR;
uenum_reset(en, &status);
while ((val = uenum_next(en, NULL, &status)))
{
char *localeid = psprintf("%s@collation=%s", name, val);
langtag = get_icu_language_tag(localeid);
collcollate = U_ICU_VERSION_MAJOR_NUM >= 54 ? langtag : localeid;
collid = CollationCreate(psprintf("%s-x-icu", langtag),
Rethink behavior of pg_import_system_collations(). Marco Atzeri reported that initdb would fail if "locale -a" reported the same locale name more than once. All previous versions of Postgres implicitly de-duplicated the results of "locale -a", but the rewrite to move the collation import logic into C had lost that property. It had also lost the property that locale names matching built-in collation names were silently ignored. The simplest way to fix this is to make initdb run the function in if-not-exists mode, which means that there's no real use-case for non if-not-exists mode; we might as well just drop the boolean argument and simplify the function's definition to be "add any collations not already known". This change also gets rid of some odd corner cases caused by the fact that aliases were added in if-not-exists mode even if the function argument said otherwise. While at it, adjust the behavior so that pg_import_system_collations() doesn't spew "collation foo already exists, skipping" messages during a re-run; that's completely unhelpful, especially since there are often hundreds of them. And make it return a count of the number of collations it did add, which seems like it might be helpful. Also, re-integrate the previous coding's property that it would make a deterministic selection of which alias to use if there were conflicting possibilities. This would only come into play if "locale -a" reports multiple equivalent locale names, say "de_DE.utf8" and "de_DE.UTF-8", but that hardly seems out of the question. In passing, fix incorrect behavior in pg_import_system_collations()'s ICU code path: it neglected CommandCounterIncrement, which would result in failures if ICU returns duplicate names, and it would try to create comments even if a new collation hadn't been created. Also, reorder operations in initdb so that the 'ucs_basic' collation is created before calling pg_import_system_collations() not after. This prevents a failure if "locale -a" were to report a locale named that. There's no reason to think that that ever happens in the wild, but the old coding would have survived it, so let's be equally robust. Discussion: https://postgr.es/m/20c74bc3-d6ca-243d-1bbc-12f17fa4fe9a@gmail.com
2017-06-23 20:19:48 +02:00
nspid, GetUserId(),
COLLPROVIDER_ICU, -1,
collcollate, collcollate,
get_collation_actual_version(COLLPROVIDER_ICU, collcollate),
Rethink behavior of pg_import_system_collations(). Marco Atzeri reported that initdb would fail if "locale -a" reported the same locale name more than once. All previous versions of Postgres implicitly de-duplicated the results of "locale -a", but the rewrite to move the collation import logic into C had lost that property. It had also lost the property that locale names matching built-in collation names were silently ignored. The simplest way to fix this is to make initdb run the function in if-not-exists mode, which means that there's no real use-case for non if-not-exists mode; we might as well just drop the boolean argument and simplify the function's definition to be "add any collations not already known". This change also gets rid of some odd corner cases caused by the fact that aliases were added in if-not-exists mode even if the function argument said otherwise. While at it, adjust the behavior so that pg_import_system_collations() doesn't spew "collation foo already exists, skipping" messages during a re-run; that's completely unhelpful, especially since there are often hundreds of them. And make it return a count of the number of collations it did add, which seems like it might be helpful. Also, re-integrate the previous coding's property that it would make a deterministic selection of which alias to use if there were conflicting possibilities. This would only come into play if "locale -a" reports multiple equivalent locale names, say "de_DE.utf8" and "de_DE.UTF-8", but that hardly seems out of the question. In passing, fix incorrect behavior in pg_import_system_collations()'s ICU code path: it neglected CommandCounterIncrement, which would result in failures if ICU returns duplicate names, and it would try to create comments even if a new collation hadn't been created. Also, reorder operations in initdb so that the 'ucs_basic' collation is created before calling pg_import_system_collations() not after. This prevents a failure if "locale -a" were to report a locale named that. There's no reason to think that that ever happens in the wild, but the old coding would have survived it, so let's be equally robust. Discussion: https://postgr.es/m/20c74bc3-d6ca-243d-1bbc-12f17fa4fe9a@gmail.com
2017-06-23 20:19:48 +02:00
true, true);
if (OidIsValid(collid))
{
ncreated++;
CommandCounterIncrement();
CreateComments(collid, CollationRelationId, 0,
get_icu_locale_comment(localeid));
}
}
if (U_FAILURE(status))
ereport(ERROR,
(errmsg("could not get keyword values for locale \"%s\": %s",
name, u_errorName(status))));
uenum_close(en);
}
}
Rethink behavior of pg_import_system_collations(). Marco Atzeri reported that initdb would fail if "locale -a" reported the same locale name more than once. All previous versions of Postgres implicitly de-duplicated the results of "locale -a", but the rewrite to move the collation import logic into C had lost that property. It had also lost the property that locale names matching built-in collation names were silently ignored. The simplest way to fix this is to make initdb run the function in if-not-exists mode, which means that there's no real use-case for non if-not-exists mode; we might as well just drop the boolean argument and simplify the function's definition to be "add any collations not already known". This change also gets rid of some odd corner cases caused by the fact that aliases were added in if-not-exists mode even if the function argument said otherwise. While at it, adjust the behavior so that pg_import_system_collations() doesn't spew "collation foo already exists, skipping" messages during a re-run; that's completely unhelpful, especially since there are often hundreds of them. And make it return a count of the number of collations it did add, which seems like it might be helpful. Also, re-integrate the previous coding's property that it would make a deterministic selection of which alias to use if there were conflicting possibilities. This would only come into play if "locale -a" reports multiple equivalent locale names, say "de_DE.utf8" and "de_DE.UTF-8", but that hardly seems out of the question. In passing, fix incorrect behavior in pg_import_system_collations()'s ICU code path: it neglected CommandCounterIncrement, which would result in failures if ICU returns duplicate names, and it would try to create comments even if a new collation hadn't been created. Also, reorder operations in initdb so that the 'ucs_basic' collation is created before calling pg_import_system_collations() not after. This prevents a failure if "locale -a" were to report a locale named that. There's no reason to think that that ever happens in the wild, but the old coding would have survived it, so let's be equally robust. Discussion: https://postgr.es/m/20c74bc3-d6ca-243d-1bbc-12f17fa4fe9a@gmail.com
2017-06-23 20:19:48 +02:00
#endif /* USE_ICU */
Rethink behavior of pg_import_system_collations(). Marco Atzeri reported that initdb would fail if "locale -a" reported the same locale name more than once. All previous versions of Postgres implicitly de-duplicated the results of "locale -a", but the rewrite to move the collation import logic into C had lost that property. It had also lost the property that locale names matching built-in collation names were silently ignored. The simplest way to fix this is to make initdb run the function in if-not-exists mode, which means that there's no real use-case for non if-not-exists mode; we might as well just drop the boolean argument and simplify the function's definition to be "add any collations not already known". This change also gets rid of some odd corner cases caused by the fact that aliases were added in if-not-exists mode even if the function argument said otherwise. While at it, adjust the behavior so that pg_import_system_collations() doesn't spew "collation foo already exists, skipping" messages during a re-run; that's completely unhelpful, especially since there are often hundreds of them. And make it return a count of the number of collations it did add, which seems like it might be helpful. Also, re-integrate the previous coding's property that it would make a deterministic selection of which alias to use if there were conflicting possibilities. This would only come into play if "locale -a" reports multiple equivalent locale names, say "de_DE.utf8" and "de_DE.UTF-8", but that hardly seems out of the question. In passing, fix incorrect behavior in pg_import_system_collations()'s ICU code path: it neglected CommandCounterIncrement, which would result in failures if ICU returns duplicate names, and it would try to create comments even if a new collation hadn't been created. Also, reorder operations in initdb so that the 'ucs_basic' collation is created before calling pg_import_system_collations() not after. This prevents a failure if "locale -a" were to report a locale named that. There's no reason to think that that ever happens in the wild, but the old coding would have survived it, so let's be equally robust. Discussion: https://postgr.es/m/20c74bc3-d6ca-243d-1bbc-12f17fa4fe9a@gmail.com
2017-06-23 20:19:48 +02:00
PG_RETURN_INT32(ncreated);
}