/*------------------------------------------------------------------------- * * collationcmds.c * collation-related commands support code * * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION * src/backend/commands/collationcmds.c * *------------------------------------------------------------------------- */ #include "postgres.h" #include "access/heapam.h" #include "access/htup_details.h" #include "access/xact.h" #include "catalog/dependency.h" #include "catalog/indexing.h" #include "catalog/namespace.h" #include "catalog/objectaccess.h" #include "catalog/pg_collation.h" #include "catalog/pg_collation_fn.h" #include "commands/alter.h" #include "commands/collationcmds.h" #include "commands/comment.h" #include "commands/dbcommands.h" #include "commands/defrem.h" #include "mb/pg_wchar.h" #include "miscadmin.h" #include "utils/builtins.h" #include "utils/lsyscache.h" #include "utils/pg_locale.h" #include "utils/rel.h" #include "utils/syscache.h" typedef struct { char *localename; /* name of locale, as per "locale -a" */ char *alias; /* shortened alias for same */ int enc; /* encoding */ } CollAliasData; /* * CREATE COLLATION */ ObjectAddress DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_exists) { char *collName; Oid collNamespace; AclResult aclresult; ListCell *pl; DefElem *fromEl = NULL; DefElem *localeEl = NULL; DefElem *lccollateEl = NULL; DefElem *lcctypeEl = NULL; DefElem *providerEl = NULL; DefElem *versionEl = NULL; char *collcollate = NULL; char *collctype = NULL; char *collproviderstr = NULL; int collencoding; char collprovider = 0; char *collversion = NULL; Oid newoid; ObjectAddress address; collNamespace = QualifiedNameGetCreationNamespace(names, &collName); aclresult = pg_namespace_aclcheck(collNamespace, GetUserId(), ACL_CREATE); if (aclresult != ACLCHECK_OK) aclcheck_error(aclresult, ACL_KIND_NAMESPACE, get_namespace_name(collNamespace)); foreach(pl, parameters) { DefElem *defel = lfirst_node(DefElem, pl); DefElem **defelp; if (pg_strcasecmp(defel->defname, "from") == 0) defelp = &fromEl; else if (pg_strcasecmp(defel->defname, "locale") == 0) defelp = &localeEl; else if (pg_strcasecmp(defel->defname, "lc_collate") == 0) defelp = &lccollateEl; else if (pg_strcasecmp(defel->defname, "lc_ctype") == 0) defelp = &lcctypeEl; else if (pg_strcasecmp(defel->defname, "provider") == 0) defelp = &providerEl; else if (pg_strcasecmp(defel->defname, "version") == 0) defelp = &versionEl; else { ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), errmsg("collation attribute \"%s\" not recognized", defel->defname), parser_errposition(pstate, defel->location))); break; } *defelp = defel; } if ((localeEl && (lccollateEl || lcctypeEl)) || (fromEl && list_length(parameters) != 1)) ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), errmsg("conflicting or redundant options"))); if (fromEl) { Oid collid; HeapTuple tp; collid = get_collation_oid(defGetQualifiedName(fromEl), false); tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collid)); if (!HeapTupleIsValid(tp)) elog(ERROR, "cache lookup failed for collation %u", collid); collcollate = pstrdup(NameStr(((Form_pg_collation) GETSTRUCT(tp))->collcollate)); collctype = pstrdup(NameStr(((Form_pg_collation) GETSTRUCT(tp))->collctype)); collprovider = ((Form_pg_collation) GETSTRUCT(tp))->collprovider; ReleaseSysCache(tp); /* * Copying the "default" collation is not allowed because most code * checks for DEFAULT_COLLATION_OID instead of COLLPROVIDER_DEFAULT, * and so having a second collation with COLLPROVIDER_DEFAULT would * not work and potentially confuse or crash some code. This could be * fixed with some legwork. */ if (collprovider == COLLPROVIDER_DEFAULT) ereport(ERROR, (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), errmsg("collation \"default\" cannot be copied"))); } if (localeEl) { collcollate = defGetString(localeEl); collctype = defGetString(localeEl); } if (lccollateEl) collcollate = defGetString(lccollateEl); if (lcctypeEl) collctype = defGetString(lcctypeEl); if (providerEl) collproviderstr = defGetString(providerEl); if (versionEl) collversion = defGetString(versionEl); if (collproviderstr) { if (pg_strcasecmp(collproviderstr, "icu") == 0) collprovider = COLLPROVIDER_ICU; else if (pg_strcasecmp(collproviderstr, "libc") == 0) collprovider = COLLPROVIDER_LIBC; else ereport(ERROR, (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), errmsg("unrecognized collation provider: %s", collproviderstr))); } else if (!fromEl) collprovider = COLLPROVIDER_LIBC; if (!collcollate) ereport(ERROR, (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), errmsg("parameter \"lc_collate\" must be specified"))); if (!collctype) ereport(ERROR, (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), errmsg("parameter \"lc_ctype\" must be specified"))); if (collprovider == COLLPROVIDER_ICU) collencoding = -1; else { collencoding = GetDatabaseEncoding(); check_encoding_locale_matches(collencoding, collcollate, collctype); } if (!collversion) collversion = get_collation_actual_version(collprovider, collcollate); newoid = CollationCreate(collName, collNamespace, GetUserId(), collprovider, collencoding, collcollate, collctype, collversion, if_not_exists, false); /* not quiet */ if (!OidIsValid(newoid)) return InvalidObjectAddress; ObjectAddressSet(address, CollationRelationId, newoid); /* check that the locales can be loaded */ CommandCounterIncrement(); (void) pg_newlocale_from_collation(newoid); return address; } /* * Subroutine for ALTER COLLATION SET SCHEMA and RENAME * * Is there a collation with the same name of the given collation already in * the given namespace? If so, raise an appropriate error message. */ void IsThereCollationInNamespace(const char *collname, Oid nspOid) { /* make sure the name doesn't already exist in new schema */ if (SearchSysCacheExists3(COLLNAMEENCNSP, CStringGetDatum(collname), Int32GetDatum(GetDatabaseEncoding()), ObjectIdGetDatum(nspOid))) ereport(ERROR, (errcode(ERRCODE_DUPLICATE_OBJECT), errmsg("collation \"%s\" for encoding \"%s\" already exists in schema \"%s\"", collname, GetDatabaseEncodingName(), get_namespace_name(nspOid)))); /* mustn't match an any-encoding entry, either */ if (SearchSysCacheExists3(COLLNAMEENCNSP, CStringGetDatum(collname), Int32GetDatum(-1), ObjectIdGetDatum(nspOid))) ereport(ERROR, (errcode(ERRCODE_DUPLICATE_OBJECT), errmsg("collation \"%s\" already exists in schema \"%s\"", collname, get_namespace_name(nspOid)))); } /* * ALTER COLLATION */ ObjectAddress AlterCollation(AlterCollationStmt *stmt) { Relation rel; Oid collOid; HeapTuple tup; Form_pg_collation collForm; Datum collversion; bool isnull; char *oldversion; char *newversion; ObjectAddress address; rel = heap_open(CollationRelationId, RowExclusiveLock); collOid = get_collation_oid(stmt->collname, false); if (!pg_collation_ownercheck(collOid, GetUserId())) aclcheck_error(ACLCHECK_NOT_OWNER, ACL_KIND_COLLATION, NameListToString(stmt->collname)); tup = SearchSysCacheCopy1(COLLOID, ObjectIdGetDatum(collOid)); if (!HeapTupleIsValid(tup)) elog(ERROR, "cache lookup failed for collation %u", collOid); collForm = (Form_pg_collation) GETSTRUCT(tup); collversion = SysCacheGetAttr(COLLOID, tup, Anum_pg_collation_collversion, &isnull); oldversion = isnull ? NULL : TextDatumGetCString(collversion); newversion = get_collation_actual_version(collForm->collprovider, NameStr(collForm->collcollate)); /* cannot change from NULL to non-NULL or vice versa */ if ((!oldversion && newversion) || (oldversion && !newversion)) elog(ERROR, "invalid collation version change"); else if (oldversion && newversion && strcmp(newversion, oldversion) != 0) { bool nulls[Natts_pg_collation]; bool replaces[Natts_pg_collation]; Datum values[Natts_pg_collation]; ereport(NOTICE, (errmsg("changing version from %s to %s", oldversion, newversion))); memset(values, 0, sizeof(values)); memset(nulls, false, sizeof(nulls)); memset(replaces, false, sizeof(replaces)); values[Anum_pg_collation_collversion - 1] = CStringGetTextDatum(newversion); replaces[Anum_pg_collation_collversion - 1] = true; tup = heap_modify_tuple(tup, RelationGetDescr(rel), values, nulls, replaces); } else ereport(NOTICE, (errmsg("version has not changed"))); CatalogTupleUpdate(rel, &tup->t_self, tup); InvokeObjectPostAlterHook(CollationRelationId, collOid, 0); ObjectAddressSet(address, CollationRelationId, collOid); heap_freetuple(tup); heap_close(rel, NoLock); return address; } Datum pg_collation_actual_version(PG_FUNCTION_ARGS) { Oid collid = PG_GETARG_OID(0); HeapTuple tp; char *collcollate; char collprovider; char *version; tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collid)); if (!HeapTupleIsValid(tp)) ereport(ERROR, (errcode(ERRCODE_UNDEFINED_OBJECT), errmsg("collation with OID %u does not exist", collid))); collcollate = pstrdup(NameStr(((Form_pg_collation) GETSTRUCT(tp))->collcollate)); collprovider = ((Form_pg_collation) GETSTRUCT(tp))->collprovider; ReleaseSysCache(tp); version = get_collation_actual_version(collprovider, collcollate); if (version) PG_RETURN_TEXT_P(cstring_to_text(version)); else PG_RETURN_NULL(); } /* will we use "locale -a" in pg_import_system_collations? */ #if defined(HAVE_LOCALE_T) && !defined(WIN32) #define READ_LOCALE_A_OUTPUT #endif #ifdef READ_LOCALE_A_OUTPUT /* * "Normalize" a libc locale name, stripping off encoding tags such as * ".utf8" (e.g., "en_US.utf8" -> "en_US", but "br_FR.iso885915@euro" * -> "br_FR@euro"). Return true if a new, different name was * generated. */ static bool normalize_libc_locale_name(char *new, const char *old) { char *n = new; const char *o = old; bool changed = false; while (*o) { if (*o == '.') { /* skip over encoding tag such as ".utf8" or ".UTF-8" */ o++; while ((*o >= 'A' && *o <= 'Z') || (*o >= 'a' && *o <= 'z') || (*o >= '0' && *o <= '9') || (*o == '-')) o++; changed = true; } else *n++ = *o++; } *n = '\0'; return changed; } /* * qsort comparator for CollAliasData items */ static int cmpaliases(const void *a, const void *b) { const CollAliasData *ca = (const CollAliasData *) a; const CollAliasData *cb = (const CollAliasData *) b; /* comparing localename is enough because other fields are derived */ return strcmp(ca->localename, cb->localename); } #endif /* READ_LOCALE_A_OUTPUT */ #ifdef USE_ICU /* * Get the ICU language tag for a locale name. * The result is a palloc'd string. */ static char * get_icu_language_tag(const char *localename) { char buf[ULOC_FULLNAME_CAPACITY]; UErrorCode status; status = U_ZERO_ERROR; uloc_toLanguageTag(localename, buf, sizeof(buf), TRUE, &status); if (U_FAILURE(status)) ereport(ERROR, (errmsg("could not convert locale name \"%s\" to language tag: %s", localename, u_errorName(status)))); return pstrdup(buf); } /* * Get a comment (specifically, the display name) for an ICU locale. * The result is a palloc'd string. */ static char * get_icu_locale_comment(const char *localename) { UErrorCode status; UChar displayname[128]; int32 len_uchar; char *result; status = U_ZERO_ERROR; len_uchar = uloc_getDisplayName(localename, "en", &displayname[0], sizeof(displayname), &status); if (U_FAILURE(status)) ereport(ERROR, (errmsg("could not get display name for locale \"%s\": %s", localename, u_errorName(status)))); icu_from_uchar(&result, displayname, len_uchar); return result; } #endif /* USE_ICU */ /* * pg_import_system_collations: add known system collations to pg_collation */ Datum pg_import_system_collations(PG_FUNCTION_ARGS) { Oid nspid = PG_GETARG_OID(0); int ncreated = 0; /* silence compiler warning if we have no locale implementation at all */ (void) nspid; if (!superuser()) ereport(ERROR, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), (errmsg("must be superuser to import system collations")))); /* Load collations known to libc, using "locale -a" to enumerate them */ #ifdef READ_LOCALE_A_OUTPUT { FILE *locale_a_handle; char localebuf[NAMEDATALEN]; /* we assume ASCII so this is fine */ int nvalid = 0; Oid collid; CollAliasData *aliases; int naliases, maxaliases, i; /* expansible array of aliases */ maxaliases = 100; aliases = (CollAliasData *) palloc(maxaliases * sizeof(CollAliasData)); naliases = 0; locale_a_handle = OpenPipeStream("locale -a", "r"); if (locale_a_handle == NULL) ereport(ERROR, (errcode_for_file_access(), errmsg("could not execute command \"%s\": %m", "locale -a"))); while (fgets(localebuf, sizeof(localebuf), locale_a_handle)) { size_t len; int enc; bool skip; char alias[NAMEDATALEN]; len = strlen(localebuf); if (len == 0 || localebuf[len - 1] != '\n') { elog(DEBUG1, "locale name too long, skipped: \"%s\"", localebuf); continue; } localebuf[len - 1] = '\0'; /* * Some systems have locale names that don't consist entirely of * ASCII letters (such as "bokmål" or "français"). * This is pretty silly, since we need the locale itself to * interpret the non-ASCII characters. We can't do much with * those, so we filter them out. */ skip = false; for (i = 0; i < len; i++) { if (IS_HIGHBIT_SET(localebuf[i])) { skip = true; break; } } if (skip) { elog(DEBUG1, "locale name has non-ASCII characters, skipped: \"%s\"", localebuf); continue; } enc = pg_get_encoding_from_locale(localebuf, false); if (enc < 0) { /* error message printed by pg_get_encoding_from_locale() */ continue; } if (!PG_VALID_BE_ENCODING(enc)) continue; /* ignore locales for client-only encodings */ if (enc == PG_SQL_ASCII) continue; /* C/POSIX are already in the catalog */ /* count valid locales found in operating system */ nvalid++; /* * Create a collation named the same as the locale, but quietly * doing nothing if it already exists. This is the behavior we * need even at initdb time, because some versions of "locale -a" * can report the same locale name more than once. And it's * convenient for later import runs, too, since you just about * always want to add on new locales without a lot of chatter * about existing ones. */ collid = CollationCreate(localebuf, nspid, GetUserId(), COLLPROVIDER_LIBC, enc, localebuf, localebuf, get_collation_actual_version(COLLPROVIDER_LIBC, localebuf), true, true); if (OidIsValid(collid)) { ncreated++; /* Must do CCI between inserts to handle duplicates correctly */ CommandCounterIncrement(); } /* * Generate aliases such as "en_US" in addition to "en_US.utf8" * for ease of use. Note that collation names are unique per * encoding only, so this doesn't clash with "en_US" for LATIN1, * say. * * However, it might conflict with a name we'll see later in the * "locale -a" output. So save up the aliases and try to add them * after we've read all the output. */ if (normalize_libc_locale_name(alias, localebuf)) { if (naliases >= maxaliases) { maxaliases *= 2; aliases = (CollAliasData *) repalloc(aliases, maxaliases * sizeof(CollAliasData)); } aliases[naliases].localename = pstrdup(localebuf); aliases[naliases].alias = pstrdup(alias); aliases[naliases].enc = enc; naliases++; } } ClosePipeStream(locale_a_handle); /* * Before processing the aliases, sort them by locale name. The point * here is that if "locale -a" gives us multiple locale names with the * same encoding and base name, say "en_US.utf8" and "en_US.utf-8", we * want to pick a deterministic one of them. First in ASCII sort * order is a good enough rule. (Before PG 10, the code corresponding * to this logic in initdb.c had an additional ordering rule, to * prefer the locale name exactly matching the alias, if any. We * don't need to consider that here, because we would have already * created such a pg_collation entry above, and that one will win.) */ if (naliases > 1) qsort((void *) aliases, naliases, sizeof(CollAliasData), cmpaliases); /* Now add aliases, ignoring any that match pre-existing entries */ for (i = 0; i < naliases; i++) { char *locale = aliases[i].localename; char *alias = aliases[i].alias; int enc = aliases[i].enc; collid = CollationCreate(alias, nspid, GetUserId(), COLLPROVIDER_LIBC, enc, locale, locale, get_collation_actual_version(COLLPROVIDER_LIBC, locale), true, true); if (OidIsValid(collid)) { ncreated++; CommandCounterIncrement(); } } /* Give a warning if "locale -a" seems to be malfunctioning */ if (nvalid == 0) ereport(WARNING, (errmsg("no usable system locales were found"))); } #endif /* READ_LOCALE_A_OUTPUT */ /* Load collations known to ICU */ #ifdef USE_ICU if (!is_encoding_supported_by_icu(GetDatabaseEncoding())) { ereport(NOTICE, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("encoding \"%s\" not supported by ICU", pg_encoding_to_char(GetDatabaseEncoding())))); } else { int i; /* * Start the loop at -1 to sneak in the root locale without too much * code duplication. */ for (i = -1; i < ucol_countAvailable(); i++) { const char *name; char *langtag; const char *collcollate; UEnumeration *en; UErrorCode status; const char *val; Oid collid; if (i == -1) name = ""; /* ICU root locale */ else name = ucol_getAvailable(i); langtag = get_icu_language_tag(name); collcollate = U_ICU_VERSION_MAJOR_NUM >= 54 ? langtag : name; collid = CollationCreate(psprintf("%s-x-icu", langtag), nspid, GetUserId(), COLLPROVIDER_ICU, -1, collcollate, collcollate, get_collation_actual_version(COLLPROVIDER_ICU, collcollate), true, true); if (OidIsValid(collid)) { ncreated++; CommandCounterIncrement(); CreateComments(collid, CollationRelationId, 0, get_icu_locale_comment(name)); } /* * Add keyword variants */ status = U_ZERO_ERROR; en = ucol_getKeywordValuesForLocale("collation", name, TRUE, &status); if (U_FAILURE(status)) ereport(ERROR, (errmsg("could not get keyword values for locale \"%s\": %s", name, u_errorName(status)))); status = U_ZERO_ERROR; uenum_reset(en, &status); while ((val = uenum_next(en, NULL, &status))) { char *localeid = psprintf("%s@collation=%s", name, val); langtag = get_icu_language_tag(localeid); collcollate = U_ICU_VERSION_MAJOR_NUM >= 54 ? langtag : localeid; collid = CollationCreate(psprintf("%s-x-icu", langtag), nspid, GetUserId(), COLLPROVIDER_ICU, -1, collcollate, collcollate, get_collation_actual_version(COLLPROVIDER_ICU, collcollate), true, true); if (OidIsValid(collid)) { ncreated++; CommandCounterIncrement(); CreateComments(collid, CollationRelationId, 0, get_icu_locale_comment(localeid)); } } if (U_FAILURE(status)) ereport(ERROR, (errmsg("could not get keyword values for locale \"%s\": %s", name, u_errorName(status)))); uenum_close(en); } } #endif /* USE_ICU */ PG_RETURN_INT32(ncreated); }