CREATE DATABASE: make LOCALE apply to all collation providers.

For CREATE DATABASE, make LOCALE parameter apply regardless of the
provider used. Also affects initdb and createdb --locale arguments.

Previously, LOCALE (and --locale) only affected the database default
collation when using the libc provider.

Discussion: https://postgr.es/m/1a63084d-221e-4075-619e-6b3e590f673e@enterprisedb.com
Reviewed-by: Peter Eisentraut
This commit is contained in:
Jeff Davis 2023-06-16 10:27:32 -07:00
parent c0d951262c
commit a14e75eb0b
12 changed files with 155 additions and 60 deletions

View File

@ -85,9 +85,16 @@ CREATE COLLATION [ IF NOT EXISTS ] <replaceable>name</replaceable> FROM <replace
<listitem>
<para>
This is a shortcut for setting <symbol>LC_COLLATE</symbol>
and <symbol>LC_CTYPE</symbol> at once. If you specify this,
you cannot specify either of those parameters.
The locale name for this collation. See <xref
linkend="collation-managing-create-libc"/> and <xref
linkend="collation-managing-create-icu"/> for details.
</para>
<para>
If <replaceable>provider</replaceable> is <literal>libc</literal>, this
is a shortcut for setting <symbol>LC_COLLATE</symbol> and
<symbol>LC_CTYPE</symbol> at once. If you specify
<replaceable>locale</replaceable>, you cannot specify either of those
parameters.
</para>
</listitem>
</varlistentry>
@ -97,8 +104,9 @@ CREATE COLLATION [ IF NOT EXISTS ] <replaceable>name</replaceable> FROM <replace
<listitem>
<para>
Use the specified operating system locale for
the <symbol>LC_COLLATE</symbol> locale category.
If <replaceable>provider</replaceable> is <literal>libc</literal>, use
the specified operating system locale for the
<symbol>LC_COLLATE</symbol> locale category.
</para>
</listitem>
</varlistentry>
@ -108,8 +116,9 @@ CREATE COLLATION [ IF NOT EXISTS ] <replaceable>name</replaceable> FROM <replace
<listitem>
<para>
Use the specified operating system locale for
the <symbol>LC_CTYPE</symbol> locale category.
If <replaceable>provider</replaceable> is <literal>libc</literal>, use
the specified operating system locale for the <symbol>LC_CTYPE</symbol>
locale category.
</para>
</listitem>
</varlistentry>

View File

@ -145,8 +145,22 @@ CREATE DATABASE <replaceable class="parameter">name</replaceable>
<term><replaceable class="parameter">locale</replaceable></term>
<listitem>
<para>
This is a shortcut for setting <symbol>LC_COLLATE</symbol>
and <symbol>LC_CTYPE</symbol> at once.
Sets the default collation order and character classification in the
new database. Collation affects the sort order applied to strings,
e.g., in queries with <literal>ORDER BY</literal>, as well as the order used in indexes
on text columns. Character classification affects the categorization
of characters, e.g., lower, upper, and digit. Also sets the
associated aspects of the operating system environment,
<literal>LC_COLLATE</literal> and <literal>LC_CTYPE</literal>. The
default is the same setting as the template database. See <xref
linkend="collation-managing-create-libc"/> and <xref
linkend="collation-managing-create-icu"/> for details.
</para>
<para>
Can be overridden by setting <xref
linkend="create-database-lc-collate"/>, <xref
linkend="create-database-lc-ctype"/>, or <xref
linkend="create-database-icu-locale"/> individually.
</para>
<tip>
<para>
@ -164,11 +178,17 @@ CREATE DATABASE <replaceable class="parameter">name</replaceable>
<term><replaceable class="parameter">lc_collate</replaceable></term>
<listitem>
<para>
Collation order (<literal>LC_COLLATE</literal>) to use in the new database.
This affects the sort order applied to strings, e.g., in queries with
ORDER BY, as well as the order used in indexes on text columns.
The default is to use the collation order of the template database.
See below for additional restrictions.
Sets <literal>LC_COLLATE</literal> in the database server's operating
system environment. The default is the setting of <xref
linkend="create-database-locale"/> if specified, otherwise the same
setting as the template database. See below for additional
restrictions.
</para>
<para>
If <xref linkend="create-database-locale-provider"/> is
<literal>libc</literal>, also sets the default collation order to use
in the new database, overriding the setting <xref
linkend="create-database-locale"/>.
</para>
</listitem>
</varlistentry>
@ -176,10 +196,17 @@ CREATE DATABASE <replaceable class="parameter">name</replaceable>
<term><replaceable class="parameter">lc_ctype</replaceable></term>
<listitem>
<para>
Character classification (<literal>LC_CTYPE</literal>) to use in the new
database. This affects the categorization of characters, e.g., lower,
upper and digit. The default is to use the character classification of
the template database. See below for additional restrictions.
Sets <literal>LC_CTYPE</literal> in the database server's operating
system environment. The default is the setting of <xref
linkend="create-database-locale"/> if specified, otherwise the same
setting as the template database. See below for additional
restrictions.
</para>
<para>
If <xref linkend="create-database-locale-provider"/> is
<literal>libc</literal>, also sets the default character
classification to use in the new database, overriding the setting
<xref linkend="create-database-locale"/>.
</para>
</listitem>
</varlistentry>
@ -188,7 +215,13 @@ CREATE DATABASE <replaceable class="parameter">name</replaceable>
<term><replaceable class="parameter">icu_locale</replaceable></term>
<listitem>
<para>
Specifies the ICU locale ID if the ICU locale provider is used.
Specifies the ICU locale (see <xref
linkend="collation-managing-create-icu"/>) for the database default
collation order and character classification, overriding the setting
<xref linkend="create-database-locale"/>. The <link
linkend="create-database-locale-provider">locale provider</link> must be ICU. The default
is the setting of <xref linkend="create-database-locale"/> if
specified; otherwise the same setting as the template database.
</para>
</listitem>
</varlistentry>

View File

@ -124,7 +124,10 @@ PostgreSQL documentation
<listitem>
<para>
Specifies the locale to be used in this database. This is equivalent
to specifying both <option>--lc-collate</option> and <option>--lc-ctype</option>.
to specifying <option>--lc-collate</option>,
<option>--lc-ctype</option>, and <option>--icu-locale</option> to the
same value. Some locales are only valid for ICU and must be set with
<option>--icu-locale</option>.
</para>
</listitem>
</varlistentry>

View File

@ -116,9 +116,10 @@ PostgreSQL documentation
<para>
To choose a different locale for the cluster, use the option
<option>--locale</option>. There are also individual options
<option>--lc-*</option> (see below) to set values for the individual locale
categories. Note that inconsistent settings for different locale
categories can give nonsensical results, so this should be used with care.
<option>--lc-*</option> and <option>--icu-locale</option> (see below) to
set values for the individual locale categories. Note that inconsistent
settings for different locale categories can give nonsensical results, so
this should be used with care.
</para>
<para>

View File

@ -276,7 +276,7 @@ DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_e
if (langtag && strcmp(colliculocale, langtag) != 0)
{
ereport(NOTICE,
(errmsg("using standard form \"%s\" for locale \"%s\"",
(errmsg("using standard form \"%s\" for ICU locale \"%s\"",
langtag, colliculocale)));
colliculocale = langtag;

View File

@ -1017,7 +1017,12 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt)
if (dblocprovider == '\0')
dblocprovider = src_locprovider;
if (dbiculocale == NULL && dblocprovider == COLLPROVIDER_ICU)
dbiculocale = src_iculocale;
{
if (dlocale && dlocale->arg)
dbiculocale = defGetString(dlocale);
else
dbiculocale = src_iculocale;
}
if (dbicurules == NULL && dblocprovider == COLLPROVIDER_ICU)
dbicurules = src_icurules;
@ -1031,12 +1036,14 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt)
if (!check_locale(LC_COLLATE, dbcollate, &canonname))
ereport(ERROR,
(errcode(ERRCODE_WRONG_OBJECT_TYPE),
errmsg("invalid locale name: \"%s\"", dbcollate)));
errmsg("invalid LC_COLLATE locale name: \"%s\"", dbcollate),
errhint("If the locale name is specific to ICU, use ICU_LOCALE.")));
dbcollate = canonname;
if (!check_locale(LC_CTYPE, dbctype, &canonname))
ereport(ERROR,
(errcode(ERRCODE_WRONG_OBJECT_TYPE),
errmsg("invalid locale name: \"%s\"", dbctype)));
errmsg("invalid LC_CTYPE locale name: \"%s\"", dbctype),
errhint("If the locale name is specific to ICU, use ICU_LOCALE.")));
dbctype = canonname;
check_encoding_locale_matches(encoding, dbcollate, dbctype);
@ -1056,7 +1063,7 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt)
if (!dbiculocale)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("ICU locale must be specified")));
errmsg("LOCALE or ICU_LOCALE must be specified")));
/*
* During binary upgrade, or when the locale came from the template
@ -1071,7 +1078,7 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt)
if (langtag && strcmp(dbiculocale, langtag) != 0)
{
ereport(NOTICE,
(errmsg("using standard form \"%s\" for locale \"%s\"",
(errmsg("using standard form \"%s\" for ICU locale \"%s\"",
langtag, dbiculocale)));
dbiculocale = langtag;

View File

@ -2163,7 +2163,11 @@ check_locale_name(int category, const char *locale, char **canonname)
if (res == NULL)
{
if (*locale)
pg_fatal("invalid locale name \"%s\"", locale);
{
pg_log_error("invalid locale name \"%s\"", locale);
pg_log_error_hint("If the locale name is specific to ICU, use --icu-locale.");
exit(1);
}
else
{
/*
@ -2376,7 +2380,7 @@ setlocales(void)
{
char *canonname;
/* set empty lc_* values to locale config if set */
/* set empty lc_* and iculocale values to locale config if set */
if (locale)
{
@ -2392,6 +2396,8 @@ setlocales(void)
lc_monetary = locale;
if (!lc_messages)
lc_messages = locale;
if (!icu_locale && locale_provider == COLLPROVIDER_ICU)
icu_locale = locale;
}
/*

View File

@ -111,6 +111,17 @@ if ($ENV{with_icu} eq 'yes')
],
'option --icu-locale');
command_like(
[
'initdb', '--no-sync', '-A', 'trust',
'--locale-provider=icu', '--locale=und',
'--lc-collate=C', '--lc-ctype=C', '--lc-messages=C',
'--lc-numeric=C', '--lc-monetary=C', '--lc-time=C',
"$tempdir/data4"
],
qr/^\s+ICU locale:\s+und\n/ms,
'options --locale-provider=icu --locale=und --lc-*=C');
command_fails_like(
[
'initdb', '--no-sync',

View File

@ -164,14 +164,6 @@ main(int argc, char *argv[])
exit(1);
}
if (locale)
{
if (!lc_ctype)
lc_ctype = locale;
if (!lc_collate)
lc_collate = locale;
}
if (encoding)
{
if (pg_char_to_encoding(encoding) < 0)
@ -219,6 +211,11 @@ main(int argc, char *argv[])
appendPQExpBuffer(&sql, " STRATEGY %s", fmtId(strategy));
if (template)
appendPQExpBuffer(&sql, " TEMPLATE %s", fmtId(template));
if (locale)
{
appendPQExpBufferStr(&sql, " LOCALE ");
appendStringLiteralConn(&sql, locale, conn);
}
if (lc_collate)
{
appendPQExpBufferStr(&sql, " LC_COLLATE ");

View File

@ -86,6 +86,15 @@ if ($ENV{with_icu} eq 'yes')
],
'create database with icu locale from template database with icu provider'
);
$node2->command_ok(
[
'createdb', '-T', 'template0', '--locale-provider', 'icu',
'--locale', 'en', '--lc-collate', 'C', '--lc-ctype', 'C',
'foobar57'
],
'create database with locale as ICU locale'
);
}
else
{
@ -110,7 +119,7 @@ ALTER TABLE tab_foobar owner to role_foobar;
CREATE POLICY pol_foobar ON tab_foobar FOR ALL TO role_foobar;');
$node->issues_sql_like(
[ 'createdb', '-l', 'C', '-T', 'foobar2', 'foobar3' ],
qr/statement: CREATE DATABASE foobar3 TEMPLATE foobar2/,
qr/statement: CREATE DATABASE foobar3 TEMPLATE foobar2 LOCALE 'C'/,
'create database with template');
($ret, $stdout, $stderr) = $node->psql(
'foobar3',
@ -137,7 +146,7 @@ $node->command_checks_all(
1,
[qr/^$/],
[
qr/^createdb: error: database creation failed: ERROR: invalid locale name|^createdb: error: database creation failed: ERROR: new collation \(foo'; SELECT '1\) is incompatible with the collation of the template database/s
qr/^createdb: error: database creation failed: ERROR: invalid LC_COLLATE locale name|^createdb: error: database creation failed: ERROR: new collation \(foo'; SELECT '1\) is incompatible with the collation of the template database/s
],
'createdb with incorrect --lc-collate');
$node->command_checks_all(
@ -145,7 +154,7 @@ $node->command_checks_all(
1,
[qr/^$/],
[
qr/^createdb: error: database creation failed: ERROR: invalid locale name|^createdb: error: database creation failed: ERROR: new LC_CTYPE \(foo'; SELECT '1\) is incompatible with the LC_CTYPE of the template database/s
qr/^createdb: error: database creation failed: ERROR: invalid LC_CTYPE locale name|^createdb: error: database creation failed: ERROR: new LC_CTYPE \(foo'; SELECT '1\) is incompatible with the LC_CTYPE of the template database/s
],
'createdb with incorrect --lc-ctype');

View File

@ -51,17 +51,36 @@ b),
'sort by explicit collation upper first');
# Test error cases in CREATE DATABASE involving locale-related options
# Test that LOCALE='C' works for ICU
is( $node1->psql(
'postgres',
q{CREATE DATABASE dbicu1 LOCALE_PROVIDER icu LOCALE 'C' TEMPLATE template0 ENCODING UTF8}
),
0,
"C locale works for ICU");
my ($ret, $stdout, $stderr) = $node1->psql('postgres',
q{CREATE DATABASE dbicu LOCALE_PROVIDER icu LOCALE 'C' TEMPLATE template0 ENCODING UTF8}
);
# Test that LOCALE works for ICU locales if LC_COLLATE and LC_CTYPE
# are specified
is( $node1->psql(
'postgres',
q{CREATE DATABASE dbicu2 LOCALE_PROVIDER icu LOCALE '@colStrength=primary'
LC_COLLATE='C' LC_CTYPE='C' TEMPLATE template0 ENCODING UTF8}
),
0,
"LOCALE works for ICU locales if LC_COLLATE and LC_CTYPE are specified");
# Test that ICU-specific LOCALE without LC_COLLATE and LC_CTYPE must
# be specified with ICU_LOCALE
my ($ret, $stdout, $stderr) = $node1->psql(
'postgres',
q{CREATE DATABASE dbicu3 LOCALE_PROVIDER icu LOCALE '@colStrength=primary'
TEMPLATE template0 ENCODING UTF8});
isnt($ret, 0,
"ICU locale must be specified for ICU provider: exit code not 0");
"ICU-specific locale must be specified with ICU_LOCALE: exit code not 0");
like(
$stderr,
qr/ERROR: ICU locale must be specified/,
"ICU locale must be specified for ICU provider: error message");
qr/ERROR: invalid LC_COLLATE locale name/,
"ICU-specific locale must be specified with ICU_LOCALE: error message");
done_testing();

View File

@ -1194,9 +1194,9 @@ SELECT 'coté' < 'côte' COLLATE "und-x-icu", 'coté' > 'côte' COLLATE testcoll
(1 row)
CREATE COLLATION testcoll_lower_first (provider = icu, locale = '@colCaseFirst=lower');
NOTICE: using standard form "und-u-kf-lower" for locale "@colCaseFirst=lower"
NOTICE: using standard form "und-u-kf-lower" for ICU locale "@colCaseFirst=lower"
CREATE COLLATION testcoll_upper_first (provider = icu, locale = '@colCaseFirst=upper');
NOTICE: using standard form "und-u-kf-upper" for locale "@colCaseFirst=upper"
NOTICE: using standard form "und-u-kf-upper" for ICU locale "@colCaseFirst=upper"
SELECT 'aaa' < 'AAA' COLLATE testcoll_lower_first, 'aaa' > 'AAA' COLLATE testcoll_upper_first;
?column? | ?column?
----------+----------
@ -1204,7 +1204,7 @@ SELECT 'aaa' < 'AAA' COLLATE testcoll_lower_first, 'aaa' > 'AAA' COLLATE testcol
(1 row)
CREATE COLLATION testcoll_shifted (provider = icu, locale = '@colAlternate=shifted');
NOTICE: using standard form "und-u-ka-shifted" for locale "@colAlternate=shifted"
NOTICE: using standard form "und-u-ka-shifted" for ICU locale "@colAlternate=shifted"
SELECT 'de-luge' < 'deanza' COLLATE "und-x-icu", 'de-luge' > 'deanza' COLLATE testcoll_shifted;
?column? | ?column?
----------+----------
@ -1221,12 +1221,12 @@ SELECT 'A-21' > 'A-123' COLLATE "und-x-icu", 'A-21' < 'A-123' COLLATE testcoll_n
(1 row)
CREATE COLLATION testcoll_error1 (provider = icu, locale = '@colNumeric=lower');
NOTICE: using standard form "und-u-kn-lower" for locale "@colNumeric=lower"
NOTICE: using standard form "und-u-kn-lower" for ICU locale "@colNumeric=lower"
ERROR: could not open collator for locale "und-u-kn-lower": U_ILLEGAL_ARGUMENT_ERROR
-- test that attributes not handled by icu_set_collation_attributes()
-- (handled by ucol_open() directly) also work
CREATE COLLATION testcoll_de_phonebook (provider = icu, locale = 'de@collation=phonebook');
NOTICE: using standard form "de-u-co-phonebk" for locale "de@collation=phonebook"
NOTICE: using standard form "de-u-co-phonebk" for ICU locale "de@collation=phonebook"
SELECT 'Goldmann' < 'Götz' COLLATE "de-x-icu", 'Goldmann' > 'Götz' COLLATE testcoll_de_phonebook;
?column? | ?column?
----------+----------
@ -1235,7 +1235,7 @@ SELECT 'Goldmann' < 'Götz' COLLATE "de-x-icu", 'Goldmann' > 'Götz' COLLATE tes
-- rules
CREATE COLLATION testcoll_rules1 (provider = icu, locale = '', rules = '&a < g');
NOTICE: using standard form "und" for locale ""
NOTICE: using standard form "und" for ICU locale ""
CREATE TABLE test7 (a text);
-- example from https://unicode-org.github.io/icu/userguide/collation/customization/#syntax
INSERT INTO test7 VALUES ('Abernathy'), ('apple'), ('bird'), ('Boston'), ('Graham'), ('green');
@ -1263,13 +1263,13 @@ SELECT * FROM test7 ORDER BY a COLLATE testcoll_rules1;
DROP TABLE test7;
CREATE COLLATION testcoll_rulesx (provider = icu, locale = '', rules = '!!wrong!!');
NOTICE: using standard form "und" for locale ""
NOTICE: using standard form "und" for ICU locale ""
ERROR: could not open collator for locale "und" with rules "!!wrong!!": U_INVALID_FORMAT_ERROR
-- nondeterministic collations
CREATE COLLATION ctest_det (provider = icu, locale = '', deterministic = true);
NOTICE: using standard form "und" for locale ""
NOTICE: using standard form "und" for ICU locale ""
CREATE COLLATION ctest_nondet (provider = icu, locale = '', deterministic = false);
NOTICE: using standard form "und" for locale ""
NOTICE: using standard form "und" for ICU locale ""
CREATE TABLE test6 (a int, b text);
-- same string in different normal forms
INSERT INTO test6 VALUES (1, U&'\00E4bc');
@ -1319,9 +1319,9 @@ SELECT * FROM test6a WHERE b = ARRAY['äbc'] COLLATE ctest_nondet;
(2 rows)
CREATE COLLATION case_sensitive (provider = icu, locale = '');
NOTICE: using standard form "und" for locale ""
NOTICE: using standard form "und" for ICU locale ""
CREATE COLLATION case_insensitive (provider = icu, locale = '@colStrength=secondary', deterministic = false);
NOTICE: using standard form "und-u-ks-level2" for locale "@colStrength=secondary"
NOTICE: using standard form "und-u-ks-level2" for ICU locale "@colStrength=secondary"
SELECT 'abc' <= 'ABC' COLLATE case_sensitive, 'abc' >= 'ABC' COLLATE case_sensitive;
?column? | ?column?
----------+----------