initdb: derive encoding from locale for ICU; similar to libc.

Previously, the default encoding was derived from the locale when
using libc; while the default was always UTF-8 when using ICU. That
would throw an error when the locale was not compatible with UTF-8.

This commit causes initdb to derive the default encoding from the
locale for both providers. If --no-locale is specified (or if the
locale is C or POSIX), the default encoding will be UTF-8 for ICU
(because ICU does not support SQL_ASCII) and SQL_ASCII for libc.

Per buildfarm failure on system "hoverfly" related to commit
27b62377b4.

Discussion: https://postgr.es/m/d191d5841347301a8f1238f609471ddd957fc47e.camel%40j-davis.com
This commit is contained in:
Jeff Davis 2023-03-10 10:51:24 -08:00
parent 3e623ebc7a
commit c45dc7ffbb
6 changed files with 29 additions and 27 deletions

View File

@ -37,6 +37,6 @@ tests += {
'sql': [
'unaccent',
],
'regress_args': ['--encoding=UTF8'],
'regress_args': ['--encoding=UTF8', '--no-locale'],
},
}

View File

@ -213,13 +213,19 @@ PostgreSQL documentation
<term><option>--encoding=<replaceable class="parameter">encoding</replaceable></option></term>
<listitem>
<para>
Selects the encoding of the template databases. This will also
be the default encoding of any database you create later,
unless you override it then. The default is derived from the locale,
if the libc locale provider is used, or <literal>UTF8</literal> if the
ICU locale provider is used. The character sets supported by
the <productname>PostgreSQL</productname> server are described
in <xref linkend="multibyte-charset-supported"/>.
Selects the encoding of the template databases. This will also be the
default encoding of any database you create later, unless you override
it then. The character sets supported by the
<productname>PostgreSQL</productname> server are described in <xref
linkend="multibyte-charset-supported"/>.
</para>
<para>
By default, the template database encoding is derived from the
locale. If <xref linkend="app-initdb-option-no-locale"/> is specified
(or equivalently, if the locale is <literal>C</literal> or
<literal>POSIX</literal>), then the default is <literal>UTF8</literal>
for the ICU provider and <literal>SQL_ASCII</literal> for the
<literal>libc</literal> provider.
</para>
</listitem>
</varlistentry>

View File

@ -2350,18 +2350,19 @@ setup_locale_encoding(void)
lc_time);
}
if (!encoding && locale_provider == COLLPROVIDER_ICU)
{
encodingid = PG_UTF8;
printf(_("The default database encoding has been set to \"%s\".\n"),
pg_encoding_to_char(encodingid));
}
else if (!encoding)
if (!encoding)
{
int ctype_enc;
ctype_enc = pg_get_encoding_from_locale(lc_ctype, true);
/*
* If ctype_enc=SQL_ASCII, it's compatible with any encoding. ICU does
* not support SQL_ASCII, so select UTF-8 instead.
*/
if (locale_provider == COLLPROVIDER_ICU && ctype_enc == PG_SQL_ASCII)
ctype_enc = PG_UTF8;
if (ctype_enc == -1)
{
/* Couldn't recognize the locale's codeset */

View File

@ -108,7 +108,7 @@ if ($oldnode->pg_version >= 11)
my $original_encoding = "6"; # UTF-8
my $original_provider = "c";
my $original_collate = "C";
my $original_locale = "C";
my $original_iculocale = "";
my $provider_field = "'c' AS datlocprovider";
my $iculocale_field = "NULL AS daticulocale";
@ -123,7 +123,7 @@ if ($oldnode->pg_version >= 15 && $ENV{with_icu} eq 'yes')
my @initdb_params = @custom_opts;
push @initdb_params, ('--encoding', 'UTF-8');
push @initdb_params, ('--lc-collate', $original_collate);
push @initdb_params, ('--locale', $original_locale);
if ($original_provider eq "i")
{
push @initdb_params, ('--locale-provider', 'icu');
@ -136,16 +136,12 @@ $oldnode->start;
my $result;
$result = $oldnode->safe_psql(
'postgres', "SELECT encoding, $provider_field, datcollate, $iculocale_field
'postgres', "SELECT encoding, $provider_field, datcollate, datctype, $iculocale_field
FROM pg_database WHERE datname='template0'");
is($result, "$original_encoding|$original_provider|$original_collate|$original_iculocale",
is($result, "$original_encoding|$original_provider|$original_locale|$original_locale|$original_iculocale",
"check locales in original cluster"
);
# check ctype, which was acquired from environment by initdb
my $original_ctype = $oldnode->safe_psql(
'postgres', q{SELECT datctype FROM pg_database WHERE datname='template0'});
# The default location of the source code is the root of this directory.
my $srcdir = abs_path("../../..");
@ -224,7 +220,6 @@ my $newnode = PostgreSQL::Test::Cluster->new('new_node');
# cluster.
push @initdb_params, ('--encoding', 'SQL_ASCII');
push @initdb_params, ('--locale-provider', 'libc');
push @initdb_params, ('--lc-ctype', 'C');
$node_params{extra} = \@initdb_params;
$newnode->init(%node_params);
@ -401,7 +396,7 @@ if (-d $log_path)
$result = $newnode->safe_psql(
'postgres', "SELECT encoding, $provider_field, datcollate, datctype, $iculocale_field
FROM pg_database WHERE datname='template0'");
is($result, "$original_encoding|$original_provider|$original_collate|$original_ctype|$original_iculocale",
is($result, "$original_encoding|$original_provider|$original_locale|$original_locale|$original_iculocale",
"check that locales in new cluster match original cluster"
);

View File

@ -41,7 +41,7 @@ if ($ENV{with_icu} eq 'yes')
[
'createdb', '-T',
'template0', '-E', 'UTF8', '--locale-provider=icu',
'--icu-locale=en', 'foobar5'
'--locale=C', '--icu-locale=en', 'foobar5'
],
qr/statement: CREATE DATABASE foobar5 .* LOCALE_PROVIDER icu ICU_LOCALE 'en'/,
'create database with ICU locale specified');

View File

@ -54,7 +54,7 @@ b),
# Test error cases in CREATE DATABASE involving locale-related options
my ($ret, $stdout, $stderr) = $node1->psql('postgres',
q{CREATE DATABASE dbicu LOCALE_PROVIDER icu TEMPLATE template0 ENCODING UTF8});
q{CREATE DATABASE dbicu LOCALE_PROVIDER icu LOCALE 'C' TEMPLATE template0 ENCODING UTF8});
isnt($ret, 0,
"ICU locale must be specified for ICU provider: exit code not 0");
like(