From c7db01e325a530ec38ec7ba57cd3ed32e123e33c Mon Sep 17 00:00:00 2001 From: Peter Eisentraut Date: Fri, 16 Sep 2022 09:37:54 +0200 Subject: [PATCH] Don't allow creation of database with ICU locale with unsupported encoding Check in CREATE DATABASE and initdb that the selected encoding is supported by ICU. Before, they would pass but users would later get an error from the server when they tried to use the database. Also document that initdb sets the encoding to UTF8 by default if the ICU locale provider is chosen. Author: Marina Polyakova Reviewed-by: Kyotaro Horiguchi Discussion: https://www.postgresql.org/message-id/6dd6db0984d86a51b7255ba79f111971@postgrespro.ru --- doc/src/sgml/ref/initdb.sgml | 5 +++-- src/backend/commands/dbcommands.c | 9 +++++++-- src/bin/initdb/initdb.c | 29 +++++++++++++++++++++++++++++ src/bin/initdb/t/001_initdb.pl | 9 +++++++++ src/bin/scripts/t/020_createdb.pl | 9 +++++++++ 5 files changed, 57 insertions(+), 4 deletions(-) diff --git a/doc/src/sgml/ref/initdb.sgml b/doc/src/sgml/ref/initdb.sgml index f01df2dde9..8158896298 100644 --- a/doc/src/sgml/ref/initdb.sgml +++ b/doc/src/sgml/ref/initdb.sgml @@ -209,8 +209,9 @@ PostgreSQL documentation Selects the encoding of the template databases. This will also be the default encoding of any database you create later, - unless you override it then. The default is derived from the locale, or - SQL_ASCII if that does not work. The character sets supported by + unless you override it then. The default is derived from the locale, + if the libc locale provider is used, or UTF8 if the + ICU locale provider is used. The character sets supported by the PostgreSQL server are described in . diff --git a/src/backend/commands/dbcommands.c b/src/backend/commands/dbcommands.c index 6ff48bb18f..f248ad42b7 100644 --- a/src/backend/commands/dbcommands.c +++ b/src/backend/commands/dbcommands.c @@ -1034,6 +1034,12 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt) if (dblocprovider == COLLPROVIDER_ICU) { + if (!(is_encoding_supported_by_icu(encoding))) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("encoding \"%s\" is not supported with ICU provider", + pg_encoding_to_char(encoding)))); + /* * This would happen if template0 uses the libc provider but the new * database uses icu. @@ -1042,10 +1048,9 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("ICU locale must be specified"))); - } - if (dblocprovider == COLLPROVIDER_ICU) check_icu_locale(dbiculocale); + } /* * Check that the new encoding and locale settings match the source diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c index 6aeec8d426..28f22b25b2 100644 --- a/src/bin/initdb/initdb.c +++ b/src/bin/initdb/initdb.c @@ -2042,6 +2042,27 @@ check_locale_encoding(const char *locale, int user_enc) return true; } +/* + * check if the chosen encoding matches is supported by ICU + * + * this should match the similar check in the backend createdb() function + */ +static bool +check_icu_locale_encoding(int user_enc) +{ + if (!(is_encoding_supported_by_icu(user_enc))) + { + pg_log_error("encoding mismatch"); + pg_log_error_detail("The encoding you selected (%s) is not supported with the ICU provider.", + pg_encoding_to_char(user_enc)); + pg_log_error_hint("Rerun %s and either do not specify an encoding explicitly, " + "or choose a matching combination.", + progname); + return false; + } + return true; +} + /* * set up the locale variables * @@ -2310,7 +2331,11 @@ setup_locale_encoding(void) } if (!encoding && locale_provider == COLLPROVIDER_ICU) + { encodingid = PG_UTF8; + printf(_("The default database encoding has been set to \"%s\".\n"), + pg_encoding_to_char(encodingid)); + } else if (!encoding) { int ctype_enc; @@ -2362,6 +2387,10 @@ setup_locale_encoding(void) if (!check_locale_encoding(lc_ctype, encodingid) || !check_locale_encoding(lc_collate, encodingid)) exit(1); /* check_locale_encoding printed the error */ + + if (locale_provider == COLLPROVIDER_ICU && + !check_icu_locale_encoding(encodingid)) + exit(1); } diff --git a/src/bin/initdb/t/001_initdb.pl b/src/bin/initdb/t/001_initdb.pl index a37f6dd9b3..164fc11cbf 100644 --- a/src/bin/initdb/t/001_initdb.pl +++ b/src/bin/initdb/t/001_initdb.pl @@ -118,6 +118,15 @@ if ($ENV{with_icu} eq 'yes') ], qr/FATAL: could not open collator for locale/, 'fails for invalid ICU locale'); + + command_fails_like( + [ + 'initdb', '--no-sync', + '--locale-provider=icu', '--encoding=SQL_ASCII', + '--icu-locale=en', "$tempdir/dataX" + ], + qr/error: encoding mismatch/, + 'fails for encoding not supported by ICU'); } else { diff --git a/src/bin/scripts/t/020_createdb.pl b/src/bin/scripts/t/020_createdb.pl index e91c1d013d..e95f200d0b 100644 --- a/src/bin/scripts/t/020_createdb.pl +++ b/src/bin/scripts/t/020_createdb.pl @@ -50,6 +50,15 @@ if ($ENV{with_icu} eq 'yes') ], 'fails for invalid ICU locale'); + $node->command_fails_like( + [ + 'createdb', '-T', + 'template0', '--locale-provider=icu', + '--encoding=SQL_ASCII', 'foobarX' + ], + qr/ERROR: encoding "SQL_ASCII" is not supported with ICU provider/, + 'fails for encoding not supported by ICU'); + # additional node, which uses the icu provider my $node2 = PostgreSQL::Test::Cluster->new('icu'); $node2->init(extra => ['--locale-provider=icu', '--icu-locale=en']);