From 9637badd9f9209166140eb567602e91699dd2ffb Mon Sep 17 00:00:00 2001 From: Jeff Davis Date: Thu, 9 Mar 2023 08:28:05 -0800 Subject: [PATCH] pg_upgrade: copy locale and encoding information to new cluster. Previously, pg_upgrade checked that the old and new clusters were compatible, including the locale and encoding. But the new cluster was just created, and only template0 from the new cluster will be preserved (template1 and postgres are both recreated during the upgrade process). Because template0 is not sensitive to locale or encoding, just update the pg_database entry to be the same as template0 from the original cluster. This commit makes it easier to change the default initdb locale or encoding settings without causing needless incompatibilities. Discussion: https://postgr.es/m/d62b2874-729b-d26a-2d0a-0d64f509eca4@enterprisedb.com Reviewed-by: Peter Eisentraut --- src/bin/pg_upgrade/Makefile | 2 + src/bin/pg_upgrade/check.c | 160 ------------------------- src/bin/pg_upgrade/info.c | 69 ++++++++--- src/bin/pg_upgrade/meson.build | 1 + src/bin/pg_upgrade/pg_upgrade.c | 62 ++++++++++ src/bin/pg_upgrade/pg_upgrade.h | 12 +- src/bin/pg_upgrade/t/002_pg_upgrade.pl | 69 ++++++++++- 7 files changed, 192 insertions(+), 183 deletions(-) diff --git a/src/bin/pg_upgrade/Makefile b/src/bin/pg_upgrade/Makefile index 7f8042f34a..5834513add 100644 --- a/src/bin/pg_upgrade/Makefile +++ b/src/bin/pg_upgrade/Makefile @@ -51,6 +51,8 @@ clean distclean maintainer-clean: rm -rf delete_old_cluster.sh log/ tmp_check/ \ reindex_hash.sql +export with_icu + check: $(prove_check) diff --git a/src/bin/pg_upgrade/check.c b/src/bin/pg_upgrade/check.c index 7cf68dc9af..b71b00be37 100644 --- a/src/bin/pg_upgrade/check.c +++ b/src/bin/pg_upgrade/check.c @@ -16,9 +16,6 @@ #include "pg_upgrade.h" static void check_new_cluster_is_empty(void); -static void check_databases_are_compatible(void); -static void check_locale_and_encoding(DbInfo *olddb, DbInfo *newdb); -static bool equivalent_locale(int category, const char *loca, const char *locb); static void check_is_install_user(ClusterInfo *cluster); static void check_proper_datallowconn(ClusterInfo *cluster); static void check_for_prepared_transactions(ClusterInfo *cluster); @@ -33,7 +30,6 @@ static void check_for_jsonb_9_4_usage(ClusterInfo *cluster); static void check_for_pg_role_prefix(ClusterInfo *cluster); static void check_for_new_tablespace_dir(ClusterInfo *new_cluster); static void check_for_user_defined_encoding_conversions(ClusterInfo *cluster); -static char *get_canonical_locale_name(int category, const char *locale); /* @@ -194,7 +190,6 @@ check_new_cluster(void) get_db_and_rel_infos(&new_cluster); check_new_cluster_is_empty(); - check_databases_are_compatible(); check_loadable_libraries(); @@ -349,94 +344,6 @@ check_cluster_compatibility(bool live_check) } -/* - * check_locale_and_encoding() - * - * Check that locale and encoding of a database in the old and new clusters - * are compatible. - */ -static void -check_locale_and_encoding(DbInfo *olddb, DbInfo *newdb) -{ - if (olddb->db_encoding != newdb->db_encoding) - pg_fatal("encodings for database \"%s\" do not match: old \"%s\", new \"%s\"", - olddb->db_name, - pg_encoding_to_char(olddb->db_encoding), - pg_encoding_to_char(newdb->db_encoding)); - if (!equivalent_locale(LC_COLLATE, olddb->db_collate, newdb->db_collate)) - pg_fatal("lc_collate values for database \"%s\" do not match: old \"%s\", new \"%s\"", - olddb->db_name, olddb->db_collate, newdb->db_collate); - if (!equivalent_locale(LC_CTYPE, olddb->db_ctype, newdb->db_ctype)) - pg_fatal("lc_ctype values for database \"%s\" do not match: old \"%s\", new \"%s\"", - olddb->db_name, olddb->db_ctype, newdb->db_ctype); - if (olddb->db_collprovider != newdb->db_collprovider) - pg_fatal("locale providers for database \"%s\" do not match: old \"%s\", new \"%s\"", - olddb->db_name, - collprovider_name(olddb->db_collprovider), - collprovider_name(newdb->db_collprovider)); - if ((olddb->db_iculocale == NULL && newdb->db_iculocale != NULL) || - (olddb->db_iculocale != NULL && newdb->db_iculocale == NULL) || - (olddb->db_iculocale != NULL && newdb->db_iculocale != NULL && strcmp(olddb->db_iculocale, newdb->db_iculocale) != 0)) - pg_fatal("ICU locale values for database \"%s\" do not match: old \"%s\", new \"%s\"", - olddb->db_name, - olddb->db_iculocale ? olddb->db_iculocale : "(null)", - newdb->db_iculocale ? newdb->db_iculocale : "(null)"); -} - -/* - * equivalent_locale() - * - * Best effort locale-name comparison. Return false if we are not 100% sure - * the locales are equivalent. - * - * Note: The encoding parts of the names are ignored. This function is - * currently used to compare locale names stored in pg_database, and - * pg_database contains a separate encoding field. That's compared directly - * in check_locale_and_encoding(). - */ -static bool -equivalent_locale(int category, const char *loca, const char *locb) -{ - const char *chara; - const char *charb; - char *canona; - char *canonb; - int lena; - int lenb; - - /* - * If the names are equal, the locales are equivalent. Checking this first - * avoids calling setlocale() in the common case that the names are equal. - * That's a good thing, if setlocale() is buggy, for example. - */ - if (pg_strcasecmp(loca, locb) == 0) - return true; - - /* - * Not identical. Canonicalize both names, remove the encoding parts, and - * try again. - */ - canona = get_canonical_locale_name(category, loca); - chara = strrchr(canona, '.'); - lena = chara ? (chara - canona) : strlen(canona); - - canonb = get_canonical_locale_name(category, locb); - charb = strrchr(canonb, '.'); - lenb = charb ? (charb - canonb) : strlen(canonb); - - if (lena == lenb && pg_strncasecmp(canona, canonb, lena) == 0) - { - pg_free(canona); - pg_free(canonb); - return true; - } - - pg_free(canona); - pg_free(canonb); - return false; -} - - static void check_new_cluster_is_empty(void) { @@ -460,35 +367,6 @@ check_new_cluster_is_empty(void) } } -/* - * Check that every database that already exists in the new cluster is - * compatible with the corresponding database in the old one. - */ -static void -check_databases_are_compatible(void) -{ - int newdbnum; - int olddbnum; - DbInfo *newdbinfo; - DbInfo *olddbinfo; - - for (newdbnum = 0; newdbnum < new_cluster.dbarr.ndbs; newdbnum++) - { - newdbinfo = &new_cluster.dbarr.dbs[newdbnum]; - - /* Find the corresponding database in the old cluster */ - for (olddbnum = 0; olddbnum < old_cluster.dbarr.ndbs; olddbnum++) - { - olddbinfo = &old_cluster.dbarr.dbs[olddbnum]; - if (strcmp(newdbinfo->db_name, olddbinfo->db_name) == 0) - { - check_locale_and_encoding(olddbinfo, newdbinfo); - break; - } - } - } -} - /* * A previous run of pg_upgrade might have failed and the new cluster * directory recreated, but they might have forgotten to remove @@ -1524,41 +1402,3 @@ check_for_user_defined_encoding_conversions(ClusterInfo *cluster) else check_ok(); } - - -/* - * get_canonical_locale_name - * - * Send the locale name to the system, and hope we get back a canonical - * version. This should match the backend's check_locale() function. - */ -static char * -get_canonical_locale_name(int category, const char *locale) -{ - char *save; - char *res; - - /* get the current setting, so we can restore it. */ - save = setlocale(category, NULL); - if (!save) - pg_fatal("failed to get the current locale"); - - /* 'save' may be pointing at a modifiable scratch variable, so copy it. */ - save = pg_strdup(save); - - /* set the locale with setlocale, to see if it accepts it. */ - res = setlocale(category, locale); - - if (!res) - pg_fatal("failed to get system locale name for \"%s\"", locale); - - res = pg_strdup(res); - - /* restore old value. */ - if (!setlocale(category, save)) - pg_fatal("failed to restore old locale \"%s\"", save); - - pg_free(save); - - return res; -} diff --git a/src/bin/pg_upgrade/info.c b/src/bin/pg_upgrade/info.c index c1399c09b9..33b10aac3c 100644 --- a/src/bin/pg_upgrade/info.c +++ b/src/bin/pg_upgrade/info.c @@ -20,6 +20,7 @@ static void create_rel_filename_map(const char *old_data, const char *new_data, static void report_unmatched_relation(const RelInfo *rel, const DbInfo *db, bool is_new_db); static void free_db_and_rel_infos(DbInfoArr *db_arr); +static void get_template0_info(ClusterInfo *cluster); static void get_db_infos(ClusterInfo *cluster); static void get_rel_infos(ClusterInfo *cluster, DbInfo *dbinfo); static void free_rel_infos(RelInfoArr *rel_arr); @@ -278,6 +279,7 @@ get_db_and_rel_infos(ClusterInfo *cluster) if (cluster->dbarr.dbs != NULL) free_db_and_rel_infos(&cluster->dbarr); + get_template0_info(cluster); get_db_infos(cluster); for (dbnum = 0; dbnum < cluster->dbarr.ndbs; dbnum++) @@ -293,6 +295,55 @@ get_db_and_rel_infos(ClusterInfo *cluster) } +/* + * Get information about template0, which will be copied from the old cluster + * to the new cluster. + */ +static void +get_template0_info(ClusterInfo *cluster) +{ + PGconn *conn = connectToServer(cluster, "template1"); + DbLocaleInfo *locale; + PGresult *dbres; + int i_datencoding; + int i_datlocprovider; + int i_datcollate; + int i_datctype; + int i_daticulocale; + + dbres = executeQueryOrDie(conn, + "SELECT encoding, datlocprovider, " + " datcollate, datctype, daticulocale " + "FROM pg_catalog.pg_database " + "WHERE datname='template0'"); + + if (PQntuples(dbres) != 1) + pg_fatal("template0 not found"); + + locale = pg_malloc(sizeof(DbLocaleInfo)); + + i_datencoding = PQfnumber(dbres, "encoding"); + i_datlocprovider = PQfnumber(dbres, "datlocprovider"); + i_datcollate = PQfnumber(dbres, "datcollate"); + i_datctype = PQfnumber(dbres, "datctype"); + i_daticulocale = PQfnumber(dbres, "daticulocale"); + + locale->db_encoding = atoi(PQgetvalue(dbres, 0, i_datencoding)); + locale->db_collprovider = PQgetvalue(dbres, 0, i_datlocprovider)[0]; + locale->db_collate = pg_strdup(PQgetvalue(dbres, 0, i_datcollate)); + locale->db_ctype = pg_strdup(PQgetvalue(dbres, 0, i_datctype)); + if (PQgetisnull(dbres, 0, i_daticulocale)) + locale->db_iculocale = NULL; + else + locale->db_iculocale = pg_strdup(PQgetvalue(dbres, 0, i_daticulocale)); + + cluster->template0 = locale; + + PQclear(dbres); + PQfinish(conn); +} + + /* * get_db_infos() * @@ -309,11 +360,6 @@ get_db_infos(ClusterInfo *cluster) DbInfo *dbinfos; int i_datname, i_oid, - i_encoding, - i_datcollate, - i_datctype, - i_datlocprovider, - i_daticulocale, i_spclocation; char query[QUERY_ALLOC]; @@ -337,11 +383,6 @@ get_db_infos(ClusterInfo *cluster) i_oid = PQfnumber(res, "oid"); i_datname = PQfnumber(res, "datname"); - i_encoding = PQfnumber(res, "encoding"); - i_datcollate = PQfnumber(res, "datcollate"); - i_datctype = PQfnumber(res, "datctype"); - i_datlocprovider = PQfnumber(res, "datlocprovider"); - i_daticulocale = PQfnumber(res, "daticulocale"); i_spclocation = PQfnumber(res, "spclocation"); ntups = PQntuples(res); @@ -351,14 +392,6 @@ get_db_infos(ClusterInfo *cluster) { dbinfos[tupnum].db_oid = atooid(PQgetvalue(res, tupnum, i_oid)); dbinfos[tupnum].db_name = pg_strdup(PQgetvalue(res, tupnum, i_datname)); - dbinfos[tupnum].db_encoding = atoi(PQgetvalue(res, tupnum, i_encoding)); - dbinfos[tupnum].db_collate = pg_strdup(PQgetvalue(res, tupnum, i_datcollate)); - dbinfos[tupnum].db_ctype = pg_strdup(PQgetvalue(res, tupnum, i_datctype)); - dbinfos[tupnum].db_collprovider = PQgetvalue(res, tupnum, i_datlocprovider)[0]; - if (PQgetisnull(res, tupnum, i_daticulocale)) - dbinfos[tupnum].db_iculocale = NULL; - else - dbinfos[tupnum].db_iculocale = pg_strdup(PQgetvalue(res, tupnum, i_daticulocale)); snprintf(dbinfos[tupnum].db_tablespace, sizeof(dbinfos[tupnum].db_tablespace), "%s", PQgetvalue(res, tupnum, i_spclocation)); } diff --git a/src/bin/pg_upgrade/meson.build b/src/bin/pg_upgrade/meson.build index 18c27b4e72..12a97f84e2 100644 --- a/src/bin/pg_upgrade/meson.build +++ b/src/bin/pg_upgrade/meson.build @@ -38,6 +38,7 @@ tests += { 'sd': meson.current_source_dir(), 'bd': meson.current_build_dir(), 'tap': { + 'env': {'with_icu': icu.found() ? 'yes' : 'no'}, 'tests': [ 't/001_basic.pl', 't/002_pg_upgrade.pl', diff --git a/src/bin/pg_upgrade/pg_upgrade.c b/src/bin/pg_upgrade/pg_upgrade.c index e5597d3105..8c6009151f 100644 --- a/src/bin/pg_upgrade/pg_upgrade.c +++ b/src/bin/pg_upgrade/pg_upgrade.c @@ -51,6 +51,7 @@ #include "fe_utils/string_utils.h" #include "pg_upgrade.h" +static void set_locale_and_encoding(void); static void prepare_new_cluster(void); static void prepare_new_globals(void); static void create_new_objects(void); @@ -139,6 +140,8 @@ main(int argc, char **argv) "Performing Upgrade\n" "------------------"); + set_locale_and_encoding(); + prepare_new_cluster(); stop_postmaster(false); @@ -366,6 +369,65 @@ setup(char *argv0, bool *live_check) } +/* + * Copy locale and encoding information into the new cluster's template0. + * + * We need to copy the encoding, datlocprovider, datcollate, datctype, and + * daticulocale. We don't need datcollversion because that's never set for + * template0. + */ +static void +set_locale_and_encoding(void) +{ + PGconn *conn_new_template1; + char *datcollate_literal; + char *datctype_literal; + char *daticulocale_literal = NULL; + DbLocaleInfo *locale = old_cluster.template0; + + prep_status("Setting locale and encoding for new cluster"); + + /* escape literals with respect to new cluster */ + conn_new_template1 = connectToServer(&new_cluster, "template1"); + + datcollate_literal = PQescapeLiteral(conn_new_template1, + locale->db_collate, + strlen(locale->db_collate)); + datctype_literal = PQescapeLiteral(conn_new_template1, + locale->db_ctype, + strlen(locale->db_ctype)); + if (locale->db_iculocale) + daticulocale_literal = PQescapeLiteral(conn_new_template1, + locale->db_iculocale, + strlen(locale->db_iculocale)); + else + daticulocale_literal = pg_strdup("NULL"); + + /* update template0 in new cluster */ + PQclear(executeQueryOrDie(conn_new_template1, + "UPDATE pg_catalog.pg_database " + " SET encoding = %u, " + " datlocprovider = '%c', " + " datcollate = %s, " + " datctype = %s, " + " daticulocale = %s " + " WHERE datname = 'template0' ", + locale->db_encoding, + locale->db_collprovider, + datcollate_literal, + datctype_literal, + daticulocale_literal)); + + PQfreemem(datcollate_literal); + PQfreemem(datctype_literal); + PQfreemem(daticulocale_literal); + + PQfinish(conn_new_template1); + + check_ok(); +} + + static void prepare_new_cluster(void) { diff --git a/src/bin/pg_upgrade/pg_upgrade.h b/src/bin/pg_upgrade/pg_upgrade.h index 5f2a116f23..3eea0139c7 100644 --- a/src/bin/pg_upgrade/pg_upgrade.h +++ b/src/bin/pg_upgrade/pg_upgrade.h @@ -175,13 +175,20 @@ typedef struct char *db_name; /* database name */ char db_tablespace[MAXPGPATH]; /* database default tablespace * path */ + RelInfoArr rel_arr; /* array of all user relinfos */ +} DbInfo; + +/* + * Locale information about a database. + */ +typedef struct +{ char *db_collate; char *db_ctype; char db_collprovider; char *db_iculocale; int db_encoding; - RelInfoArr rel_arr; /* array of all user relinfos */ -} DbInfo; +} DbLocaleInfo; typedef struct { @@ -252,6 +259,7 @@ typedef enum typedef struct { ControlData controldata; /* pg_control information */ + DbLocaleInfo *template0; /* template0 locale info */ DbInfoArr dbarr; /* dbinfos array */ char *pgdata; /* pathname for cluster's $PGDATA directory */ char *pgconfig; /* pathname for cluster's config file diff --git a/src/bin/pg_upgrade/t/002_pg_upgrade.pl b/src/bin/pg_upgrade/t/002_pg_upgrade.pl index 62a8fa9d8b..a6a0162d5a 100644 --- a/src/bin/pg_upgrade/t/002_pg_upgrade.pl +++ b/src/bin/pg_upgrade/t/002_pg_upgrade.pl @@ -90,15 +90,58 @@ my $oldnode = PostgreSQL::Test::Cluster->new('old_node', install_path => $ENV{oldinstall}); +my %node_params = (); + # To increase coverage of non-standard segment size and group access without # increasing test runtime, run these tests with a custom setting. # --allow-group-access and --wal-segsize have been added in v11. -my %node_params = (); -$node_params{extra} = [ '--wal-segsize', '1', '--allow-group-access' ] - if $oldnode->pg_version >= 11; +my @custom_opts = (); +if ($oldnode->pg_version >= 11) +{ + push @custom_opts, ('--wal-segsize', '1'); + push @custom_opts, '--allow-group-access'; +} + +# Set up the locale settings for the original cluster, so that we +# can test that pg_upgrade copies the locale settings of template0 +# from the old to the new cluster. + +my $original_encoding = "6"; # UTF-8 +my $original_provider = "c"; +my $original_collate = "C"; +my $original_iculocale = ""; +if ($oldnode->pg_version >= 15 && $ENV{with_icu} eq 'yes') +{ + $original_provider = "i"; + $original_iculocale = "fr-CA"; +} + +my @initdb_params = @custom_opts; + +push @initdb_params, ('--encoding', 'UTF-8'); +push @initdb_params, ('--lc-collate', $original_collate); +if ($original_provider eq "i") +{ + push @initdb_params, ('--locale-provider', 'icu'); + push @initdb_params, ('--icu-locale', 'fr-CA'); +} + +$node_params{extra} = \@initdb_params; $oldnode->init(%node_params); $oldnode->start; +my $result; +$result = $oldnode->safe_psql( + 'postgres', q{SELECT encoding, datlocprovider, datcollate, daticulocale + FROM pg_database WHERE datname='template0'}); +is($result, "$original_encoding|$original_provider|$original_collate|$original_iculocale", + "check locales in original cluster" + ); + +# check ctype, which was acquired from environment by initdb +my $original_ctype = $oldnode->safe_psql( + 'postgres', q{SELECT datctype FROM pg_database WHERE datname='template0'}); + # The default location of the source code is the root of this directory. my $srcdir = abs_path("../../.."); @@ -168,6 +211,18 @@ else # Initialize a new node for the upgrade. my $newnode = PostgreSQL::Test::Cluster->new('new_node'); + +# Reset to original parameters. +@initdb_params = @custom_opts; + +# The new cluster will be initialized with different locale settings, +# but these settings will be overwritten with those of the original +# cluster. +push @initdb_params, ('--encoding', 'SQL_ASCII'); +push @initdb_params, ('--locale-provider', 'libc'); +push @initdb_params, ('--lc-ctype', 'C'); + +$node_params{extra} = \@initdb_params; $newnode->init(%node_params); my $newbindir = $newnode->config_data('--bindir'); @@ -338,6 +393,14 @@ if (-d $log_path) } } +# Test that upgraded cluster has original locale settings. +$result = $newnode->safe_psql( + 'postgres', q{SELECT encoding, datlocprovider, datcollate, datctype, daticulocale + FROM pg_database WHERE datname='template0'}); +is($result, "$original_encoding|$original_provider|$original_collate|$original_ctype|$original_iculocale", + "check that locales in new cluster match original cluster" + ); + # Second dump from the upgraded instance. @dump_command = ( 'pg_dumpall', '--no-sync', '-d', $newnode->connstr('postgres'),