pg_upgrade: copy locale and encoding information to new cluster.

Previously, pg_upgrade checked that the old and new clusters were
compatible, including the locale and encoding. But the new cluster was
just created, and only template0 from the new cluster will be
preserved (template1 and postgres are both recreated during the
upgrade process).

Because template0 is not sensitive to locale or encoding, just update
the pg_database entry to be the same as template0 from the original
cluster.

This commit makes it easier to change the default initdb locale or
encoding settings without causing needless incompatibilities.

Discussion: https://postgr.es/m/d62b2874-729b-d26a-2d0a-0d64f509eca4@enterprisedb.com
Reviewed-by: Peter Eisentraut
This commit is contained in:
Jeff Davis 2023-03-09 08:28:05 -08:00
parent 8dff2f224f
commit 9637badd9f
7 changed files with 192 additions and 183 deletions

View File

@ -51,6 +51,8 @@ clean distclean maintainer-clean:
rm -rf delete_old_cluster.sh log/ tmp_check/ \
reindex_hash.sql
export with_icu
check:
$(prove_check)

View File

@ -16,9 +16,6 @@
#include "pg_upgrade.h"
static void check_new_cluster_is_empty(void);
static void check_databases_are_compatible(void);
static void check_locale_and_encoding(DbInfo *olddb, DbInfo *newdb);
static bool equivalent_locale(int category, const char *loca, const char *locb);
static void check_is_install_user(ClusterInfo *cluster);
static void check_proper_datallowconn(ClusterInfo *cluster);
static void check_for_prepared_transactions(ClusterInfo *cluster);
@ -33,7 +30,6 @@ static void check_for_jsonb_9_4_usage(ClusterInfo *cluster);
static void check_for_pg_role_prefix(ClusterInfo *cluster);
static void check_for_new_tablespace_dir(ClusterInfo *new_cluster);
static void check_for_user_defined_encoding_conversions(ClusterInfo *cluster);
static char *get_canonical_locale_name(int category, const char *locale);
/*
@ -194,7 +190,6 @@ check_new_cluster(void)
get_db_and_rel_infos(&new_cluster);
check_new_cluster_is_empty();
check_databases_are_compatible();
check_loadable_libraries();
@ -349,94 +344,6 @@ check_cluster_compatibility(bool live_check)
}
/*
* check_locale_and_encoding()
*
* Check that locale and encoding of a database in the old and new clusters
* are compatible.
*/
static void
check_locale_and_encoding(DbInfo *olddb, DbInfo *newdb)
{
if (olddb->db_encoding != newdb->db_encoding)
pg_fatal("encodings for database \"%s\" do not match: old \"%s\", new \"%s\"",
olddb->db_name,
pg_encoding_to_char(olddb->db_encoding),
pg_encoding_to_char(newdb->db_encoding));
if (!equivalent_locale(LC_COLLATE, olddb->db_collate, newdb->db_collate))
pg_fatal("lc_collate values for database \"%s\" do not match: old \"%s\", new \"%s\"",
olddb->db_name, olddb->db_collate, newdb->db_collate);
if (!equivalent_locale(LC_CTYPE, olddb->db_ctype, newdb->db_ctype))
pg_fatal("lc_ctype values for database \"%s\" do not match: old \"%s\", new \"%s\"",
olddb->db_name, olddb->db_ctype, newdb->db_ctype);
if (olddb->db_collprovider != newdb->db_collprovider)
pg_fatal("locale providers for database \"%s\" do not match: old \"%s\", new \"%s\"",
olddb->db_name,
collprovider_name(olddb->db_collprovider),
collprovider_name(newdb->db_collprovider));
if ((olddb->db_iculocale == NULL && newdb->db_iculocale != NULL) ||
(olddb->db_iculocale != NULL && newdb->db_iculocale == NULL) ||
(olddb->db_iculocale != NULL && newdb->db_iculocale != NULL && strcmp(olddb->db_iculocale, newdb->db_iculocale) != 0))
pg_fatal("ICU locale values for database \"%s\" do not match: old \"%s\", new \"%s\"",
olddb->db_name,
olddb->db_iculocale ? olddb->db_iculocale : "(null)",
newdb->db_iculocale ? newdb->db_iculocale : "(null)");
}
/*
* equivalent_locale()
*
* Best effort locale-name comparison. Return false if we are not 100% sure
* the locales are equivalent.
*
* Note: The encoding parts of the names are ignored. This function is
* currently used to compare locale names stored in pg_database, and
* pg_database contains a separate encoding field. That's compared directly
* in check_locale_and_encoding().
*/
static bool
equivalent_locale(int category, const char *loca, const char *locb)
{
const char *chara;
const char *charb;
char *canona;
char *canonb;
int lena;
int lenb;
/*
* If the names are equal, the locales are equivalent. Checking this first
* avoids calling setlocale() in the common case that the names are equal.
* That's a good thing, if setlocale() is buggy, for example.
*/
if (pg_strcasecmp(loca, locb) == 0)
return true;
/*
* Not identical. Canonicalize both names, remove the encoding parts, and
* try again.
*/
canona = get_canonical_locale_name(category, loca);
chara = strrchr(canona, '.');
lena = chara ? (chara - canona) : strlen(canona);
canonb = get_canonical_locale_name(category, locb);
charb = strrchr(canonb, '.');
lenb = charb ? (charb - canonb) : strlen(canonb);
if (lena == lenb && pg_strncasecmp(canona, canonb, lena) == 0)
{
pg_free(canona);
pg_free(canonb);
return true;
}
pg_free(canona);
pg_free(canonb);
return false;
}
static void
check_new_cluster_is_empty(void)
{
@ -460,35 +367,6 @@ check_new_cluster_is_empty(void)
}
}
/*
* Check that every database that already exists in the new cluster is
* compatible with the corresponding database in the old one.
*/
static void
check_databases_are_compatible(void)
{
int newdbnum;
int olddbnum;
DbInfo *newdbinfo;
DbInfo *olddbinfo;
for (newdbnum = 0; newdbnum < new_cluster.dbarr.ndbs; newdbnum++)
{
newdbinfo = &new_cluster.dbarr.dbs[newdbnum];
/* Find the corresponding database in the old cluster */
for (olddbnum = 0; olddbnum < old_cluster.dbarr.ndbs; olddbnum++)
{
olddbinfo = &old_cluster.dbarr.dbs[olddbnum];
if (strcmp(newdbinfo->db_name, olddbinfo->db_name) == 0)
{
check_locale_and_encoding(olddbinfo, newdbinfo);
break;
}
}
}
}
/*
* A previous run of pg_upgrade might have failed and the new cluster
* directory recreated, but they might have forgotten to remove
@ -1524,41 +1402,3 @@ check_for_user_defined_encoding_conversions(ClusterInfo *cluster)
else
check_ok();
}
/*
* get_canonical_locale_name
*
* Send the locale name to the system, and hope we get back a canonical
* version. This should match the backend's check_locale() function.
*/
static char *
get_canonical_locale_name(int category, const char *locale)
{
char *save;
char *res;
/* get the current setting, so we can restore it. */
save = setlocale(category, NULL);
if (!save)
pg_fatal("failed to get the current locale");
/* 'save' may be pointing at a modifiable scratch variable, so copy it. */
save = pg_strdup(save);
/* set the locale with setlocale, to see if it accepts it. */
res = setlocale(category, locale);
if (!res)
pg_fatal("failed to get system locale name for \"%s\"", locale);
res = pg_strdup(res);
/* restore old value. */
if (!setlocale(category, save))
pg_fatal("failed to restore old locale \"%s\"", save);
pg_free(save);
return res;
}

View File

@ -20,6 +20,7 @@ static void create_rel_filename_map(const char *old_data, const char *new_data,
static void report_unmatched_relation(const RelInfo *rel, const DbInfo *db,
bool is_new_db);
static void free_db_and_rel_infos(DbInfoArr *db_arr);
static void get_template0_info(ClusterInfo *cluster);
static void get_db_infos(ClusterInfo *cluster);
static void get_rel_infos(ClusterInfo *cluster, DbInfo *dbinfo);
static void free_rel_infos(RelInfoArr *rel_arr);
@ -278,6 +279,7 @@ get_db_and_rel_infos(ClusterInfo *cluster)
if (cluster->dbarr.dbs != NULL)
free_db_and_rel_infos(&cluster->dbarr);
get_template0_info(cluster);
get_db_infos(cluster);
for (dbnum = 0; dbnum < cluster->dbarr.ndbs; dbnum++)
@ -293,6 +295,55 @@ get_db_and_rel_infos(ClusterInfo *cluster)
}
/*
* Get information about template0, which will be copied from the old cluster
* to the new cluster.
*/
static void
get_template0_info(ClusterInfo *cluster)
{
PGconn *conn = connectToServer(cluster, "template1");
DbLocaleInfo *locale;
PGresult *dbres;
int i_datencoding;
int i_datlocprovider;
int i_datcollate;
int i_datctype;
int i_daticulocale;
dbres = executeQueryOrDie(conn,
"SELECT encoding, datlocprovider, "
" datcollate, datctype, daticulocale "
"FROM pg_catalog.pg_database "
"WHERE datname='template0'");
if (PQntuples(dbres) != 1)
pg_fatal("template0 not found");
locale = pg_malloc(sizeof(DbLocaleInfo));
i_datencoding = PQfnumber(dbres, "encoding");
i_datlocprovider = PQfnumber(dbres, "datlocprovider");
i_datcollate = PQfnumber(dbres, "datcollate");
i_datctype = PQfnumber(dbres, "datctype");
i_daticulocale = PQfnumber(dbres, "daticulocale");
locale->db_encoding = atoi(PQgetvalue(dbres, 0, i_datencoding));
locale->db_collprovider = PQgetvalue(dbres, 0, i_datlocprovider)[0];
locale->db_collate = pg_strdup(PQgetvalue(dbres, 0, i_datcollate));
locale->db_ctype = pg_strdup(PQgetvalue(dbres, 0, i_datctype));
if (PQgetisnull(dbres, 0, i_daticulocale))
locale->db_iculocale = NULL;
else
locale->db_iculocale = pg_strdup(PQgetvalue(dbres, 0, i_daticulocale));
cluster->template0 = locale;
PQclear(dbres);
PQfinish(conn);
}
/*
* get_db_infos()
*
@ -309,11 +360,6 @@ get_db_infos(ClusterInfo *cluster)
DbInfo *dbinfos;
int i_datname,
i_oid,
i_encoding,
i_datcollate,
i_datctype,
i_datlocprovider,
i_daticulocale,
i_spclocation;
char query[QUERY_ALLOC];
@ -337,11 +383,6 @@ get_db_infos(ClusterInfo *cluster)
i_oid = PQfnumber(res, "oid");
i_datname = PQfnumber(res, "datname");
i_encoding = PQfnumber(res, "encoding");
i_datcollate = PQfnumber(res, "datcollate");
i_datctype = PQfnumber(res, "datctype");
i_datlocprovider = PQfnumber(res, "datlocprovider");
i_daticulocale = PQfnumber(res, "daticulocale");
i_spclocation = PQfnumber(res, "spclocation");
ntups = PQntuples(res);
@ -351,14 +392,6 @@ get_db_infos(ClusterInfo *cluster)
{
dbinfos[tupnum].db_oid = atooid(PQgetvalue(res, tupnum, i_oid));
dbinfos[tupnum].db_name = pg_strdup(PQgetvalue(res, tupnum, i_datname));
dbinfos[tupnum].db_encoding = atoi(PQgetvalue(res, tupnum, i_encoding));
dbinfos[tupnum].db_collate = pg_strdup(PQgetvalue(res, tupnum, i_datcollate));
dbinfos[tupnum].db_ctype = pg_strdup(PQgetvalue(res, tupnum, i_datctype));
dbinfos[tupnum].db_collprovider = PQgetvalue(res, tupnum, i_datlocprovider)[0];
if (PQgetisnull(res, tupnum, i_daticulocale))
dbinfos[tupnum].db_iculocale = NULL;
else
dbinfos[tupnum].db_iculocale = pg_strdup(PQgetvalue(res, tupnum, i_daticulocale));
snprintf(dbinfos[tupnum].db_tablespace, sizeof(dbinfos[tupnum].db_tablespace), "%s",
PQgetvalue(res, tupnum, i_spclocation));
}

View File

@ -38,6 +38,7 @@ tests += {
'sd': meson.current_source_dir(),
'bd': meson.current_build_dir(),
'tap': {
'env': {'with_icu': icu.found() ? 'yes' : 'no'},
'tests': [
't/001_basic.pl',
't/002_pg_upgrade.pl',

View File

@ -51,6 +51,7 @@
#include "fe_utils/string_utils.h"
#include "pg_upgrade.h"
static void set_locale_and_encoding(void);
static void prepare_new_cluster(void);
static void prepare_new_globals(void);
static void create_new_objects(void);
@ -139,6 +140,8 @@ main(int argc, char **argv)
"Performing Upgrade\n"
"------------------");
set_locale_and_encoding();
prepare_new_cluster();
stop_postmaster(false);
@ -366,6 +369,65 @@ setup(char *argv0, bool *live_check)
}
/*
* Copy locale and encoding information into the new cluster's template0.
*
* We need to copy the encoding, datlocprovider, datcollate, datctype, and
* daticulocale. We don't need datcollversion because that's never set for
* template0.
*/
static void
set_locale_and_encoding(void)
{
PGconn *conn_new_template1;
char *datcollate_literal;
char *datctype_literal;
char *daticulocale_literal = NULL;
DbLocaleInfo *locale = old_cluster.template0;
prep_status("Setting locale and encoding for new cluster");
/* escape literals with respect to new cluster */
conn_new_template1 = connectToServer(&new_cluster, "template1");
datcollate_literal = PQescapeLiteral(conn_new_template1,
locale->db_collate,
strlen(locale->db_collate));
datctype_literal = PQescapeLiteral(conn_new_template1,
locale->db_ctype,
strlen(locale->db_ctype));
if (locale->db_iculocale)
daticulocale_literal = PQescapeLiteral(conn_new_template1,
locale->db_iculocale,
strlen(locale->db_iculocale));
else
daticulocale_literal = pg_strdup("NULL");
/* update template0 in new cluster */
PQclear(executeQueryOrDie(conn_new_template1,
"UPDATE pg_catalog.pg_database "
" SET encoding = %u, "
" datlocprovider = '%c', "
" datcollate = %s, "
" datctype = %s, "
" daticulocale = %s "
" WHERE datname = 'template0' ",
locale->db_encoding,
locale->db_collprovider,
datcollate_literal,
datctype_literal,
daticulocale_literal));
PQfreemem(datcollate_literal);
PQfreemem(datctype_literal);
PQfreemem(daticulocale_literal);
PQfinish(conn_new_template1);
check_ok();
}
static void
prepare_new_cluster(void)
{

View File

@ -175,13 +175,20 @@ typedef struct
char *db_name; /* database name */
char db_tablespace[MAXPGPATH]; /* database default tablespace
* path */
RelInfoArr rel_arr; /* array of all user relinfos */
} DbInfo;
/*
* Locale information about a database.
*/
typedef struct
{
char *db_collate;
char *db_ctype;
char db_collprovider;
char *db_iculocale;
int db_encoding;
RelInfoArr rel_arr; /* array of all user relinfos */
} DbInfo;
} DbLocaleInfo;
typedef struct
{
@ -252,6 +259,7 @@ typedef enum
typedef struct
{
ControlData controldata; /* pg_control information */
DbLocaleInfo *template0; /* template0 locale info */
DbInfoArr dbarr; /* dbinfos array */
char *pgdata; /* pathname for cluster's $PGDATA directory */
char *pgconfig; /* pathname for cluster's config file

View File

@ -90,15 +90,58 @@ my $oldnode =
PostgreSQL::Test::Cluster->new('old_node',
install_path => $ENV{oldinstall});
my %node_params = ();
# To increase coverage of non-standard segment size and group access without
# increasing test runtime, run these tests with a custom setting.
# --allow-group-access and --wal-segsize have been added in v11.
my %node_params = ();
$node_params{extra} = [ '--wal-segsize', '1', '--allow-group-access' ]
if $oldnode->pg_version >= 11;
my @custom_opts = ();
if ($oldnode->pg_version >= 11)
{
push @custom_opts, ('--wal-segsize', '1');
push @custom_opts, '--allow-group-access';
}
# Set up the locale settings for the original cluster, so that we
# can test that pg_upgrade copies the locale settings of template0
# from the old to the new cluster.
my $original_encoding = "6"; # UTF-8
my $original_provider = "c";
my $original_collate = "C";
my $original_iculocale = "";
if ($oldnode->pg_version >= 15 && $ENV{with_icu} eq 'yes')
{
$original_provider = "i";
$original_iculocale = "fr-CA";
}
my @initdb_params = @custom_opts;
push @initdb_params, ('--encoding', 'UTF-8');
push @initdb_params, ('--lc-collate', $original_collate);
if ($original_provider eq "i")
{
push @initdb_params, ('--locale-provider', 'icu');
push @initdb_params, ('--icu-locale', 'fr-CA');
}
$node_params{extra} = \@initdb_params;
$oldnode->init(%node_params);
$oldnode->start;
my $result;
$result = $oldnode->safe_psql(
'postgres', q{SELECT encoding, datlocprovider, datcollate, daticulocale
FROM pg_database WHERE datname='template0'});
is($result, "$original_encoding|$original_provider|$original_collate|$original_iculocale",
"check locales in original cluster"
);
# check ctype, which was acquired from environment by initdb
my $original_ctype = $oldnode->safe_psql(
'postgres', q{SELECT datctype FROM pg_database WHERE datname='template0'});
# The default location of the source code is the root of this directory.
my $srcdir = abs_path("../../..");
@ -168,6 +211,18 @@ else
# Initialize a new node for the upgrade.
my $newnode = PostgreSQL::Test::Cluster->new('new_node');
# Reset to original parameters.
@initdb_params = @custom_opts;
# The new cluster will be initialized with different locale settings,
# but these settings will be overwritten with those of the original
# cluster.
push @initdb_params, ('--encoding', 'SQL_ASCII');
push @initdb_params, ('--locale-provider', 'libc');
push @initdb_params, ('--lc-ctype', 'C');
$node_params{extra} = \@initdb_params;
$newnode->init(%node_params);
my $newbindir = $newnode->config_data('--bindir');
@ -338,6 +393,14 @@ if (-d $log_path)
}
}
# Test that upgraded cluster has original locale settings.
$result = $newnode->safe_psql(
'postgres', q{SELECT encoding, datlocprovider, datcollate, datctype, daticulocale
FROM pg_database WHERE datname='template0'});
is($result, "$original_encoding|$original_provider|$original_collate|$original_ctype|$original_iculocale",
"check that locales in new cluster match original cluster"
);
# Second dump from the upgraded instance.
@dump_command = (
'pg_dumpall', '--no-sync', '-d', $newnode->connstr('postgres'),