From 404946d40109fb247f51b553ac00198765c6c1cb Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Thu, 29 Apr 2021 15:24:37 -0400 Subject: [PATCH] Fix some more omissions in pg_upgrade's tests for non-upgradable types. Commits 29aeda6e4 et al closed up some oversights involving not checking for non-upgradable types within container types, such as arrays and ranges. However, I only looked at version.c, failing to notice that there were substantially-equivalent tests in check.c. (The division of responsibility between those files is less than clear...) In addition, because genbki.pl does not guarantee that auto-generated rowtype OIDs will hold still across versions, we need to consider that the composite type associated with a system catalog or view is non-upgradable. It seems unlikely that someone would have a user column declared that way, but if they did, trying to read it in another PG version would likely draw "no such pg_type OID" failures, thanks to the type OID embedded in composite Datums. To support the composite and reg*-type cases, extend the recursive query that does the search to allow any base query that returns a column of pg_type OIDs, rather than limiting it to exactly one starting type. As before, back-patch to all supported branches. Discussion: https://postgr.es/m/2798740.1619622555@sss.pgh.pa.us --- src/bin/pg_upgrade/check.c | 222 ++++++++++++-------------------- src/bin/pg_upgrade/pg_upgrade.h | 6 + src/bin/pg_upgrade/version.c | 53 ++++++-- 3 files changed, 133 insertions(+), 148 deletions(-) diff --git a/src/bin/pg_upgrade/check.c b/src/bin/pg_upgrade/check.c index 4a94890b32..fbf541712e 100644 --- a/src/bin/pg_upgrade/check.c +++ b/src/bin/pg_upgrade/check.c @@ -23,6 +23,7 @@ static void check_is_install_user(ClusterInfo *cluster); static void check_proper_datallowconn(ClusterInfo *cluster); static void check_for_prepared_transactions(ClusterInfo *cluster); static void check_for_isn_and_int8_passing_mismatch(ClusterInfo *cluster); +static void check_for_composite_data_type_usage(ClusterInfo *cluster); static void check_for_reg_data_type_usage(ClusterInfo *cluster); static void check_for_jsonb_9_4_usage(ClusterInfo *cluster); static void check_for_pg_role_prefix(ClusterInfo *cluster); @@ -98,6 +99,7 @@ check_and_dump_old_cluster(bool live_check) check_is_install_user(&old_cluster); check_proper_datallowconn(&old_cluster); check_for_prepared_transactions(&old_cluster); + check_for_composite_data_type_usage(&old_cluster); check_for_reg_data_type_usage(&old_cluster); check_for_isn_and_int8_passing_mismatch(&old_cluster); @@ -897,6 +899,63 @@ check_for_isn_and_int8_passing_mismatch(ClusterInfo *cluster) } +/* + * check_for_composite_data_type_usage() + * Check for system-defined composite types used in user tables. + * + * The OIDs of rowtypes of system catalogs and information_schema views + * can change across major versions; unlike user-defined types, we have + * no mechanism for forcing them to be the same in the new cluster. + * Hence, if any user table uses one, that's problematic for pg_upgrade. + */ +static void +check_for_composite_data_type_usage(ClusterInfo *cluster) +{ + bool found; + Oid firstUserOid; + char output_path[MAXPGPATH]; + char *base_query; + + prep_status("Checking for system-defined composite types in user tables"); + + snprintf(output_path, sizeof(output_path), "tables_using_composite.txt"); + + /* + * Look for composite types that were made during initdb *or* belong to + * information_schema; that's important in case information_schema was + * dropped and reloaded. + * + * The cutoff OID here should match the source cluster's value of + * FirstNormalObjectId. We hardcode it rather than using that C #define + * because, if that #define is ever changed, our own version's value is + * NOT what to use. Eventually we may need a test on the source cluster's + * version to select the correct value. + */ + firstUserOid = 16384; + + base_query = psprintf("SELECT t.oid FROM pg_catalog.pg_type t " + "LEFT JOIN pg_catalog.pg_namespace n ON t.typnamespace = n.oid " + " WHERE typtype = 'c' AND (t.oid < %u OR nspname = 'information_schema')", + firstUserOid); + + found = check_for_data_types_usage(cluster, base_query, output_path); + + free(base_query); + + if (found) + { + pg_log(PG_REPORT, "fatal\n"); + pg_fatal("Your installation contains system-defined composite type(s) in user tables.\n" + "These type OIDs are not stable across PostgreSQL versions,\n" + "so this cluster cannot currently be upgraded. You can\n" + "drop the problem columns and restart the upgrade.\n" + "A list of the problem columns is in the file:\n" + " %s\n\n", output_path); + } + else + check_ok(); +} + /* * check_for_reg_data_type_usage() * pg_upgrade only preserves these system values: @@ -911,87 +970,36 @@ check_for_isn_and_int8_passing_mismatch(ClusterInfo *cluster) static void check_for_reg_data_type_usage(ClusterInfo *cluster) { - int dbnum; - FILE *script = NULL; - bool found = false; + bool found; char output_path[MAXPGPATH]; prep_status("Checking for reg* data types in user tables"); snprintf(output_path, sizeof(output_path), "tables_using_reg.txt"); - for (dbnum = 0; dbnum < cluster->dbarr.ndbs; dbnum++) - { - PGresult *res; - bool db_used = false; - int ntups; - int rowno; - int i_nspname, - i_relname, - i_attname; - DbInfo *active_db = &cluster->dbarr.dbs[dbnum]; - PGconn *conn = connectToServer(cluster, active_db->db_name); - - /* - * While several relkinds don't store any data, e.g. views, they can - * be used to define data types of other columns, so we check all - * relkinds. - */ - res = executeQueryOrDie(conn, - "SELECT n.nspname, c.relname, a.attname " - "FROM pg_catalog.pg_class c, " - " pg_catalog.pg_namespace n, " - " pg_catalog.pg_attribute a, " - " pg_catalog.pg_type t " - "WHERE c.oid = a.attrelid AND " - " NOT a.attisdropped AND " - " a.atttypid = t.oid AND " - " t.typnamespace = " - " (SELECT oid FROM pg_namespace " - " WHERE nspname = 'pg_catalog') AND" - " t.typname IN ( " - /* regclass.oid is preserved, so 'regclass' is OK */ - " 'regconfig', " - " 'regdictionary', " - " 'regnamespace', " - " 'regoper', " - " 'regoperator', " - " 'regproc', " - " 'regprocedure' " - /* regrole.oid is preserved, so 'regrole' is OK */ - /* regtype.oid is preserved, so 'regtype' is OK */ - " ) AND " - " c.relnamespace = n.oid AND " - " n.nspname NOT IN ('pg_catalog', 'information_schema')"); - - ntups = PQntuples(res); - i_nspname = PQfnumber(res, "nspname"); - i_relname = PQfnumber(res, "relname"); - i_attname = PQfnumber(res, "attname"); - for (rowno = 0; rowno < ntups; rowno++) - { - found = true; - if (script == NULL && (script = fopen_priv(output_path, "w")) == NULL) - pg_fatal("could not open file \"%s\": %s\n", - output_path, strerror(errno)); - if (!db_used) - { - fprintf(script, "Database: %s\n", active_db->db_name); - db_used = true; - } - fprintf(script, " %s.%s.%s\n", - PQgetvalue(res, rowno, i_nspname), - PQgetvalue(res, rowno, i_relname), - PQgetvalue(res, rowno, i_attname)); - } - - PQclear(res); - - PQfinish(conn); - } - - if (script) - fclose(script); + /* + * Note: older servers will not have all of these reg* types, so we have + * to write the query like this rather than depending on casts to regtype. + */ + found = check_for_data_types_usage(cluster, + "SELECT oid FROM pg_catalog.pg_type t " + "WHERE t.typnamespace = " + " (SELECT oid FROM pg_catalog.pg_namespace " + " WHERE nspname = 'pg_catalog') " + " AND t.typname IN ( " + /* pg_class.oid is preserved, so 'regclass' is OK */ + " 'regcollation', " + " 'regconfig', " + " 'regdictionary', " + " 'regnamespace', " + " 'regoper', " + " 'regoperator', " + " 'regproc', " + " 'regprocedure' " + /* pg_authid.oid is preserved, so 'regrole' is OK */ + /* pg_type.oid is (mostly) preserved, so 'regtype' is OK */ + " )", + output_path); if (found) { @@ -1016,75 +1024,13 @@ check_for_reg_data_type_usage(ClusterInfo *cluster) static void check_for_jsonb_9_4_usage(ClusterInfo *cluster) { - int dbnum; - FILE *script = NULL; - bool found = false; char output_path[MAXPGPATH]; prep_status("Checking for incompatible \"jsonb\" data type"); snprintf(output_path, sizeof(output_path), "tables_using_jsonb.txt"); - for (dbnum = 0; dbnum < cluster->dbarr.ndbs; dbnum++) - { - PGresult *res; - bool db_used = false; - int ntups; - int rowno; - int i_nspname, - i_relname, - i_attname; - DbInfo *active_db = &cluster->dbarr.dbs[dbnum]; - PGconn *conn = connectToServer(cluster, active_db->db_name); - - /* - * While several relkinds don't store any data, e.g. views, they can - * be used to define data types of other columns, so we check all - * relkinds. - */ - res = executeQueryOrDie(conn, - "SELECT n.nspname, c.relname, a.attname " - "FROM pg_catalog.pg_class c, " - " pg_catalog.pg_namespace n, " - " pg_catalog.pg_attribute a " - "WHERE c.oid = a.attrelid AND " - " NOT a.attisdropped AND " - " a.atttypid = 'pg_catalog.jsonb'::pg_catalog.regtype AND " - " c.relnamespace = n.oid AND " - /* exclude possible orphaned temp tables */ - " n.nspname !~ '^pg_temp_' AND " - " n.nspname NOT IN ('pg_catalog', 'information_schema')"); - - ntups = PQntuples(res); - i_nspname = PQfnumber(res, "nspname"); - i_relname = PQfnumber(res, "relname"); - i_attname = PQfnumber(res, "attname"); - for (rowno = 0; rowno < ntups; rowno++) - { - found = true; - if (script == NULL && (script = fopen_priv(output_path, "w")) == NULL) - pg_fatal("could not open file \"%s\": %s\n", - output_path, strerror(errno)); - if (!db_used) - { - fprintf(script, "Database: %s\n", active_db->db_name); - db_used = true; - } - fprintf(script, " %s.%s.%s\n", - PQgetvalue(res, rowno, i_nspname), - PQgetvalue(res, rowno, i_relname), - PQgetvalue(res, rowno, i_attname)); - } - - PQclear(res); - - PQfinish(conn); - } - - if (script) - fclose(script); - - if (found) + if (check_for_data_type_usage(cluster, "pg_catalog.jsonb", output_path)) { pg_log(PG_REPORT, "fatal\n"); pg_fatal("Your installation contains the \"jsonb\" data type in user tables.\n" diff --git a/src/bin/pg_upgrade/pg_upgrade.h b/src/bin/pg_upgrade/pg_upgrade.h index 7e5e971294..7c5a02a26c 100644 --- a/src/bin/pg_upgrade/pg_upgrade.h +++ b/src/bin/pg_upgrade/pg_upgrade.h @@ -441,6 +441,12 @@ void pg_putenv(const char *var, const char *val); /* version.c */ +bool check_for_data_types_usage(ClusterInfo *cluster, + const char *base_query, + const char *output_path); +bool check_for_data_type_usage(ClusterInfo *cluster, + const char *typename, + const char *output_path); void new_9_0_populate_pg_largeobject_metadata(ClusterInfo *cluster, bool check_mode); void old_9_3_check_for_line_data_type_usage(ClusterInfo *cluster); diff --git a/src/bin/pg_upgrade/version.c b/src/bin/pg_upgrade/version.c index a0f6fbac18..5a66576704 100644 --- a/src/bin/pg_upgrade/version.c +++ b/src/bin/pg_upgrade/version.c @@ -100,17 +100,22 @@ new_9_0_populate_pg_largeobject_metadata(ClusterInfo *cluster, bool check_mode) /* - * check_for_data_type_usage - * Detect whether there are any stored columns depending on the given type + * check_for_data_types_usage() + * Detect whether there are any stored columns depending on given type(s) * * If so, write a report to the given file name, and return true. * - * We check for the type in tables, matviews, and indexes, but not views; + * base_query should be a SELECT yielding a single column named "oid", + * containing the pg_type OIDs of one or more types that are known to have + * inconsistent on-disk representations across server versions. + * + * We check for the type(s) in tables, matviews, and indexes, but not views; * there's no storage involved in a view. */ -static bool -check_for_data_type_usage(ClusterInfo *cluster, const char *typename, - char *output_path) +bool +check_for_data_types_usage(ClusterInfo *cluster, + const char *base_query, + const char *output_path) { bool found = false; FILE *script = NULL; @@ -130,7 +135,7 @@ check_for_data_type_usage(ClusterInfo *cluster, const char *typename, i_attname; /* - * The type of interest might be wrapped in a domain, array, + * The type(s) of interest might be wrapped in a domain, array, * composite, or range, and these container types can be nested (to * varying extents depending on server version, but that's not of * concern here). To handle all these cases we need a recursive CTE. @@ -138,8 +143,8 @@ check_for_data_type_usage(ClusterInfo *cluster, const char *typename, initPQExpBuffer(&querybuf); appendPQExpBuffer(&querybuf, "WITH RECURSIVE oids AS ( " - /* the target type itself */ - " SELECT '%s'::pg_catalog.regtype AS oid " + /* start with the type(s) returned by base_query */ + " %s " " UNION ALL " " SELECT * FROM ( " /* inner WITH because we can only reference the CTE once */ @@ -157,7 +162,7 @@ check_for_data_type_usage(ClusterInfo *cluster, const char *typename, " c.oid = a.attrelid AND " " NOT a.attisdropped AND " " a.atttypid = x.oid ", - typename); + base_query); /* Ranges came in in 9.2 */ if (GET_MAJOR_VERSION(cluster->major_version) >= 902) @@ -225,6 +230,34 @@ check_for_data_type_usage(ClusterInfo *cluster, const char *typename, return found; } +/* + * check_for_data_type_usage() + * Detect whether there are any stored columns depending on the given type + * + * If so, write a report to the given file name, and return true. + * + * typename should be a fully qualified type name. This is just a + * trivial wrapper around check_for_data_types_usage() to convert a + * type name into a base query. + */ +bool +check_for_data_type_usage(ClusterInfo *cluster, + const char *typename, + const char *output_path) +{ + bool found; + char *base_query; + + base_query = psprintf("SELECT '%s'::pg_catalog.regtype AS oid", + typename); + + found = check_for_data_types_usage(cluster, base_query, output_path); + + free(base_query); + + return found; +} + /* * old_9_3_check_for_line_data_type_usage()