From 404946d40109fb247f51b553ac00198765c6c1cb Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Thu, 29 Apr 2021 15:24:37 -0400
Subject: [PATCH] Fix some more omissions in pg_upgrade's tests for
 non-upgradable types.

Commits 29aeda6e4 et al closed up some oversights involving not checking
for non-upgradable types within container types, such as arrays and
ranges.  However, I only looked at version.c, failing to notice that
there were substantially-equivalent tests in check.c.  (The division
of responsibility between those files is less than clear...)

In addition, because genbki.pl does not guarantee that auto-generated
rowtype OIDs will hold still across versions, we need to consider that
the composite type associated with a system catalog or view is
non-upgradable.  It seems unlikely that someone would have a user
column declared that way, but if they did, trying to read it in another
PG version would likely draw "no such pg_type OID" failures, thanks
to the type OID embedded in composite Datums.

To support the composite and reg*-type cases, extend the recursive
query that does the search to allow any base query that returns
a column of pg_type OIDs, rather than limiting it to exactly one
starting type.

As before, back-patch to all supported branches.

Discussion: https://postgr.es/m/2798740.1619622555@sss.pgh.pa.us
---
 src/bin/pg_upgrade/check.c      | 222 ++++++++++++--------------------
 src/bin/pg_upgrade/pg_upgrade.h |   6 +
 src/bin/pg_upgrade/version.c    |  53 ++++++--
 3 files changed, 133 insertions(+), 148 deletions(-)

diff --git a/src/bin/pg_upgrade/check.c b/src/bin/pg_upgrade/check.c
index 4a94890b32..fbf541712e 100644
--- a/src/bin/pg_upgrade/check.c
+++ b/src/bin/pg_upgrade/check.c
@@ -23,6 +23,7 @@ static void check_is_install_user(ClusterInfo *cluster);
 static void check_proper_datallowconn(ClusterInfo *cluster);
 static void check_for_prepared_transactions(ClusterInfo *cluster);
 static void check_for_isn_and_int8_passing_mismatch(ClusterInfo *cluster);
+static void check_for_composite_data_type_usage(ClusterInfo *cluster);
 static void check_for_reg_data_type_usage(ClusterInfo *cluster);
 static void check_for_jsonb_9_4_usage(ClusterInfo *cluster);
 static void check_for_pg_role_prefix(ClusterInfo *cluster);
@@ -98,6 +99,7 @@ check_and_dump_old_cluster(bool live_check)
 	check_is_install_user(&old_cluster);
 	check_proper_datallowconn(&old_cluster);
 	check_for_prepared_transactions(&old_cluster);
+	check_for_composite_data_type_usage(&old_cluster);
 	check_for_reg_data_type_usage(&old_cluster);
 	check_for_isn_and_int8_passing_mismatch(&old_cluster);
 
@@ -897,6 +899,63 @@ check_for_isn_and_int8_passing_mismatch(ClusterInfo *cluster)
 }
 
 
+/*
+ * check_for_composite_data_type_usage()
+ *	Check for system-defined composite types used in user tables.
+ *
+ *	The OIDs of rowtypes of system catalogs and information_schema views
+ *	can change across major versions; unlike user-defined types, we have
+ *	no mechanism for forcing them to be the same in the new cluster.
+ *	Hence, if any user table uses one, that's problematic for pg_upgrade.
+ */
+static void
+check_for_composite_data_type_usage(ClusterInfo *cluster)
+{
+	bool		found;
+	Oid			firstUserOid;
+	char		output_path[MAXPGPATH];
+	char	   *base_query;
+
+	prep_status("Checking for system-defined composite types in user tables");
+
+	snprintf(output_path, sizeof(output_path), "tables_using_composite.txt");
+
+	/*
+	 * Look for composite types that were made during initdb *or* belong to
+	 * information_schema; that's important in case information_schema was
+	 * dropped and reloaded.
+	 *
+	 * The cutoff OID here should match the source cluster's value of
+	 * FirstNormalObjectId.  We hardcode it rather than using that C #define
+	 * because, if that #define is ever changed, our own version's value is
+	 * NOT what to use.  Eventually we may need a test on the source cluster's
+	 * version to select the correct value.
+	 */
+	firstUserOid = 16384;
+
+	base_query = psprintf("SELECT t.oid FROM pg_catalog.pg_type t "
+						  "LEFT JOIN pg_catalog.pg_namespace n ON t.typnamespace = n.oid "
+						  " WHERE typtype = 'c' AND (t.oid < %u OR nspname = 'information_schema')",
+						  firstUserOid);
+
+	found = check_for_data_types_usage(cluster, base_query, output_path);
+
+	free(base_query);
+
+	if (found)
+	{
+		pg_log(PG_REPORT, "fatal\n");
+		pg_fatal("Your installation contains system-defined composite type(s) in user tables.\n"
+				 "These type OIDs are not stable across PostgreSQL versions,\n"
+				 "so this cluster cannot currently be upgraded.  You can\n"
+				 "drop the problem columns and restart the upgrade.\n"
+				 "A list of the problem columns is in the file:\n"
+				 "    %s\n\n", output_path);
+	}
+	else
+		check_ok();
+}
+
 /*
  * check_for_reg_data_type_usage()
  *	pg_upgrade only preserves these system values:
@@ -911,87 +970,36 @@ check_for_isn_and_int8_passing_mismatch(ClusterInfo *cluster)
 static void
 check_for_reg_data_type_usage(ClusterInfo *cluster)
 {
-	int			dbnum;
-	FILE	   *script = NULL;
-	bool		found = false;
+	bool		found;
 	char		output_path[MAXPGPATH];
 
 	prep_status("Checking for reg* data types in user tables");
 
 	snprintf(output_path, sizeof(output_path), "tables_using_reg.txt");
 
-	for (dbnum = 0; dbnum < cluster->dbarr.ndbs; dbnum++)
-	{
-		PGresult   *res;
-		bool		db_used = false;
-		int			ntups;
-		int			rowno;
-		int			i_nspname,
-					i_relname,
-					i_attname;
-		DbInfo	   *active_db = &cluster->dbarr.dbs[dbnum];
-		PGconn	   *conn = connectToServer(cluster, active_db->db_name);
-
-		/*
-		 * While several relkinds don't store any data, e.g. views, they can
-		 * be used to define data types of other columns, so we check all
-		 * relkinds.
-		 */
-		res = executeQueryOrDie(conn,
-								"SELECT n.nspname, c.relname, a.attname "
-								"FROM	pg_catalog.pg_class c, "
-								"		pg_catalog.pg_namespace n, "
-								"		pg_catalog.pg_attribute a, "
-								"		pg_catalog.pg_type t "
-								"WHERE	c.oid = a.attrelid AND "
-								"		NOT a.attisdropped AND "
-								"       a.atttypid = t.oid AND "
-								"       t.typnamespace = "
-								"           (SELECT oid FROM pg_namespace "
-								"            WHERE nspname = 'pg_catalog') AND"
-								"		t.typname IN ( "
-		/* regclass.oid is preserved, so 'regclass' is OK */
-								"           'regconfig', "
-								"           'regdictionary', "
-								"           'regnamespace', "
-								"           'regoper', "
-								"           'regoperator', "
-								"           'regproc', "
-								"           'regprocedure' "
-		/* regrole.oid is preserved, so 'regrole' is OK */
-		/* regtype.oid is preserved, so 'regtype' is OK */
-								"			) AND "
-								"		c.relnamespace = n.oid AND "
-								"		n.nspname NOT IN ('pg_catalog', 'information_schema')");
-
-		ntups = PQntuples(res);
-		i_nspname = PQfnumber(res, "nspname");
-		i_relname = PQfnumber(res, "relname");
-		i_attname = PQfnumber(res, "attname");
-		for (rowno = 0; rowno < ntups; rowno++)
-		{
-			found = true;
-			if (script == NULL && (script = fopen_priv(output_path, "w")) == NULL)
-				pg_fatal("could not open file \"%s\": %s\n",
-						 output_path, strerror(errno));
-			if (!db_used)
-			{
-				fprintf(script, "Database: %s\n", active_db->db_name);
-				db_used = true;
-			}
-			fprintf(script, "  %s.%s.%s\n",
-					PQgetvalue(res, rowno, i_nspname),
-					PQgetvalue(res, rowno, i_relname),
-					PQgetvalue(res, rowno, i_attname));
-		}
-
-		PQclear(res);
-
-		PQfinish(conn);
-	}
-
-	if (script)
-		fclose(script);
+	/*
+	 * Note: older servers will not have all of these reg* types, so we have
+	 * to write the query like this rather than depending on casts to regtype.
+	 */
+	found = check_for_data_types_usage(cluster,
+									   "SELECT oid FROM pg_catalog.pg_type t "
+									   "WHERE t.typnamespace = "
+									   "        (SELECT oid FROM pg_catalog.pg_namespace "
+									   "         WHERE nspname = 'pg_catalog') "
+									   "  AND t.typname IN ( "
+	/* pg_class.oid is preserved, so 'regclass' is OK */
+									   "           'regcollation', "
+									   "           'regconfig', "
+									   "           'regdictionary', "
+									   "           'regnamespace', "
+									   "           'regoper', "
+									   "           'regoperator', "
+									   "           'regproc', "
+									   "           'regprocedure' "
+	/* pg_authid.oid is preserved, so 'regrole' is OK */
+	/* pg_type.oid is (mostly) preserved, so 'regtype' is OK */
+									   "         )",
+									   output_path);
 
 	if (found)
 	{
@@ -1016,75 +1024,13 @@ check_for_reg_data_type_usage(ClusterInfo *cluster)
 static void
 check_for_jsonb_9_4_usage(ClusterInfo *cluster)
 {
-	int			dbnum;
-	FILE	   *script = NULL;
-	bool		found = false;
 	char		output_path[MAXPGPATH];
 
 	prep_status("Checking for incompatible \"jsonb\" data type");
 
 	snprintf(output_path, sizeof(output_path), "tables_using_jsonb.txt");
 
-	for (dbnum = 0; dbnum < cluster->dbarr.ndbs; dbnum++)
-	{
-		PGresult   *res;
-		bool		db_used = false;
-		int			ntups;
-		int			rowno;
-		int			i_nspname,
-					i_relname,
-					i_attname;
-		DbInfo	   *active_db = &cluster->dbarr.dbs[dbnum];
-		PGconn	   *conn = connectToServer(cluster, active_db->db_name);
-
-		/*
-		 * While several relkinds don't store any data, e.g. views, they can
-		 * be used to define data types of other columns, so we check all
-		 * relkinds.
-		 */
-		res = executeQueryOrDie(conn,
-								"SELECT n.nspname, c.relname, a.attname "
-								"FROM	pg_catalog.pg_class c, "
-								"		pg_catalog.pg_namespace n, "
-								"		pg_catalog.pg_attribute a "
-								"WHERE	c.oid = a.attrelid AND "
-								"		NOT a.attisdropped AND "
-								"		a.atttypid = 'pg_catalog.jsonb'::pg_catalog.regtype AND "
-								"		c.relnamespace = n.oid AND "
-		/* exclude possible orphaned temp tables */
-								"		n.nspname !~ '^pg_temp_' AND "
-								"		n.nspname NOT IN ('pg_catalog', 'information_schema')");
-
-		ntups = PQntuples(res);
-		i_nspname = PQfnumber(res, "nspname");
-		i_relname = PQfnumber(res, "relname");
-		i_attname = PQfnumber(res, "attname");
-		for (rowno = 0; rowno < ntups; rowno++)
-		{
-			found = true;
-			if (script == NULL && (script = fopen_priv(output_path, "w")) == NULL)
-				pg_fatal("could not open file \"%s\": %s\n",
-						 output_path, strerror(errno));
-			if (!db_used)
-			{
-				fprintf(script, "Database: %s\n", active_db->db_name);
-				db_used = true;
-			}
-			fprintf(script, "  %s.%s.%s\n",
-					PQgetvalue(res, rowno, i_nspname),
-					PQgetvalue(res, rowno, i_relname),
-					PQgetvalue(res, rowno, i_attname));
-		}
-
-		PQclear(res);
-
-		PQfinish(conn);
-	}
-
-	if (script)
-		fclose(script);
-
-	if (found)
+	if (check_for_data_type_usage(cluster, "pg_catalog.jsonb", output_path))
 	{
 		pg_log(PG_REPORT, "fatal\n");
 		pg_fatal("Your installation contains the \"jsonb\" data type in user tables.\n"
diff --git a/src/bin/pg_upgrade/pg_upgrade.h b/src/bin/pg_upgrade/pg_upgrade.h
index 7e5e971294..7c5a02a26c 100644
--- a/src/bin/pg_upgrade/pg_upgrade.h
+++ b/src/bin/pg_upgrade/pg_upgrade.h
@@ -441,6 +441,12 @@ void		pg_putenv(const char *var, const char *val);
 
 /* version.c */
 
+bool		check_for_data_types_usage(ClusterInfo *cluster,
+									   const char *base_query,
+									   const char *output_path);
+bool		check_for_data_type_usage(ClusterInfo *cluster,
+									  const char *typename,
+									  const char *output_path);
 void new_9_0_populate_pg_largeobject_metadata(ClusterInfo *cluster,
 										 bool check_mode);
 void		old_9_3_check_for_line_data_type_usage(ClusterInfo *cluster);
diff --git a/src/bin/pg_upgrade/version.c b/src/bin/pg_upgrade/version.c
index a0f6fbac18..5a66576704 100644
--- a/src/bin/pg_upgrade/version.c
+++ b/src/bin/pg_upgrade/version.c
@@ -100,17 +100,22 @@ new_9_0_populate_pg_largeobject_metadata(ClusterInfo *cluster, bool check_mode)
 
 
 /*
- * check_for_data_type_usage
- *	Detect whether there are any stored columns depending on the given type
+ * check_for_data_types_usage()
+ *	Detect whether there are any stored columns depending on given type(s)
  *
  * If so, write a report to the given file name, and return true.
  *
- * We check for the type in tables, matviews, and indexes, but not views;
+ * base_query should be a SELECT yielding a single column named "oid",
+ * containing the pg_type OIDs of one or more types that are known to have
+ * inconsistent on-disk representations across server versions.
+ *
+ * We check for the type(s) in tables, matviews, and indexes, but not views;
  * there's no storage involved in a view.
  */
-static bool
-check_for_data_type_usage(ClusterInfo *cluster, const char *typename,
-						  char *output_path)
+bool
+check_for_data_types_usage(ClusterInfo *cluster,
+						   const char *base_query,
+						   const char *output_path)
 {
 	bool		found = false;
 	FILE	   *script = NULL;
@@ -130,7 +135,7 @@ check_for_data_type_usage(ClusterInfo *cluster, const char *typename,
 					i_attname;
 
 		/*
-		 * The type of interest might be wrapped in a domain, array,
+		 * The type(s) of interest might be wrapped in a domain, array,
 		 * composite, or range, and these container types can be nested (to
 		 * varying extents depending on server version, but that's not of
 		 * concern here).  To handle all these cases we need a recursive CTE.
@@ -138,8 +143,8 @@ check_for_data_type_usage(ClusterInfo *cluster, const char *typename,
 		initPQExpBuffer(&querybuf);
 		appendPQExpBuffer(&querybuf,
 						  "WITH RECURSIVE oids AS ( "
-		/* the target type itself */
-						  "	SELECT '%s'::pg_catalog.regtype AS oid "
+		/* start with the type(s) returned by base_query */
+						  "	%s "
 						  "	UNION ALL "
 						  "	SELECT * FROM ( "
 		/* inner WITH because we can only reference the CTE once */
@@ -157,7 +162,7 @@ check_for_data_type_usage(ClusterInfo *cluster, const char *typename,
 						  "				  c.oid = a.attrelid AND "
 						  "				  NOT a.attisdropped AND "
 						  "				  a.atttypid = x.oid ",
-						  typename);
+						  base_query);
 
 		/* Ranges came in in 9.2 */
 		if (GET_MAJOR_VERSION(cluster->major_version) >= 902)
@@ -225,6 +230,34 @@ check_for_data_type_usage(ClusterInfo *cluster, const char *typename,
 	return found;
 }
 
+/*
+ * check_for_data_type_usage()
+ *	Detect whether there are any stored columns depending on the given type
+ *
+ * If so, write a report to the given file name, and return true.
+ *
+ * typename should be a fully qualified type name.  This is just a
+ * trivial wrapper around check_for_data_types_usage() to convert a
+ * type name into a base query.
+ */
+bool
+check_for_data_type_usage(ClusterInfo *cluster,
+						  const char *typename,
+						  const char *output_path)
+{
+	bool		found;
+	char	   *base_query;
+
+	base_query = psprintf("SELECT '%s'::pg_catalog.regtype AS oid",
+						  typename);
+
+	found = check_for_data_types_usage(cluster, base_query, output_path);
+
+	free(base_query);
+
+	return found;
+}
+
 
 /*
  * old_9_3_check_for_line_data_type_usage()