Improve pg_upgrade's report about failure to match up old and new tables.

Ordinarily, pg_upgrade shouldn't have any difficulty in matching up all
the relations it sees in the old and new databases.  If it does, however,
it just goes belly-up with a pretty unhelpful error message.  That seemed
fine as long as we expected the case never to occur in the wild, but
Alvaro reported that it had been seen in a database whose pg_largeobject
table had somehow acquired a TOAST table.  That doesn't quite seem like
a case that pg_upgrade actually needs to handle, but it would be good if
the report were more diagnosable.  Hence, extend the logic to print out
as much information as we can about the mismatch(es) before we quit.

In passing, improve the readability of get_rel_infos()'s data collection
query, which had suffered seriously from lets-not-bother-to-update-comments
syndrome, and generally was unnecessarily disrespectful to readers.

It could be argued that this is a bug fix, but given that we have so few
reports, I don't feel a need to back-patch; at least not before this has
baked awhile in HEAD.
This commit is contained in:
Tom Lane 2016-05-06 14:23:45 -04:00
parent 06bd458cb8
commit 73b9952e82
2 changed files with 247 additions and 144 deletions

View File

@ -18,6 +18,8 @@ static void create_rel_filename_map(const char *old_data, const char *new_data,
const DbInfo *old_db, const DbInfo *new_db, const DbInfo *old_db, const DbInfo *new_db,
const RelInfo *old_rel, const RelInfo *new_rel, const RelInfo *old_rel, const RelInfo *new_rel,
FileNameMap *map); FileNameMap *map);
static void report_unmatched_relation(const RelInfo *rel, const DbInfo *db,
bool is_new_db);
static void free_db_and_rel_infos(DbInfoArr *db_arr); static void free_db_and_rel_infos(DbInfoArr *db_arr);
static void get_db_infos(ClusterInfo *cluster); static void get_db_infos(ClusterInfo *cluster);
static void get_rel_infos(ClusterInfo *cluster, DbInfo *dbinfo); static void get_rel_infos(ClusterInfo *cluster, DbInfo *dbinfo);
@ -29,99 +31,125 @@ static void print_rel_infos(RelInfoArr *rel_arr);
/* /*
* gen_db_file_maps() * gen_db_file_maps()
* *
* generates database mappings for "old_db" and "new_db". Returns a malloc'ed * generates a database mapping from "old_db" to "new_db".
* array of mappings. nmaps is a return parameter which refers to the number *
* mappings. * Returns a malloc'ed array of mappings. The length of the array
* is returned into *nmaps.
*/ */
FileNameMap * FileNameMap *
gen_db_file_maps(DbInfo *old_db, DbInfo *new_db, gen_db_file_maps(DbInfo *old_db, DbInfo *new_db,
int *nmaps, const char *old_pgdata, const char *new_pgdata) int *nmaps,
const char *old_pgdata, const char *new_pgdata)
{ {
FileNameMap *maps; FileNameMap *maps;
int old_relnum, int old_relnum,
new_relnum; new_relnum;
int num_maps = 0; int num_maps = 0;
bool all_matched = true;
/* There will certainly not be more mappings than there are old rels */
maps = (FileNameMap *) pg_malloc(sizeof(FileNameMap) * maps = (FileNameMap *) pg_malloc(sizeof(FileNameMap) *
old_db->rel_arr.nrels); old_db->rel_arr.nrels);
/* /*
* The old database shouldn't have more relations than the new one. We * Each of the RelInfo arrays should be sorted by OID. Scan through them
* force the new cluster to have a TOAST table if the old table had one. * and match them up. If we fail to match everything, we'll abort, but
* first print as much info as we can about mismatches.
*/ */
if (old_db->rel_arr.nrels > new_db->rel_arr.nrels) old_relnum = new_relnum = 0;
pg_fatal("old and new databases \"%s\" have a mismatched number of relations\n", while (old_relnum < old_db->rel_arr.nrels ||
old_db->db_name); new_relnum < new_db->rel_arr.nrels)
/* Drive the loop using new_relnum, which might be higher. */
for (old_relnum = new_relnum = 0; new_relnum < new_db->rel_arr.nrels;
new_relnum++)
{ {
RelInfo *old_rel; RelInfo *old_rel = (old_relnum < old_db->rel_arr.nrels) ?
RelInfo *new_rel = &new_db->rel_arr.rels[new_relnum]; &old_db->rel_arr.rels[old_relnum] : NULL;
RelInfo *new_rel = (new_relnum < new_db->rel_arr.nrels) ?
&new_db->rel_arr.rels[new_relnum] : NULL;
/* /* handle running off one array before the other */
* It is possible that the new cluster has a TOAST table for a table if (!new_rel)
* that didn't need one in the old cluster, e.g. 9.0 to 9.1 changed
* the NUMERIC length computation. Therefore, if we have a TOAST
* table in the new cluster that doesn't match, skip over it and
* continue processing. It is possible this TOAST table used an OID
* that was reserved in the old cluster, but we have no way of testing
* that, and we would have already gotten an error at the new cluster
* schema creation stage. Fortunately, since we only restore the OID
* counter after schema restore, and restore in OID order via pg_dump,
* a conflict would only happen if the new TOAST table had a very low
* OID. However, TOAST tables created long after initial table
* creation can have any OID, particularly after OID wraparound.
*/
if (old_relnum == old_db->rel_arr.nrels)
{ {
if (strcmp(new_rel->nspname, "pg_toast") == 0) /*
continue; * old_rel is unmatched. This should never happen, because we
else * force new rels to have TOAST tables if the old one did.
pg_fatal("Extra non-TOAST relation found in database \"%s\": new OID %d\n", */
old_db->db_name, new_rel->reloid); report_unmatched_relation(old_rel, old_db, false);
all_matched = false;
old_relnum++;
continue;
}
if (!old_rel)
{
/*
* new_rel is unmatched. This shouldn't really happen either, but
* if it's a TOAST table, we can ignore it and continue
* processing, assuming that the new server made a TOAST table
* that wasn't needed.
*/
if (strcmp(new_rel->nspname, "pg_toast") != 0)
{
report_unmatched_relation(new_rel, new_db, true);
all_matched = false;
}
new_relnum++;
continue;
} }
old_rel = &old_db->rel_arr.rels[old_relnum]; /* check for mismatched OID */
if (old_rel->reloid < new_rel->reloid)
if (old_rel->reloid != new_rel->reloid)
{ {
if (strcmp(new_rel->nspname, "pg_toast") == 0) /* old_rel is unmatched, see comment above */
continue; report_unmatched_relation(old_rel, old_db, false);
else all_matched = false;
pg_fatal("Mismatch of relation OID in database \"%s\": old OID %d, new OID %d\n", old_relnum++;
old_db->db_name, old_rel->reloid, new_rel->reloid); continue;
}
else if (old_rel->reloid > new_rel->reloid)
{
/* new_rel is unmatched, see comment above */
if (strcmp(new_rel->nspname, "pg_toast") != 0)
{
report_unmatched_relation(new_rel, new_db, true);
all_matched = false;
}
new_relnum++;
continue;
} }
/* /*
* TOAST table names initially match the heap pg_class oid. In * Verify that rels of same OID have same name. The namespace name
* pre-8.4, TOAST table names change during CLUSTER; in pre-9.0, TOAST * should always match, but the relname might not match for TOAST
* table names change during ALTER TABLE ALTER COLUMN SET TYPE. In >= * tables (and, therefore, their indexes).
* 9.0, TOAST relation names always use heap table oids, hence we *
* cannot check relation names when upgrading from pre-9.0. Clusters * TOAST table names initially match the heap pg_class oid, but
* upgraded to 9.0 will get matching TOAST names. If index names don't * pre-9.0 they can change during certain commands such as CLUSTER, so
* match primary key constraint names, this will fail because pg_dump * don't insist on a match if old cluster is < 9.0.
* dumps constraint names and pg_upgrade checks index names.
*/ */
if (strcmp(old_rel->nspname, new_rel->nspname) != 0 || if (strcmp(old_rel->nspname, new_rel->nspname) != 0 ||
((GET_MAJOR_VERSION(old_cluster.major_version) >= 900 || (strcmp(old_rel->relname, new_rel->relname) != 0 &&
strcmp(old_rel->nspname, "pg_toast") != 0) && (GET_MAJOR_VERSION(old_cluster.major_version) >= 900 ||
strcmp(old_rel->relname, new_rel->relname) != 0)) strcmp(old_rel->nspname, "pg_toast") != 0)))
pg_fatal("Mismatch of relation names in database \"%s\": " {
"old name \"%s.%s\", new name \"%s.%s\"\n", pg_log(PG_WARNING, "Relation names for OID %u in database \"%s\" do not match: "
old_db->db_name, old_rel->nspname, old_rel->relname, "old name \"%s.%s\", new name \"%s.%s\"\n",
new_rel->nspname, new_rel->relname); old_rel->reloid, old_db->db_name,
old_rel->nspname, old_rel->relname,
new_rel->nspname, new_rel->relname);
all_matched = false;
old_relnum++;
new_relnum++;
continue;
}
/* OK, create a mapping entry */
create_rel_filename_map(old_pgdata, new_pgdata, old_db, new_db, create_rel_filename_map(old_pgdata, new_pgdata, old_db, new_db,
old_rel, new_rel, maps + num_maps); old_rel, new_rel, maps + num_maps);
num_maps++; num_maps++;
old_relnum++; old_relnum++;
new_relnum++;
} }
/* Did we fail to exhaust the old array? */ if (!all_matched)
if (old_relnum != old_db->rel_arr.nrels) pg_fatal("Failed to match up old and new tables in database \"%s\"\n",
pg_fatal("old and new databases \"%s\" have a mismatched number of relations\n",
old_db->db_name); old_db->db_name);
*nmaps = num_maps; *nmaps = num_maps;
@ -187,6 +215,71 @@ create_rel_filename_map(const char *old_data, const char *new_data,
} }
/*
* Complain about a relation we couldn't match to the other database,
* identifying it as best we can.
*/
static void
report_unmatched_relation(const RelInfo *rel, const DbInfo *db, bool is_new_db)
{
Oid reloid = rel->reloid; /* we might change rel below */
char reldesc[1000];
int i;
snprintf(reldesc, sizeof(reldesc), "\"%s.%s\"",
rel->nspname, rel->relname);
if (rel->indtable)
{
for (i = 0; i < db->rel_arr.nrels; i++)
{
const RelInfo *hrel = &db->rel_arr.rels[i];
if (hrel->reloid == rel->indtable)
{
snprintf(reldesc + strlen(reldesc),
sizeof(reldesc) - strlen(reldesc),
" which is an index on \"%s.%s\"",
hrel->nspname, hrel->relname);
/* Shift attention to index's table for toast check */
rel = hrel;
break;
}
}
if (i >= db->rel_arr.nrels)
snprintf(reldesc + strlen(reldesc),
sizeof(reldesc) - strlen(reldesc),
" which is an index on OID %u", rel->indtable);
}
if (rel->toastheap)
{
for (i = 0; i < db->rel_arr.nrels; i++)
{
const RelInfo *brel = &db->rel_arr.rels[i];
if (brel->reloid == rel->toastheap)
{
snprintf(reldesc + strlen(reldesc),
sizeof(reldesc) - strlen(reldesc),
" which is the TOAST table for \"%s.%s\"",
brel->nspname, brel->relname);
break;
}
}
if (i >= db->rel_arr.nrels)
snprintf(reldesc + strlen(reldesc),
sizeof(reldesc) - strlen(reldesc),
" which is the TOAST table for OID %u", rel->toastheap);
}
if (is_new_db)
pg_log(PG_WARNING, "No match found in old cluster for new relation with OID %u in database \"%s\": %s\n",
reloid, db->db_name, reldesc);
else
pg_log(PG_WARNING, "No match found in new cluster for old relation with OID %u in database \"%s\": %s\n",
reloid, db->db_name, reldesc);
}
void void
print_maps(FileNameMap *maps, int n_maps, const char *db_name) print_maps(FileNameMap *maps, int n_maps, const char *db_name)
{ {
@ -301,11 +394,11 @@ get_db_infos(ClusterInfo *cluster)
/* /*
* get_rel_infos() * get_rel_infos()
* *
* gets the relinfos for all the user tables of the database referred * gets the relinfos for all the user tables and indexes of the database
* by "db". * referred to by "dbinfo".
* *
* NOTE: we assume that relations/entities with oids greater than * Note: the resulting RelInfo array is assumed to be sorted by OID.
* FirstNormalObjectId belongs to the user * This allows later processing to match up old and new databases efficiently.
*/ */
static void static void
get_rel_infos(ClusterInfo *cluster, DbInfo *dbinfo) get_rel_infos(ClusterInfo *cluster, DbInfo *dbinfo)
@ -323,93 +416,98 @@ get_rel_infos(ClusterInfo *cluster, DbInfo *dbinfo)
int i_spclocation, int i_spclocation,
i_nspname, i_nspname,
i_relname, i_relname,
i_oid, i_reloid,
i_indtable,
i_toastheap,
i_relfilenode, i_relfilenode,
i_reltablespace; i_reltablespace;
char query[QUERY_ALLOC]; char query[QUERY_ALLOC];
char *last_namespace = NULL, char *last_namespace = NULL,
*last_tablespace = NULL; *last_tablespace = NULL;
query[0] = '\0'; /* initialize query string to empty */
/* /*
* Create a CTE that collects OIDs of regular user tables, including
* matviews and sequences, but excluding toast tables and indexes. We
* assume that relations with OIDs >= FirstNormalObjectId belong to the
* user. (That's probably redundant with the namespace-name exclusions,
* but let's be safe.)
*
* pg_largeobject contains user data that does not appear in pg_dump * pg_largeobject contains user data that does not appear in pg_dump
* --schema-only output, so we have to copy that system table heap and * output, so we have to copy that system table. It's easiest to do that
* index. We could grab the pg_largeobject oids from template1, but it is * by treating it as a user table. Likewise for pg_largeobject_metadata,
* easy to treat it as a normal table. Order by oid so we can join old/new * if it exists.
* structures efficiently.
*/ */
snprintf(query + strlen(query), sizeof(query) - strlen(query),
snprintf(query, sizeof(query), "WITH regular_heap (reloid, indtable, toastheap) AS ( "
/* get regular heap */ " SELECT c.oid, 0::oid, 0::oid "
"WITH regular_heap (reloid) AS ( " " FROM pg_catalog.pg_class c JOIN pg_catalog.pg_namespace n "
" SELECT c.oid " " ON c.relnamespace = n.oid "
" FROM pg_catalog.pg_class c JOIN pg_catalog.pg_namespace n " " WHERE relkind IN ('r', 'm', 'S') AND "
" ON c.relnamespace = n.oid "
" LEFT OUTER JOIN pg_catalog.pg_index i "
" ON c.oid = i.indexrelid "
" WHERE relkind IN ('r', 'm', 'i', 'S') AND "
/*
* pg_dump only dumps valid indexes; testing indisready is necessary in
* 9.2, and harmless in earlier/later versions.
*/
" i.indisvalid IS DISTINCT FROM false AND "
" i.indisready IS DISTINCT FROM false AND "
/* exclude possible orphaned temp tables */ /* exclude possible orphaned temp tables */
" ((n.nspname !~ '^pg_temp_' AND " " ((n.nspname !~ '^pg_temp_' AND "
" n.nspname !~ '^pg_toast_temp_' AND " " n.nspname !~ '^pg_toast_temp_' AND "
/* skip pg_toast because toast index have relkind == 'i', not 't' */ " n.nspname NOT IN ('pg_catalog', 'information_schema', "
" n.nspname NOT IN ('pg_catalog', 'information_schema', " " 'binary_upgrade', 'pg_toast') AND "
" 'binary_upgrade', 'pg_toast') AND " " c.oid >= %u::pg_catalog.oid) OR "
" c.oid >= %u) OR " " (n.nspname = 'pg_catalog' AND "
" (n.nspname = 'pg_catalog' AND " " relname IN ('pg_largeobject'%s) ))), ",
" relname IN ('pg_largeobject', 'pg_largeobject_loid_pn_index'%s) ))), " FirstNormalObjectId,
(GET_MAJOR_VERSION(old_cluster.major_version) >= 900) ?
", 'pg_largeobject_metadata'" : "");
/* /*
* We have to gather the TOAST tables in later steps because we can't * Add a CTE that collects OIDs of toast tables belonging to the tables
* schema-qualify TOAST tables. * selected by the regular_heap CTE. (We have to do this separately
* because the namespace-name rules above don't work for toast tables.)
*/ */
/* get TOAST heap */ snprintf(query + strlen(query), sizeof(query) - strlen(query),
" toast_heap (reloid) AS ( " " toast_heap (reloid, indtable, toastheap) AS ( "
" SELECT reltoastrelid " " SELECT c.reltoastrelid, 0::oid, c.oid "
" FROM regular_heap JOIN pg_catalog.pg_class c " " FROM regular_heap JOIN pg_catalog.pg_class c "
" ON regular_heap.reloid = c.oid " " ON regular_heap.reloid = c.oid "
" AND c.reltoastrelid != %u), " " WHERE c.reltoastrelid != 0), ");
/* get indexes on regular and TOAST heap */
" all_index (reloid) AS ( " /*
" SELECT indexrelid " * Add a CTE that collects OIDs of all valid indexes on the previously
" FROM pg_index " * selected tables. We can ignore invalid indexes since pg_dump does.
" WHERE indisvalid " * Testing indisready is necessary in 9.2, and harmless in earlier/later
" AND indrelid IN (SELECT reltoastrelid " * versions.
" FROM (SELECT reloid FROM regular_heap " */
" UNION ALL " snprintf(query + strlen(query), sizeof(query) - strlen(query),
" SELECT reloid FROM toast_heap) all_heap " " all_index (reloid, indtable, toastheap) AS ( "
" JOIN pg_catalog.pg_class c " " SELECT indexrelid, indrelid, 0::oid "
" ON all_heap.reloid = c.oid " " FROM pg_catalog.pg_index "
" AND c.reltoastrelid != %u)) " " WHERE indisvalid AND indisready "
/* get all rels */ " AND indrelid IN "
"SELECT c.oid, n.nspname, c.relname, " " (SELECT reloid FROM regular_heap "
" c.relfilenode, c.reltablespace, %s " " UNION ALL "
"FROM (SELECT reloid FROM regular_heap " " SELECT reloid FROM toast_heap)) ");
" UNION ALL "
" SELECT reloid FROM toast_heap " /*
" UNION ALL " * And now we can write the query that retrieves the data we want for each
" SELECT reloid FROM all_index) all_rels " * heap and index relation. Make sure result is sorted by OID.
*/
snprintf(query + strlen(query), sizeof(query) - strlen(query),
"SELECT all_rels.*, n.nspname, c.relname, "
" c.relfilenode, c.reltablespace, %s "
"FROM (SELECT * FROM regular_heap "
" UNION ALL "
" SELECT * FROM toast_heap "
" UNION ALL "
" SELECT * FROM all_index) all_rels "
" JOIN pg_catalog.pg_class c " " JOIN pg_catalog.pg_class c "
" ON all_rels.reloid = c.oid " " ON all_rels.reloid = c.oid "
" JOIN pg_catalog.pg_namespace n " " JOIN pg_catalog.pg_namespace n "
" ON c.relnamespace = n.oid " " ON c.relnamespace = n.oid "
" LEFT OUTER JOIN pg_catalog.pg_tablespace t " " LEFT OUTER JOIN pg_catalog.pg_tablespace t "
" ON c.reltablespace = t.oid " " ON c.reltablespace = t.oid "
/* we preserve pg_class.oid so we sort by it to match old/new */
"ORDER BY 1;", "ORDER BY 1;",
FirstNormalObjectId, /* 9.2 removed the pg_tablespace.spclocation column */
/* does pg_largeobject_metadata need to be migrated? */ (GET_MAJOR_VERSION(cluster->major_version) >= 902) ?
(GET_MAJOR_VERSION(old_cluster.major_version) <= 804) ? "pg_catalog.pg_tablespace_location(t.oid) AS spclocation" :
"" : ", 'pg_largeobject_metadata', 'pg_largeobject_metadata_oid_index'", "t.spclocation");
InvalidOid, InvalidOid,
/* 9.2 removed the spclocation column */
(GET_MAJOR_VERSION(cluster->major_version) <= 901) ?
"t.spclocation" : "pg_catalog.pg_tablespace_location(t.oid) AS spclocation");
res = executeQueryOrDie(conn, "%s", query); res = executeQueryOrDie(conn, "%s", query);
@ -417,7 +515,9 @@ get_rel_infos(ClusterInfo *cluster, DbInfo *dbinfo)
relinfos = (RelInfo *) pg_malloc(sizeof(RelInfo) * ntups); relinfos = (RelInfo *) pg_malloc(sizeof(RelInfo) * ntups);
i_oid = PQfnumber(res, "oid"); i_reloid = PQfnumber(res, "reloid");
i_indtable = PQfnumber(res, "indtable");
i_toastheap = PQfnumber(res, "toastheap");
i_nspname = PQfnumber(res, "nspname"); i_nspname = PQfnumber(res, "nspname");
i_relname = PQfnumber(res, "relname"); i_relname = PQfnumber(res, "relname");
i_relfilenode = PQfnumber(res, "relfilenode"); i_relfilenode = PQfnumber(res, "relfilenode");
@ -428,7 +528,9 @@ get_rel_infos(ClusterInfo *cluster, DbInfo *dbinfo)
{ {
RelInfo *curr = &relinfos[num_rels++]; RelInfo *curr = &relinfos[num_rels++];
curr->reloid = atooid(PQgetvalue(res, relnum, i_oid)); curr->reloid = atooid(PQgetvalue(res, relnum, i_reloid));
curr->indtable = atooid(PQgetvalue(res, relnum, i_indtable));
curr->toastheap = atooid(PQgetvalue(res, relnum, i_toastheap));
nspname = PQgetvalue(res, relnum, i_nspname); nspname = PQgetvalue(res, relnum, i_nspname);
curr->nsp_alloc = false; curr->nsp_alloc = false;
@ -453,7 +555,7 @@ get_rel_infos(ClusterInfo *cluster, DbInfo *dbinfo)
curr->relfilenode = atooid(PQgetvalue(res, relnum, i_relfilenode)); curr->relfilenode = atooid(PQgetvalue(res, relnum, i_relfilenode));
curr->tblsp_alloc = false; curr->tblsp_alloc = false;
/* Is the tablespace oid non-zero? */ /* Is the tablespace oid non-default? */
if (atooid(PQgetvalue(res, relnum, i_reltablespace)) != 0) if (atooid(PQgetvalue(res, relnum, i_reltablespace)) != 0)
{ {
/* /*

View File

@ -137,15 +137,16 @@ extern char *output_files[];
*/ */
typedef struct typedef struct
{ {
/* Can't use NAMEDATALEN; not guaranteed to fit on client */ /* Can't use NAMEDATALEN; not guaranteed to be same on client */
char *nspname; /* namespace name */ char *nspname; /* namespace name */
char *relname; /* relation name */ char *relname; /* relation name */
Oid reloid; /* relation oid */ Oid reloid; /* relation OID */
Oid relfilenode; /* relation relfile node */ Oid relfilenode; /* relation relfile node */
/* relation tablespace path, or "" for the cluster default */ Oid indtable; /* if index, OID of its table, else 0 */
char *tablespace; Oid toastheap; /* if toast table, OID of base table, else 0 */
bool nsp_alloc; char *tablespace; /* tablespace path; "" for cluster default */
bool tblsp_alloc; bool nsp_alloc; /* should nspname be freed? */
bool tblsp_alloc; /* should tablespace be freed? */
} RelInfo; } RelInfo;
typedef struct typedef struct