Improve pg_dump's dependency-sorting logic to enforce section dump order.

As of 9.2, with the --section option, it is very important that the concept
of "pre data", "data", and "post data" sections of the output be honored
strictly; else a dump divided into separate sectional files might be
unrestorable.  However, the dependency-sorting logic knew nothing of
sections and would happily select output orderings that didn't fit that
structure.  Doing so was mostly harmless before 9.2, but now we need to be
sure it doesn't do that.  To fix, create dummy objects representing the
section boundaries and add dependencies between them and all the normal
objects.  (This might sound expensive but it seems to only add a percent or
two to pg_dump's runtime.)

This also fixes a problem introduced in 9.1 by the feature that allows
incomplete GROUP BY lists when a primary key is given in GROUP BY.
That means that views can depend on primary key constraints.  Previously,
pg_dump would deal with that by simply emitting the primary key constraint
before the view definition (and hence before the data section of the
output).  That's bad enough for simple serial restores, where creating an
index before the data is loaded works, but is undesirable for speed
reasons.  But it could lead to outright failure of parallel restores, as
seen in bug #6699 from Joe Van Dyk.  That happened because pg_restore would
switch into parallel mode as soon as it reached the constraint, and then
very possibly would try to emit the view definition before the primary key
was committed (as a consequence of another bug that causes the view not to
be correctly marked as depending on the constraint).  Adding the section
boundary constraints forces the dependency-sorting code to break the view
into separate table and rule declarations, allowing the rule, and hence the
primary key constraint it depends on, to revert to their intended location
in the post-data section.  This also somewhat accidentally works around the
bogus-dependency-marking problem, because the rule will be correctly shown
as depending on the constraint, so parallel pg_restore will now do the
right thing.  (We will fix the bogus-dependency problem for real in a
separate patch, but that patch is not easily back-portable to 9.1, so the
fact that this patch is enough to dodge the only known symptom is
fortunate.)

Back-patch to 9.1, except for the hunk that adds verification that the
finished archive TOC list is in correct section order; the place where
it was convenient to add that doesn't exist in 9.1.
This commit is contained in:
Tom Lane 2012-06-25 21:19:10 -04:00
parent 77ed0c6950
commit a1ef01fe16
4 changed files with 238 additions and 30 deletions

View File

@ -247,8 +247,42 @@ SetArchiveRestoreOptions(Archive *AHX, RestoreOptions *ropt)
curSection = SECTION_PRE_DATA;
for (te = AH->toc->next; te != AH->toc; te = te->next)
{
/*
* When writing an archive, we also take this opportunity to check
* that we have generated the entries in a sane order that respects
* the section divisions. When reading, don't complain, since buggy
* old versions of pg_dump might generate out-of-order archives.
*/
if (AH->mode != archModeRead)
{
switch (te->section)
{
case SECTION_NONE:
/* ok to be anywhere */
break;
case SECTION_PRE_DATA:
if (curSection != SECTION_PRE_DATA)
write_msg(modulename,
"WARNING: archive items not in correct section order\n");
break;
case SECTION_DATA:
if (curSection == SECTION_POST_DATA)
write_msg(modulename,
"WARNING: archive items not in correct section order\n");
break;
case SECTION_POST_DATA:
/* ok no matter which section we were in */
break;
default:
exit_horribly(modulename, "unexpected section code %d\n",
(int) te->section);
break;
}
}
if (te->section != SECTION_NONE)
curSection = te->section;
te->reqs = _tocEntryRequired(te, curSection, ropt);
}
}

View File

@ -210,6 +210,11 @@ static void dumpACL(Archive *fout, CatalogId objCatId, DumpId objDumpId,
const char *acls);
static void getDependencies(Archive *fout);
static DumpableObject *createBoundaryObjects(void);
static void addBoundaryDependencies(DumpableObject **dobjs, int numObjs,
DumpableObject *boundaryObjs);
static void getDomainConstraints(Archive *fout, TypeInfo *tyinfo);
static void getTableData(TableInfo *tblinfo, int numTables, bool oids);
static void makeTableDataInfo(TableInfo *tbinfo, bool oids);
@ -270,6 +275,7 @@ main(int argc, char **argv)
int numTables;
DumpableObject **dobjs;
int numObjs;
DumpableObject *boundaryObjs;
int i;
enum trivalue prompt_password = TRI_DEFAULT;
int compressLevel = -1;
@ -691,6 +697,17 @@ main(int argc, char **argv)
*/
getDependencies(fout);
/* Lastly, create dummy objects to represent the section boundaries */
boundaryObjs = createBoundaryObjects();
/* Get pointers to all the known DumpableObjects */
getDumpableObjects(&dobjs, &numObjs);
/*
* Add dummy dependencies to enforce the dump section ordering.
*/
addBoundaryDependencies(dobjs, numObjs, boundaryObjs);
/*
* Sort the objects into a safe dump order (no forward references).
*
@ -700,14 +717,13 @@ main(int argc, char **argv)
* will dump identically. Before 7.3 we don't have dependencies and we
* use OID ordering as an (unreliable) guide to creation order.
*/
getDumpableObjects(&dobjs, &numObjs);
if (fout->remoteVersion >= 70300)
sortDumpableObjectsByTypeName(dobjs, numObjs);
else
sortDumpableObjectsByTypeOid(dobjs, numObjs);
sortDumpableObjects(dobjs, numObjs);
sortDumpableObjects(dobjs, numObjs,
boundaryObjs[0].dumpId, boundaryObjs[1].dumpId);
/*
* Create archive TOC entries for all the objects to be dumped, in a safe
@ -7184,6 +7200,10 @@ dumpDumpableObject(Archive *fout, DumpableObject *dobj)
dobj->dependencies, dobj->nDeps,
dumpBlobs, NULL);
break;
case DO_PRE_DATA_BOUNDARY:
case DO_POST_DATA_BOUNDARY:
/* never dumped, nothing to do */
break;
}
}
@ -11672,7 +11692,7 @@ dumpDefaultACL(Archive *fout, DefaultACLInfo *daclinfo)
daclinfo->dobj.namespace ? daclinfo->dobj.namespace->dobj.name : NULL,
NULL,
daclinfo->defaclrole,
false, "DEFAULT ACL", SECTION_NONE,
false, "DEFAULT ACL", SECTION_POST_DATA,
q->data, "", NULL,
daclinfo->dobj.dependencies, daclinfo->dobj.nDeps,
NULL, NULL);
@ -14027,6 +14047,113 @@ getDependencies(Archive *fout)
}
/*
* createBoundaryObjects - create dummy DumpableObjects to represent
* dump section boundaries.
*/
static DumpableObject *
createBoundaryObjects(void)
{
DumpableObject *dobjs;
dobjs = (DumpableObject *) pg_malloc(2 * sizeof(DumpableObject));
dobjs[0].objType = DO_PRE_DATA_BOUNDARY;
dobjs[0].catId = nilCatalogId;
AssignDumpId(dobjs + 0);
dobjs[0].name = pg_strdup("PRE-DATA BOUNDARY");
dobjs[1].objType = DO_POST_DATA_BOUNDARY;
dobjs[1].catId = nilCatalogId;
AssignDumpId(dobjs + 1);
dobjs[1].name = pg_strdup("POST-DATA BOUNDARY");
return dobjs;
}
/*
* addBoundaryDependencies - add dependencies as needed to enforce the dump
* section boundaries.
*/
static void
addBoundaryDependencies(DumpableObject **dobjs, int numObjs,
DumpableObject *boundaryObjs)
{
DumpableObject *preDataBound = boundaryObjs + 0;
DumpableObject *postDataBound = boundaryObjs + 1;
int i;
for (i = 0; i < numObjs; i++)
{
DumpableObject *dobj = dobjs[i];
/*
* The classification of object types here must match the SECTION_xxx
* values assigned during subsequent ArchiveEntry calls!
*/
switch (dobj->objType)
{
case DO_NAMESPACE:
case DO_EXTENSION:
case DO_TYPE:
case DO_SHELL_TYPE:
case DO_FUNC:
case DO_AGG:
case DO_OPERATOR:
case DO_OPCLASS:
case DO_OPFAMILY:
case DO_COLLATION:
case DO_CONVERSION:
case DO_TABLE:
case DO_ATTRDEF:
case DO_PROCLANG:
case DO_CAST:
case DO_DUMMY_TYPE:
case DO_TSPARSER:
case DO_TSDICT:
case DO_TSTEMPLATE:
case DO_TSCONFIG:
case DO_FDW:
case DO_FOREIGN_SERVER:
case DO_BLOB:
/* Pre-data objects: must come before the pre-data boundary */
addObjectDependency(preDataBound, dobj->dumpId);
break;
case DO_TABLE_DATA:
case DO_BLOB_DATA:
/* Data objects: must come between the boundaries */
addObjectDependency(dobj, preDataBound->dumpId);
addObjectDependency(postDataBound, dobj->dumpId);
break;
case DO_INDEX:
case DO_TRIGGER:
case DO_DEFAULT_ACL:
/* Post-data objects: must come after the post-data boundary */
addObjectDependency(dobj, postDataBound->dumpId);
break;
case DO_RULE:
/* Rules are post-data, but only if dumped separately */
if (((RuleInfo *) dobj)->separate)
addObjectDependency(dobj, postDataBound->dumpId);
break;
case DO_CONSTRAINT:
case DO_FK_CONSTRAINT:
/* Constraints are post-data, but only if dumped separately */
if (((ConstraintInfo *) dobj)->separate)
addObjectDependency(dobj, postDataBound->dumpId);
break;
case DO_PRE_DATA_BOUNDARY:
/* nothing to do */
break;
case DO_POST_DATA_BOUNDARY:
/* must come after the pre-data boundary */
addObjectDependency(dobj, preDataBound->dumpId);
break;
}
}
}
/*
* selectSourceSchema - make the specified schema the active search path
* in the source database.

View File

@ -97,6 +97,7 @@ typedef enum
DO_OPERATOR,
DO_OPCLASS,
DO_OPFAMILY,
DO_COLLATION,
DO_CONVERSION,
DO_TABLE,
DO_ATTRDEF,
@ -118,7 +119,8 @@ typedef enum
DO_DEFAULT_ACL,
DO_BLOB,
DO_BLOB_DATA,
DO_COLLATION
DO_PRE_DATA_BOUNDARY,
DO_POST_DATA_BOUNDARY
} DumpableObjectType;
typedef struct _dumpableObject
@ -520,7 +522,8 @@ extern bool simple_string_list_member(SimpleStringList *list, const char *val);
extern void parseOidArray(const char *str, Oid *array, int arraysize);
extern void sortDumpableObjects(DumpableObject **objs, int numObjs);
extern void sortDumpableObjects(DumpableObject **objs, int numObjs,
DumpId preBoundaryId, DumpId postBoundaryId);
extern void sortDumpableObjectsByTypeName(DumpableObject **objs, int numObjs);
extern void sortDumpableObjectsByTypeOid(DumpableObject **objs, int numObjs);

View File

@ -26,6 +26,11 @@ static const char *modulename = gettext_noop("sorter");
* behavior for old databases without full dependency info.) Note: collations,
* extensions, text search, foreign-data, and default ACL objects can't really
* happen here, so the rather bogus priorities for them don't matter.
*
* NOTE: object-type priorities must match the section assignments made in
* pg_dump.c; that is, PRE_DATA objects must sort before DO_PRE_DATA_BOUNDARY,
* POST_DATA objects must sort after DO_POST_DATA_BOUNDARY, and DATA objects
* must sort between them.
*/
static const int oldObjectTypePriority[] =
{
@ -38,33 +43,40 @@ static const int oldObjectTypePriority[] =
3, /* DO_OPERATOR */
4, /* DO_OPCLASS */
4, /* DO_OPFAMILY */
4, /* DO_COLLATION */
5, /* DO_CONVERSION */
6, /* DO_TABLE */
8, /* DO_ATTRDEF */
13, /* DO_INDEX */
14, /* DO_RULE */
15, /* DO_TRIGGER */
12, /* DO_CONSTRAINT */
16, /* DO_FK_CONSTRAINT */
15, /* DO_INDEX */
16, /* DO_RULE */
17, /* DO_TRIGGER */
14, /* DO_CONSTRAINT */
18, /* DO_FK_CONSTRAINT */
2, /* DO_PROCLANG */
2, /* DO_CAST */
10, /* DO_TABLE_DATA */
11, /* DO_TABLE_DATA */
7, /* DO_DUMMY_TYPE */
3, /* DO_TSPARSER */
4, /* DO_TSPARSER */
4, /* DO_TSDICT */
3, /* DO_TSTEMPLATE */
5, /* DO_TSCONFIG */
3, /* DO_FDW */
4, /* DO_TSTEMPLATE */
4, /* DO_TSCONFIG */
4, /* DO_FDW */
4, /* DO_FOREIGN_SERVER */
17, /* DO_DEFAULT_ACL */
19, /* DO_DEFAULT_ACL */
9, /* DO_BLOB */
11, /* DO_BLOB_DATA */
2 /* DO_COLLATION */
12, /* DO_BLOB_DATA */
10, /* DO_PRE_DATA_BOUNDARY */
13 /* DO_POST_DATA_BOUNDARY */
};
/*
* Sort priority for object types when dumping newer databases.
* Objects are sorted by type, and within a type by name.
*
* NOTE: object-type priorities must match the section assignments made in
* pg_dump.c; that is, PRE_DATA objects must sort before DO_PRE_DATA_BOUNDARY,
* POST_DATA objects must sort after DO_POST_DATA_BOUNDARY, and DATA objects
* must sort between them.
*/
static const int newObjectTypePriority[] =
{
@ -77,17 +89,18 @@ static const int newObjectTypePriority[] =
8, /* DO_OPERATOR */
9, /* DO_OPCLASS */
9, /* DO_OPFAMILY */
3, /* DO_COLLATION */
11, /* DO_CONVERSION */
18, /* DO_TABLE */
20, /* DO_ATTRDEF */
25, /* DO_INDEX */
26, /* DO_RULE */
27, /* DO_TRIGGER */
24, /* DO_CONSTRAINT */
28, /* DO_FK_CONSTRAINT */
27, /* DO_INDEX */
28, /* DO_RULE */
29, /* DO_TRIGGER */
26, /* DO_CONSTRAINT */
30, /* DO_FK_CONSTRAINT */
2, /* DO_PROCLANG */
10, /* DO_CAST */
22, /* DO_TABLE_DATA */
23, /* DO_TABLE_DATA */
19, /* DO_DUMMY_TYPE */
12, /* DO_TSPARSER */
14, /* DO_TSDICT */
@ -95,12 +108,16 @@ static const int newObjectTypePriority[] =
15, /* DO_TSCONFIG */
16, /* DO_FDW */
17, /* DO_FOREIGN_SERVER */
29, /* DO_DEFAULT_ACL */
31, /* DO_DEFAULT_ACL */
21, /* DO_BLOB */
23, /* DO_BLOB_DATA */
3 /* DO_COLLATION */
24, /* DO_BLOB_DATA */
22, /* DO_PRE_DATA_BOUNDARY */
25 /* DO_POST_DATA_BOUNDARY */
};
static DumpId preDataBoundId;
static DumpId postDataBoundId;
static int DOTypeNameCompare(const void *p1, const void *p2);
static int DOTypeOidCompare(const void *p1, const void *p2);
@ -237,16 +254,27 @@ DOTypeOidCompare(const void *p1, const void *p2)
/*
* Sort the given objects into a safe dump order using dependency
* information (to the extent we have it available).
*
* The DumpIds of the PRE_DATA_BOUNDARY and POST_DATA_BOUNDARY objects are
* passed in separately, in case we need them during dependency loop repair.
*/
void
sortDumpableObjects(DumpableObject **objs, int numObjs)
sortDumpableObjects(DumpableObject **objs, int numObjs,
DumpId preBoundaryId, DumpId postBoundaryId)
{
DumpableObject **ordering;
int nOrdering;
if (numObjs <= 0)
if (numObjs <= 0) /* can't happen anymore ... */
return;
/*
* Saving the boundary IDs in static variables is a bit grotty, but seems
* better than adding them to parameter lists of subsidiary functions.
*/
preDataBoundId = preBoundaryId;
postDataBoundId = postBoundaryId;
ordering = (DumpableObject **) pg_malloc(numObjs * sizeof(DumpableObject *));
while (!TopoSort(objs, numObjs, ordering, &nOrdering))
findDependencyLoops(ordering, nOrdering, numObjs);
@ -701,6 +729,8 @@ repairViewRuleMultiLoop(DumpableObject *viewobj,
((RuleInfo *) ruleobj)->separate = true;
/* put back rule's dependency on view */
addObjectDependency(ruleobj, viewobj->dumpId);
/* now that rule is separate, it must be post-data */
addObjectDependency(ruleobj, postDataBoundId);
}
/*
@ -736,6 +766,8 @@ repairTableConstraintMultiLoop(DumpableObject *tableobj,
((ConstraintInfo *) constraintobj)->separate = true;
/* put back constraint's dependency on table */
addObjectDependency(constraintobj, tableobj->dumpId);
/* now that constraint is separate, it must be post-data */
addObjectDependency(constraintobj, postDataBoundId);
}
/*
@ -782,6 +814,8 @@ repairDomainConstraintMultiLoop(DumpableObject *domainobj,
((ConstraintInfo *) constraintobj)->separate = true;
/* put back constraint's dependency on domain */
addObjectDependency(constraintobj, domainobj->dumpId);
/* now that constraint is separate, it must be post-data */
addObjectDependency(constraintobj, postDataBoundId);
}
/*
@ -1190,6 +1224,16 @@ describeDumpableObject(DumpableObject *obj, char *buf, int bufsize)
"BLOB DATA (ID %d)",
obj->dumpId);
return;
case DO_PRE_DATA_BOUNDARY:
snprintf(buf, bufsize,
"PRE-DATA BOUNDARY (ID %d)",
obj->dumpId);
return;
case DO_POST_DATA_BOUNDARY:
snprintf(buf, bufsize,
"POST-DATA BOUNDARY (ID %d)",
obj->dumpId);
return;
}
/* shouldn't get here */
snprintf(buf, bufsize,