Improve parallel restore's ability to cope with selective restore (-L option).

The original coding tended to break down in the face of modified restore
orders, as shown in bug #5626 from Albert Ullrich, because it would flip over
into parallel-restore operation too soon.  That causes problems because we
don't have sufficient dependency information in dump archives to allow safe
parallel processing of SECTION_PRE_DATA items.  Even if we did, it's probably
undesirable to allow that to override the commanded restore order.

To fix the problem of omitted items causing unexpected changes in restore
order, tweak SortTocFromFile so that omitted items end up at the head of
the list not the tail.  This ensures that they'll be examined and their
dependencies will be marked satisfied before we get to any interesting
items.

In HEAD and 9.0, we can easily change restore_toc_entries_parallel so that
all SECTION_PRE_DATA items are guaranteed to be processed in the initial
serial-restore loop, and hence in commanded order.  Only DATA and POST_DATA
items are candidates for parallel processing.  For them there might be
variations from the commanded order because of parallelism, but we should
do it in a safe order thanks to dependencies.

In 8.4 it's much harder to make such a guarantee.  I settled for not
letting the initial loop break out into parallel processing mode if
it sees a DATA/POST_DATA item that's not to be restored; this at least
prevents a non-restorable item from causing premature exit from the loop.
This means that 8.4 will be more likely to fail given a badly-ordered -L
list than 9.x, but we don't really promise any such thing will work anyway.
This commit is contained in:
Tom Lane 2010-08-21 13:59:44 +00:00
parent 5abd2d704d
commit c5d6d5bc6d
1 changed files with 38 additions and 19 deletions

View File

@ -15,7 +15,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/bin/pg_dump/pg_backup_archiver.c,v 1.187 2010/07/06 19:18:59 momjian Exp $
* $PostgreSQL: pgsql/src/bin/pg_dump/pg_backup_archiver.c,v 1.188 2010/08/21 13:59:44 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -102,7 +102,7 @@ static bool _tocEntryIsACL(TocEntry *te);
static void _disableTriggersIfNecessary(ArchiveHandle *AH, TocEntry *te, RestoreOptions *ropt);
static void _enableTriggersIfNecessary(ArchiveHandle *AH, TocEntry *te, RestoreOptions *ropt);
static TocEntry *getTocEntryByDumpId(ArchiveHandle *AH, DumpId id);
static void _moveAfter(ArchiveHandle *AH, TocEntry *pos, TocEntry *te);
static void _moveBefore(ArchiveHandle *AH, TocEntry *pos, TocEntry *te);
static int _discoverArchiveFormat(ArchiveHandle *AH);
static void dump_lo_buf(ArchiveHandle *AH);
@ -995,15 +995,11 @@ SortTocFromFile(Archive *AHX, RestoreOptions *ropt)
char *endptr;
DumpId id;
TocEntry *te;
TocEntry *tePrev;
/* Allocate space for the 'wanted' array, and init it */
ropt->idWanted = (bool *) malloc(sizeof(bool) * AH->maxDumpId);
memset(ropt->idWanted, 0, sizeof(bool) * AH->maxDumpId);
/* Set prev entry as head of list */
tePrev = AH->toc;
/* Setup the file */
fh = fopen(ropt->tocFile, PG_BINARY_R);
if (!fh)
@ -1018,7 +1014,7 @@ SortTocFromFile(Archive *AHX, RestoreOptions *ropt)
cmnt[0] = '\0';
/* Ignore if all blank */
if (strspn(buf, " \t\r") == strlen(buf))
if (strspn(buf, " \t\r\n") == strlen(buf))
continue;
/* Get an ID, check it's valid and not already seen */
@ -1036,10 +1032,21 @@ SortTocFromFile(Archive *AHX, RestoreOptions *ropt)
die_horribly(AH, modulename, "could not find entry for ID %d\n",
id);
/* Mark it wanted */
ropt->idWanted[id - 1] = true;
_moveAfter(AH, tePrev, te);
tePrev = te;
/*
* Move each item to the end of the list as it is selected, so that
* they are placed in the desired order. Any unwanted items will end
* up at the front of the list, which may seem unintuitive but it's
* what we need. In an ordinary serial restore that makes no
* difference, but in a parallel restore we need to mark unrestored
* items' dependencies as satisfied before we start examining
* restorable items. Otherwise they could have surprising
* side-effects on the order in which restorable items actually get
* restored.
*/
_moveBefore(AH, AH->toc, te);
}
if (fclose(fh) != 0)
@ -1471,33 +1478,37 @@ warn_or_die_horribly(ArchiveHandle *AH,
va_end(ap);
}
#ifdef NOT_USED
static void
_moveAfter(ArchiveHandle *AH, TocEntry *pos, TocEntry *te)
{
/* Unlink te from list */
te->prev->next = te->next;
te->next->prev = te->prev;
/* and insert it after "pos" */
te->prev = pos;
te->next = pos->next;
pos->next->prev = te;
pos->next = te;
}
#ifdef NOT_USED
#endif
static void
_moveBefore(ArchiveHandle *AH, TocEntry *pos, TocEntry *te)
{
/* Unlink te from list */
te->prev->next = te->next;
te->next->prev = te->prev;
/* and insert it before "pos" */
te->prev = pos->prev;
te->next = pos;
pos->prev->next = te;
pos->prev = te;
}
#endif
static TocEntry *
getTocEntryByDumpId(ArchiveHandle *AH, DumpId id)
@ -3180,13 +3191,16 @@ restore_toc_entries_parallel(ArchiveHandle *AH)
* Do all the early stuff in a single connection in the parent. There's no
* great point in running it in parallel, in fact it will actually run
* faster in a single connection because we avoid all the connection and
* setup overhead.
* setup overhead. Also, pg_dump is not currently very good about
* showing all the dependencies of SECTION_PRE_DATA items, so we do not
* risk trying to process them out-of-order.
*/
for (next_work_item = AH->toc->next; next_work_item != AH->toc; next_work_item = next_work_item->next)
{
/* Non-PRE_DATA items are just ignored for now */
if (next_work_item->section == SECTION_DATA ||
next_work_item->section == SECTION_POST_DATA)
break;
continue;
ahlog(AH, 1, "processing item %d %s %s\n",
next_work_item->dumpId,
@ -3229,12 +3243,17 @@ restore_toc_entries_parallel(ArchiveHandle *AH)
*/
par_list_header_init(&pending_list);
par_list_header_init(&ready_list);
for (; next_work_item != AH->toc; next_work_item = next_work_item->next)
for (next_work_item = AH->toc->next; next_work_item != AH->toc; next_work_item = next_work_item->next)
{
if (next_work_item->depCount > 0)
par_list_append(&pending_list, next_work_item);
else
par_list_append(&ready_list, next_work_item);
/* All PRE_DATA items were dealt with above */
if (next_work_item->section == SECTION_DATA ||
next_work_item->section == SECTION_POST_DATA)
{
if (next_work_item->depCount > 0)
par_list_append(&pending_list, next_work_item);
else
par_list_append(&ready_list, next_work_item);
}
}
/*