From c5d6d5bc6d23b6ebd6cb17d401e10581cd112c20 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Sat, 21 Aug 2010 13:59:44 +0000 Subject: [PATCH] Improve parallel restore's ability to cope with selective restore (-L option). The original coding tended to break down in the face of modified restore orders, as shown in bug #5626 from Albert Ullrich, because it would flip over into parallel-restore operation too soon. That causes problems because we don't have sufficient dependency information in dump archives to allow safe parallel processing of SECTION_PRE_DATA items. Even if we did, it's probably undesirable to allow that to override the commanded restore order. To fix the problem of omitted items causing unexpected changes in restore order, tweak SortTocFromFile so that omitted items end up at the head of the list not the tail. This ensures that they'll be examined and their dependencies will be marked satisfied before we get to any interesting items. In HEAD and 9.0, we can easily change restore_toc_entries_parallel so that all SECTION_PRE_DATA items are guaranteed to be processed in the initial serial-restore loop, and hence in commanded order. Only DATA and POST_DATA items are candidates for parallel processing. For them there might be variations from the commanded order because of parallelism, but we should do it in a safe order thanks to dependencies. In 8.4 it's much harder to make such a guarantee. I settled for not letting the initial loop break out into parallel processing mode if it sees a DATA/POST_DATA item that's not to be restored; this at least prevents a non-restorable item from causing premature exit from the loop. This means that 8.4 will be more likely to fail given a badly-ordered -L list than 9.x, but we don't really promise any such thing will work anyway. --- src/bin/pg_dump/pg_backup_archiver.c | 57 ++++++++++++++++++---------- 1 file changed, 38 insertions(+), 19 deletions(-) diff --git a/src/bin/pg_dump/pg_backup_archiver.c b/src/bin/pg_dump/pg_backup_archiver.c index 61e4e41222..7e1374bf89 100644 --- a/src/bin/pg_dump/pg_backup_archiver.c +++ b/src/bin/pg_dump/pg_backup_archiver.c @@ -15,7 +15,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/bin/pg_dump/pg_backup_archiver.c,v 1.187 2010/07/06 19:18:59 momjian Exp $ + * $PostgreSQL: pgsql/src/bin/pg_dump/pg_backup_archiver.c,v 1.188 2010/08/21 13:59:44 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -102,7 +102,7 @@ static bool _tocEntryIsACL(TocEntry *te); static void _disableTriggersIfNecessary(ArchiveHandle *AH, TocEntry *te, RestoreOptions *ropt); static void _enableTriggersIfNecessary(ArchiveHandle *AH, TocEntry *te, RestoreOptions *ropt); static TocEntry *getTocEntryByDumpId(ArchiveHandle *AH, DumpId id); -static void _moveAfter(ArchiveHandle *AH, TocEntry *pos, TocEntry *te); +static void _moveBefore(ArchiveHandle *AH, TocEntry *pos, TocEntry *te); static int _discoverArchiveFormat(ArchiveHandle *AH); static void dump_lo_buf(ArchiveHandle *AH); @@ -995,15 +995,11 @@ SortTocFromFile(Archive *AHX, RestoreOptions *ropt) char *endptr; DumpId id; TocEntry *te; - TocEntry *tePrev; /* Allocate space for the 'wanted' array, and init it */ ropt->idWanted = (bool *) malloc(sizeof(bool) * AH->maxDumpId); memset(ropt->idWanted, 0, sizeof(bool) * AH->maxDumpId); - /* Set prev entry as head of list */ - tePrev = AH->toc; - /* Setup the file */ fh = fopen(ropt->tocFile, PG_BINARY_R); if (!fh) @@ -1018,7 +1014,7 @@ SortTocFromFile(Archive *AHX, RestoreOptions *ropt) cmnt[0] = '\0'; /* Ignore if all blank */ - if (strspn(buf, " \t\r") == strlen(buf)) + if (strspn(buf, " \t\r\n") == strlen(buf)) continue; /* Get an ID, check it's valid and not already seen */ @@ -1036,10 +1032,21 @@ SortTocFromFile(Archive *AHX, RestoreOptions *ropt) die_horribly(AH, modulename, "could not find entry for ID %d\n", id); + /* Mark it wanted */ ropt->idWanted[id - 1] = true; - _moveAfter(AH, tePrev, te); - tePrev = te; + /* + * Move each item to the end of the list as it is selected, so that + * they are placed in the desired order. Any unwanted items will end + * up at the front of the list, which may seem unintuitive but it's + * what we need. In an ordinary serial restore that makes no + * difference, but in a parallel restore we need to mark unrestored + * items' dependencies as satisfied before we start examining + * restorable items. Otherwise they could have surprising + * side-effects on the order in which restorable items actually get + * restored. + */ + _moveBefore(AH, AH->toc, te); } if (fclose(fh) != 0) @@ -1471,33 +1478,37 @@ warn_or_die_horribly(ArchiveHandle *AH, va_end(ap); } +#ifdef NOT_USED + static void _moveAfter(ArchiveHandle *AH, TocEntry *pos, TocEntry *te) { + /* Unlink te from list */ te->prev->next = te->next; te->next->prev = te->prev; + /* and insert it after "pos" */ te->prev = pos; te->next = pos->next; - pos->next->prev = te; pos->next = te; } -#ifdef NOT_USED +#endif static void _moveBefore(ArchiveHandle *AH, TocEntry *pos, TocEntry *te) { + /* Unlink te from list */ te->prev->next = te->next; te->next->prev = te->prev; + /* and insert it before "pos" */ te->prev = pos->prev; te->next = pos; pos->prev->next = te; pos->prev = te; } -#endif static TocEntry * getTocEntryByDumpId(ArchiveHandle *AH, DumpId id) @@ -3180,13 +3191,16 @@ restore_toc_entries_parallel(ArchiveHandle *AH) * Do all the early stuff in a single connection in the parent. There's no * great point in running it in parallel, in fact it will actually run * faster in a single connection because we avoid all the connection and - * setup overhead. + * setup overhead. Also, pg_dump is not currently very good about + * showing all the dependencies of SECTION_PRE_DATA items, so we do not + * risk trying to process them out-of-order. */ for (next_work_item = AH->toc->next; next_work_item != AH->toc; next_work_item = next_work_item->next) { + /* Non-PRE_DATA items are just ignored for now */ if (next_work_item->section == SECTION_DATA || next_work_item->section == SECTION_POST_DATA) - break; + continue; ahlog(AH, 1, "processing item %d %s %s\n", next_work_item->dumpId, @@ -3229,12 +3243,17 @@ restore_toc_entries_parallel(ArchiveHandle *AH) */ par_list_header_init(&pending_list); par_list_header_init(&ready_list); - for (; next_work_item != AH->toc; next_work_item = next_work_item->next) + for (next_work_item = AH->toc->next; next_work_item != AH->toc; next_work_item = next_work_item->next) { - if (next_work_item->depCount > 0) - par_list_append(&pending_list, next_work_item); - else - par_list_append(&ready_list, next_work_item); + /* All PRE_DATA items were dealt with above */ + if (next_work_item->section == SECTION_DATA || + next_work_item->section == SECTION_POST_DATA) + { + if (next_work_item->depCount > 0) + par_list_append(&pending_list, next_work_item); + else + par_list_append(&ready_list, next_work_item); + } } /*