From 32f4de0adfb2037f1402e40b54a5c4043227363f Mon Sep 17 00:00:00 2001 From: Heikki Linnakangas Date: Tue, 4 Dec 2012 15:28:58 +0200 Subject: [PATCH] Write exact xlog position of timeline switch in the timeline history file. This allows us to do some more rigorous sanity checking for various incorrect point-in-time recovery scenarios, and provides more information for debugging purposes. It will also come handy in the upcoming patch to allow timeline switches to be replicated by streaming replication. --- src/backend/access/transam/timeline.c | 144 +++++++++++++++++---- src/backend/access/transam/xlog.c | 175 ++++++++++++++++---------- src/include/access/timeline.h | 20 ++- 3 files changed, 248 insertions(+), 91 deletions(-) diff --git a/src/backend/access/transam/timeline.c b/src/backend/access/transam/timeline.c index 225ce465f7..324b6c1860 100644 --- a/src/backend/access/transam/timeline.c +++ b/src/backend/access/transam/timeline.c @@ -12,10 +12,10 @@ * * Each line in the file represents a timeline switch: * - * + * * * parentTLI ID of the parent timeline - * xlogfname filename of the WAL segment where the switch happened + * switchpoint XLogRecPtr of the WAL position where the switch happened * reason human-readable explanation of why the timeline was changed * * The fields are separated by tabs. Lines beginning with # are comments, and @@ -56,10 +56,18 @@ readTimeLineHistory(TimeLineID targetTLI) char histfname[MAXFNAMELEN]; char fline[MAXPGPATH]; FILE *fd; + TimeLineHistoryEntry *entry; + TimeLineID lasttli = 0; + XLogRecPtr prevend; /* Timeline 1 does not have a history file, so no need to check */ if (targetTLI == 1) - return list_make1_int((int) targetTLI); + { + entry = (TimeLineHistoryEntry *) palloc(sizeof(TimeLineHistoryEntry)); + entry->tli = targetTLI; + entry->begin = entry->end = InvalidXLogRecPtr; + return list_make1(entry); + } if (InArchiveRecovery) { @@ -77,7 +85,10 @@ readTimeLineHistory(TimeLineID targetTLI) (errcode_for_file_access(), errmsg("could not open file \"%s\": %m", path))); /* Not there, so assume no parents */ - return list_make1_int((int) targetTLI); + entry = (TimeLineHistoryEntry *) palloc(sizeof(TimeLineHistoryEntry)); + entry->tli = targetTLI; + entry->begin = entry->end = InvalidXLogRecPtr; + return list_make1(entry); } result = NIL; @@ -85,12 +96,15 @@ readTimeLineHistory(TimeLineID targetTLI) /* * Parse the file... */ + prevend = InvalidXLogRecPtr; while (fgets(fline, sizeof(fline), fd) != NULL) { /* skip leading whitespace and check for # comment */ char *ptr; - char *endptr; TimeLineID tli; + uint32 switchpoint_hi; + uint32 switchpoint_lo; + int nfields; for (ptr = fline; *ptr; ptr++) { @@ -100,38 +114,56 @@ readTimeLineHistory(TimeLineID targetTLI) if (*ptr == '\0' || *ptr == '#') continue; - /* expect a numeric timeline ID as first field of line */ - tli = (TimeLineID) strtoul(ptr, &endptr, 0); - if (endptr == ptr) + nfields = sscanf(fline, "%u\t%X/%X", &tli, &switchpoint_hi, &switchpoint_lo); + + if (nfields < 1) + { + /* expect a numeric timeline ID as first field of line */ ereport(FATAL, (errmsg("syntax error in history file: %s", fline), errhint("Expected a numeric timeline ID."))); + } + if (nfields != 3) + ereport(FATAL, + (errmsg("syntax error in history file: %s", fline), + errhint("Expected an XLOG switchpoint location."))); - if (result && - tli <= (TimeLineID) linitial_int(result)) + if (result && tli <= lasttli) ereport(FATAL, (errmsg("invalid data in history file: %s", fline), errhint("Timeline IDs must be in increasing sequence."))); + lasttli = tli; + + entry = (TimeLineHistoryEntry *) palloc(sizeof(TimeLineHistoryEntry)); + entry->tli = tli; + entry->begin = prevend; + entry->end = ((uint64) (switchpoint_hi)) << 32 | (uint64) switchpoint_lo; + prevend = entry->end; + /* Build list with newest item first */ - result = lcons_int((int) tli, result); + result = lcons(entry, result); /* we ignore the remainder of each line */ } FreeFile(fd); - if (result && - targetTLI <= (TimeLineID) linitial_int(result)) + if (result && targetTLI <= lasttli) ereport(FATAL, (errmsg("invalid data in history file \"%s\"", path), errhint("Timeline IDs must be less than child timeline's ID."))); - result = lcons_int((int) targetTLI, result); + /* + * Create one more entry for the "tip" of the timeline, which has no + * entry in the history file. + */ + entry = (TimeLineHistoryEntry *) palloc(sizeof(TimeLineHistoryEntry)); + entry->tli = targetTLI; + entry->begin = prevend; + entry->end = InvalidXLogRecPtr; - ereport(DEBUG3, - (errmsg_internal("history of timeline %u is %s", - targetTLI, nodeToString(result)))); + result = lcons(entry, result); return result; } @@ -214,7 +246,7 @@ findNewestTimeLine(TimeLineID startTLI) * * newTLI: ID of the new timeline * parentTLI: ID of its immediate parent - * endTLI et al: ID of the last used WAL file, for annotation purposes + * switchpoint: XLOG position where the system switched to the new timeline * reason: human-readable explanation of why the timeline was switched * * Currently this is only used at the end recovery, and so there are no locking @@ -223,12 +255,11 @@ findNewestTimeLine(TimeLineID startTLI) */ void writeTimeLineHistory(TimeLineID newTLI, TimeLineID parentTLI, - TimeLineID endTLI, XLogSegNo endLogSegNo, char *reason) + XLogRecPtr switchpoint, char *reason) { char path[MAXPGPATH]; char tmppath[MAXPGPATH]; char histfname[MAXFNAMELEN]; - char xlogfname[MAXFNAMELEN]; char buffer[BLCKSZ]; int srcfd; int fd; @@ -313,13 +344,11 @@ writeTimeLineHistory(TimeLineID newTLI, TimeLineID parentTLI, * If we did have a parent file, insert an extra newline just in case the * parent file failed to end with one. */ - XLogFileName(xlogfname, endTLI, endLogSegNo); - snprintf(buffer, sizeof(buffer), - "%s%u\t%s\t%s\n", + "%s%u\t%X/%X\t%s\n", (srcfd < 0) ? "" : "\n", parentTLI, - xlogfname, + (uint32) (switchpoint >> 32), (uint32) (switchpoint), reason); nbytes = strlen(buffer); @@ -380,3 +409,70 @@ writeTimeLineHistory(TimeLineID newTLI, TimeLineID parentTLI, TLHistoryFileName(histfname, newTLI); XLogArchiveNotify(histfname); } + +/* + * Returns true if 'expectedTLEs' contains a timeline with id 'tli' + */ +bool +tliInHistory(TimeLineID tli, List *expectedTLEs) +{ + ListCell *cell; + + foreach(cell, expectedTLEs) + { + if (((TimeLineHistoryEntry *) lfirst(cell))->tli == tli) + return true; + } + + return false; +} + +/* + * Returns the ID of the timeline in use at a particular point in time, in + * the given timeline history. + */ +TimeLineID +tliOfPointInHistory(XLogRecPtr ptr, List *history) +{ + ListCell *cell; + + foreach(cell, history) + { + TimeLineHistoryEntry *tle = (TimeLineHistoryEntry *) lfirst(cell); + if ((XLogRecPtrIsInvalid(tle->begin) || XLByteLE(tle->begin, ptr)) && + (XLogRecPtrIsInvalid(tle->end) || XLByteLT(ptr, tle->end))) + { + /* found it */ + return tle->tli; + } + } + + /* shouldn't happen. */ + elog(ERROR, "timeline history was not contiguous"); + return 0; /* keep compiler quiet */ +} + +/* + * Returns the point in history where we branched off the given timeline. + * Returns InvalidXLogRecPtr if the timeline is current (= we have not + * branched off from it), and throws an error if the timeline is not part of + * this server's history. + */ +XLogRecPtr +tliSwitchPoint(TimeLineID tli, List *history) +{ + ListCell *cell; + + foreach (cell, history) + { + TimeLineHistoryEntry *tle = (TimeLineHistoryEntry *) lfirst(cell); + + if (tle->tli == tli) + return tle->end; + } + + ereport(ERROR, + (errmsg("requested timeline %u is not in this server's history", + tli))); + return InvalidXLogRecPtr; /* keep compiler quiet */ +} diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index b3356fd434..d60c2a3bfc 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -226,7 +226,7 @@ static bool recoveryStopAfter; * * recoveryTargetIsLatest: was the requested target timeline 'latest'? * - * expectedTLIs: an integer list of recoveryTargetTLI and the TLIs of + * expectedTLEs: a list of TimeLineHistoryEntries for recoveryTargetTLI and the timelines of * its known parents, newest first (so recoveryTargetTLI is always the * first list member). Only these TLIs are expected to be seen in the WAL * segments we read, and indeed only these TLIs will be considered as @@ -240,7 +240,7 @@ static bool recoveryStopAfter; */ static TimeLineID recoveryTargetTLI; static bool recoveryTargetIsLatest = false; -static List *expectedTLIs; +static List *expectedTLEs; static TimeLineID curFileTLI; /* @@ -2515,7 +2515,7 @@ InstallXLogFileSegment(XLogSegNo *segno, char *tmppath, /* * Prefer link() to rename() here just to be really sure that we don't - * overwrite an existing logfile. However, there shouldn't be one, so + * overwrite an existing file. However, there shouldn't be one, so * rename() is an acceptable substitute except for the truly paranoid. */ #if HAVE_WORKING_LINK @@ -2716,7 +2716,7 @@ XLogFileRead(XLogSegNo segno, int emode, TimeLineID tli, /* * Open a logfile segment for reading (during recovery). * - * This version searches for the segment with any TLI listed in expectedTLIs. + * This version searches for the segment with any TLI listed in expectedTLEs. */ static int XLogFileReadAnyTLI(XLogSegNo segno, int emode, int source) @@ -2727,7 +2727,7 @@ XLogFileReadAnyTLI(XLogSegNo segno, int emode, int source) /* * Loop looking for a suitable timeline ID: we might need to read any of - * the timelines listed in expectedTLIs. + * the timelines listed in expectedTLEs. * * We expect curFileTLI on entry to be the TLI of the preceding file in * sequence, or 0 if there was no predecessor. We do not allow curFileTLI @@ -2735,9 +2735,9 @@ XLogFileReadAnyTLI(XLogSegNo segno, int emode, int source) * parent timeline extends to higher segment numbers than the child we * want to read. */ - foreach(cell, expectedTLIs) + foreach(cell, expectedTLEs) { - TimeLineID tli = (TimeLineID) lfirst_int(cell); + TimeLineID tli = ((TimeLineHistoryEntry *) lfirst(cell))->tli; if (tli < curFileTLI) break; /* don't bother looking at too-old TLIs */ @@ -3344,7 +3344,7 @@ ReadRecord(XLogRecPtr *RecPtr, int emode, bool fetching_ckpt) /* * Since we are going to a random position in WAL, forget any prior * state about what timeline we were in, and allow it to be any - * timeline in expectedTLIs. We also set a flag to allow curFileTLI + * timeline in expectedTLEs. We also set a flag to allow curFileTLI * to go backwards (but we can't reset that variable right here, since * we might not change files at all). */ @@ -3675,7 +3675,7 @@ ValidXLogPageHeader(XLogPageHeader hdr, int emode, bool segmentonly) /* * Check page TLI is one of the expected values. */ - if (!list_member_int(expectedTLIs, (int) hdr->xlp_tli)) + if (!tliInHistory(hdr->xlp_tli, expectedTLEs)) { ereport(emode_for_corrupt_record(emode, recaddr), (errmsg("unexpected timeline ID %u in log segment %s, offset %u", @@ -3812,57 +3812,86 @@ ValidXLogRecordHeader(XLogRecPtr *RecPtr, XLogRecord *record, int emode, static bool rescanLatestTimeLine(void) { + List *newExpectedTLEs; + bool found; + ListCell *cell; TimeLineID newtarget; + TimeLineHistoryEntry *currentTle = NULL; + /* use volatile pointer to prevent code rearrangement */ + volatile XLogCtlData *xlogctl = XLogCtl; newtarget = findNewestTimeLine(recoveryTargetTLI); - if (newtarget != recoveryTargetTLI) + if (newtarget == recoveryTargetTLI) { - /* - * Determine the list of expected TLIs for the new TLI - */ - List *newExpectedTLIs; + /* No new timelines found */ + return false; + } - newExpectedTLIs = readTimeLineHistory(newtarget); + /* + * Determine the list of expected TLIs for the new TLI + */ - /* - * If the current timeline is not part of the history of the new - * timeline, we cannot proceed to it. - * - * XXX This isn't foolproof: The new timeline might have forked from - * the current one, but before the current recovery location. In that - * case we will still switch to the new timeline and proceed replaying - * from it even though the history doesn't match what we already - * replayed. That's not good. We will likely notice at the next online - * checkpoint, as the TLI won't match what we expected, but it's not - * guaranteed. The admin needs to make sure that doesn't happen. - */ - if (!list_member_int(newExpectedTLIs, - (int) recoveryTargetTLI)) - ereport(LOG, - (errmsg("new timeline %u is not a child of database system timeline %u", - newtarget, - ThisTimeLineID))); - else + newExpectedTLEs = readTimeLineHistory(newtarget); + + /* + * If the current timeline is not part of the history of the new + * timeline, we cannot proceed to it. + */ + found = false; + foreach (cell, newExpectedTLEs) + { + currentTle = (TimeLineHistoryEntry *) lfirst(cell); + + if (currentTle->tli == recoveryTargetTLI) { - /* use volatile pointer to prevent code rearrangement */ - volatile XLogCtlData *xlogctl = XLogCtl; - - /* Switch target */ - recoveryTargetTLI = newtarget; - list_free(expectedTLIs); - expectedTLIs = newExpectedTLIs; - - SpinLockAcquire(&xlogctl->info_lck); - xlogctl->RecoveryTargetTLI = recoveryTargetTLI; - SpinLockRelease(&xlogctl->info_lck); - - ereport(LOG, - (errmsg("new target timeline is %u", - recoveryTargetTLI))); - return true; + found = true; + break; } } - return false; + if (!found) + { + ereport(LOG, + (errmsg("new timeline %u is not a child of database system timeline %u", + newtarget, + ThisTimeLineID))); + return false; + } + + /* + * The current timeline was found in the history file, but check that the + * next timeline was forked off from it *after* the current recovery + * location. + */ + if (XLByteLT(currentTle->end, EndRecPtr)) + { + ereport(LOG, + (errmsg("new timeline %u forked off current database system timeline %u before current recovery point %X/%X", + newtarget, + ThisTimeLineID, + (uint32) (EndRecPtr >> 32), (uint32) EndRecPtr))); + return false; + } + + /* The new timeline history seems valid. Switch target */ + recoveryTargetTLI = newtarget; + list_free_deep(expectedTLEs); + expectedTLEs = newExpectedTLEs; + + SpinLockAcquire(&xlogctl->info_lck); + xlogctl->RecoveryTargetTLI = recoveryTargetTLI; + SpinLockRelease(&xlogctl->info_lck); + + ereport(LOG, + (errmsg("new target timeline is %u", + recoveryTargetTLI))); + + /* + * Wake up any walsenders to notice that we have a new target timeline. + */ + if (AllowCascadeReplication()) + WalSndWakeup(); + + return true; } /* @@ -5300,26 +5329,41 @@ StartupXLOG(void) readRecoveryCommandFile(); /* Now we can determine the list of expected TLIs */ - expectedTLIs = readTimeLineHistory(recoveryTargetTLI); + expectedTLEs = readTimeLineHistory(recoveryTargetTLI); /* - * If pg_control's timeline is not in expectedTLIs, then we cannot - * proceed: the backup is not part of the history of the requested - * timeline. + * If the location of the checkpoint record is not on the expected + * timeline in the history of the requested timeline, we cannot proceed: + * the backup is not part of the history of the requested timeline. */ - if (!list_member_int(expectedTLIs, - (int) ControlFile->checkPointCopy.ThisTimeLineID)) + if (tliOfPointInHistory(ControlFile->checkPoint, expectedTLEs) != + ControlFile->checkPointCopy.ThisTimeLineID) + { + XLogRecPtr switchpoint; + + /* + * tliSwitchPoint will throw an error if the checkpoint's timeline + * is not in expectedTLEs at all. + */ + switchpoint = tliSwitchPoint(ControlFile->checkPointCopy.ThisTimeLineID, expectedTLEs); ereport(FATAL, - (errmsg("requested timeline %u is not a child of database system timeline %u", - recoveryTargetTLI, - ControlFile->checkPointCopy.ThisTimeLineID))); + (errmsg("requested timeline %u is not a child of this server's history", + recoveryTargetTLI), + errdetail("Latest checkpoint is at %X/%X on timeline %u, but in the history of the requested timeline, the server forked off from that timeline at %X/%X", + (uint32) (ControlFile->checkPoint >> 32), + (uint32) ControlFile->checkPoint, + ControlFile->checkPointCopy.ThisTimeLineID, + (uint32) (switchpoint >> 32), + (uint32) switchpoint))); + } /* * The min recovery point should be part of the requested timeline's * history, too. */ if (!XLogRecPtrIsInvalid(ControlFile->minRecoveryPoint) && - !list_member_int(expectedTLIs, ControlFile->minRecoveryPointTLI)) + tliOfPointInHistory(ControlFile->minRecoveryPoint - 1, expectedTLEs) != + ControlFile->minRecoveryPointTLI) ereport(FATAL, (errmsg("requested timeline %u does not contain minimum recovery point %X/%X on timeline %u", recoveryTargetTLI, @@ -6026,8 +6070,8 @@ StartupXLOG(void) (errmsg("selected new timeline ID: %u", ThisTimeLineID))); /* - * Write comment to history file to explain why and where timeline - * changed. Comment varies according to the recovery target used. + * Create a comment for the history file to explain why and where + * timeline changed. */ if (recoveryTarget == RECOVERY_TARGET_XID) snprintf(reason, sizeof(reason), @@ -6047,7 +6091,7 @@ StartupXLOG(void) snprintf(reason, sizeof(reason), "no recovery target specified"); writeTimeLineHistory(ThisTimeLineID, recoveryTargetTLI, - curFileTLI, endLogSegNo, reason); + EndRecPtr, reason); } /* Save the selected TimeLineID in shared memory, too */ @@ -7916,8 +7960,7 @@ xlog_redo(XLogRecPtr lsn, XLogRecord *record) * decrease. */ if (checkPoint.ThisTimeLineID < ThisTimeLineID || - !list_member_int(expectedTLIs, - (int) checkPoint.ThisTimeLineID)) + !tliInHistory(checkPoint.ThisTimeLineID, expectedTLEs)) ereport(PANIC, (errmsg("unexpected timeline ID %u (after %u) in checkpoint record", checkPoint.ThisTimeLineID, ThisTimeLineID))); diff --git a/src/include/access/timeline.h b/src/include/access/timeline.h index f2a7658bc4..785195bd36 100644 --- a/src/include/access/timeline.h +++ b/src/include/access/timeline.h @@ -14,10 +14,28 @@ #include "access/xlogdefs.h" #include "nodes/pg_list.h" +/* + * A list of these structs describes the timeline history of the server. Each + * TimeLineHistoryEntry represents a piece of WAL belonging to the history, + * from newest to oldest. All WAL positions between 'begin' and 'end' belong to + * the timeline represented by the entry. Together the 'begin' and 'end' + * pointers of all the entries form a contiguous line from beginning of time + * to infinity. + */ +typedef struct +{ + TimeLineID tli; + XLogRecPtr begin; /* inclusive */ + XLogRecPtr end; /* exclusive, 0 means infinity */ +} TimeLineHistoryEntry; + extern List *readTimeLineHistory(TimeLineID targetTLI); extern bool existsTimeLineHistory(TimeLineID probeTLI); extern TimeLineID findNewestTimeLine(TimeLineID startTLI); extern void writeTimeLineHistory(TimeLineID newTLI, TimeLineID parentTLI, - TimeLineID endTLI, XLogSegNo endLogSegNo, char *reason); + XLogRecPtr switchpoint, char *reason); +extern bool tliInHistory(TimeLineID tli, List *expectedTLIs); +extern TimeLineID tliOfPointInHistory(XLogRecPtr ptr, List *history); +extern XLogRecPtr tliSwitchPoint(TimeLineID tli, List *history); #endif /* TIMELINE_H */