diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 90ba32ef0f..3ac3b767dd 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -626,9 +626,10 @@ static int XLogFileRead(XLogSegNo segno, int emode, TimeLineID tli, int source, bool notexistOk); static int XLogFileReadAnyTLI(XLogSegNo segno, int emode, int source); static int XLogPageRead(XLogReaderState *xlogreader, XLogRecPtr targetPagePtr, - int reqLen, char *readBuf, TimeLineID *readTLI); + int reqLen, XLogRecPtr targetRecPtr, char *readBuf, + TimeLineID *readTLI); static bool WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess, - bool fetching_ckpt); + bool fetching_ckpt, XLogRecPtr tliRecPtr); static int emode_for_corrupt_record(int emode, XLogRecPtr RecPtr); static void XLogFileClose(void); static void PreallocXlogFiles(XLogRecPtr endptr); @@ -8832,7 +8833,7 @@ CancelBackup(void) */ static int XLogPageRead(XLogReaderState *xlogreader, XLogRecPtr targetPagePtr, int reqLen, - char *readBuf, TimeLineID *readTLI) + XLogRecPtr targetRecPtr, char *readBuf, TimeLineID *readTLI) { XLogPageReadPrivate *private = (XLogPageReadPrivate *) xlogreader->private_data; @@ -8880,7 +8881,8 @@ retry: { if (!WaitForWALToBecomeAvailable(targetPagePtr + reqLen, private->randAccess, - private->fetching_ckpt)) + private->fetching_ckpt, + targetRecPtr)) goto triggered; } /* In archive or crash recovery. */ @@ -8980,11 +8982,19 @@ triggered: } /* - * In standby mode, wait for the requested record to become available, either + * In standby mode, wait for WAL at position 'RecPtr' to become available, either * via restore_command succeeding to restore the segment, or via walreceiver * having streamed the record (or via someone copying the segment directly to * pg_xlog, but that is not documented or recommended). * + * If 'fetching_ckpt' is true, we're fetching a checkpoint record, and should + * prepare to read WAL starting from RedoStartLSN after this. + * + * 'RecPtr' might not point to the beginning of the record we're interested + * in, it might also point to the page or segment header. In that case, + * 'tliRecPtr' is the position of the WAL record we're interested in. It is + * used to decide which timeline to stream the requested WAL from. + * * When the requested record becomes available, the function opens the file * containing it (if not open already), and returns true. When end of standby * mode is triggered by the user, and there is no more WAL available, returns @@ -8992,7 +9002,7 @@ triggered: */ static bool WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess, - bool fetching_ckpt) + bool fetching_ckpt, XLogRecPtr tliRecPtr) { static pg_time_t last_fail_time = 0; pg_time_t now; @@ -9076,7 +9086,7 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess, else { ptr = RecPtr; - tli = tliOfPointInHistory(ptr, expectedTLEs); + tli = tliOfPointInHistory(tliRecPtr, expectedTLEs); if (curFileTLI > 0 && tli < curFileTLI) elog(ERROR, "according to history file, WAL location %X/%X belongs to timeline %u, but previous recovered WAL file came from timeline %u", diff --git a/src/backend/access/transam/xlogreader.c b/src/backend/access/transam/xlogreader.c index 9499f848b0..a358a3d40a 100644 --- a/src/backend/access/transam/xlogreader.c +++ b/src/backend/access/transam/xlogreader.c @@ -216,6 +216,8 @@ XLogReadRecord(XLogReaderState *state, XLogRecPtr RecPtr, char **errormsg) randAccess = true; /* allow readPageTLI to go backwards too */ } + state->currRecPtr = RecPtr; + targetPagePtr = RecPtr - (RecPtr % XLOG_BLCKSZ); targetRecOff = RecPtr % XLOG_BLCKSZ; @@ -503,6 +505,7 @@ ReadPageInternal(XLogReaderState *state, XLogRecPtr pageptr, int reqLen) XLogRecPtr targetSegmentPtr = pageptr - targetPageOff; readLen = state->read_page(state, targetSegmentPtr, XLOG_BLCKSZ, + state->currRecPtr, state->readBuf, &state->readPageTLI); if (readLen < 0) goto err; @@ -521,6 +524,7 @@ ReadPageInternal(XLogReaderState *state, XLogRecPtr pageptr, int reqLen) * so that we can validate it. */ readLen = state->read_page(state, pageptr, Max(reqLen, SizeOfXLogShortPHD), + state->currRecPtr, state->readBuf, &state->readPageTLI); if (readLen < 0) goto err; @@ -539,6 +543,7 @@ ReadPageInternal(XLogReaderState *state, XLogRecPtr pageptr, int reqLen) if (readLen < XLogPageHeaderSize(hdr)) { readLen = state->read_page(state, pageptr, XLogPageHeaderSize(hdr), + state->currRecPtr, state->readBuf, &state->readPageTLI); if (readLen < 0) goto err; diff --git a/src/include/access/xlogreader.h b/src/include/access/xlogreader.h index 36907d6330..3829ce2ab1 100644 --- a/src/include/access/xlogreader.h +++ b/src/include/access/xlogreader.h @@ -27,6 +27,7 @@ typedef struct XLogReaderState XLogReaderState; typedef int (*XLogPageReadCB) (XLogReaderState *xlogreader, XLogRecPtr targetPagePtr, int reqLen, + XLogRecPtr targetRecPtr, char *readBuf, TimeLineID *pageTLI); @@ -46,11 +47,17 @@ struct XLogReaderState * -1 on failure. The callback shall sleep, if necessary, to wait for the * requested bytes to become available. The callback will not be invoked * again for the same page unless more than the returned number of bytes - * are necessary. + * are needed. * - * *pageTLI should be set to the TLI of the file the page was read from. - * It is currently used only for error reporting purposes, to reconstruct - * the name of the WAL file where an error occurred. + * targetRecPtr is the position of the WAL record we're reading. Usually + * it is equal to targetPagePtr + reqLen, but sometimes xlogreader needs + * to read and verify the page or segment header, before it reads the + * actual WAL record it's interested in. In that case, targetRecPtr can + * be used to determine which timeline to read the page from. + * + * The callback shall set *pageTLI to the TLI of the file the page was + * read from. It is currently used only for error reporting purposes, to + * reconstruct the name of the WAL file where an error occurred. */ XLogPageReadCB read_page; @@ -90,6 +97,9 @@ struct XLogReaderState XLogRecPtr latestPagePtr; TimeLineID latestPageTLI; + /* beginning of the WAL record being read. */ + XLogRecPtr currRecPtr; + /* Buffer for current ReadRecord result (expandable) */ char *readRecordBuf; uint32 readRecordBufSize;