Fix scenario where streaming standby gets stuck at a continuation record.

If a continuation record is split so that its first half has already been
removed from the master, and is only present in pg_wal, and there is a
recycled WAL segment in the standby server that looks like it would
contain the second half, recovery would get stuck. The code in
XLogPageRead() incorrectly started streaming at the beginning of the
WAL record, even if we had already read the first page.

Backpatch to 9.4. In principle, older versions have the same problem, but
without replication slots, there was no straightforward mechanism to
prevent the master from recycling old WAL that was still needed by standby.
Without such a mechanism, I think it's reasonable to assume that there's
enough slack in how many old segments are kept around to not run into this,
or you have a WAL archive.

Reported by Jonathon Nelson. Analysis and patch by Kyotaro HORIGUCHI, with
some extra comments by me.

Discussion: https://www.postgresql.org/message-id/CACJqAM3xVz0JY1XFDKPP%2BJoJAjoGx%3DGNuOAshEDWCext7BFvCQ%40mail.gmail.com
This commit is contained in:
Heikki Linnakangas 2018-05-05 01:34:53 +03:00
parent d2599ecfcc
commit 0668719801
3 changed files with 62 additions and 13 deletions

View File

@ -11694,6 +11694,40 @@ retry:
Assert(reqLen <= readLen);
*readTLI = curFileTLI;
/*
* Check the page header immediately, so that we can retry immediately if
* it's not valid. This may seem unnecessary, because XLogReadRecord()
* validates the page header anyway, and would propagate the failure up to
* ReadRecord(), which would retry. However, there's a corner case with
* continuation records, if a record is split across two pages such that
* we would need to read the two pages from different sources. For
* example, imagine a scenario where a streaming replica is started up,
* and replay reaches a record that's split across two WAL segments. The
* first page is only available locally, in pg_wal, because it's already
* been recycled in the master. The second page, however, is not present
* in pg_wal, and we should stream it from the master. There is a recycled
* WAL segment present in pg_wal, with garbage contents, however. We would
* read the first page from the local WAL segment, but when reading the
* second page, we would read the bogus, recycled, WAL segment. If we
* didn't catch that case here, we would never recover, because
* ReadRecord() would retry reading the whole record from the beginning.
*
* Of course, this only catches errors in the page header, which is what
* happens in the case of a recycled WAL segment. Other kinds of errors or
* corruption still has the same problem. But this at least fixes the
* common case, which can happen as part of normal operation.
*
* Validating the page header is cheap enough that doing it twice
* shouldn't be a big deal from a performance point of view.
*/
if (!XLogReaderValidatePageHeader(xlogreader, targetPagePtr, readBuf))
{
/* reset any error XLogReaderValidatePageHeader() might have set */
xlogreader->errormsg_buf[0] = '\0';
goto next_record_is_invalid;
}
return readLen;
next_record_is_invalid:
@ -11828,12 +11862,18 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
}
else
{
ptr = tliRecPtr;
ptr = RecPtr;
/*
* Use the record begin position to determine the
* TLI, rather than the position we're reading.
*/
tli = tliOfPointInHistory(tliRecPtr, expectedTLEs);
if (curFileTLI > 0 && tli < curFileTLI)
elog(ERROR, "according to history file, WAL location %X/%X belongs to timeline %u, but previous recovered WAL file came from timeline %u",
(uint32) (ptr >> 32), (uint32) ptr,
(uint32) (tliRecPtr >> 32),
(uint32) tliRecPtr,
tli, curFileTLI);
}
curFileTLI = tli;

View File

@ -27,8 +27,6 @@
static bool allocate_recordbuf(XLogReaderState *state, uint32 reclength);
static bool ValidXLogPageHeader(XLogReaderState *state, XLogRecPtr recptr,
XLogPageHeader hdr);
static bool ValidXLogRecordHeader(XLogReaderState *state, XLogRecPtr RecPtr,
XLogRecPtr PrevRecPtr, XLogRecord *record, bool randAccess);
static bool ValidXLogRecord(XLogReaderState *state, XLogRecord *record,
@ -533,7 +531,6 @@ ReadPageInternal(XLogReaderState *state, XLogRecPtr pageptr, int reqLen)
*/
if (targetSegNo != state->readSegNo && targetPageOff != 0)
{
XLogPageHeader hdr;
XLogRecPtr targetSegmentPtr = pageptr - targetPageOff;
readLen = state->read_page(state, targetSegmentPtr, XLOG_BLCKSZ,
@ -545,9 +542,8 @@ ReadPageInternal(XLogReaderState *state, XLogRecPtr pageptr, int reqLen)
/* we can be sure to have enough WAL available, we scrolled back */
Assert(readLen == XLOG_BLCKSZ);
hdr = (XLogPageHeader) state->readBuf;
if (!ValidXLogPageHeader(state, targetSegmentPtr, hdr))
if (!XLogReaderValidatePageHeader(state, targetSegmentPtr,
state->readBuf))
goto err;
}
@ -584,7 +580,7 @@ ReadPageInternal(XLogReaderState *state, XLogRecPtr pageptr, int reqLen)
/*
* Now that we know we have the full header, validate it.
*/
if (!ValidXLogPageHeader(state, pageptr, hdr))
if (!XLogReaderValidatePageHeader(state, pageptr, (char *) hdr))
goto err;
/* update read state information */
@ -709,15 +705,19 @@ ValidXLogRecord(XLogReaderState *state, XLogRecord *record, XLogRecPtr recptr)
}
/*
* Validate a page header
* Validate a page header.
*
* Check if 'phdr' is valid as the header of the XLog page at position
* 'recptr'.
*/
static bool
ValidXLogPageHeader(XLogReaderState *state, XLogRecPtr recptr,
XLogPageHeader hdr)
bool
XLogReaderValidatePageHeader(XLogReaderState *state, XLogRecPtr recptr,
char *phdr)
{
XLogRecPtr recaddr;
XLogSegNo segno;
int32 offset;
XLogPageHeader hdr = (XLogPageHeader) phdr;
Assert((recptr % XLOG_BLCKSZ) == 0);
@ -805,6 +805,11 @@ ValidXLogPageHeader(XLogReaderState *state, XLogRecPtr recptr,
return false;
}
/*
* Check that the address on the page agrees with what we expected.
* This check typically fails when an old WAL segment is recycled,
* and hasn't yet been overwritten with new data yet.
*/
if (hdr->xlp_pageaddr != recaddr)
{
char fname[MAXFNAMELEN];

View File

@ -205,6 +205,10 @@ extern void XLogReaderFree(XLogReaderState *state);
extern struct XLogRecord *XLogReadRecord(XLogReaderState *state,
XLogRecPtr recptr, char **errormsg);
/* Validate a page */
extern bool XLogReaderValidatePageHeader(XLogReaderState *state,
XLogRecPtr recptr, char *phdr);
/* Invalidate read state */
extern void XLogReaderInvalReadState(XLogReaderState *state);