Skip WAL recycling and preallocation during archive recovery.

The previous commit addressed the chief consequences of a race condition
between InstallXLogFileSegment() and KeepFileRestoredFromArchive().  Fix
three lesser consequences.  A spurious durable_rename_excl() LOG message
remained possible.  KeepFileRestoredFromArchive() wasted the proceeds of
WAL recycling and preallocation.  Finally, XLogFileInitInternal() could
return a descriptor for a file that KeepFileRestoredFromArchive() had
already unlinked.  That felt like a recipe for future bugs.

Discussion: https://postgr.es/m/20210202151416.GB3304930@rfd.leadboat.com
This commit is contained in:
Noah Misch 2021-06-28 18:34:56 -07:00
parent 2b3e4672f7
commit cc2c7d65fc
1 changed files with 57 additions and 8 deletions

View File

@ -662,6 +662,16 @@ typedef struct XLogCtlData
*/
bool SharedHotStandbyActive;
/*
* InstallXLogFileSegmentActive indicates whether the checkpointer should
* arrange for future segments by recycling and/or PreallocXlogFiles().
* Protected by ControlFileLock. Only the startup process changes it. If
* true, anyone can use InstallXLogFileSegment(). If false, the startup
* process owns the exclusive right to install segments, by reading from
* the archive and possibly replacing existing files.
*/
bool InstallXLogFileSegmentActive;
/*
* SharedPromoteIsTriggered indicates if a standby promotion has been
* triggered. Protected by info_lck.
@ -921,6 +931,7 @@ static int XLogPageRead(XLogReaderState *xlogreader, XLogRecPtr targetPagePtr,
int reqLen, XLogRecPtr targetRecPtr, char *readBuf);
static bool WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
bool fetching_ckpt, XLogRecPtr tliRecPtr);
static void XLogShutdownWalRcv(void);
static int emode_for_corrupt_record(int emode, XLogRecPtr RecPtr);
static void XLogFileClose(void);
static void PreallocXlogFiles(XLogRecPtr endptr);
@ -3625,8 +3636,8 @@ XLogFileCopy(XLogSegNo destsegno, TimeLineID srcTLI, XLogSegNo srcsegno,
* is false.)
*
* Returns true if the file was installed successfully. false indicates that
* max_segno limit was exceeded, or an error occurred while renaming the
* file into place.
* max_segno limit was exceeded, the startup process has disabled this
* function for now, or an error occurred while renaming the file into place.
*/
static bool
InstallXLogFileSegment(XLogSegNo *segno, char *tmppath,
@ -3638,6 +3649,11 @@ InstallXLogFileSegment(XLogSegNo *segno, char *tmppath,
XLogFilePath(path, ThisTimeLineID, *segno, wal_segment_size);
LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
if (!XLogCtl->InstallXLogFileSegmentActive)
{
LWLockRelease(ControlFileLock);
return false;
}
if (!find_free)
{
@ -3745,6 +3761,7 @@ XLogFileRead(XLogSegNo segno, int emode, TimeLineID tli,
*/
if (source == XLOG_FROM_ARCHIVE)
{
Assert(!XLogCtl->InstallXLogFileSegmentActive);
KeepFileRestoredFromArchive(path, xlogfname);
/*
@ -3946,6 +3963,9 @@ PreallocXlogFiles(XLogRecPtr endptr)
char path[MAXPGPATH];
uint64 offset;
if (!XLogCtl->InstallXLogFileSegmentActive)
return; /* unlocked check says no */
XLByteToPrevSeg(endptr, _logSegNo, wal_segment_size);
offset = XLogSegmentOffset(endptr - 1, wal_segment_size);
if (offset >= (uint32) (0.75 * wal_segment_size))
@ -4227,6 +4247,7 @@ RemoveXlogFile(const char *segname, XLogSegNo recycleSegNo,
*/
if (wal_recycle &&
*endlogSegNo <= recycleSegNo &&
XLogCtl->InstallXLogFileSegmentActive && /* callee rechecks this */
lstat(path, &statbuf) == 0 && S_ISREG(statbuf.st_mode) &&
InstallXLogFileSegment(endlogSegNo, path,
true, recycleSegNo))
@ -4240,7 +4261,7 @@ RemoveXlogFile(const char *segname, XLogSegNo recycleSegNo,
}
else
{
/* No need for any more future segments... */
/* No need for any more future segments, or recycling failed ... */
int rc;
ereport(DEBUG2,
@ -5226,6 +5247,7 @@ XLOGShmemInit(void)
XLogCtl->XLogCacheBlck = XLOGbuffers - 1;
XLogCtl->SharedRecoveryState = RECOVERY_STATE_CRASH;
XLogCtl->SharedHotStandbyActive = false;
XLogCtl->InstallXLogFileSegmentActive = false;
XLogCtl->SharedPromoteIsTriggered = false;
XLogCtl->WalWriterSleeping = false;
@ -5253,6 +5275,11 @@ BootStrapXLOG(void)
struct timeval tv;
pg_crc32c crc;
/* allow ordinary WAL segment creation, like StartupXLOG() would */
LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
XLogCtl->InstallXLogFileSegmentActive = true;
LWLockRelease(ControlFileLock);
/*
* Select a hopefully-unique system identifier code for this installation.
* We use the result of gettimeofday(), including the fractional seconds
@ -7619,7 +7646,7 @@ StartupXLOG(void)
* the startup checkpoint record. It will trump over the checkpoint and
* subsequent records if it's still alive when we start writing WAL.
*/
ShutdownWalRcv();
XLogShutdownWalRcv();
/*
* Reset unlogged relations to the contents of their INIT fork. This is
@ -7644,7 +7671,7 @@ StartupXLOG(void)
* recovery, e.g., timeline history file) from archive or pg_wal.
*
* Note that standby mode must be turned off after killing WAL receiver,
* i.e., calling ShutdownWalRcv().
* i.e., calling XLogShutdownWalRcv().
*/
Assert(!WalRcvStreaming());
StandbyMode = false;
@ -7709,6 +7736,14 @@ StartupXLOG(void)
*/
oldestActiveXID = PrescanPreparedTransactions(NULL, NULL);
/*
* Allow ordinary WAL segment creation before any exitArchiveRecovery(),
* which sometimes creates a segment, and after the last ReadRecord().
*/
LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
XLogCtl->InstallXLogFileSegmentActive = true;
LWLockRelease(ControlFileLock);
/*
* Consider whether we need to assign a new timeline ID.
*
@ -12378,7 +12413,7 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
*/
if (StandbyMode && CheckForStandbyTrigger())
{
ShutdownWalRcv();
XLogShutdownWalRcv();
return false;
}
@ -12426,7 +12461,7 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
* WAL that we restore from archive.
*/
if (WalRcvStreaming())
ShutdownWalRcv();
XLogShutdownWalRcv();
/*
* Before we sleep, re-scan for possible new timelines if
@ -12553,7 +12588,7 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
*/
if (pendingWalRcvRestart && !startWalReceiver)
{
ShutdownWalRcv();
XLogShutdownWalRcv();
/*
* Re-scan for possible new timelines if we were
@ -12603,6 +12638,9 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
tli, curFileTLI);
}
curFileTLI = tli;
LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
XLogCtl->InstallXLogFileSegmentActive = true;
LWLockRelease(ControlFileLock);
RequestXLogStreaming(tli, ptr, PrimaryConnInfo,
PrimarySlotName,
wal_receiver_create_temp_slot);
@ -12770,6 +12808,17 @@ StartupRequestWalReceiverRestart(void)
}
}
/* Thin wrapper around ShutdownWalRcv(). */
static void
XLogShutdownWalRcv(void)
{
ShutdownWalRcv();
LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
XLogCtl->InstallXLogFileSegmentActive = false;
LWLockRelease(ControlFileLock);
}
/*
* Determine what log level should be used to report a corrupt WAL record
* in the current WAL page, previously read by XLogPageRead().