diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml index 356a2f0c4c..e930731b16 100644 --- a/doc/src/sgml/monitoring.sgml +++ b/doc/src/sgml/monitoring.sgml @@ -1017,6 +1017,10 @@ postgres 27093 0.0 0.0 30096 2752 ? Ss 11:34 0:00 postgres: ser OldSnapshotTimeMapLock Waiting to read or update old snapshot control information. + + CLogTruncationLock + Waiting to truncate the transaction log or waiting for transaction log truncation to finish. + clog Waiting for I/O on a clog (transaction status) buffer. diff --git a/src/backend/access/rmgrdesc/clogdesc.c b/src/backend/access/rmgrdesc/clogdesc.c index 352de48dbe..ef268c5ab3 100644 --- a/src/backend/access/rmgrdesc/clogdesc.c +++ b/src/backend/access/rmgrdesc/clogdesc.c @@ -23,12 +23,20 @@ clog_desc(StringInfo buf, XLogReaderState *record) char *rec = XLogRecGetData(record); uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK; - if (info == CLOG_ZEROPAGE || info == CLOG_TRUNCATE) + if (info == CLOG_ZEROPAGE) { int pageno; memcpy(&pageno, rec, sizeof(int)); - appendStringInfo(buf, "%d", pageno); + appendStringInfo(buf, "page %d", pageno); + } + else if (info == CLOG_TRUNCATE) + { + xl_clog_truncate xlrec; + + memcpy(&xlrec, rec, sizeof(xl_clog_truncate)); + appendStringInfo(buf, "page %d; oldestXact %u", + xlrec.pageno, xlrec.oldestXact); } } diff --git a/src/backend/access/transam/clog.c b/src/backend/access/transam/clog.c index 5b1d13dac1..2d33510930 100644 --- a/src/backend/access/transam/clog.c +++ b/src/backend/access/transam/clog.c @@ -83,7 +83,8 @@ static SlruCtlData ClogCtlData; static int ZeroCLOGPage(int pageno, bool writeXlog); static bool CLOGPagePrecedes(int page1, int page2); static void WriteZeroPageXlogRec(int pageno); -static void WriteTruncateXlogRec(int pageno); +static void WriteTruncateXlogRec(int pageno, TransactionId oldestXact, + Oid oldestXidDb); static void TransactionIdSetPageStatus(TransactionId xid, int nsubxids, TransactionId *subxids, XidStatus status, XLogRecPtr lsn, int pageno); @@ -640,7 +641,7 @@ ExtendCLOG(TransactionId newestXact) * the XLOG flush unless we have confirmed that there is a removable segment. */ void -TruncateCLOG(TransactionId oldestXact) +TruncateCLOG(TransactionId oldestXact, Oid oldestxid_datoid) { int cutoffPage; @@ -654,8 +655,26 @@ TruncateCLOG(TransactionId oldestXact) if (!SlruScanDirectory(ClogCtl, SlruScanDirCbReportPresence, &cutoffPage)) return; /* nothing to remove */ - /* Write XLOG record and flush XLOG to disk */ - WriteTruncateXlogRec(cutoffPage); + /* + * Advance oldestClogXid before truncating clog, so concurrent xact status + * lookups can ensure they don't attempt to access truncated-away clog. + * + * It's only necessary to do this if we will actually truncate away clog + * pages. + */ + AdvanceOldestClogXid(oldestXact); + + /* vac_truncate_clog already advanced oldestXid */ + Assert(TransactionIdPrecedesOrEquals(oldestXact, + ShmemVariableCache->oldestXid)); + + /* + * Write XLOG record and flush XLOG to disk. We record the oldest xid we're + * keeping information about here so we can ensure that it's always ahead + * of clog truncation in case we crash, and so a standby finds out the new + * valid xid before the next checkpoint. + */ + WriteTruncateXlogRec(cutoffPage, oldestXact, oldestxid_datoid); /* Now we can remove the old CLOG segment(s) */ SimpleLruTruncate(ClogCtl, cutoffPage); @@ -704,12 +723,17 @@ WriteZeroPageXlogRec(int pageno) * in TruncateCLOG(). */ static void -WriteTruncateXlogRec(int pageno) +WriteTruncateXlogRec(int pageno, TransactionId oldestXact, Oid oldestXactDb) { XLogRecPtr recptr; + xl_clog_truncate xlrec; + + xlrec.pageno = pageno; + xlrec.oldestXact = oldestXact; + xlrec.oldestXactDb = oldestXactDb; XLogBeginInsert(); - XLogRegisterData((char *) (&pageno), sizeof(int)); + XLogRegisterData((char *) (&xlrec), sizeof(xl_clog_truncate)); recptr = XLogInsert(RM_CLOG_ID, CLOG_TRUNCATE); XLogFlush(recptr); } @@ -742,17 +766,19 @@ clog_redo(XLogReaderState *record) } else if (info == CLOG_TRUNCATE) { - int pageno; + xl_clog_truncate xlrec; - memcpy(&pageno, XLogRecGetData(record), sizeof(int)); + memcpy(&xlrec, XLogRecGetData(record), sizeof(xl_clog_truncate)); /* * During XLOG replay, latest_page_number isn't set up yet; insert a * suitable value to bypass the sanity test in SimpleLruTruncate. */ - ClogCtl->shared->latest_page_number = pageno; + ClogCtl->shared->latest_page_number = xlrec.pageno; - SimpleLruTruncate(ClogCtl, pageno); + AdvanceOldestClogXid(xlrec.oldestXact); + + SimpleLruTruncate(ClogCtl, xlrec.pageno); } else elog(PANIC, "clog_redo: unknown op code %u", info); diff --git a/src/backend/access/transam/transam.c b/src/backend/access/transam/transam.c index b91a259e80..562b53be9a 100644 --- a/src/backend/access/transam/transam.c +++ b/src/backend/access/transam/transam.c @@ -119,7 +119,7 @@ TransactionLogFetch(TransactionId transactionId) * True iff transaction associated with the identifier did commit. * * Note: - * Assumes transaction identifier is valid. + * Assumes transaction identifier is valid and exists in clog. */ bool /* true if given transaction committed */ TransactionIdDidCommit(TransactionId transactionId) @@ -175,7 +175,7 @@ TransactionIdDidCommit(TransactionId transactionId) * True iff transaction associated with the identifier did abort. * * Note: - * Assumes transaction identifier is valid. + * Assumes transaction identifier is valid and exists in clog. */ bool /* true if given transaction aborted */ TransactionIdDidAbort(TransactionId transactionId) diff --git a/src/backend/access/transam/varsup.c b/src/backend/access/transam/varsup.c index 42fc351f7b..5efbfbd3d6 100644 --- a/src/backend/access/transam/varsup.c +++ b/src/backend/access/transam/varsup.c @@ -259,7 +259,28 @@ ReadNewTransactionId(void) } /* - * Determine the last safe XID to allocate given the currently oldest + * Advance the cluster-wide value for the oldest valid clog entry. + * + * We must acquire CLogTruncationLock to advance the oldestClogXid. It's not + * necessary to hold the lock during the actual clog truncation, only when we + * advance the limit, as code looking up arbitrary xids is required to hold + * CLogTruncationLock from when it tests oldestClogXid through to when it + * completes the clog lookup. + */ +void +AdvanceOldestClogXid(TransactionId oldest_datfrozenxid) +{ + LWLockAcquire(CLogTruncationLock, LW_EXCLUSIVE); + if (TransactionIdPrecedes(ShmemVariableCache->oldestClogXid, + oldest_datfrozenxid)) + { + ShmemVariableCache->oldestClogXid = oldest_datfrozenxid; + } + LWLockRelease(CLogTruncationLock); +} + +/* + * Determine the last safe XID to allocate using the currently oldest * datfrozenxid (ie, the oldest XID that might exist in any database * of our cluster), and the OID of the (or a) database with that value. */ diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index de1937e013..b99ded5df6 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -5016,6 +5016,7 @@ BootStrapXLOG(void) ShmemVariableCache->nextOid = checkPoint.nextOid; ShmemVariableCache->oidCount = 0; MultiXactSetNextMXact(checkPoint.nextMulti, checkPoint.nextMultiOffset); + AdvanceOldestClogXid(checkPoint.oldestXid); SetTransactionIdLimit(checkPoint.oldestXid, checkPoint.oldestXidDB); SetMultiXactIdLimit(checkPoint.oldestMulti, checkPoint.oldestMultiDB, true); SetCommitTsLimit(InvalidTransactionId, InvalidTransactionId); @@ -6622,6 +6623,7 @@ StartupXLOG(void) ShmemVariableCache->nextOid = checkPoint.nextOid; ShmemVariableCache->oidCount = 0; MultiXactSetNextMXact(checkPoint.nextMulti, checkPoint.nextMultiOffset); + AdvanceOldestClogXid(checkPoint.oldestXid); SetTransactionIdLimit(checkPoint.oldestXid, checkPoint.oldestXidDB); SetMultiXactIdLimit(checkPoint.oldestMulti, checkPoint.oldestMultiDB, true); SetCommitTsLimit(checkPoint.oldestCommitTsXid, @@ -8687,6 +8689,11 @@ CreateCheckPoint(int flags) /* * Get the other info we need for the checkpoint record. + * + * We don't need to save oldestClogXid in the checkpoint, it only matters + * for the short period in which clog is being truncated, and if we crash + * during that we'll redo the clog truncation and fix up oldestClogXid + * there. */ LWLockAcquire(XidGenLock, LW_SHARED); checkPoint.nextXid = ShmemVariableCache->nextXid; @@ -9616,6 +9623,10 @@ xlog_redo(XLogReaderState *record) MultiXactAdvanceOldest(checkPoint.oldestMulti, checkPoint.oldestMultiDB); + /* + * No need to set oldestClogXid here as well; it'll be set when we + * redo an xl_clog_truncate if it changed since initialization. + */ SetTransactionIdLimit(checkPoint.oldestXid, checkPoint.oldestXidDB); /* diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c index d8ae3e1e43..9fbb0eb4eb 100644 --- a/src/backend/commands/vacuum.c +++ b/src/backend/commands/vacuum.c @@ -1194,7 +1194,7 @@ vac_truncate_clog(TransactionId frozenXID, /* * Truncate CLOG, multixact and CommitTs to the oldest computed value. */ - TruncateCLOG(frozenXID); + TruncateCLOG(frozenXID, oldestxid_datoid); TruncateCommitTs(frozenXID); TruncateMultiXact(minMulti, minmulti_datoid); diff --git a/src/backend/storage/lmgr/lwlocknames.txt b/src/backend/storage/lmgr/lwlocknames.txt index cd8b08f50d..e6025ecedb 100644 --- a/src/backend/storage/lmgr/lwlocknames.txt +++ b/src/backend/storage/lmgr/lwlocknames.txt @@ -49,3 +49,4 @@ MultiXactTruncationLock 41 OldSnapshotTimeMapLock 42 BackendRandomLock 43 LogicalRepWorkerLock 44 +CLogTruncationLock 45 diff --git a/src/include/access/clog.h b/src/include/access/clog.h index 2894bd5620..60a9e11a0f 100644 --- a/src/include/access/clog.h +++ b/src/include/access/clog.h @@ -28,6 +28,12 @@ typedef int XidStatus; #define TRANSACTION_STATUS_ABORTED 0x02 #define TRANSACTION_STATUS_SUB_COMMITTED 0x03 +typedef struct xl_clog_truncate +{ + int pageno; + TransactionId oldestXact; + Oid oldestXactDb; +} xl_clog_truncate; extern void TransactionIdSetTreeStatus(TransactionId xid, int nsubxids, TransactionId *subxids, XidStatus status, XLogRecPtr lsn); @@ -42,7 +48,7 @@ extern void TrimCLOG(void); extern void ShutdownCLOG(void); extern void CheckPointCLOG(void); extern void ExtendCLOG(TransactionId newestXact); -extern void TruncateCLOG(TransactionId oldestXact); +extern void TruncateCLOG(TransactionId oldestXact, Oid oldestxid_datoid); /* XLOG stuff */ #define CLOG_ZEROPAGE 0x00 diff --git a/src/include/access/transam.h b/src/include/access/transam.h index 522c104073..d25a2dd207 100644 --- a/src/include/access/transam.h +++ b/src/include/access/transam.h @@ -134,6 +134,12 @@ typedef struct VariableCacheData */ TransactionId latestCompletedXid; /* newest XID that has committed or * aborted */ + + /* + * These fields are protected by CLogTruncationLock + */ + TransactionId oldestClogXid; /* oldest it's safe to look up in clog */ + } VariableCacheData; typedef VariableCacheData *VariableCache; @@ -173,6 +179,7 @@ extern TransactionId GetNewTransactionId(bool isSubXact); extern TransactionId ReadNewTransactionId(void); extern void SetTransactionIdLimit(TransactionId oldest_datfrozenxid, Oid oldest_datoid); +extern void AdvanceOldestClogXid(TransactionId oldest_datfrozenxid); extern bool ForceTransactionIdLimitUpdate(void); extern Oid GetNewObjectId(void); diff --git a/src/include/access/xlog_internal.h b/src/include/access/xlog_internal.h index 7957cab98c..c09c0f8e5f 100644 --- a/src/include/access/xlog_internal.h +++ b/src/include/access/xlog_internal.h @@ -31,7 +31,7 @@ /* * Each page of XLOG file has a header like this: */ -#define XLOG_PAGE_MAGIC 0xD096 /* can be used as WAL version indicator */ +#define XLOG_PAGE_MAGIC 0xD097 /* can be used as WAL version indicator */ typedef struct XLogPageHeaderData {