From 49e928154978da2a5976628588fc545b726ad84a Mon Sep 17 00:00:00 2001 From: Simon Riggs Date: Thu, 27 Apr 2017 14:41:22 +0200 Subject: [PATCH] Rework handling of subtransactions in 2PC recovery The bug fixed by 0874d4f3e183757ba15a4b3f3bf563e0393dd9c2 caused us to question and rework the handling of subtransactions in 2PC during and at end of recovery. Patch adds checks and tests to ensure no further bugs. This effectively removes the temporary measure put in place by 546c13e11b29a5408b9d6a6e3cca301380b47f7f. Author: Simon Riggs Reviewed-by: Tom Lane, Michael Paquier Discussion: http://postgr.es/m/CANP8+j+vvXmruL_i2buvdhMeVv5TQu0Hm2+C5N+kdVwHJuor8w@mail.gmail.com --- src/backend/access/transam/subtrans.c | 32 +++++++++---- src/backend/access/transam/twophase.c | 67 +++++++++++---------------- src/backend/access/transam/xact.c | 2 +- src/backend/access/transam/xlog.c | 4 +- src/backend/storage/ipc/procarray.c | 2 +- src/include/access/subtrans.h | 2 +- src/include/access/twophase.h | 2 +- 7 files changed, 56 insertions(+), 55 deletions(-) diff --git a/src/backend/access/transam/subtrans.c b/src/backend/access/transam/subtrans.c index 70828e5170..cc68484a5d 100644 --- a/src/backend/access/transam/subtrans.c +++ b/src/backend/access/transam/subtrans.c @@ -68,11 +68,9 @@ static bool SubTransPagePrecedes(int page1, int page2); /* * Record the parent of a subtransaction in the subtrans log. - * - * In some cases we may need to overwrite an existing value. */ void -SubTransSetParent(TransactionId xid, TransactionId parent, bool overwriteOK) +SubTransSetParent(TransactionId xid, TransactionId parent) { int pageno = TransactionIdToPage(xid); int entryno = TransactionIdToEntry(xid); @@ -80,6 +78,7 @@ SubTransSetParent(TransactionId xid, TransactionId parent, bool overwriteOK) TransactionId *ptr; Assert(TransactionIdIsValid(parent)); + Assert(TransactionIdFollows(xid, parent)); LWLockAcquire(SubtransControlLock, LW_EXCLUSIVE); @@ -87,13 +86,17 @@ SubTransSetParent(TransactionId xid, TransactionId parent, bool overwriteOK) ptr = (TransactionId *) SubTransCtl->shared->page_buffer[slotno]; ptr += entryno; - /* Current state should be 0 */ - Assert(*ptr == InvalidTransactionId || - (*ptr == parent && overwriteOK)); - - *ptr = parent; - - SubTransCtl->shared->page_dirty[slotno] = true; + /* + * It's possible we'll try to set the parent xid multiple times + * but we shouldn't ever be changing the xid from one valid xid + * to another valid xid, which would corrupt the data structure. + */ + if (*ptr != parent) + { + Assert(*ptr == InvalidTransactionId); + *ptr = parent; + SubTransCtl->shared->page_dirty[slotno] = true; + } LWLockRelease(SubtransControlLock); } @@ -157,6 +160,15 @@ SubTransGetTopmostTransaction(TransactionId xid) if (TransactionIdPrecedes(parentXid, TransactionXmin)) break; parentXid = SubTransGetParent(parentXid); + + /* + * By convention the parent xid gets allocated first, so should + * always precede the child xid. Anything else points to a corrupted + * data structure that could lead to an infinite loop, so exit. + */ + if (!TransactionIdPrecedes(parentXid, previousXid)) + elog(ERROR, "pg_subtrans contains invalid entry: xid %u points to parent xid %u", + previousXid, parentXid); } Assert(TransactionIdIsValid(previousXid)); diff --git a/src/backend/access/transam/twophase.c b/src/backend/access/transam/twophase.c index fa7124d903..c9fff42991 100644 --- a/src/backend/access/transam/twophase.c +++ b/src/backend/access/transam/twophase.c @@ -221,8 +221,7 @@ static void RemoveGXact(GlobalTransaction gxact); static void XlogReadTwoPhaseData(XLogRecPtr lsn, char **buf, int *len); static char *ProcessTwoPhaseBuffer(TransactionId xid, XLogRecPtr prepare_start_lsn, - bool fromdisk, bool overwriteOK, bool setParent, - bool setNextXid); + bool fromdisk, bool setParent, bool setNextXid); static void MarkAsPreparingGuts(GlobalTransaction gxact, TransactionId xid, const char *gid, TimestampTz prepared_at, Oid owner, Oid databaseid); @@ -1743,8 +1742,7 @@ restoreTwoPhaseData(void) xid = (TransactionId) strtoul(clde->d_name, NULL, 16); buf = ProcessTwoPhaseBuffer(xid, InvalidXLogRecPtr, - true, false, false, - false); + true, false, false); if (buf == NULL) continue; @@ -1804,8 +1802,7 @@ PrescanPreparedTransactions(TransactionId **xids_p, int *nxids_p) buf = ProcessTwoPhaseBuffer(xid, gxact->prepare_start_lsn, - gxact->ondisk, false, false, - true); + gxact->ondisk, false, true); if (buf == NULL) continue; @@ -1858,12 +1855,12 @@ PrescanPreparedTransactions(TransactionId **xids_p, int *nxids_p) * This is never called at the end of recovery - we use * RecoverPreparedTransactions() at that point. * - * Currently we simply call SubTransSetParent() for any subxids of prepared - * transactions. If overwriteOK is true, it's OK if some XIDs have already - * been marked in pg_subtrans. + * The lack of calls to SubTransSetParent() calls here is by design; + * those calls are made by RecoverPreparedTransactions() at the end of recovery + * for those xacts that need this. */ void -StandbyRecoverPreparedTransactions(bool overwriteOK) +StandbyRecoverPreparedTransactions(void) { int i; @@ -1880,8 +1877,7 @@ StandbyRecoverPreparedTransactions(bool overwriteOK) buf = ProcessTwoPhaseBuffer(xid, gxact->prepare_start_lsn, - gxact->ondisk, overwriteOK, true, - false); + gxact->ondisk, false, false); if (buf != NULL) pfree(buf); } @@ -1895,6 +1891,13 @@ StandbyRecoverPreparedTransactions(bool overwriteOK) * each prepared transaction (reacquire locks, etc). * * This is run during database startup. + * + * At the end of recovery the way we take snapshots will change. We now need + * to mark all running transactions with their full SubTransSetParent() info + * to allow normal snapshots to work correctly if snapshots overflow. + * We do this here because by definition prepared transactions are the only + * type of write transaction still running, so this is necessary and + * complete. */ void RecoverPreparedTransactions(void) @@ -1913,15 +1916,21 @@ RecoverPreparedTransactions(void) TwoPhaseFileHeader *hdr; TransactionId *subxids; const char *gid; - bool overwriteOK = false; - int i; xid = gxact->xid; + /* + * Reconstruct subtrans state for the transaction --- needed + * because pg_subtrans is not preserved over a restart. Note that + * we are linking all the subtransactions directly to the + * top-level XID; there may originally have been a more complex + * hierarchy, but there's no need to restore that exactly. + * It's possible that SubTransSetParent has been set before, if + * the prepared transaction generated xid assignment records. + */ buf = ProcessTwoPhaseBuffer(xid, gxact->prepare_start_lsn, - gxact->ondisk, false, false, - false); + gxact->ondisk, true, false); if (buf == NULL) continue; @@ -1939,25 +1948,6 @@ RecoverPreparedTransactions(void) bufptr += MAXALIGN(hdr->nabortrels * sizeof(RelFileNode)); bufptr += MAXALIGN(hdr->ninvalmsgs * sizeof(SharedInvalidationMessage)); - /* - * It's possible that SubTransSetParent has been set before, if - * the prepared transaction generated xid assignment records. Test - * here must match one used in AssignTransactionId(). - */ - if (InHotStandby && (hdr->nsubxacts >= PGPROC_MAX_CACHED_SUBXIDS || - XLogLogicalInfoActive())) - overwriteOK = true; - - /* - * Reconstruct subtrans state for the transaction --- needed - * because pg_subtrans is not preserved over a restart. Note that - * we are linking all the subtransactions directly to the - * top-level XID; there may originally have been a more complex - * hierarchy, but there's no need to restore that exactly. - */ - for (i = 0; i < hdr->nsubxacts; i++) - SubTransSetParent(subxids[i], xid, true); - /* * Recreate its GXACT and dummy PGPROC. But, check whether * it was added in redo and already has a shmem entry for @@ -2006,8 +1996,7 @@ RecoverPreparedTransactions(void) * Given a transaction id, read it either from disk or read it directly * via shmem xlog record pointer using the provided "prepare_start_lsn". * - * If setParent is true, then use the overwriteOK parameter to set up - * subtransaction parent linkages. + * If setParent is true, set up subtransaction parent linkages. * * If setNextXid is true, set ShmemVariableCache->nextXid to the newest * value scanned. @@ -2015,7 +2004,7 @@ RecoverPreparedTransactions(void) static char * ProcessTwoPhaseBuffer(TransactionId xid, XLogRecPtr prepare_start_lsn, - bool fromdisk, bool overwriteOK, + bool fromdisk, bool setParent, bool setNextXid) { TransactionId origNextXid = ShmemVariableCache->nextXid; @@ -2142,7 +2131,7 @@ ProcessTwoPhaseBuffer(TransactionId xid, } if (setParent) - SubTransSetParent(subxid, xid, overwriteOK); + SubTransSetParent(subxid, xid); } return buf; diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c index 92b263aea1..605639b0c3 100644 --- a/src/backend/access/transam/xact.c +++ b/src/backend/access/transam/xact.c @@ -559,7 +559,7 @@ AssignTransactionId(TransactionState s) XactTopTransactionId = s->transactionId; if (isSubXact) - SubTransSetParent(s->transactionId, s->parent->transactionId, false); + SubTransSetParent(s->transactionId, s->parent->transactionId); /* * If it's a top-level transaction, the predicate locking system needs to diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index c7667879c6..a89d99838a 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -6930,7 +6930,7 @@ StartupXLOG(void) ProcArrayApplyRecoveryInfo(&running); - StandbyRecoverPreparedTransactions(false); + StandbyRecoverPreparedTransactions(); } } @@ -9692,7 +9692,7 @@ xlog_redo(XLogReaderState *record) ProcArrayApplyRecoveryInfo(&running); - StandbyRecoverPreparedTransactions(true); + StandbyRecoverPreparedTransactions(); } /* ControlFile->checkPointCopy always tracks the latest ckpt XID */ diff --git a/src/backend/storage/ipc/procarray.c b/src/backend/storage/ipc/procarray.c index ebf6a92923..4976bb03c7 100644 --- a/src/backend/storage/ipc/procarray.c +++ b/src/backend/storage/ipc/procarray.c @@ -943,7 +943,7 @@ ProcArrayApplyXidAssignment(TransactionId topxid, * have attempted to SubTransSetParent(). */ for (i = 0; i < nsubxids; i++) - SubTransSetParent(subxids[i], topxid, false); + SubTransSetParent(subxids[i], topxid); /* KnownAssignedXids isn't maintained yet, so we're done for now */ if (standbyState == STANDBY_INITIALIZED) diff --git a/src/include/access/subtrans.h b/src/include/access/subtrans.h index bd30f5861f..847359873a 100644 --- a/src/include/access/subtrans.h +++ b/src/include/access/subtrans.h @@ -14,7 +14,7 @@ /* Number of SLRU buffers to use for subtrans */ #define NUM_SUBTRANS_BUFFERS 32 -extern void SubTransSetParent(TransactionId xid, TransactionId parent, bool overwriteOK); +extern void SubTransSetParent(TransactionId xid, TransactionId parent); extern TransactionId SubTransGetParent(TransactionId xid); extern TransactionId SubTransGetTopmostTransaction(TransactionId xid); diff --git a/src/include/access/twophase.h b/src/include/access/twophase.h index 4d547c5553..80ec4ca4a5 100644 --- a/src/include/access/twophase.h +++ b/src/include/access/twophase.h @@ -46,7 +46,7 @@ extern bool StandbyTransactionIdIsPrepared(TransactionId xid); extern TransactionId PrescanPreparedTransactions(TransactionId **xids_p, int *nxids_p); -extern void StandbyRecoverPreparedTransactions(bool overwriteOK); +extern void StandbyRecoverPreparedTransactions(void); extern void RecoverPreparedTransactions(void); extern void CheckPointTwoPhase(XLogRecPtr redo_horizon);