Rework handling of subtransactions in 2PC recovery

The bug fixed by 0874d4f3e1
caused us to question and rework the handling of
subtransactions in 2PC during and at end of recovery.
Patch adds checks and tests to ensure no further bugs.

This effectively removes the temporary measure put in place
by 546c13e11b.

Author: Simon Riggs
Reviewed-by: Tom Lane, Michael Paquier
Discussion: http://postgr.es/m/CANP8+j+vvXmruL_i2buvdhMeVv5TQu0Hm2+C5N+kdVwHJuor8w@mail.gmail.com
This commit is contained in:
Simon Riggs 2017-04-27 14:41:22 +02:00
parent 0352c15e5a
commit 49e9281549
7 changed files with 56 additions and 55 deletions

View File

@ -68,11 +68,9 @@ static bool SubTransPagePrecedes(int page1, int page2);
/*
* Record the parent of a subtransaction in the subtrans log.
*
* In some cases we may need to overwrite an existing value.
*/
void
SubTransSetParent(TransactionId xid, TransactionId parent, bool overwriteOK)
SubTransSetParent(TransactionId xid, TransactionId parent)
{
int pageno = TransactionIdToPage(xid);
int entryno = TransactionIdToEntry(xid);
@ -80,6 +78,7 @@ SubTransSetParent(TransactionId xid, TransactionId parent, bool overwriteOK)
TransactionId *ptr;
Assert(TransactionIdIsValid(parent));
Assert(TransactionIdFollows(xid, parent));
LWLockAcquire(SubtransControlLock, LW_EXCLUSIVE);
@ -87,13 +86,17 @@ SubTransSetParent(TransactionId xid, TransactionId parent, bool overwriteOK)
ptr = (TransactionId *) SubTransCtl->shared->page_buffer[slotno];
ptr += entryno;
/* Current state should be 0 */
Assert(*ptr == InvalidTransactionId ||
(*ptr == parent && overwriteOK));
*ptr = parent;
SubTransCtl->shared->page_dirty[slotno] = true;
/*
* It's possible we'll try to set the parent xid multiple times
* but we shouldn't ever be changing the xid from one valid xid
* to another valid xid, which would corrupt the data structure.
*/
if (*ptr != parent)
{
Assert(*ptr == InvalidTransactionId);
*ptr = parent;
SubTransCtl->shared->page_dirty[slotno] = true;
}
LWLockRelease(SubtransControlLock);
}
@ -157,6 +160,15 @@ SubTransGetTopmostTransaction(TransactionId xid)
if (TransactionIdPrecedes(parentXid, TransactionXmin))
break;
parentXid = SubTransGetParent(parentXid);
/*
* By convention the parent xid gets allocated first, so should
* always precede the child xid. Anything else points to a corrupted
* data structure that could lead to an infinite loop, so exit.
*/
if (!TransactionIdPrecedes(parentXid, previousXid))
elog(ERROR, "pg_subtrans contains invalid entry: xid %u points to parent xid %u",
previousXid, parentXid);
}
Assert(TransactionIdIsValid(previousXid));

View File

@ -221,8 +221,7 @@ static void RemoveGXact(GlobalTransaction gxact);
static void XlogReadTwoPhaseData(XLogRecPtr lsn, char **buf, int *len);
static char *ProcessTwoPhaseBuffer(TransactionId xid,
XLogRecPtr prepare_start_lsn,
bool fromdisk, bool overwriteOK, bool setParent,
bool setNextXid);
bool fromdisk, bool setParent, bool setNextXid);
static void MarkAsPreparingGuts(GlobalTransaction gxact, TransactionId xid,
const char *gid, TimestampTz prepared_at, Oid owner,
Oid databaseid);
@ -1743,8 +1742,7 @@ restoreTwoPhaseData(void)
xid = (TransactionId) strtoul(clde->d_name, NULL, 16);
buf = ProcessTwoPhaseBuffer(xid, InvalidXLogRecPtr,
true, false, false,
false);
true, false, false);
if (buf == NULL)
continue;
@ -1804,8 +1802,7 @@ PrescanPreparedTransactions(TransactionId **xids_p, int *nxids_p)
buf = ProcessTwoPhaseBuffer(xid,
gxact->prepare_start_lsn,
gxact->ondisk, false, false,
true);
gxact->ondisk, false, true);
if (buf == NULL)
continue;
@ -1858,12 +1855,12 @@ PrescanPreparedTransactions(TransactionId **xids_p, int *nxids_p)
* This is never called at the end of recovery - we use
* RecoverPreparedTransactions() at that point.
*
* Currently we simply call SubTransSetParent() for any subxids of prepared
* transactions. If overwriteOK is true, it's OK if some XIDs have already
* been marked in pg_subtrans.
* The lack of calls to SubTransSetParent() calls here is by design;
* those calls are made by RecoverPreparedTransactions() at the end of recovery
* for those xacts that need this.
*/
void
StandbyRecoverPreparedTransactions(bool overwriteOK)
StandbyRecoverPreparedTransactions(void)
{
int i;
@ -1880,8 +1877,7 @@ StandbyRecoverPreparedTransactions(bool overwriteOK)
buf = ProcessTwoPhaseBuffer(xid,
gxact->prepare_start_lsn,
gxact->ondisk, overwriteOK, true,
false);
gxact->ondisk, false, false);
if (buf != NULL)
pfree(buf);
}
@ -1895,6 +1891,13 @@ StandbyRecoverPreparedTransactions(bool overwriteOK)
* each prepared transaction (reacquire locks, etc).
*
* This is run during database startup.
*
* At the end of recovery the way we take snapshots will change. We now need
* to mark all running transactions with their full SubTransSetParent() info
* to allow normal snapshots to work correctly if snapshots overflow.
* We do this here because by definition prepared transactions are the only
* type of write transaction still running, so this is necessary and
* complete.
*/
void
RecoverPreparedTransactions(void)
@ -1913,15 +1916,21 @@ RecoverPreparedTransactions(void)
TwoPhaseFileHeader *hdr;
TransactionId *subxids;
const char *gid;
bool overwriteOK = false;
int i;
xid = gxact->xid;
/*
* Reconstruct subtrans state for the transaction --- needed
* because pg_subtrans is not preserved over a restart. Note that
* we are linking all the subtransactions directly to the
* top-level XID; there may originally have been a more complex
* hierarchy, but there's no need to restore that exactly.
* It's possible that SubTransSetParent has been set before, if
* the prepared transaction generated xid assignment records.
*/
buf = ProcessTwoPhaseBuffer(xid,
gxact->prepare_start_lsn,
gxact->ondisk, false, false,
false);
gxact->ondisk, true, false);
if (buf == NULL)
continue;
@ -1939,25 +1948,6 @@ RecoverPreparedTransactions(void)
bufptr += MAXALIGN(hdr->nabortrels * sizeof(RelFileNode));
bufptr += MAXALIGN(hdr->ninvalmsgs * sizeof(SharedInvalidationMessage));
/*
* It's possible that SubTransSetParent has been set before, if
* the prepared transaction generated xid assignment records. Test
* here must match one used in AssignTransactionId().
*/
if (InHotStandby && (hdr->nsubxacts >= PGPROC_MAX_CACHED_SUBXIDS ||
XLogLogicalInfoActive()))
overwriteOK = true;
/*
* Reconstruct subtrans state for the transaction --- needed
* because pg_subtrans is not preserved over a restart. Note that
* we are linking all the subtransactions directly to the
* top-level XID; there may originally have been a more complex
* hierarchy, but there's no need to restore that exactly.
*/
for (i = 0; i < hdr->nsubxacts; i++)
SubTransSetParent(subxids[i], xid, true);
/*
* Recreate its GXACT and dummy PGPROC. But, check whether
* it was added in redo and already has a shmem entry for
@ -2006,8 +1996,7 @@ RecoverPreparedTransactions(void)
* Given a transaction id, read it either from disk or read it directly
* via shmem xlog record pointer using the provided "prepare_start_lsn".
*
* If setParent is true, then use the overwriteOK parameter to set up
* subtransaction parent linkages.
* If setParent is true, set up subtransaction parent linkages.
*
* If setNextXid is true, set ShmemVariableCache->nextXid to the newest
* value scanned.
@ -2015,7 +2004,7 @@ RecoverPreparedTransactions(void)
static char *
ProcessTwoPhaseBuffer(TransactionId xid,
XLogRecPtr prepare_start_lsn,
bool fromdisk, bool overwriteOK,
bool fromdisk,
bool setParent, bool setNextXid)
{
TransactionId origNextXid = ShmemVariableCache->nextXid;
@ -2142,7 +2131,7 @@ ProcessTwoPhaseBuffer(TransactionId xid,
}
if (setParent)
SubTransSetParent(subxid, xid, overwriteOK);
SubTransSetParent(subxid, xid);
}
return buf;

View File

@ -559,7 +559,7 @@ AssignTransactionId(TransactionState s)
XactTopTransactionId = s->transactionId;
if (isSubXact)
SubTransSetParent(s->transactionId, s->parent->transactionId, false);
SubTransSetParent(s->transactionId, s->parent->transactionId);
/*
* If it's a top-level transaction, the predicate locking system needs to

View File

@ -6930,7 +6930,7 @@ StartupXLOG(void)
ProcArrayApplyRecoveryInfo(&running);
StandbyRecoverPreparedTransactions(false);
StandbyRecoverPreparedTransactions();
}
}
@ -9692,7 +9692,7 @@ xlog_redo(XLogReaderState *record)
ProcArrayApplyRecoveryInfo(&running);
StandbyRecoverPreparedTransactions(true);
StandbyRecoverPreparedTransactions();
}
/* ControlFile->checkPointCopy always tracks the latest ckpt XID */

View File

@ -943,7 +943,7 @@ ProcArrayApplyXidAssignment(TransactionId topxid,
* have attempted to SubTransSetParent().
*/
for (i = 0; i < nsubxids; i++)
SubTransSetParent(subxids[i], topxid, false);
SubTransSetParent(subxids[i], topxid);
/* KnownAssignedXids isn't maintained yet, so we're done for now */
if (standbyState == STANDBY_INITIALIZED)

View File

@ -14,7 +14,7 @@
/* Number of SLRU buffers to use for subtrans */
#define NUM_SUBTRANS_BUFFERS 32
extern void SubTransSetParent(TransactionId xid, TransactionId parent, bool overwriteOK);
extern void SubTransSetParent(TransactionId xid, TransactionId parent);
extern TransactionId SubTransGetParent(TransactionId xid);
extern TransactionId SubTransGetTopmostTransaction(TransactionId xid);

View File

@ -46,7 +46,7 @@ extern bool StandbyTransactionIdIsPrepared(TransactionId xid);
extern TransactionId PrescanPreparedTransactions(TransactionId **xids_p,
int *nxids_p);
extern void StandbyRecoverPreparedTransactions(bool overwriteOK);
extern void StandbyRecoverPreparedTransactions(void);
extern void RecoverPreparedTransactions(void);
extern void CheckPointTwoPhase(XLogRecPtr redo_horizon);