Speedup 2PC recovery by skipping two phase state files in normal path

2PC state info held in shmem at PREPARE, then cleaned at COMMIT PREPARED/ABORT PREPARED,
avoiding writing/fsyncing any state information to disk in the normal path, greatly enhancing replay speed.
Prepared transactions that live past one checkpoint redo horizon will be written to disk as now.
Similar conceptually to 978b2f65aa and building upon
the infrastructure created by that commit.

Authors, in equal measure: Stas Kelvich, Nikhil Sontakke and Michael Paquier
Discussion: https://postgr.es/m/CAMGcDxf8Bn9ZPBBJZba9wiyQq-Qk5uqq=VjoMnRnW5s+fKST3w@mail.gmail.com
This commit is contained in:
Simon Riggs 2017-04-04 15:56:56 -04:00
parent 60a0b2ec89
commit 728bd991c3
4 changed files with 564 additions and 320 deletions

File diff suppressed because it is too large Load Diff

View File

@ -5615,7 +5615,9 @@ xact_redo(XLogReaderState *record)
Assert(TransactionIdIsValid(parsed.twophase_xid));
xact_redo_commit(&parsed, parsed.twophase_xid,
record->EndRecPtr, XLogRecGetOrigin(record));
RemoveTwoPhaseFile(parsed.twophase_xid, false);
/* Delete TwoPhaseState gxact entry and/or 2PC file. */
PrepareRedoRemove(parsed.twophase_xid, false);
}
}
else if (info == XLOG_XACT_ABORT || info == XLOG_XACT_ABORT_PREPARED)
@ -5635,14 +5637,20 @@ xact_redo(XLogReaderState *record)
{
Assert(TransactionIdIsValid(parsed.twophase_xid));
xact_redo_abort(&parsed, parsed.twophase_xid);
RemoveTwoPhaseFile(parsed.twophase_xid, false);
/* Delete TwoPhaseState gxact entry and/or 2PC file. */
PrepareRedoRemove(parsed.twophase_xid, false);
}
}
else if (info == XLOG_XACT_PREPARE)
{
/* the record contents are exactly the 2PC file */
RecreateTwoPhaseFile(XLogRecGetXid(record),
XLogRecGetData(record), XLogRecGetDataLen(record));
/*
* Store xid and start/end pointers of the WAL record in
* TwoPhaseState gxact entry.
*/
PrepareRedoAdd(XLogRecGetData(record),
record->ReadRecPtr,
record->EndRecPtr);
}
else if (info == XLOG_XACT_ASSIGNMENT)
{

View File

@ -6696,6 +6696,16 @@ StartupXLOG(void)
*/
restoreTimeLineHistoryFiles(ThisTimeLineID, recoveryTargetTLI);
/*
* Before running in recovery, scan pg_twophase and fill in its status
* to be able to work on entries generated by redo. Doing a scan before
* taking any recovery action has the merit to discard any 2PC files that
* are newer than the first record to replay, saving from any conflicts at
* replay. This avoids as well any subsequent scans when doing recovery
* of the on-disk two-phase data.
*/
restoreTwoPhaseData();
lastFullPageWrites = checkPoint.fullPageWrites;
RedoRecPtr = XLogCtl->RedoRecPtr = XLogCtl->Insert.RedoRecPtr = checkPoint.redo;

View File

@ -49,11 +49,12 @@ extern TransactionId PrescanPreparedTransactions(TransactionId **xids_p,
extern void StandbyRecoverPreparedTransactions(bool overwriteOK);
extern void RecoverPreparedTransactions(void);
extern void RecreateTwoPhaseFile(TransactionId xid, void *content, int len);
extern void RemoveTwoPhaseFile(TransactionId xid, bool giveWarning);
extern void CheckPointTwoPhase(XLogRecPtr redo_horizon);
extern void FinishPreparedTransaction(const char *gid, bool isCommit);
extern void PrepareRedoAdd(char *buf, XLogRecPtr start_lsn,
XLogRecPtr end_lsn);
extern void PrepareRedoRemove(TransactionId xid, bool giveWarning);
extern void restoreTwoPhaseData(void);
#endif /* TWOPHASE_H */