diff --git a/src/backend/access/gist/gistutil.c b/src/backend/access/gist/gistutil.c index 7d1b219bbc..97260201dc 100644 --- a/src/backend/access/gist/gistutil.c +++ b/src/backend/access/gist/gistutil.c @@ -882,9 +882,27 @@ gistNewBuffer(Relation r) bool gistPageRecyclable(Page page) { - return PageIsNew(page) || - (GistPageIsDeleted(page) && - TransactionIdPrecedes(GistPageGetDeleteXid(page), RecentGlobalXmin)); + if (PageIsNew(page)) + return true; + if (GistPageIsDeleted(page)) + { + /* + * The page was deleted, but when? If it was just deleted, a scan + * might have seen the downlink to it, and will read the page later. + * As long as that can happen, we must keep the deleted page around as + * a tombstone. + * + * Compare the deletion XID with RecentGlobalXmin. If deleteXid < + * RecentGlobalXmin, then no scan that's still in progress could have + * seen its downlink, and we can recycle it. + */ + FullTransactionId deletexid_full = GistPageGetDeleteXid(page); + FullTransactionId recentxmin_full = GetFullRecentGlobalXmin(); + + if (FullTransactionIdPrecedes(deletexid_full, recentxmin_full)) + return true; + } + return false; } bytea * diff --git a/src/backend/access/gist/gistvacuum.c b/src/backend/access/gist/gistvacuum.c index 4270226eee..bf754ea6d0 100644 --- a/src/backend/access/gist/gistvacuum.c +++ b/src/backend/access/gist/gistvacuum.c @@ -595,7 +595,7 @@ gistdeletepage(IndexVacuumInfo *info, GistBulkDeleteResult *stats, ItemId iid; IndexTuple idxtuple; XLogRecPtr recptr; - TransactionId txid; + FullTransactionId txid; /* * Check that the leaf is still empty and deletable. @@ -648,14 +648,13 @@ gistdeletepage(IndexVacuumInfo *info, GistBulkDeleteResult *stats, * currently in progress must have ended. (That's much more conservative * than needed, but let's keep it safe and simple.) */ - txid = ReadNewTransactionId(); + txid = ReadNextFullTransactionId(); START_CRIT_SECTION(); /* mark the page as deleted */ MarkBufferDirty(leafBuffer); - GistPageSetDeleteXid(leafPage, txid); - GistPageSetDeleted(leafPage); + GistPageSetDeleted(leafPage, txid); stats->stats.pages_deleted++; /* remove the downlink from the parent */ diff --git a/src/backend/access/gist/gistxlog.c b/src/backend/access/gist/gistxlog.c index 503db34d86..3b28f54646 100644 --- a/src/backend/access/gist/gistxlog.c +++ b/src/backend/access/gist/gistxlog.c @@ -356,8 +356,7 @@ gistRedoPageDelete(XLogReaderState *record) { Page page = (Page) BufferGetPage(leafBuffer); - GistPageSetDeleteXid(page, xldata->deleteXid); - GistPageSetDeleted(page); + GistPageSetDeleted(page, xldata->deleteXid); PageSetLSN(page, lsn); MarkBufferDirty(leafBuffer); @@ -396,8 +395,27 @@ gistRedoPageReuse(XLogReaderState *record) */ if (InHotStandby) { - ResolveRecoveryConflictWithSnapshot(xlrec->latestRemovedXid, - xlrec->node); + FullTransactionId latestRemovedFullXid = xlrec->latestRemovedFullXid; + FullTransactionId nextFullXid = ReadNextFullTransactionId(); + uint64 diff; + + /* + * ResolveRecoveryConflictWithSnapshot operates on 32-bit + * TransactionIds, so truncate the logged FullTransactionId. If the + * logged value is very old, so that XID wrap-around already happened + * on it, there can't be any snapshots that still see it. + */ + nextFullXid = ReadNextFullTransactionId(); + diff = U64FromFullTransactionId(nextFullXid) - + U64FromFullTransactionId(latestRemovedFullXid); + if (diff < MaxTransactionId / 2) + { + TransactionId latestRemovedXid; + + latestRemovedXid = XidFromFullTransactionId(latestRemovedFullXid); + ResolveRecoveryConflictWithSnapshot(latestRemovedXid, + xlrec->node); + } } } @@ -554,7 +572,7 @@ gistXLogSplit(bool page_is_leaf, * downlink from the parent page. */ XLogRecPtr -gistXLogPageDelete(Buffer buffer, TransactionId xid, +gistXLogPageDelete(Buffer buffer, FullTransactionId xid, Buffer parentBuffer, OffsetNumber downlinkOffset) { gistxlogPageDelete xlrec; @@ -578,7 +596,7 @@ gistXLogPageDelete(Buffer buffer, TransactionId xid, * Write XLOG record about reuse of a deleted page. */ void -gistXLogPageReuse(Relation rel, BlockNumber blkno, TransactionId latestRemovedXid) +gistXLogPageReuse(Relation rel, BlockNumber blkno, FullTransactionId latestRemovedXid) { gistxlogPageReuse xlrec_reuse; @@ -591,7 +609,7 @@ gistXLogPageReuse(Relation rel, BlockNumber blkno, TransactionId latestRemovedXi /* XLOG stuff */ xlrec_reuse.node = rel->rd_node; xlrec_reuse.block = blkno; - xlrec_reuse.latestRemovedXid = latestRemovedXid; + xlrec_reuse.latestRemovedFullXid = latestRemovedXid; XLogBeginInsert(); XLogRegisterData((char *) &xlrec_reuse, SizeOfGistxlogPageReuse); diff --git a/src/backend/access/rmgrdesc/gistdesc.c b/src/backend/access/rmgrdesc/gistdesc.c index 767864b58e..eccb6fd942 100644 --- a/src/backend/access/rmgrdesc/gistdesc.c +++ b/src/backend/access/rmgrdesc/gistdesc.c @@ -26,10 +26,11 @@ out_gistxlogPageUpdate(StringInfo buf, gistxlogPageUpdate *xlrec) static void out_gistxlogPageReuse(StringInfo buf, gistxlogPageReuse *xlrec) { - appendStringInfo(buf, "rel %u/%u/%u; blk %u; latestRemovedXid %u", + appendStringInfo(buf, "rel %u/%u/%u; blk %u; latestRemovedXid %u:%u", xlrec->node.spcNode, xlrec->node.dbNode, xlrec->node.relNode, xlrec->block, - xlrec->latestRemovedXid); + EpochFromFullTransactionId(xlrec->latestRemovedFullXid), + XidFromFullTransactionId(xlrec->latestRemovedFullXid)); } static void @@ -50,8 +51,10 @@ out_gistxlogPageSplit(StringInfo buf, gistxlogPageSplit *xlrec) static void out_gistxlogPageDelete(StringInfo buf, gistxlogPageDelete *xlrec) { - appendStringInfo(buf, "deleteXid %u; downlink %u", - xlrec->deleteXid, xlrec->downlinkOffset); + appendStringInfo(buf, "deleteXid %u:%u; downlink %u", + EpochFromFullTransactionId(xlrec->deleteXid), + XidFromFullTransactionId(xlrec->deleteXid), + xlrec->downlinkOffset); } void diff --git a/src/backend/utils/time/snapmgr.c b/src/backend/utils/time/snapmgr.c index 6690d78137..40fe6ed3d3 100644 --- a/src/backend/utils/time/snapmgr.c +++ b/src/backend/utils/time/snapmgr.c @@ -956,6 +956,36 @@ xmin_cmp(const pairingheap_node *a, const pairingheap_node *b, void *arg) return 0; } +/* + * Get current RecentGlobalXmin value, as a FullTransactionId. + */ +FullTransactionId +GetFullRecentGlobalXmin(void) +{ + FullTransactionId nextxid_full; + uint32 nextxid_epoch; + TransactionId nextxid_xid; + uint32 epoch; + + Assert(TransactionIdIsNormal(RecentGlobalXmin)); + + /* + * Compute the epoch from the next XID's epoch. This relies on the fact + * that RecentGlobalXmin must be within the 2 billion XID horizon from the + * next XID. + */ + nextxid_full = ReadNextFullTransactionId(); + nextxid_epoch = EpochFromFullTransactionId(nextxid_full); + nextxid_xid = XidFromFullTransactionId(nextxid_full); + + if (RecentGlobalXmin > nextxid_xid) + epoch = nextxid_epoch - 1; + else + epoch = nextxid_epoch; + + return FullTransactionIdFromEpochAndXid(epoch, RecentGlobalXmin); +} + /* * SnapshotResetXmin * diff --git a/src/include/access/gist.h b/src/include/access/gist.h index 6902f4115b..8292956cc0 100644 --- a/src/include/access/gist.h +++ b/src/include/access/gist.h @@ -16,6 +16,7 @@ #ifndef GIST_H #define GIST_H +#include "access/transam.h" #include "access/xlog.h" #include "access/xlogdefs.h" #include "storage/block.h" @@ -140,8 +141,6 @@ typedef struct GISTENTRY #define GIST_LEAF(entry) (GistPageIsLeaf((entry)->page)) #define GistPageIsDeleted(page) ( GistPageGetOpaque(page)->flags & F_DELETED) -#define GistPageSetDeleted(page) ( GistPageGetOpaque(page)->flags |= F_DELETED) -#define GistPageSetNonDeleted(page) ( GistPageGetOpaque(page)->flags &= ~F_DELETED) #define GistTuplesDeleted(page) ( GistPageGetOpaque(page)->flags & F_TUPLES_DELETED) #define GistMarkTuplesDeleted(page) ( GistPageGetOpaque(page)->flags |= F_TUPLES_DELETED) @@ -158,9 +157,45 @@ typedef struct GISTENTRY #define GistPageGetNSN(page) ( PageXLogRecPtrGet(GistPageGetOpaque(page)->nsn)) #define GistPageSetNSN(page, val) ( PageXLogRecPtrSet(GistPageGetOpaque(page)->nsn, val)) -/* For deleted pages we store last xid which could see the page in scan */ -#define GistPageGetDeleteXid(page) ( ((PageHeader) (page))->pd_prune_xid ) -#define GistPageSetDeleteXid(page, val) ( ((PageHeader) (page))->pd_prune_xid = val) + +/* + * On a deleted page, we store this struct. A deleted page doesn't contain any + * tuples, so we don't use the normal page layout with line pointers. Instead, + * this struct is stored right after the standard page header. pd_lower points + * to the end of this struct. If we add fields to this struct in the future, we + * can distinguish the old and new formats by pd_lower. + */ +typedef struct GISTDeletedPageContents +{ + /* last xid which could see the page in a scan */ + FullTransactionId deleteXid; +} GISTDeletedPageContents; + +static inline void +GistPageSetDeleted(Page page, FullTransactionId deletexid) +{ + Assert(PageIsEmpty(page)); + + GistPageGetOpaque(page)->flags |= F_DELETED; + ((PageHeader) page)->pd_lower = MAXALIGN(SizeOfPageHeaderData) + sizeof(GISTDeletedPageContents); + + ((GISTDeletedPageContents *) PageGetContents(page))->deleteXid = deletexid; +} + +static inline FullTransactionId +GistPageGetDeleteXid(Page page) +{ + Assert(GistPageIsDeleted(page)); + + /* Is the deleteXid field present? */ + if (((PageHeader) page)->pd_lower >= MAXALIGN(SizeOfPageHeaderData) + + offsetof(GISTDeletedPageContents, deleteXid) + sizeof(FullTransactionId)) + { + return ((GISTDeletedPageContents *) PageGetContents(page))->deleteXid; + } + else + return FullTransactionIdFromEpochAndXid(0, FirstNormalTransactionId); +} /* * Vector of GISTENTRY structs; user-defined methods union and picksplit diff --git a/src/include/access/gist_private.h b/src/include/access/gist_private.h index 9e3958398e..0488d01c9b 100644 --- a/src/include/access/gist_private.h +++ b/src/include/access/gist_private.h @@ -426,11 +426,11 @@ extern SplitedPageLayout *gistSplit(Relation r, Page page, IndexTuple *itup, /* gistxlog.c */ extern XLogRecPtr gistXLogPageDelete(Buffer buffer, - TransactionId xid, Buffer parentBuffer, + FullTransactionId xid, Buffer parentBuffer, OffsetNumber downlinkOffset); extern void gistXLogPageReuse(Relation rel, BlockNumber blkno, - TransactionId latestRemovedXid); + FullTransactionId latestRemovedXid); extern XLogRecPtr gistXLogUpdate(Buffer buffer, OffsetNumber *todelete, int ntodelete, diff --git a/src/include/access/gistxlog.h b/src/include/access/gistxlog.h index 969a5376b5..e44922d915 100644 --- a/src/include/access/gistxlog.h +++ b/src/include/access/gistxlog.h @@ -83,7 +83,7 @@ typedef struct gistxlogPageSplit */ typedef struct gistxlogPageDelete { - TransactionId deleteXid; /* last Xid which could see page in scan */ + FullTransactionId deleteXid; /* last Xid which could see page in scan */ OffsetNumber downlinkOffset; /* Offset of downlink referencing this * page */ } gistxlogPageDelete; @@ -98,10 +98,10 @@ typedef struct gistxlogPageReuse { RelFileNode node; BlockNumber block; - TransactionId latestRemovedXid; + FullTransactionId latestRemovedFullXid; } gistxlogPageReuse; -#define SizeOfGistxlogPageReuse (offsetof(gistxlogPageReuse, latestRemovedXid) + sizeof(TransactionId)) +#define SizeOfGistxlogPageReuse (offsetof(gistxlogPageReuse, latestRemovedFullXid) + sizeof(FullTransactionId)) extern void gist_redo(XLogReaderState *record); extern void gist_desc(StringInfo buf, XLogReaderState *record); diff --git a/src/include/utils/snapmgr.h b/src/include/utils/snapmgr.h index 58ae3b0c7a..6641ee510a 100644 --- a/src/include/utils/snapmgr.h +++ b/src/include/utils/snapmgr.h @@ -13,6 +13,7 @@ #ifndef SNAPMGR_H #define SNAPMGR_H +#include "access/transam.h" #include "fmgr.h" #include "utils/relcache.h" #include "utils/resowner.h" @@ -122,6 +123,8 @@ extern void UnregisterSnapshot(Snapshot snapshot); extern Snapshot RegisterSnapshotOnOwner(Snapshot snapshot, ResourceOwner owner); extern void UnregisterSnapshotFromOwner(Snapshot snapshot, ResourceOwner owner); +extern FullTransactionId GetFullRecentGlobalXmin(void); + extern void AtSubCommit_Snapshot(int level); extern void AtSubAbort_Snapshot(int level); extern void AtEOXact_Snapshot(bool isCommit, bool resetXmin);