diff --git a/src/backend/access/nbtree/nbtpage.c b/src/backend/access/nbtree/nbtpage.c index 73d28d37a3..f05cbe7467 100644 --- a/src/backend/access/nbtree/nbtpage.c +++ b/src/backend/access/nbtree/nbtpage.c @@ -961,20 +961,15 @@ _bt_page_recyclable(Page page) } /* - * Delete item(s) from a btree page during VACUUM. - * - * This must only be used for deleting leaf items. Deleting an item on a - * non-leaf page has to be done as part of an atomic action that includes - * deleting the page it points to. + * Delete item(s) from a btree leaf page during VACUUM. * * This routine assumes that the caller has a super-exclusive write lock on * the buffer. Also, the given deletable array *must* be sorted in ascending * order. * * We record VACUUMs and b-tree deletes differently in WAL. Deletes must - * generate recovery conflicts by accessing the heap inline, whereas VACUUMs - * can rely on the initial heap scan taking care of the problem (pruning would - * have generated the conflicts needed for hot standby already). + * generate their own latestRemovedXid by accessing the heap directly, whereas + * VACUUMs rely on the initial heap scan taking care of it indirectly. */ void _bt_delitems_vacuum(Relation rel, Buffer buf, @@ -1030,9 +1025,9 @@ _bt_delitems_vacuum(Relation rel, Buffer buf, XLogRegisterData((char *) &xlrec_vacuum, SizeOfBtreeVacuum); /* - * The target-offsets array is not in the buffer, but pretend that it - * is. When XLogInsert stores the whole buffer, the offsets array - * need not be stored too. + * The deletable array is not in the buffer, but pretend that it is. + * When XLogInsert stores the whole buffer, the array need not be + * stored too. */ XLogRegisterBufData(0, (char *) deletable, ndeletable * sizeof(OffsetNumber)); @@ -1046,21 +1041,19 @@ _bt_delitems_vacuum(Relation rel, Buffer buf, } /* - * Delete item(s) from a btree page during single-page cleanup. - * - * As above, must only be used on leaf pages. + * Delete item(s) from a btree leaf page during single-page cleanup. * * This routine assumes that the caller has pinned and write locked the - * buffer. Also, the given itemnos *must* appear in increasing order in the - * array. + * buffer. Also, the given deletable array *must* be sorted in ascending + * order. * * This is nearly the same as _bt_delitems_vacuum as far as what it does to - * the page, but it needs to generate its own recovery conflicts by accessing - * the heap. See comments for _bt_delitems_vacuum. + * the page, but it needs to generate its own latestRemovedXid by accessing + * the heap. This is used by the REDO routine to generate recovery conflicts. */ void _bt_delitems_delete(Relation rel, Buffer buf, - OffsetNumber *itemnos, int nitems, + OffsetNumber *deletable, int ndeletable, Relation heapRel) { Page page = BufferGetPage(buf); @@ -1068,18 +1061,18 @@ _bt_delitems_delete(Relation rel, Buffer buf, TransactionId latestRemovedXid = InvalidTransactionId; /* Shouldn't be called unless there's something to do */ - Assert(nitems > 0); + Assert(ndeletable > 0); if (XLogStandbyInfoActive() && RelationNeedsWAL(rel)) latestRemovedXid = index_compute_xid_horizon_for_tuples(rel, heapRel, buf, - itemnos, nitems); + deletable, ndeletable); /* No ereport(ERROR) until changes are logged */ START_CRIT_SECTION(); /* Fix the page */ - PageIndexMultiDelete(page, itemnos, nitems); + PageIndexMultiDelete(page, deletable, ndeletable); /* * Unlike _bt_delitems_vacuum, we *must not* clear the vacuum cycle ID, @@ -1098,18 +1091,19 @@ _bt_delitems_delete(Relation rel, Buffer buf, xl_btree_delete xlrec_delete; xlrec_delete.latestRemovedXid = latestRemovedXid; - xlrec_delete.nitems = nitems; + xlrec_delete.ndeleted = ndeletable; XLogBeginInsert(); XLogRegisterBuffer(0, buf, REGBUF_STANDARD); XLogRegisterData((char *) &xlrec_delete, SizeOfBtreeDelete); /* - * We need the target-offsets array whether or not we store the whole - * buffer, to allow us to find the latestRemovedXid on a standby - * server. + * The deletable array is not in the buffer, but pretend that it is. + * When XLogInsert stores the whole buffer, the array need not be + * stored too. */ - XLogRegisterData((char *) itemnos, nitems * sizeof(OffsetNumber)); + XLogRegisterBufData(0, (char *) deletable, + ndeletable * sizeof(OffsetNumber)); recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_DELETE); diff --git a/src/backend/access/nbtree/nbtxlog.c b/src/backend/access/nbtree/nbtxlog.c index e1c3749148..2e5202c2d6 100644 --- a/src/backend/access/nbtree/nbtxlog.c +++ b/src/backend/access/nbtree/nbtxlog.c @@ -449,16 +449,11 @@ btree_xlog_delete(XLogReaderState *record) */ if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO) { + char *ptr = XLogRecGetBlockData(record, 0, NULL); + page = (Page) BufferGetPage(buffer); - if (XLogRecGetDataLen(record) > SizeOfBtreeDelete) - { - OffsetNumber *unused; - - unused = (OffsetNumber *) ((char *) xlrec + SizeOfBtreeDelete); - - PageIndexMultiDelete(page, unused, xlrec->nitems); - } + PageIndexMultiDelete(page, (OffsetNumber *) ptr, xlrec->ndeleted); /* Mark the page as not containing any LP_DEAD items */ opaque = (BTPageOpaque) PageGetSpecialPointer(page); diff --git a/src/backend/access/rmgrdesc/nbtdesc.c b/src/backend/access/rmgrdesc/nbtdesc.c index e0ec8a4b0b..7d63a7124e 100644 --- a/src/backend/access/rmgrdesc/nbtdesc.c +++ b/src/backend/access/rmgrdesc/nbtdesc.c @@ -53,8 +53,8 @@ btree_desc(StringInfo buf, XLogReaderState *record) { xl_btree_delete *xlrec = (xl_btree_delete *) rec; - appendStringInfo(buf, "%d items, latest removed xid %u", - xlrec->nitems, xlrec->latestRemovedXid); + appendStringInfo(buf, "latestRemovedXid %u; ndeleted %u", + xlrec->latestRemovedXid, xlrec->ndeleted); break; } case XLOG_BTREE_MARK_PAGE_HALFDEAD: diff --git a/src/include/access/nbtree.h b/src/include/access/nbtree.h index 4f84ca83dc..f90ee3a0e0 100644 --- a/src/include/access/nbtree.h +++ b/src/include/access/nbtree.h @@ -779,7 +779,8 @@ extern bool _bt_page_recyclable(Page page); extern void _bt_delitems_vacuum(Relation rel, Buffer buf, OffsetNumber *deletable, int ndeletable); extern void _bt_delitems_delete(Relation rel, Buffer buf, - OffsetNumber *itemnos, int nitems, Relation heapRel); + OffsetNumber *deletable, int ndeletable, + Relation heapRel); extern int _bt_pagedel(Relation rel, Buffer buf); /* diff --git a/src/include/access/nbtxlog.h b/src/include/access/nbtxlog.h index 3da5514655..776a9bd723 100644 --- a/src/include/access/nbtxlog.h +++ b/src/include/access/nbtxlog.h @@ -126,12 +126,12 @@ typedef struct xl_btree_split typedef struct xl_btree_delete { TransactionId latestRemovedXid; - int nitems; + uint32 ndeleted; - /* TARGET OFFSET NUMBERS FOLLOW AT THE END */ + /* DELETED TARGET OFFSET NUMBERS FOLLOW */ } xl_btree_delete; -#define SizeOfBtreeDelete (offsetof(xl_btree_delete, nitems) + sizeof(int)) +#define SizeOfBtreeDelete (offsetof(xl_btree_delete, ndeleted) + sizeof(uint32)) /* * This is what we need to know about page reuse within btree. This record diff --git a/src/include/access/xlog_internal.h b/src/include/access/xlog_internal.h index 0a836d1c92..087918d41d 100644 --- a/src/include/access/xlog_internal.h +++ b/src/include/access/xlog_internal.h @@ -31,7 +31,7 @@ /* * Each page of XLOG file has a header like this: */ -#define XLOG_PAGE_MAGIC 0xD103 /* can be used as WAL version indicator */ +#define XLOG_PAGE_MAGIC 0xD104 /* can be used as WAL version indicator */ typedef struct XLogPageHeaderData {