diff --git a/src/backend/access/nbtree/nbtpage.c b/src/backend/access/nbtree/nbtpage.c index 6f19bd5bc3..db8fa54375 100644 --- a/src/backend/access/nbtree/nbtpage.c +++ b/src/backend/access/nbtree/nbtpage.c @@ -35,7 +35,8 @@ static bool _bt_mark_page_halfdead(Relation rel, Buffer leafbuf, BTStack stack); static bool _bt_unlink_halfdead_page(Relation rel, Buffer leafbuf, - bool *rightsib_empty, TransactionId *oldestBtpoXact); + BlockNumber scanblkno, bool *rightsib_empty, + TransactionId *oldestBtpoXact, uint32 *ndeleted); static bool _bt_lock_branch_parent(Relation rel, BlockNumber child, BTStack stack, Buffer *topparent, OffsetNumber *topoff, BlockNumber *target, BlockNumber *rightsib); @@ -1236,7 +1237,9 @@ _bt_lock_branch_parent(Relation rel, BlockNumber child, BTStack stack, * * Returns the number of pages successfully deleted (zero if page cannot * be deleted now; could be more than one if parent or right sibling pages - * were deleted too). + * were deleted too). Note that this does not include pages that we delete + * that the btvacuumscan scan has yet to reach; they'll get counted later + * instead. * * Maintains *oldestBtpoXact for any pages that get deleted. Caller is * responsible for maintaining *oldestBtpoXact in the case of pages that were @@ -1246,15 +1249,21 @@ _bt_lock_branch_parent(Relation rel, BlockNumber child, BTStack stack, * carefully, it's better to run it in a temp context that can be reset * frequently. */ -int +uint32 _bt_pagedel(Relation rel, Buffer leafbuf, TransactionId *oldestBtpoXact) { - int ndeleted = 0; + uint32 ndeleted = 0; BlockNumber rightsib; bool rightsib_empty; Page page; BTPageOpaque opaque; + /* + * Save original leafbuf block number from caller. Only deleted blocks + * that are <= scanblkno get counted in ndeleted return value. + */ + BlockNumber scanblkno = BufferGetBlockNumber(leafbuf); + /* * "stack" is a search stack leading (approximately) to the target page. * It is initially NULL, but when iterating, we keep it to avoid @@ -1305,8 +1314,9 @@ _bt_pagedel(Relation rel, Buffer leafbuf, TransactionId *oldestBtpoXact) if (P_ISDELETED(opaque)) ereport(LOG, (errcode(ERRCODE_INDEX_CORRUPTED), - errmsg_internal("found deleted block %u while following right link in index \"%s\"", + errmsg_internal("found deleted block %u while following right link from block %u in index \"%s\"", BufferGetBlockNumber(leafbuf), + scanblkno, RelationGetRelationName(rel)))); _bt_relbuf(rel, leafbuf); @@ -1456,13 +1466,13 @@ _bt_pagedel(Relation rel, Buffer leafbuf, TransactionId *oldestBtpoXact) while (P_ISHALFDEAD(opaque)) { /* Check for interrupts in _bt_unlink_halfdead_page */ - if (!_bt_unlink_halfdead_page(rel, leafbuf, &rightsib_empty, - oldestBtpoXact)) + if (!_bt_unlink_halfdead_page(rel, leafbuf, scanblkno, + &rightsib_empty, oldestBtpoXact, + &ndeleted)) { /* _bt_unlink_halfdead_page failed, released buffer */ return ndeleted; } - ndeleted++; } Assert(P_ISLEAF(opaque) && P_ISDELETED(opaque)); @@ -1704,8 +1714,9 @@ _bt_mark_page_halfdead(Relation rel, Buffer leafbuf, BTStack stack) * to avoid having to reacquire a lock we already released). */ static bool -_bt_unlink_halfdead_page(Relation rel, Buffer leafbuf, bool *rightsib_empty, - TransactionId *oldestBtpoXact) +_bt_unlink_halfdead_page(Relation rel, Buffer leafbuf, BlockNumber scanblkno, + bool *rightsib_empty, TransactionId *oldestBtpoXact, + uint32 *ndeleted) { BlockNumber leafblkno = BufferGetBlockNumber(leafbuf); BlockNumber leafleftsib; @@ -2089,6 +2100,14 @@ _bt_unlink_halfdead_page(Relation rel, Buffer leafbuf, bool *rightsib_empty, TransactionIdPrecedes(opaque->btpo.xact, *oldestBtpoXact)) *oldestBtpoXact = opaque->btpo.xact; + /* + * If btvacuumscan won't revisit this page in a future btvacuumpage call + * and count it as deleted then, we count it as deleted by current + * btvacuumpage call + */ + if (target <= scanblkno) + (*ndeleted)++; + /* * Release the target, if it was not the leaf block. The leaf is always * kept locked. diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c index 474ad251fe..2542bf2220 100644 --- a/src/backend/access/nbtree/nbtree.c +++ b/src/backend/access/nbtree/nbtree.c @@ -1340,17 +1340,17 @@ restart: if (delete_now) { MemoryContext oldcontext; - int ndel; /* Run pagedel in a temp context to avoid memory leakage */ MemoryContextReset(vstate->pagedelcontext); oldcontext = MemoryContextSwitchTo(vstate->pagedelcontext); - ndel = _bt_pagedel(rel, buf, &vstate->oldestBtpoXact); - - /* count only this page, else may double-count parent */ - if (ndel) - stats->pages_deleted++; + /* + * We trust the _bt_pagedel return value because it does not include + * any page that a future call here from btvacuumscan is expected to + * count. There will be no double-counting. + */ + stats->pages_deleted += _bt_pagedel(rel, buf, &vstate->oldestBtpoXact); MemoryContextSwitchTo(oldcontext); /* pagedel released buffer, so we shouldn't */ diff --git a/src/include/access/nbtree.h b/src/include/access/nbtree.h index 76413e09dd..516f697b4c 100644 --- a/src/include/access/nbtree.h +++ b/src/include/access/nbtree.h @@ -553,8 +553,8 @@ extern void _bt_delitems_delete(Relation rel, Buffer buf, extern void _bt_delitems_vacuum(Relation rel, Buffer buf, OffsetNumber *itemnos, int nitems, BlockNumber lastBlockVacuumed); -extern int _bt_pagedel(Relation rel, Buffer leafbuf, - TransactionId *oldestBtpoXact); +extern uint32 _bt_pagedel(Relation rel, Buffer leafbuf, + TransactionId *oldestBtpoXact); /* * prototypes for functions in nbtsearch.c