Fix undercounting in VACUUM VERBOSE output.

The logic for determining how many nbtree pages in an index are deleted
pages sometimes undercounted pages.  Pages that were deleted by the
current VACUUM operation (as opposed to some previous VACUUM operation
whose deleted pages have yet to be reused) were sometimes overlooked.
The final count is exposed to users through VACUUM VERBOSE's "%u index
pages have been deleted" output.

btvacuumpage() avoided double-counting when _bt_pagedel() deleted more
than one page by assuming that only one page was deleted, and that the
additional deleted pages would get picked up during a future call to
btvacuumpage() by the same VACUUM operation.  _bt_pagedel() can
legitimately delete pages that the btvacuumscan() scan will not visit
again, though, so that assumption was slightly faulty.

Fix the accounting by teaching _bt_pagedel() about its caller's
requirements.  It now only reports on pages that it knows btvacuumscan()
won't visit again (including the current btvacuumpage() page), so
everything works out in the end.

This bug has been around forever.  Only backpatch to v11, though, to
keep _bt_pagedel() is sync on the branches that have today's bugfix
commit b0229f26da.  Note that this commit changes the signature of
_bt_pagedel(), just like commit b0229f26da.

Author: Peter Geoghegan
Reviewed-By: Masahiko Sawada
Discussion: https://postgr.es/m/CAH2-WzkrXBcMQWAYUJMFTTvzx_r4q=pYSjDe07JnUXhe+OZnJA@mail.gmail.com
Backpatch: 11-
This commit is contained in:
Peter Geoghegan 2020-05-01 09:51:09 -07:00
parent b0229f26da
commit 73a076b03f
3 changed files with 38 additions and 18 deletions

View File

@ -38,8 +38,10 @@ static BTMetaPageData *_bt_getmeta(Relation rel, Buffer metabuf);
static bool _bt_mark_page_halfdead(Relation rel, Buffer leafbuf,
BTStack stack);
static bool _bt_unlink_halfdead_page(Relation rel, Buffer leafbuf,
BlockNumber scanblkno,
bool *rightsib_empty,
TransactionId *oldestBtpoXact);
TransactionId *oldestBtpoXact,
uint32 *ndeleted);
static TransactionId _bt_xid_horizon(Relation rel, Relation heapRel, Page page,
OffsetNumber *deletable, int ndeletable);
static bool _bt_lock_branch_parent(Relation rel, BlockNumber child,
@ -1489,7 +1491,9 @@ _bt_lock_branch_parent(Relation rel, BlockNumber child, BTStack stack,
*
* Returns the number of pages successfully deleted (zero if page cannot
* be deleted now; could be more than one if parent or right sibling pages
* were deleted too).
* were deleted too). Note that this does not include pages that we delete
* that the btvacuumscan scan has yet to reach; they'll get counted later
* instead.
*
* Maintains *oldestBtpoXact for any pages that get deleted. Caller is
* responsible for maintaining *oldestBtpoXact in the case of pages that were
@ -1499,15 +1503,21 @@ _bt_lock_branch_parent(Relation rel, BlockNumber child, BTStack stack,
* carefully, it's better to run it in a temp context that can be reset
* frequently.
*/
int
uint32
_bt_pagedel(Relation rel, Buffer leafbuf, TransactionId *oldestBtpoXact)
{
int ndeleted = 0;
uint32 ndeleted = 0;
BlockNumber rightsib;
bool rightsib_empty;
Page page;
BTPageOpaque opaque;
/*
* Save original leafbuf block number from caller. Only deleted blocks
* that are <= scanblkno get counted in ndeleted return value.
*/
BlockNumber scanblkno = BufferGetBlockNumber(leafbuf);
/*
* "stack" is a search stack leading (approximately) to the target page.
* It is initially NULL, but when iterating, we keep it to avoid
@ -1558,8 +1568,9 @@ _bt_pagedel(Relation rel, Buffer leafbuf, TransactionId *oldestBtpoXact)
if (P_ISDELETED(opaque))
ereport(LOG,
(errcode(ERRCODE_INDEX_CORRUPTED),
errmsg_internal("found deleted block %u while following right link in index \"%s\"",
errmsg_internal("found deleted block %u while following right link from block %u in index \"%s\"",
BufferGetBlockNumber(leafbuf),
scanblkno,
RelationGetRelationName(rel))));
_bt_relbuf(rel, leafbuf);
@ -1709,13 +1720,13 @@ _bt_pagedel(Relation rel, Buffer leafbuf, TransactionId *oldestBtpoXact)
while (P_ISHALFDEAD(opaque))
{
/* Check for interrupts in _bt_unlink_halfdead_page */
if (!_bt_unlink_halfdead_page(rel, leafbuf, &rightsib_empty,
oldestBtpoXact))
if (!_bt_unlink_halfdead_page(rel, leafbuf, scanblkno,
&rightsib_empty, oldestBtpoXact,
&ndeleted))
{
/* _bt_unlink_halfdead_page failed, released buffer */
return ndeleted;
}
ndeleted++;
}
Assert(P_ISLEAF(opaque) && P_ISDELETED(opaque));
@ -1974,8 +1985,9 @@ _bt_mark_page_halfdead(Relation rel, Buffer leafbuf, BTStack stack)
* to avoid having to reacquire a lock we already released).
*/
static bool
_bt_unlink_halfdead_page(Relation rel, Buffer leafbuf, bool *rightsib_empty,
TransactionId *oldestBtpoXact)
_bt_unlink_halfdead_page(Relation rel, Buffer leafbuf, BlockNumber scanblkno,
bool *rightsib_empty, TransactionId *oldestBtpoXact,
uint32 *ndeleted)
{
BlockNumber leafblkno = BufferGetBlockNumber(leafbuf);
BlockNumber leafleftsib;
@ -2370,6 +2382,14 @@ _bt_unlink_halfdead_page(Relation rel, Buffer leafbuf, bool *rightsib_empty,
TransactionIdPrecedes(opaque->btpo.xact, *oldestBtpoXact))
*oldestBtpoXact = opaque->btpo.xact;
/*
* If btvacuumscan won't revisit this page in a future btvacuumpage call
* and count it as deleted then, we count it as deleted by current
* btvacuumpage call
*/
if (target <= scanblkno)
(*ndeleted)++;
/*
* Release the target, if it was not the leaf block. The leaf is always
* kept locked.

View File

@ -1362,17 +1362,17 @@ restart:
if (delete_now)
{
MemoryContext oldcontext;
int ndel;
/* Run pagedel in a temp context to avoid memory leakage */
MemoryContextReset(vstate->pagedelcontext);
oldcontext = MemoryContextSwitchTo(vstate->pagedelcontext);
ndel = _bt_pagedel(rel, buf, &vstate->oldestBtpoXact);
/* count only this page, else may double-count parent */
if (ndel)
stats->pages_deleted++;
/*
* We trust the _bt_pagedel return value because it does not include
* any page that a future call here from btvacuumscan is expected to
* count. There will be no double-counting.
*/
stats->pages_deleted += _bt_pagedel(rel, buf, &vstate->oldestBtpoXact);
MemoryContextSwitchTo(oldcontext);
/* pagedel released buffer, so we shouldn't */

View File

@ -1080,8 +1080,8 @@ extern void _bt_delitems_vacuum(Relation rel, Buffer buf,
extern void _bt_delitems_delete(Relation rel, Buffer buf,
OffsetNumber *deletable, int ndeletable,
Relation heapRel);
extern int _bt_pagedel(Relation rel, Buffer leafbuf,
TransactionId *oldestBtpoXact);
extern uint32 _bt_pagedel(Relation rel, Buffer leafbuf,
TransactionId *oldestBtpoXact);
/*
* prototypes for functions in nbtsearch.c