VACUUM VERBOSE: Count "newly deleted" index pages.

Teach VACUUM VERBOSE to report on pages deleted by the _current_ VACUUM
operation -- these are newly deleted pages.  VACUUM VERBOSE continues to
report on the total number of deleted pages in the entire index (no
change there).  The former is a subset of the latter.

The distinction between each category of deleted index page only arises
with index AMs where page deletion is supported and is decoupled from
page recycling for performance reasons.

This is follow-up work to commit e5d8a999, which made nbtree store
64-bit XIDs (not 32-bit XIDs) in pages at the point at which they're
deleted.  Note that the btm_last_cleanup_num_delpages metapage field
added by that commit usually gets set to pages_newly_deleted.  The
exceptions (the scenarios in which they're not equal) all seem to be
tricky cases for the implementation (of page deletion and recycling) in
general.

Author: Peter Geoghegan <pg@bowt.ie>
Discussion: https://postgr.es/m/CAH2-WznpdHvujGUwYZ8sihX%3Dd5u-tRYhi-F4wnV2uN2zHpMUXw%40mail.gmail.com
This commit is contained in:
Peter Geoghegan 2021-02-25 14:32:18 -08:00
parent 301ed8812e
commit 2376361839
8 changed files with 86 additions and 54 deletions

View File

@ -231,6 +231,7 @@ ginDeletePage(GinVacuumState *gvs, BlockNumber deleteBlkno, BlockNumber leftBlkn
END_CRIT_SECTION(); END_CRIT_SECTION();
gvs->result->pages_newly_deleted++;
gvs->result->pages_deleted++; gvs->result->pages_deleted++;
} }

View File

@ -133,9 +133,21 @@ gistvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
MemoryContext oldctx; MemoryContext oldctx;
/* /*
* Reset counts that will be incremented during the scan; needed in case * Reset fields that track information about the entire index now. This
* of multiple scans during a single VACUUM command. * avoids double-counting in the case where a single VACUUM command
* requires multiple scans of the index.
*
* Avoid resetting the tuples_removed and pages_newly_deleted fields here,
* since they track information about the VACUUM command, and so must last
* across each call to gistvacuumscan().
*
* (Note that pages_free is treated as state about the whole index, not
* the current VACUUM. This is appropriate because RecordFreeIndexPage()
* calls are idempotent, and get repeated for the same deleted pages in
* some scenarios. The point for us is to track the number of recyclable
* pages in the index at the end of the VACUUM command.)
*/ */
stats->num_pages = 0;
stats->estimated_count = false; stats->estimated_count = false;
stats->num_index_tuples = 0; stats->num_index_tuples = 0;
stats->pages_deleted = 0; stats->pages_deleted = 0;
@ -281,8 +293,8 @@ restart:
{ {
/* Okay to recycle this page */ /* Okay to recycle this page */
RecordFreeIndexPage(rel, blkno); RecordFreeIndexPage(rel, blkno);
vstate->stats->pages_free++;
vstate->stats->pages_deleted++; vstate->stats->pages_deleted++;
vstate->stats->pages_free++;
} }
else if (GistPageIsDeleted(page)) else if (GistPageIsDeleted(page))
{ {
@ -636,6 +648,7 @@ gistdeletepage(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
/* mark the page as deleted */ /* mark the page as deleted */
MarkBufferDirty(leafBuffer); MarkBufferDirty(leafBuffer);
GistPageSetDeleted(leafPage, txid); GistPageSetDeleted(leafPage, txid);
stats->pages_newly_deleted++;
stats->pages_deleted++; stats->pages_deleted++;
/* remove the downlink from the parent */ /* remove the downlink from the parent */

View File

@ -2521,9 +2521,11 @@ lazy_cleanup_index(Relation indrel,
(*stats)->num_index_tuples, (*stats)->num_index_tuples,
(*stats)->num_pages), (*stats)->num_pages),
errdetail("%.0f index row versions were removed.\n" errdetail("%.0f index row versions were removed.\n"
"%u index pages have been deleted, %u are currently reusable.\n" "%u index pages were newly deleted.\n"
"%u index pages are currently deleted, of which %u are currently reusable.\n"
"%s.", "%s.",
(*stats)->tuples_removed, (*stats)->tuples_removed,
(*stats)->pages_newly_deleted,
(*stats)->pages_deleted, (*stats)->pages_free, (*stats)->pages_deleted, (*stats)->pages_free,
pg_rusage_show(&ru0)))); pg_rusage_show(&ru0))));
} }

View File

@ -50,7 +50,7 @@ static bool _bt_mark_page_halfdead(Relation rel, Buffer leafbuf,
static bool _bt_unlink_halfdead_page(Relation rel, Buffer leafbuf, static bool _bt_unlink_halfdead_page(Relation rel, Buffer leafbuf,
BlockNumber scanblkno, BlockNumber scanblkno,
bool *rightsib_empty, bool *rightsib_empty,
uint32 *ndeleted); BTVacState *vstate);
static bool _bt_lock_subtree_parent(Relation rel, BlockNumber child, static bool _bt_lock_subtree_parent(Relation rel, BlockNumber child,
BTStack stack, BTStack stack,
Buffer *subtreeparent, Buffer *subtreeparent,
@ -1760,20 +1760,22 @@ _bt_rightsib_halfdeadflag(Relation rel, BlockNumber leafrightsib)
* should never pass a buffer containing an existing deleted page here. The * should never pass a buffer containing an existing deleted page here. The
* lock and pin on caller's buffer will be dropped before we return. * lock and pin on caller's buffer will be dropped before we return.
* *
* Returns the number of pages successfully deleted (zero if page cannot * Maintains bulk delete stats for caller, which are taken from vstate. We
* be deleted now; could be more than one if parent or right sibling pages * need to cooperate closely with caller here so that whole VACUUM operation
* were deleted too). Note that this does not include pages that we delete * reliably avoids any double counting of subsidiary-to-leafbuf pages that we
* that the btvacuumscan scan has yet to reach; they'll get counted later * delete in passing. If such pages happen to be from a block number that is
* instead. * ahead of the current scanblkno position, then caller is expected to count
* them directly later on. It's simpler for us to understand caller's
* requirements than it would be for caller to understand when or how a
* deleted page became deleted after the fact.
* *
* NOTE: this leaks memory. Rather than trying to clean up everything * NOTE: this leaks memory. Rather than trying to clean up everything
* carefully, it's better to run it in a temp context that can be reset * carefully, it's better to run it in a temp context that can be reset
* frequently. * frequently.
*/ */
uint32 void
_bt_pagedel(Relation rel, Buffer leafbuf) _bt_pagedel(Relation rel, Buffer leafbuf, BTVacState *vstate)
{ {
uint32 ndeleted = 0;
BlockNumber rightsib; BlockNumber rightsib;
bool rightsib_empty; bool rightsib_empty;
Page page; Page page;
@ -1781,7 +1783,8 @@ _bt_pagedel(Relation rel, Buffer leafbuf)
/* /*
* Save original leafbuf block number from caller. Only deleted blocks * Save original leafbuf block number from caller. Only deleted blocks
* that are <= scanblkno get counted in ndeleted return value. * that are <= scanblkno are added to bulk delete stat's pages_deleted
* count.
*/ */
BlockNumber scanblkno = BufferGetBlockNumber(leafbuf); BlockNumber scanblkno = BufferGetBlockNumber(leafbuf);
@ -1843,7 +1846,7 @@ _bt_pagedel(Relation rel, Buffer leafbuf)
RelationGetRelationName(rel)))); RelationGetRelationName(rel))));
_bt_relbuf(rel, leafbuf); _bt_relbuf(rel, leafbuf);
return ndeleted; return;
} }
/* /*
@ -1873,7 +1876,7 @@ _bt_pagedel(Relation rel, Buffer leafbuf)
Assert(!P_ISHALFDEAD(opaque)); Assert(!P_ISHALFDEAD(opaque));
_bt_relbuf(rel, leafbuf); _bt_relbuf(rel, leafbuf);
return ndeleted; return;
} }
/* /*
@ -1922,8 +1925,7 @@ _bt_pagedel(Relation rel, Buffer leafbuf)
if (_bt_leftsib_splitflag(rel, leftsib, leafblkno)) if (_bt_leftsib_splitflag(rel, leftsib, leafblkno))
{ {
ReleaseBuffer(leafbuf); ReleaseBuffer(leafbuf);
Assert(ndeleted == 0); return;
return ndeleted;
} }
/* we need an insertion scan key for the search, so build one */ /* we need an insertion scan key for the search, so build one */
@ -1964,7 +1966,7 @@ _bt_pagedel(Relation rel, Buffer leafbuf)
if (!_bt_mark_page_halfdead(rel, leafbuf, stack)) if (!_bt_mark_page_halfdead(rel, leafbuf, stack))
{ {
_bt_relbuf(rel, leafbuf); _bt_relbuf(rel, leafbuf);
return ndeleted; return;
} }
} }
@ -1979,7 +1981,7 @@ _bt_pagedel(Relation rel, Buffer leafbuf)
{ {
/* Check for interrupts in _bt_unlink_halfdead_page */ /* Check for interrupts in _bt_unlink_halfdead_page */
if (!_bt_unlink_halfdead_page(rel, leafbuf, scanblkno, if (!_bt_unlink_halfdead_page(rel, leafbuf, scanblkno,
&rightsib_empty, &ndeleted)) &rightsib_empty, vstate))
{ {
/* /*
* _bt_unlink_halfdead_page should never fail, since we * _bt_unlink_halfdead_page should never fail, since we
@ -1990,7 +1992,7 @@ _bt_pagedel(Relation rel, Buffer leafbuf)
* lock and pin on leafbuf for us. * lock and pin on leafbuf for us.
*/ */
Assert(false); Assert(false);
return ndeleted; return;
} }
} }
@ -2026,8 +2028,6 @@ _bt_pagedel(Relation rel, Buffer leafbuf)
leafbuf = _bt_getbuf(rel, rightsib, BT_WRITE); leafbuf = _bt_getbuf(rel, rightsib, BT_WRITE);
} }
return ndeleted;
} }
/* /*
@ -2262,9 +2262,10 @@ _bt_mark_page_halfdead(Relation rel, Buffer leafbuf, BTStack stack)
*/ */
static bool static bool
_bt_unlink_halfdead_page(Relation rel, Buffer leafbuf, BlockNumber scanblkno, _bt_unlink_halfdead_page(Relation rel, Buffer leafbuf, BlockNumber scanblkno,
bool *rightsib_empty, uint32 *ndeleted) bool *rightsib_empty, BTVacState *vstate)
{ {
BlockNumber leafblkno = BufferGetBlockNumber(leafbuf); BlockNumber leafblkno = BufferGetBlockNumber(leafbuf);
IndexBulkDeleteResult *stats = vstate->stats;
BlockNumber leafleftsib; BlockNumber leafleftsib;
BlockNumber leafrightsib; BlockNumber leafrightsib;
BlockNumber target; BlockNumber target;
@ -2674,12 +2675,17 @@ _bt_unlink_halfdead_page(Relation rel, Buffer leafbuf, BlockNumber scanblkno,
_bt_relbuf(rel, buf); _bt_relbuf(rel, buf);
/* /*
* If btvacuumscan won't revisit this page in a future btvacuumpage call * Maintain pages_newly_deleted, which is simply the number of pages
* and count it as deleted then, we count it as deleted by current * deleted by the ongoing VACUUM operation.
* btvacuumpage call *
* Maintain pages_deleted in a way that takes into account how
* btvacuumpage() will count deleted pages that have yet to become
* scanblkno -- only count page when it's not going to get that treatment
* later on.
*/ */
stats->pages_newly_deleted++;
if (target <= scanblkno) if (target <= scanblkno)
(*ndeleted)++; stats->pages_deleted++;
return true; return true;
} }

View File

@ -38,17 +38,6 @@
#include "utils/memutils.h" #include "utils/memutils.h"
/* Working state needed by btvacuumpage */
typedef struct
{
IndexVacuumInfo *info;
IndexBulkDeleteResult *stats;
IndexBulkDeleteCallback callback;
void *callback_state;
BTCycleId cycleid;
MemoryContext pagedelcontext;
} BTVacState;
/* /*
* BTPARALLEL_NOT_INITIALIZED indicates that the scan has not started. * BTPARALLEL_NOT_INITIALIZED indicates that the scan has not started.
* *
@ -1016,9 +1005,9 @@ btvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
* avoids double-counting in the case where a single VACUUM command * avoids double-counting in the case where a single VACUUM command
* requires multiple scans of the index. * requires multiple scans of the index.
* *
* Avoid resetting the tuples_removed field here, since it tracks * Avoid resetting the tuples_removed and pages_newly_deleted fields here,
* information about the VACUUM command, and so must last across each call * since they track information about the VACUUM command, and so must last
* to btvacuumscan(). * across each call to btvacuumscan().
* *
* (Note that pages_free is treated as state about the whole index, not * (Note that pages_free is treated as state about the whole index, not
* the current VACUUM. This is appropriate because RecordFreeIndexPage() * the current VACUUM. This is appropriate because RecordFreeIndexPage()
@ -1237,11 +1226,13 @@ backtrack:
} }
else if (P_ISHALFDEAD(opaque)) else if (P_ISHALFDEAD(opaque))
{ {
/* /* Half-dead leaf page (from interrupted VACUUM) -- finish deleting */
* Half-dead leaf page. Try to delete now. Might update
* pages_deleted below.
*/
attempt_pagedel = true; attempt_pagedel = true;
/*
* _bt_pagedel() will increment both pages_newly_deleted and
* pages_deleted stats in all cases (barring corruption)
*/
} }
else if (P_ISLEAF(opaque)) else if (P_ISLEAF(opaque))
{ {
@ -1451,12 +1442,12 @@ backtrack:
oldcontext = MemoryContextSwitchTo(vstate->pagedelcontext); oldcontext = MemoryContextSwitchTo(vstate->pagedelcontext);
/* /*
* We trust the _bt_pagedel return value because it does not include * _bt_pagedel maintains the bulk delete stats on our behalf;
* any page that a future call here from btvacuumscan is expected to * pages_newly_deleted and pages_deleted are likely to be incremented
* count. There will be no double-counting. * during call
*/ */
Assert(blkno == scanblkno); Assert(blkno == scanblkno);
stats->pages_deleted += _bt_pagedel(rel, buf); _bt_pagedel(rel, buf, vstate);
MemoryContextSwitchTo(oldcontext); MemoryContextSwitchTo(oldcontext);
/* pagedel released buffer, so we shouldn't */ /* pagedel released buffer, so we shouldn't */

View File

@ -891,6 +891,7 @@ spgvacuumscan(spgBulkDeleteState *bds)
/* Report final stats */ /* Report final stats */
bds->stats->num_pages = num_pages; bds->stats->num_pages = num_pages;
bds->stats->pages_newly_deleted = bds->stats->pages_deleted;
bds->stats->pages_free = bds->stats->pages_deleted; bds->stats->pages_free = bds->stats->pages_deleted;
} }

View File

@ -63,8 +63,11 @@ typedef struct IndexVacuumInfo
* of which this is just the first field; this provides a way for ambulkdelete * of which this is just the first field; this provides a way for ambulkdelete
* to communicate additional private data to amvacuumcleanup. * to communicate additional private data to amvacuumcleanup.
* *
* Note: pages_deleted and pages_free refer to free space within the index * Note: pages_newly_deleted is the number of pages in the index that were
* file. Some index AMs may compute num_index_tuples by reference to * deleted by the current vacuum operation. pages_deleted and pages_free
* refer to free space within the index file.
*
* Note: Some index AMs may compute num_index_tuples by reference to
* num_heap_tuples, in which case they should copy the estimated_count field * num_heap_tuples, in which case they should copy the estimated_count field
* from IndexVacuumInfo. * from IndexVacuumInfo.
*/ */
@ -74,7 +77,8 @@ typedef struct IndexBulkDeleteResult
bool estimated_count; /* num_index_tuples is an estimate */ bool estimated_count; /* num_index_tuples is an estimate */
double num_index_tuples; /* tuples remaining */ double num_index_tuples; /* tuples remaining */
double tuples_removed; /* # removed during vacuum operation */ double tuples_removed; /* # removed during vacuum operation */
BlockNumber pages_deleted; /* # unused pages in index */ BlockNumber pages_newly_deleted; /* # pages marked deleted by us */
BlockNumber pages_deleted; /* # pages marked deleted (could be by us) */
BlockNumber pages_free; /* # pages available for reuse */ BlockNumber pages_free; /* # pages available for reuse */
} IndexBulkDeleteResult; } IndexBulkDeleteResult;

View File

@ -312,6 +312,20 @@ BTPageIsRecyclable(Page page)
return false; return false;
} }
/*
* BTVacState is private nbtree.c state used during VACUUM. It is exported
* for use by page deletion related code in nbtpage.c.
*/
typedef struct BTVacState
{
IndexVacuumInfo *info;
IndexBulkDeleteResult *stats;
IndexBulkDeleteCallback callback;
void *callback_state;
BTCycleId cycleid;
MemoryContext pagedelcontext;
} BTVacState;
/* /*
* Lehman and Yao's algorithm requires a ``high key'' on every non-rightmost * Lehman and Yao's algorithm requires a ``high key'' on every non-rightmost
* page. The high key is not a tuple that is used to visit the heap. It is * page. The high key is not a tuple that is used to visit the heap. It is
@ -1181,7 +1195,7 @@ extern void _bt_delitems_vacuum(Relation rel, Buffer buf,
extern void _bt_delitems_delete_check(Relation rel, Buffer buf, extern void _bt_delitems_delete_check(Relation rel, Buffer buf,
Relation heapRel, Relation heapRel,
TM_IndexDeleteOp *delstate); TM_IndexDeleteOp *delstate);
extern uint32 _bt_pagedel(Relation rel, Buffer leafbuf); extern void _bt_pagedel(Relation rel, Buffer leafbuf, BTVacState *vstate);
/* /*
* prototypes for functions in nbtsearch.c * prototypes for functions in nbtsearch.c