VACUUM VERBOSE: Count "newly deleted" index pages.

Teach VACUUM VERBOSE to report on pages deleted by the _current_ VACUUM
operation -- these are newly deleted pages.  VACUUM VERBOSE continues to
report on the total number of deleted pages in the entire index (no
change there).  The former is a subset of the latter.

The distinction between each category of deleted index page only arises
with index AMs where page deletion is supported and is decoupled from
page recycling for performance reasons.

This is follow-up work to commit e5d8a999, which made nbtree store
64-bit XIDs (not 32-bit XIDs) in pages at the point at which they're
deleted.  Note that the btm_last_cleanup_num_delpages metapage field
added by that commit usually gets set to pages_newly_deleted.  The
exceptions (the scenarios in which they're not equal) all seem to be
tricky cases for the implementation (of page deletion and recycling) in
general.

Author: Peter Geoghegan <pg@bowt.ie>
Discussion: https://postgr.es/m/CAH2-WznpdHvujGUwYZ8sihX%3Dd5u-tRYhi-F4wnV2uN2zHpMUXw%40mail.gmail.com
This commit is contained in:
Peter Geoghegan 2021-02-25 14:32:18 -08:00
parent 301ed8812e
commit 2376361839
8 changed files with 86 additions and 54 deletions

View File

@ -231,6 +231,7 @@ ginDeletePage(GinVacuumState *gvs, BlockNumber deleteBlkno, BlockNumber leftBlkn
END_CRIT_SECTION();
gvs->result->pages_newly_deleted++;
gvs->result->pages_deleted++;
}

View File

@ -133,9 +133,21 @@ gistvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
MemoryContext oldctx;
/*
* Reset counts that will be incremented during the scan; needed in case
* of multiple scans during a single VACUUM command.
* Reset fields that track information about the entire index now. This
* avoids double-counting in the case where a single VACUUM command
* requires multiple scans of the index.
*
* Avoid resetting the tuples_removed and pages_newly_deleted fields here,
* since they track information about the VACUUM command, and so must last
* across each call to gistvacuumscan().
*
* (Note that pages_free is treated as state about the whole index, not
* the current VACUUM. This is appropriate because RecordFreeIndexPage()
* calls are idempotent, and get repeated for the same deleted pages in
* some scenarios. The point for us is to track the number of recyclable
* pages in the index at the end of the VACUUM command.)
*/
stats->num_pages = 0;
stats->estimated_count = false;
stats->num_index_tuples = 0;
stats->pages_deleted = 0;
@ -281,8 +293,8 @@ restart:
{
/* Okay to recycle this page */
RecordFreeIndexPage(rel, blkno);
vstate->stats->pages_free++;
vstate->stats->pages_deleted++;
vstate->stats->pages_free++;
}
else if (GistPageIsDeleted(page))
{
@ -636,6 +648,7 @@ gistdeletepage(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
/* mark the page as deleted */
MarkBufferDirty(leafBuffer);
GistPageSetDeleted(leafPage, txid);
stats->pages_newly_deleted++;
stats->pages_deleted++;
/* remove the downlink from the parent */

View File

@ -2521,9 +2521,11 @@ lazy_cleanup_index(Relation indrel,
(*stats)->num_index_tuples,
(*stats)->num_pages),
errdetail("%.0f index row versions were removed.\n"
"%u index pages have been deleted, %u are currently reusable.\n"
"%u index pages were newly deleted.\n"
"%u index pages are currently deleted, of which %u are currently reusable.\n"
"%s.",
(*stats)->tuples_removed,
(*stats)->pages_newly_deleted,
(*stats)->pages_deleted, (*stats)->pages_free,
pg_rusage_show(&ru0))));
}

View File

@ -50,7 +50,7 @@ static bool _bt_mark_page_halfdead(Relation rel, Buffer leafbuf,
static bool _bt_unlink_halfdead_page(Relation rel, Buffer leafbuf,
BlockNumber scanblkno,
bool *rightsib_empty,
uint32 *ndeleted);
BTVacState *vstate);
static bool _bt_lock_subtree_parent(Relation rel, BlockNumber child,
BTStack stack,
Buffer *subtreeparent,
@ -1760,20 +1760,22 @@ _bt_rightsib_halfdeadflag(Relation rel, BlockNumber leafrightsib)
* should never pass a buffer containing an existing deleted page here. The
* lock and pin on caller's buffer will be dropped before we return.
*
* Returns the number of pages successfully deleted (zero if page cannot
* be deleted now; could be more than one if parent or right sibling pages
* were deleted too). Note that this does not include pages that we delete
* that the btvacuumscan scan has yet to reach; they'll get counted later
* instead.
* Maintains bulk delete stats for caller, which are taken from vstate. We
* need to cooperate closely with caller here so that whole VACUUM operation
* reliably avoids any double counting of subsidiary-to-leafbuf pages that we
* delete in passing. If such pages happen to be from a block number that is
* ahead of the current scanblkno position, then caller is expected to count
* them directly later on. It's simpler for us to understand caller's
* requirements than it would be for caller to understand when or how a
* deleted page became deleted after the fact.
*
* NOTE: this leaks memory. Rather than trying to clean up everything
* carefully, it's better to run it in a temp context that can be reset
* frequently.
*/
uint32
_bt_pagedel(Relation rel, Buffer leafbuf)
void
_bt_pagedel(Relation rel, Buffer leafbuf, BTVacState *vstate)
{
uint32 ndeleted = 0;
BlockNumber rightsib;
bool rightsib_empty;
Page page;
@ -1781,7 +1783,8 @@ _bt_pagedel(Relation rel, Buffer leafbuf)
/*
* Save original leafbuf block number from caller. Only deleted blocks
* that are <= scanblkno get counted in ndeleted return value.
* that are <= scanblkno are added to bulk delete stat's pages_deleted
* count.
*/
BlockNumber scanblkno = BufferGetBlockNumber(leafbuf);
@ -1843,7 +1846,7 @@ _bt_pagedel(Relation rel, Buffer leafbuf)
RelationGetRelationName(rel))));
_bt_relbuf(rel, leafbuf);
return ndeleted;
return;
}
/*
@ -1873,7 +1876,7 @@ _bt_pagedel(Relation rel, Buffer leafbuf)
Assert(!P_ISHALFDEAD(opaque));
_bt_relbuf(rel, leafbuf);
return ndeleted;
return;
}
/*
@ -1922,8 +1925,7 @@ _bt_pagedel(Relation rel, Buffer leafbuf)
if (_bt_leftsib_splitflag(rel, leftsib, leafblkno))
{
ReleaseBuffer(leafbuf);
Assert(ndeleted == 0);
return ndeleted;
return;
}
/* we need an insertion scan key for the search, so build one */
@ -1964,7 +1966,7 @@ _bt_pagedel(Relation rel, Buffer leafbuf)
if (!_bt_mark_page_halfdead(rel, leafbuf, stack))
{
_bt_relbuf(rel, leafbuf);
return ndeleted;
return;
}
}
@ -1979,7 +1981,7 @@ _bt_pagedel(Relation rel, Buffer leafbuf)
{
/* Check for interrupts in _bt_unlink_halfdead_page */
if (!_bt_unlink_halfdead_page(rel, leafbuf, scanblkno,
&rightsib_empty, &ndeleted))
&rightsib_empty, vstate))
{
/*
* _bt_unlink_halfdead_page should never fail, since we
@ -1990,7 +1992,7 @@ _bt_pagedel(Relation rel, Buffer leafbuf)
* lock and pin on leafbuf for us.
*/
Assert(false);
return ndeleted;
return;
}
}
@ -2026,8 +2028,6 @@ _bt_pagedel(Relation rel, Buffer leafbuf)
leafbuf = _bt_getbuf(rel, rightsib, BT_WRITE);
}
return ndeleted;
}
/*
@ -2262,9 +2262,10 @@ _bt_mark_page_halfdead(Relation rel, Buffer leafbuf, BTStack stack)
*/
static bool
_bt_unlink_halfdead_page(Relation rel, Buffer leafbuf, BlockNumber scanblkno,
bool *rightsib_empty, uint32 *ndeleted)
bool *rightsib_empty, BTVacState *vstate)
{
BlockNumber leafblkno = BufferGetBlockNumber(leafbuf);
IndexBulkDeleteResult *stats = vstate->stats;
BlockNumber leafleftsib;
BlockNumber leafrightsib;
BlockNumber target;
@ -2674,12 +2675,17 @@ _bt_unlink_halfdead_page(Relation rel, Buffer leafbuf, BlockNumber scanblkno,
_bt_relbuf(rel, buf);
/*
* If btvacuumscan won't revisit this page in a future btvacuumpage call
* and count it as deleted then, we count it as deleted by current
* btvacuumpage call
* Maintain pages_newly_deleted, which is simply the number of pages
* deleted by the ongoing VACUUM operation.
*
* Maintain pages_deleted in a way that takes into account how
* btvacuumpage() will count deleted pages that have yet to become
* scanblkno -- only count page when it's not going to get that treatment
* later on.
*/
stats->pages_newly_deleted++;
if (target <= scanblkno)
(*ndeleted)++;
stats->pages_deleted++;
return true;
}

View File

@ -38,17 +38,6 @@
#include "utils/memutils.h"
/* Working state needed by btvacuumpage */
typedef struct
{
IndexVacuumInfo *info;
IndexBulkDeleteResult *stats;
IndexBulkDeleteCallback callback;
void *callback_state;
BTCycleId cycleid;
MemoryContext pagedelcontext;
} BTVacState;
/*
* BTPARALLEL_NOT_INITIALIZED indicates that the scan has not started.
*
@ -1016,9 +1005,9 @@ btvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
* avoids double-counting in the case where a single VACUUM command
* requires multiple scans of the index.
*
* Avoid resetting the tuples_removed field here, since it tracks
* information about the VACUUM command, and so must last across each call
* to btvacuumscan().
* Avoid resetting the tuples_removed and pages_newly_deleted fields here,
* since they track information about the VACUUM command, and so must last
* across each call to btvacuumscan().
*
* (Note that pages_free is treated as state about the whole index, not
* the current VACUUM. This is appropriate because RecordFreeIndexPage()
@ -1237,11 +1226,13 @@ backtrack:
}
else if (P_ISHALFDEAD(opaque))
{
/*
* Half-dead leaf page. Try to delete now. Might update
* pages_deleted below.
*/
/* Half-dead leaf page (from interrupted VACUUM) -- finish deleting */
attempt_pagedel = true;
/*
* _bt_pagedel() will increment both pages_newly_deleted and
* pages_deleted stats in all cases (barring corruption)
*/
}
else if (P_ISLEAF(opaque))
{
@ -1451,12 +1442,12 @@ backtrack:
oldcontext = MemoryContextSwitchTo(vstate->pagedelcontext);
/*
* We trust the _bt_pagedel return value because it does not include
* any page that a future call here from btvacuumscan is expected to
* count. There will be no double-counting.
* _bt_pagedel maintains the bulk delete stats on our behalf;
* pages_newly_deleted and pages_deleted are likely to be incremented
* during call
*/
Assert(blkno == scanblkno);
stats->pages_deleted += _bt_pagedel(rel, buf);
_bt_pagedel(rel, buf, vstate);
MemoryContextSwitchTo(oldcontext);
/* pagedel released buffer, so we shouldn't */

View File

@ -891,6 +891,7 @@ spgvacuumscan(spgBulkDeleteState *bds)
/* Report final stats */
bds->stats->num_pages = num_pages;
bds->stats->pages_newly_deleted = bds->stats->pages_deleted;
bds->stats->pages_free = bds->stats->pages_deleted;
}

View File

@ -63,8 +63,11 @@ typedef struct IndexVacuumInfo
* of which this is just the first field; this provides a way for ambulkdelete
* to communicate additional private data to amvacuumcleanup.
*
* Note: pages_deleted and pages_free refer to free space within the index
* file. Some index AMs may compute num_index_tuples by reference to
* Note: pages_newly_deleted is the number of pages in the index that were
* deleted by the current vacuum operation. pages_deleted and pages_free
* refer to free space within the index file.
*
* Note: Some index AMs may compute num_index_tuples by reference to
* num_heap_tuples, in which case they should copy the estimated_count field
* from IndexVacuumInfo.
*/
@ -74,7 +77,8 @@ typedef struct IndexBulkDeleteResult
bool estimated_count; /* num_index_tuples is an estimate */
double num_index_tuples; /* tuples remaining */
double tuples_removed; /* # removed during vacuum operation */
BlockNumber pages_deleted; /* # unused pages in index */
BlockNumber pages_newly_deleted; /* # pages marked deleted by us */
BlockNumber pages_deleted; /* # pages marked deleted (could be by us) */
BlockNumber pages_free; /* # pages available for reuse */
} IndexBulkDeleteResult;

View File

@ -312,6 +312,20 @@ BTPageIsRecyclable(Page page)
return false;
}
/*
* BTVacState is private nbtree.c state used during VACUUM. It is exported
* for use by page deletion related code in nbtpage.c.
*/
typedef struct BTVacState
{
IndexVacuumInfo *info;
IndexBulkDeleteResult *stats;
IndexBulkDeleteCallback callback;
void *callback_state;
BTCycleId cycleid;
MemoryContext pagedelcontext;
} BTVacState;
/*
* Lehman and Yao's algorithm requires a ``high key'' on every non-rightmost
* page. The high key is not a tuple that is used to visit the heap. It is
@ -1181,7 +1195,7 @@ extern void _bt_delitems_vacuum(Relation rel, Buffer buf,
extern void _bt_delitems_delete_check(Relation rel, Buffer buf,
Relation heapRel,
TM_IndexDeleteOp *delstate);
extern uint32 _bt_pagedel(Relation rel, Buffer leafbuf);
extern void _bt_pagedel(Relation rel, Buffer leafbuf, BTVacState *vstate);
/*
* prototypes for functions in nbtsearch.c