From 73f6ec3d3c8d5786c54373e71a096e5acf78e7ca Mon Sep 17 00:00:00 2001 From: Peter Geoghegan Date: Sat, 12 Mar 2022 12:52:38 -0800 Subject: [PATCH] vacuumlazy.c: document vistest and OldestXmin. Explain the relationship between vacuumlazy.c's vistest and OldestXmin cutoffs. These closely related cutoffs are different in subtle but important ways. Also document a closely related rule: we must establish rel_pages _after_ OldestXmin to ensure that no XID < OldestXmin can be missed by lazy_scan_heap(). It's easier to explain these issues by initializing everything together, so consolidate initialization of vacrel state. Now almost every vacrel field is initialized by heap_vacuum_rel(). The only remaining exception is the dead_items array, which is still managed by lazy_scan_heap() due to interactions with how we initialize parallel VACUUM. Also move the process that updates pg_class entries for each index into heap_vacuum_rel(), and adjust related assertions. All pg_class updates now take place after lazy_scan_heap() returns, which seems clearer. Author: Peter Geoghegan Reviewed-By: Andres Freund Discussion: https://postgr.es/m/20211211045710.ljtuu4gfloh754rs@alap3.anarazel.de Discussion: https://postgr.es/m/CAH2-WznYsUxVT156rCQ+q=YD4S4=1M37hWvvHLz-H1pwSM8-Ew@mail.gmail.com --- src/backend/access/heap/vacuumlazy.c | 173 ++++++++++++++------------- 1 file changed, 92 insertions(+), 81 deletions(-) diff --git a/src/backend/access/heap/vacuumlazy.c b/src/backend/access/heap/vacuumlazy.c index 40101e0cb8..7982139baa 100644 --- a/src/backend/access/heap/vacuumlazy.c +++ b/src/backend/access/heap/vacuumlazy.c @@ -167,9 +167,10 @@ typedef struct LVRelState MultiXactId relminmxid; double old_live_tuples; /* previous value of pg_class.reltuples */ - /* VACUUM operation's cutoff for pruning */ + /* VACUUM operation's cutoffs for freezing and pruning */ TransactionId OldestXmin; - /* VACUUM operation's cutoff for freezing XIDs and MultiXactIds */ + GlobalVisState *vistest; + /* VACUUM operation's target cutoffs for freezing XIDs and MultiXactIds */ TransactionId FreezeLimit; MultiXactId MultiXactCutoff; /* Are FreezeLimit/MultiXactCutoff still valid? */ @@ -185,8 +186,6 @@ typedef struct LVRelState bool verbose; /* VACUUM VERBOSE? */ /* - * State managed by lazy_scan_heap() follows. - * * dead_items stores TIDs whose index tuples are deleted by index * vacuuming. Each TID points to an LP_DEAD line pointer from a heap page * that has been processed by lazy_scan_prune. Also needed by @@ -252,7 +251,6 @@ static bool lazy_scan_new_or_empty(LVRelState *vacrel, Buffer buf, bool sharelock, Buffer vmbuffer); static void lazy_scan_prune(LVRelState *vacrel, Buffer buf, BlockNumber blkno, Page page, - GlobalVisState *vistest, LVPagePruneState *prunestate); static bool lazy_scan_noprune(LVRelState *vacrel, Buffer buf, BlockNumber blkno, Page page, @@ -281,7 +279,7 @@ static void dead_items_alloc(LVRelState *vacrel, int nworkers); static void dead_items_cleanup(LVRelState *vacrel); static bool heap_page_is_all_visible(LVRelState *vacrel, Buffer buf, TransactionId *visibility_cutoff_xid, bool *all_frozen); -static void update_index_statistics(LVRelState *vacrel); +static void update_relstats_all_indexes(LVRelState *vacrel); static void vacuum_error_callback(void *arg); static void update_vacuum_error_info(LVRelState *vacrel, LVSavedErrInfo *saved_vacrel, @@ -296,7 +294,8 @@ static void restore_vacuum_error_info(LVRelState *vacrel, * * This routine sets things up for and then calls lazy_scan_heap, where * almost all work actually takes place. Finalizes everything after call - * returns by managing rel truncation and updating pg_class statistics. + * returns by managing relation truncation and updating rel's pg_class + * entry. (Also updates pg_class entries for any indexes that need it.) * * At entry, we have already established a transaction and opened * and locked the relation. @@ -468,9 +467,51 @@ heap_vacuum_rel(Relation rel, VacuumParams *params, vacrel->relminmxid = rel->rd_rel->relminmxid; vacrel->old_live_tuples = rel->rd_rel->reltuples; - /* Set cutoffs for entire VACUUM */ + /* Initialize page counters explicitly (be tidy) */ + vacrel->scanned_pages = 0; + vacrel->frozenskipped_pages = 0; + vacrel->removed_pages = 0; + vacrel->lpdead_item_pages = 0; + vacrel->missed_dead_pages = 0; + vacrel->nonempty_pages = 0; + /* dead_items_alloc allocates vacrel->dead_items later on */ + + /* Allocate/initialize output statistics state */ + vacrel->new_rel_tuples = 0; + vacrel->new_live_tuples = 0; + vacrel->indstats = (IndexBulkDeleteResult **) + palloc0(vacrel->nindexes * sizeof(IndexBulkDeleteResult *)); + + /* Initialize remaining counters (be tidy) */ + vacrel->num_index_scans = 0; + vacrel->tuples_deleted = 0; + vacrel->lpdead_items = 0; + vacrel->live_tuples = 0; + vacrel->recently_dead_tuples = 0; + vacrel->missed_dead_tuples = 0; + + /* + * Determine the extent of the blocks that we'll scan in lazy_scan_heap, + * and finalize cutoffs used for freezing and pruning in lazy_scan_prune. + * + * We expect vistest will always make heap_page_prune remove any deleted + * tuple whose xmax is < OldestXmin. lazy_scan_prune must never become + * confused about whether a tuple should be frozen or removed. (In the + * future we might want to teach lazy_scan_prune to recompute vistest from + * time to time, to increase the number of dead tuples it can prune away.) + * + * We must determine rel_pages _after_ OldestXmin has been established. + * lazy_scan_heap's physical heap scan (scan of pages < rel_pages) is + * thereby guaranteed to not miss any tuples with XIDs < OldestXmin. These + * XIDs must at least be considered for freezing (though not necessarily + * frozen) during its scan. + */ + vacrel->rel_pages = orig_rel_pages = RelationGetNumberOfBlocks(rel); vacrel->OldestXmin = OldestXmin; + vacrel->vistest = GlobalVisTestFor(rel); + /* FreezeLimit controls XID freezing (always <= OldestXmin) */ vacrel->FreezeLimit = FreezeLimit; + /* MultiXactCutoff controls MXID freezing */ vacrel->MultiXactCutoff = MultiXactCutoff; /* Track if cutoffs became invalid (possible in !aggressive case only) */ vacrel->freeze_cutoffs_valid = true; @@ -481,21 +522,21 @@ heap_vacuum_rel(Relation rel, VacuumParams *params, */ lazy_scan_heap(vacrel, params->nworkers); - /* Done with indexes */ + /* + * Update pg_class entries for each of rel's indexes where appropriate. + * + * Unlike the later update to rel's pg_class entry, this is not critical. + * Maintains relpages/reltuples statistics used by the planner only. + */ + if (vacrel->do_index_cleanup) + update_relstats_all_indexes(vacrel); + + /* Done with rel's indexes */ vac_close_indexes(vacrel->nindexes, vacrel->indrels, NoLock); - /* - * Optionally truncate the relation. But remember the relation size used - * by lazy_scan_heap for later first. - */ - orig_rel_pages = vacrel->rel_pages; + /* Optionally truncate rel */ if (should_attempt_truncation(vacrel)) - { - update_vacuum_error_info(vacrel, NULL, VACUUM_ERRCB_PHASE_TRUNCATE, - vacrel->nonempty_pages, - InvalidOffsetNumber); lazy_truncate_heap(vacrel); - } /* Pop the error context stack */ error_context_stack = errcallback.previous; @@ -505,7 +546,7 @@ heap_vacuum_rel(Relation rel, VacuumParams *params, PROGRESS_VACUUM_PHASE_FINAL_CLEANUP); /* - * Update statistics in pg_class. + * Prepare to update rel's pg_class entry. * * In principle new_live_tuples could be -1 indicating that we (still) * don't know the tuple count. In practice that probably can't happen, @@ -517,22 +558,19 @@ heap_vacuum_rel(Relation rel, VacuumParams *params, */ new_rel_pages = vacrel->rel_pages; /* After possible rel truncation */ new_live_tuples = vacrel->new_live_tuples; - visibilitymap_count(rel, &new_rel_allvisible, NULL); if (new_rel_allvisible > new_rel_pages) new_rel_allvisible = new_rel_pages; /* + * Now actually update rel's pg_class entry. + * * Aggressive VACUUM must reliably advance relfrozenxid (and relminmxid). * We are able to advance relfrozenxid in a non-aggressive VACUUM too, * provided we didn't skip any all-visible (not all-frozen) pages using * the visibility map, and assuming that we didn't fail to get a cleanup * lock that made it unsafe with respect to FreezeLimit (or perhaps our * MultiXactCutoff) established for VACUUM operation. - * - * NB: We must use orig_rel_pages, not vacrel->rel_pages, since we want - * the rel_pages used by lazy_scan_heap, which won't match when we - * happened to truncate the relation afterwards. */ if (vacrel->scanned_pages + vacrel->frozenskipped_pages < orig_rel_pages || !vacrel->freeze_cutoffs_valid) @@ -787,7 +825,7 @@ static void lazy_scan_heap(LVRelState *vacrel, int nworkers) { VacDeadItems *dead_items; - BlockNumber nblocks, + BlockNumber nblocks = vacrel->rel_pages, blkno, next_unskippable_block, next_failsafe_block, @@ -800,29 +838,6 @@ lazy_scan_heap(LVRelState *vacrel, int nworkers) PROGRESS_VACUUM_MAX_DEAD_TUPLES }; int64 initprog_val[3]; - GlobalVisState *vistest; - - nblocks = RelationGetNumberOfBlocks(vacrel->rel); - vacrel->rel_pages = nblocks; - vacrel->scanned_pages = 0; - vacrel->frozenskipped_pages = 0; - vacrel->removed_pages = 0; - vacrel->lpdead_item_pages = 0; - vacrel->missed_dead_pages = 0; - vacrel->nonempty_pages = 0; - - /* Initialize instrumentation counters */ - vacrel->num_index_scans = 0; - vacrel->tuples_deleted = 0; - vacrel->lpdead_items = 0; - vacrel->live_tuples = 0; - vacrel->recently_dead_tuples = 0; - vacrel->missed_dead_tuples = 0; - - vistest = GlobalVisTestFor(vacrel->rel); - - vacrel->indstats = (IndexBulkDeleteResult **) - palloc0(vacrel->nindexes * sizeof(IndexBulkDeleteResult *)); /* * Do failsafe precheck before calling dead_items_alloc. This ensures @@ -880,9 +895,9 @@ lazy_scan_heap(LVRelState *vacrel, int nworkers) * might leave some dead tuples lying around, but the next vacuum will * find them. But even when aggressive *is* set, it's still OK if we miss * a page whose all-frozen marking has just been cleared. Any new XIDs - * just added to that page are necessarily newer than the GlobalXmin we - * computed, so they'll have no effect on the value to which we can safely - * set relfrozenxid. A similar argument applies for MXIDs and relminmxid. + * just added to that page are necessarily >= vacrel->OldestXmin, and so + * they'll have no effect on the value to which we can safely set + * relfrozenxid. A similar argument applies for MXIDs and relminmxid. */ next_unskippable_block = 0; if (vacrel->skipwithvm) @@ -1153,7 +1168,7 @@ lazy_scan_heap(LVRelState *vacrel, int nworkers) * were pruned some time earlier. Also considers freezing XIDs in the * tuple headers of remaining items with storage. */ - lazy_scan_prune(vacrel, buf, blkno, page, vistest, &prunestate); + lazy_scan_prune(vacrel, buf, blkno, page, &prunestate); Assert(!prunestate.all_visible || !prunestate.has_lpdead_items); @@ -1392,15 +1407,11 @@ lazy_scan_heap(LVRelState *vacrel, int nworkers) lazy_cleanup_all_indexes(vacrel); /* - * Free resources managed by dead_items_alloc. This will end parallel - * mode when needed (it must end before updating index statistics as we - * can't write in parallel mode). + * Free resources managed by dead_items_alloc. This ends parallel mode in + * passing when necessary. */ dead_items_cleanup(vacrel); - - /* Update index statistics */ - if (vacrel->nindexes > 0 && vacrel->do_index_cleanup) - update_index_statistics(vacrel); + Assert(!IsInParallelMode()); } /* @@ -1559,7 +1570,6 @@ lazy_scan_prune(LVRelState *vacrel, Buffer buf, BlockNumber blkno, Page page, - GlobalVisState *vistest, LVPagePruneState *prunestate) { Relation rel = vacrel->rel; @@ -1598,7 +1608,7 @@ retry: * lpdead_items's final value can be thought of as the number of tuples * that were deleted from indexes. */ - tuples_deleted = heap_page_prune(rel, buf, vistest, + tuples_deleted = heap_page_prune(rel, buf, vacrel->vistest, InvalidTransactionId, 0, &nnewlpdead, &vacrel->offnum); @@ -2292,8 +2302,6 @@ lazy_vacuum_all_indexes(LVRelState *vacrel) Assert(vacrel->nindexes > 0); Assert(vacrel->do_index_vacuuming); Assert(vacrel->do_index_cleanup); - Assert(TransactionIdIsNormal(vacrel->relfrozenxid)); - Assert(MultiXactIdIsValid(vacrel->relminmxid)); /* Precheck for XID wraparound emergencies */ if (lazy_check_wraparound_failsafe(vacrel)) @@ -2604,6 +2612,9 @@ lazy_vacuum_heap_page(LVRelState *vacrel, BlockNumber blkno, Buffer buffer, static bool lazy_check_wraparound_failsafe(LVRelState *vacrel) { + Assert(TransactionIdIsNormal(vacrel->relfrozenxid)); + Assert(MultiXactIdIsValid(vacrel->relminmxid)); + /* Don't warn more than once per VACUUM */ if (vacrel->failsafe_active) return true; @@ -2644,6 +2655,10 @@ lazy_check_wraparound_failsafe(LVRelState *vacrel) static void lazy_cleanup_all_indexes(LVRelState *vacrel) { + double reltuples = vacrel->new_rel_tuples; + bool estimated_count = vacrel->scanned_pages < vacrel->rel_pages; + + Assert(vacrel->do_index_cleanup); Assert(vacrel->nindexes > 0); /* Report that we are now cleaning up indexes */ @@ -2652,10 +2667,6 @@ lazy_cleanup_all_indexes(LVRelState *vacrel) if (!ParallelVacuumIsActive(vacrel)) { - double reltuples = vacrel->new_rel_tuples; - bool estimated_count = - vacrel->scanned_pages < vacrel->rel_pages; - for (int idx = 0; idx < vacrel->nindexes; idx++) { Relation indrel = vacrel->indrels[idx]; @@ -2669,9 +2680,9 @@ lazy_cleanup_all_indexes(LVRelState *vacrel) else { /* Outsource everything to parallel variant */ - parallel_vacuum_cleanup_all_indexes(vacrel->pvs, vacrel->new_rel_tuples, + parallel_vacuum_cleanup_all_indexes(vacrel->pvs, reltuples, vacrel->num_index_scans, - (vacrel->scanned_pages < vacrel->rel_pages)); + estimated_count); } } @@ -2797,27 +2808,23 @@ lazy_cleanup_one_index(Relation indrel, IndexBulkDeleteResult *istat, * Also don't attempt it if we are doing early pruning/vacuuming, because a * scan which cannot find a truncated heap page cannot determine that the * snapshot is too old to read that page. - * - * This is split out so that we can test whether truncation is going to be - * called for before we actually do it. If you change the logic here, be - * careful to depend only on fields that lazy_scan_heap updates on-the-fly. */ static bool should_attempt_truncation(LVRelState *vacrel) { BlockNumber possibly_freeable; - if (!vacrel->do_rel_truncate || vacrel->failsafe_active) + if (!vacrel->do_rel_truncate || vacrel->failsafe_active || + old_snapshot_threshold >= 0) return false; possibly_freeable = vacrel->rel_pages - vacrel->nonempty_pages; if (possibly_freeable > 0 && (possibly_freeable >= REL_TRUNCATE_MINIMUM || - possibly_freeable >= vacrel->rel_pages / REL_TRUNCATE_FRACTION) && - old_snapshot_threshold < 0) + possibly_freeable >= vacrel->rel_pages / REL_TRUNCATE_FRACTION)) return true; - else - return false; + + return false; } /* @@ -2835,6 +2842,10 @@ lazy_truncate_heap(LVRelState *vacrel) pgstat_progress_update_param(PROGRESS_VACUUM_PHASE, PROGRESS_VACUUM_PHASE_TRUNCATE); + /* Update error traceback information one last time */ + update_vacuum_error_info(vacrel, NULL, VACUUM_ERRCB_PHASE_TRUNCATE, + vacrel->nonempty_pages, InvalidOffsetNumber); + /* * Loop until no more truncating can be done. */ @@ -3328,13 +3339,13 @@ heap_page_is_all_visible(LVRelState *vacrel, Buffer buf, * Update index statistics in pg_class if the statistics are accurate. */ static void -update_index_statistics(LVRelState *vacrel) +update_relstats_all_indexes(LVRelState *vacrel) { Relation *indrels = vacrel->indrels; int nindexes = vacrel->nindexes; IndexBulkDeleteResult **indstats = vacrel->indstats; - Assert(!IsInParallelMode()); + Assert(vacrel->do_index_cleanup); for (int idx = 0; idx < nindexes; idx++) {