diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml index 18447f404c..963824d050 100644 --- a/doc/src/sgml/config.sgml +++ b/doc/src/sgml/config.sgml @@ -8644,6 +8644,39 @@ COPY postgres_log FROM '/full/path/to/logfile.csv' WITH csv; + + vacuum_failsafe_age (integer) + + vacuum_failsafe_age configuration parameter + + + + + Specifies the maximum age (in transactions) that a table's + pg_class.relfrozenxid + field can attain before VACUUM takes + extraordinary measures to avoid system-wide transaction ID + wraparound failure. This is VACUUM's + strategy of last resort. The failsafe typically triggers + when an autovacuum to prevent transaction ID wraparound has + already been running for some time, though it's possible for + the failsafe to trigger during any VACUUM. + + + When the failsafe is triggered, any cost-based delay that is + in effect will no longer be applied, and further non-essential + maintenance tasks (such as index vacuuming) are bypassed. + + + The default is 1.6 billion transactions. Although users can + set this value anywhere from zero to 2.1 billion, + VACUUM will silently adjust the effective + value to no less than 105% of . + + + + vacuum_multixact_freeze_table_age (integer) @@ -8690,6 +8723,39 @@ COPY postgres_log FROM '/full/path/to/logfile.csv' WITH csv; + + vacuum_multixact_failsafe_age (integer) + + vacuum_multixact_failsafe_age configuration parameter + + + + + Specifies the maximum age (in transactions) that a table's + pg_class.relminmxid + field can attain before VACUUM takes + extraordinary measures to avoid system-wide multixact ID + wraparound failure. This is VACUUM's + strategy of last resort. The failsafe typically triggers when + an autovacuum to prevent transaction ID wraparound has already + been running for some time, though it's possible for the + failsafe to trigger during any VACUUM. + + + When the failsafe is triggered, any cost-based delay that is + in effect will no longer be applied, and further non-essential + maintenance tasks (such as index vacuuming) are bypassed. + + + The default is 1.6 billion multixacts. Although users can set + this value anywhere from zero to 2.1 billion, + VACUUM will silently adjust the effective + value to no less than 105% of . + + + + bytea_output (enum) diff --git a/src/backend/access/heap/vacuumlazy.c b/src/backend/access/heap/vacuumlazy.c index 1d55d0ecf9..00832b72dc 100644 --- a/src/backend/access/heap/vacuumlazy.c +++ b/src/backend/access/heap/vacuumlazy.c @@ -103,6 +103,13 @@ #define VACUUM_TRUNCATE_LOCK_WAIT_INTERVAL 50 /* ms */ #define VACUUM_TRUNCATE_LOCK_TIMEOUT 5000 /* ms */ +/* + * When a table is small (i.e. smaller than this), save cycles by avoiding + * repeated failsafe checks + */ +#define FAILSAFE_MIN_PAGES \ + ((BlockNumber) (((uint64) 4 * 1024 * 1024 * 1024) / BLCKSZ)) + /* * When a table has no indexes, vacuum the FSM after every 8GB, approximately * (it won't be exact because we only vacuum FSM after processing a heap page @@ -299,6 +306,8 @@ typedef struct LVRelState /* Do index vacuuming/cleanup? */ bool do_index_vacuuming; bool do_index_cleanup; + /* Wraparound failsafe in effect? (implies !do_index_vacuuming) */ + bool do_failsafe; /* Buffer access strategy and parallel state */ BufferAccessStrategy bstrategy; @@ -393,12 +402,13 @@ static void lazy_scan_prune(LVRelState *vacrel, Buffer buf, GlobalVisState *vistest, LVPagePruneState *prunestate); static void lazy_vacuum(LVRelState *vacrel); -static void lazy_vacuum_all_indexes(LVRelState *vacrel); +static bool lazy_vacuum_all_indexes(LVRelState *vacrel); static void lazy_vacuum_heap_rel(LVRelState *vacrel); static int lazy_vacuum_heap_page(LVRelState *vacrel, BlockNumber blkno, Buffer buffer, int tupindex, Buffer *vmbuffer); static bool lazy_check_needs_freeze(Buffer buf, bool *hastup, LVRelState *vacrel); +static bool lazy_check_wraparound_failsafe(LVRelState *vacrel); static void do_parallel_lazy_vacuum_all_indexes(LVRelState *vacrel); static void do_parallel_lazy_cleanup_all_indexes(LVRelState *vacrel); static void do_parallel_vacuum_or_cleanup(LVRelState *vacrel, int nworkers); @@ -544,6 +554,7 @@ heap_vacuum_rel(Relation rel, VacuumParams *params, &vacrel->indrels); vacrel->do_index_vacuuming = true; vacrel->do_index_cleanup = true; + vacrel->do_failsafe = false; if (params->index_cleanup == VACOPT_TERNARY_DISABLED) { vacrel->do_index_vacuuming = false; @@ -888,6 +899,12 @@ lazy_scan_heap(LVRelState *vacrel, VacuumParams *params, bool aggressive) vacrel->indstats = (IndexBulkDeleteResult **) palloc0(vacrel->nindexes * sizeof(IndexBulkDeleteResult *)); + /* + * Before beginning scan, check if it's already necessary to apply + * failsafe + */ + lazy_check_wraparound_failsafe(vacrel); + /* * Allocate the space for dead tuples. Note that this handles parallel * VACUUM initialization as part of allocating shared memory space used @@ -1311,12 +1328,17 @@ lazy_scan_heap(LVRelState *vacrel, VacuumParams *params, bool aggressive) * Periodically perform FSM vacuuming to make newly-freed * space visible on upper FSM pages. Note we have not yet * performed FSM processing for blkno. + * + * Call lazy_check_wraparound_failsafe() here, too, since we + * also don't want to do that too frequently, or too + * infrequently. */ if (blkno - next_fsm_block_to_vacuum >= VACUUM_FSM_EVERY_PAGES) { FreeSpaceMapVacuumRange(vacrel->rel, next_fsm_block_to_vacuum, blkno); next_fsm_block_to_vacuum = blkno; + lazy_check_wraparound_failsafe(vacrel); } /* @@ -1450,6 +1472,13 @@ lazy_scan_heap(LVRelState *vacrel, VacuumParams *params, bool aggressive) * make available in cases where it's possible to truncate the * page's line pointer array. * + * Note: It's not in fact 100% certain that we really will call + * lazy_vacuum_heap_rel() -- lazy_vacuum() might yet opt to skip + * index vacuuming (and so must skip heap vacuuming). This is + * deemed okay because it only happens in emergencies. (Besides, + * we start recording free space in the FSM once index vacuuming + * has been abandoned.) + * * Note: The one-pass (no indexes) case is only supposed to make * it this far when there were no LP_DEAD items during pruning. */ @@ -1499,7 +1528,7 @@ lazy_scan_heap(LVRelState *vacrel, VacuumParams *params, bool aggressive) /* * Vacuum the remainder of the Free Space Map. We must do this whether or - * not there were indexes. + * not there were indexes, and whether or not we bypassed index vacuuming. */ if (blkno > next_fsm_block_to_vacuum) FreeSpaceMapVacuumRange(vacrel->rel, next_fsm_block_to_vacuum, blkno); @@ -1953,6 +1982,11 @@ retry: /* * Remove the collected garbage tuples from the table and its indexes. + * + * In rare emergencies, the ongoing VACUUM operation can be made to skip both + * index vacuuming and index cleanup at the point we're called. This avoids + * having the whole system refuse to allocate further XIDs/MultiXactIds due to + * wraparound. */ static void lazy_vacuum(LVRelState *vacrel) @@ -1969,11 +2003,30 @@ lazy_vacuum(LVRelState *vacrel) return; } - /* Okay, we're going to do index vacuuming */ - lazy_vacuum_all_indexes(vacrel); - - /* Remove tuples from heap */ - lazy_vacuum_heap_rel(vacrel); + if (lazy_vacuum_all_indexes(vacrel)) + { + /* + * We successfully completed a round of index vacuuming. Do related + * heap vacuuming now. + */ + lazy_vacuum_heap_rel(vacrel); + } + else + { + /* + * Failsafe case. + * + * we attempted index vacuuming, but didn't finish a full round/full + * index scan. This happens when relfrozenxid or relminmxid is too + * far in the past. + * + * From this point on the VACUUM operation will do no further index + * vacuuming or heap vacuuming. It will do any remaining pruning that + * may be required, plus other heap-related and relation-level + * maintenance tasks. But that's it. + */ + Assert(vacrel->do_failsafe); + } /* * Forget the now-vacuumed tuples -- just press on @@ -1983,10 +2036,17 @@ lazy_vacuum(LVRelState *vacrel) /* * lazy_vacuum_all_indexes() -- Main entry for index vacuuming + * + * Returns true in the common case when all indexes were successfully + * vacuumed. Returns false in rare cases where we determined that the ongoing + * VACUUM operation is at risk of taking too long to finish, leading to + * wraparound failure. */ -static void +static bool lazy_vacuum_all_indexes(LVRelState *vacrel) { + bool allindexes = true; + Assert(!IsParallelWorker()); Assert(vacrel->nindexes > 0); Assert(vacrel->do_index_vacuuming); @@ -1994,6 +2054,13 @@ lazy_vacuum_all_indexes(LVRelState *vacrel) Assert(TransactionIdIsNormal(vacrel->relfrozenxid)); Assert(MultiXactIdIsValid(vacrel->relminmxid)); + /* Precheck for XID wraparound emergencies */ + if (lazy_check_wraparound_failsafe(vacrel)) + { + /* Wraparound emergency -- don't even start an index scan */ + return false; + } + /* Report that we are now vacuuming indexes */ pgstat_progress_update_param(PROGRESS_VACUUM_PHASE, PROGRESS_VACUUM_PHASE_VACUUM_INDEX); @@ -2008,26 +2075,50 @@ lazy_vacuum_all_indexes(LVRelState *vacrel) vacrel->indstats[idx] = lazy_vacuum_one_index(indrel, istat, vacrel->old_live_tuples, vacrel); + + if (lazy_check_wraparound_failsafe(vacrel)) + { + /* Wraparound emergency -- end current index scan */ + allindexes = false; + break; + } } } else { /* Outsource everything to parallel variant */ do_parallel_lazy_vacuum_all_indexes(vacrel); + + /* + * Do a postcheck to consider applying wraparound failsafe now. Note + * that parallel VACUUM only gets the precheck and this postcheck. + */ + if (lazy_check_wraparound_failsafe(vacrel)) + allindexes = false; } /* * We delete all LP_DEAD items from the first heap pass in all indexes on - * each call here. This makes the next call to lazy_vacuum_heap_rel() - * safe. + * each call here (except calls where we choose to do the failsafe). This + * makes the next call to lazy_vacuum_heap_rel() safe (except in the event + * of the failsafe triggering, which prevents the next call from taking + * place). */ Assert(vacrel->num_index_scans > 0 || vacrel->dead_tuples->num_tuples == vacrel->lpdead_items); + Assert(allindexes || vacrel->do_failsafe); - /* Increase and report the number of index scans */ + /* + * Increase and report the number of index scans. + * + * We deliberately include the case where we started a round of bulk + * deletes that we weren't able to finish due to the failsafe triggering. + */ vacrel->num_index_scans++; pgstat_progress_update_param(PROGRESS_VACUUM_NUM_INDEX_VACUUMS, vacrel->num_index_scans); + + return allindexes; } /* @@ -2320,6 +2411,67 @@ lazy_check_needs_freeze(Buffer buf, bool *hastup, LVRelState *vacrel) return (offnum <= maxoff); } +/* + * Trigger the failsafe to avoid wraparound failure when vacrel table has a + * relfrozenxid and/or relminmxid that is dangerously far in the past. + * + * Triggering the failsafe makes the ongoing VACUUM bypass any further index + * vacuuming and heap vacuuming. It also stops the ongoing VACUUM from + * applying any cost-based delay that may be in effect. + * + * Returns true when failsafe has been triggered. + * + * Caller is expected to call here before and after vacuuming each index in + * the case of two-pass VACUUM, or every VACUUM_FSM_EVERY_PAGES blocks in the + * case of no-indexes/one-pass VACUUM. + * + * There is also a precheck before the first pass over the heap begins, which + * is helpful when the failsafe initially triggers during a non-aggressive + * VACUUM -- the automatic aggressive vacuum to prevent wraparound that + * follows can independently trigger the failsafe right away. + */ +static bool +lazy_check_wraparound_failsafe(LVRelState *vacrel) +{ + /* Avoid calling vacuum_xid_failsafe_check() very frequently */ + if (vacrel->num_index_scans == 0 && + vacrel->rel_pages <= FAILSAFE_MIN_PAGES) + return false; + + /* Don't warn more than once per VACUUM */ + if (vacrel->do_failsafe) + return true; + + if (unlikely(vacuum_xid_failsafe_check(vacrel->relfrozenxid, + vacrel->relminmxid))) + { + Assert(vacrel->do_index_vacuuming); + Assert(vacrel->do_index_cleanup); + + vacrel->do_index_vacuuming = false; + vacrel->do_index_cleanup = false; + vacrel->do_failsafe = true; + + ereport(WARNING, + (errmsg("abandoned index vacuuming of table \"%s.%s.%s\" as a failsafe after %d index scans", + get_database_name(MyDatabaseId), + vacrel->relnamespace, + vacrel->relname, + vacrel->num_index_scans), + errdetail("table's relfrozenxid or relminmxid is too far in the past"), + errhint("Consider increasing configuration parameter \"maintenance_work_mem\" or \"autovacuum_work_mem\".\n" + "You might also need to consider other ways for VACUUM to keep up with the allocation of transaction IDs."))); + + /* Stop applying cost limits from this point on */ + VacuumCostActive = false; + VacuumCostBalance = 0; + + return true; + } + + return false; +} + /* * Perform lazy_vacuum_all_indexes() steps in parallel */ @@ -3173,7 +3325,7 @@ lazy_space_alloc(LVRelState *vacrel, int nworkers, BlockNumber nblocks) * be used for an index, so we invoke parallelism only if there are at * least two indexes on a table. */ - if (nworkers >= 0 && vacrel->nindexes > 1) + if (nworkers >= 0 && vacrel->nindexes > 1 && vacrel->do_index_vacuuming) { /* * Since parallel workers cannot access data in temporary tables, we diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c index 25465b05dd..39df05c735 100644 --- a/src/backend/commands/vacuum.c +++ b/src/backend/commands/vacuum.c @@ -62,6 +62,8 @@ int vacuum_freeze_min_age; int vacuum_freeze_table_age; int vacuum_multixact_freeze_min_age; int vacuum_multixact_freeze_table_age; +int vacuum_failsafe_age; +int vacuum_multixact_failsafe_age; /* A few variables that don't seem worth passing around as parameters */ @@ -1134,6 +1136,62 @@ vacuum_set_xid_limits(Relation rel, } } +/* + * vacuum_xid_failsafe_check() -- Used by VACUUM's wraparound failsafe + * mechanism to determine if its table's relfrozenxid and relminmxid are now + * dangerously far in the past. + * + * Input parameters are the target relation's relfrozenxid and relminmxid. + * + * When we return true, VACUUM caller triggers the failsafe. + */ +bool +vacuum_xid_failsafe_check(TransactionId relfrozenxid, MultiXactId relminmxid) +{ + TransactionId xid_skip_limit; + MultiXactId multi_skip_limit; + int skip_index_vacuum; + + Assert(TransactionIdIsNormal(relfrozenxid)); + Assert(MultiXactIdIsValid(relminmxid)); + + /* + * Determine the index skipping age to use. In any case no less than + * autovacuum_freeze_max_age * 1.05. + */ + skip_index_vacuum = Max(vacuum_failsafe_age, autovacuum_freeze_max_age * 1.05); + + xid_skip_limit = ReadNextTransactionId() - skip_index_vacuum; + if (!TransactionIdIsNormal(xid_skip_limit)) + xid_skip_limit = FirstNormalTransactionId; + + if (TransactionIdPrecedes(relfrozenxid, xid_skip_limit)) + { + /* The table's relfrozenxid is too old */ + return true; + } + + /* + * Similar to above, determine the index skipping age to use for + * multixact. In any case no less than autovacuum_multixact_freeze_max_age + * * 1.05. + */ + skip_index_vacuum = Max(vacuum_multixact_failsafe_age, + autovacuum_multixact_freeze_max_age * 1.05); + + multi_skip_limit = ReadNextMultiXactId() - skip_index_vacuum; + if (multi_skip_limit < FirstMultiXactId) + multi_skip_limit = FirstMultiXactId; + + if (MultiXactIdPrecedes(relminmxid, multi_skip_limit)) + { + /* The table's relminmxid is too old */ + return true; + } + + return false; +} + /* * vac_estimate_reltuples() -- estimate the new value for pg_class.reltuples * diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c index bdd67fb0bb..bee976bae8 100644 --- a/src/backend/utils/misc/guc.c +++ b/src/backend/utils/misc/guc.c @@ -2657,6 +2657,24 @@ static struct config_int ConfigureNamesInt[] = 0, 0, 1000000, /* see ComputeXidHorizons */ NULL, NULL, NULL }, + { + {"vacuum_failsafe_age", PGC_USERSET, CLIENT_CONN_STATEMENT, + gettext_noop("Age at which VACUUM should trigger failsafe to avoid a wraparound outage."), + NULL + }, + &vacuum_failsafe_age, + 1600000000, 0, 2100000000, + NULL, NULL, NULL + }, + { + {"vacuum_multixact_failsafe_age", PGC_USERSET, CLIENT_CONN_STATEMENT, + gettext_noop("Multixact age at which VACUUM should trigger failsafe to avoid a wraparound outage."), + NULL + }, + &vacuum_multixact_failsafe_age, + 1600000000, 0, 2100000000, + NULL, NULL, NULL + }, /* * See also CheckRequiredParameterValues() if this parameter changes @@ -3257,7 +3275,10 @@ static struct config_int ConfigureNamesInt[] = NULL }, &autovacuum_freeze_max_age, - /* see pg_resetwal if you change the upper-limit value */ + /* + * see pg_resetwal and vacuum_failsafe_age if you change the + * upper-limit value. + */ 200000000, 100000, 2000000000, NULL, NULL, NULL }, diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample index 65f6186966..ff9fa006fe 100644 --- a/src/backend/utils/misc/postgresql.conf.sample +++ b/src/backend/utils/misc/postgresql.conf.sample @@ -677,6 +677,8 @@ #vacuum_freeze_table_age = 150000000 #vacuum_multixact_freeze_min_age = 5000000 #vacuum_multixact_freeze_table_age = 150000000 +#vacuum_failsafe_age = 1600000000 +#vacuum_multixact_failsafe_age = 1600000000 #bytea_output = 'hex' # hex, escape #xmlbinary = 'base64' #xmloption = 'content' diff --git a/src/include/commands/vacuum.h b/src/include/commands/vacuum.h index d029da5ac0..cb27257bb6 100644 --- a/src/include/commands/vacuum.h +++ b/src/include/commands/vacuum.h @@ -235,6 +235,8 @@ extern int vacuum_freeze_min_age; extern int vacuum_freeze_table_age; extern int vacuum_multixact_freeze_min_age; extern int vacuum_multixact_freeze_table_age; +extern int vacuum_failsafe_age; +extern int vacuum_multixact_failsafe_age; /* Variables for cost-based parallel vacuum */ extern pg_atomic_uint32 *VacuumSharedCostBalance; @@ -270,6 +272,8 @@ extern void vacuum_set_xid_limits(Relation rel, TransactionId *xidFullScanLimit, MultiXactId *multiXactCutoff, MultiXactId *mxactFullScanLimit); +extern bool vacuum_xid_failsafe_check(TransactionId relfrozenxid, + MultiXactId relminmxid); extern void vac_update_datfrozenxid(void); extern void vacuum_delay_point(void); extern bool vacuum_is_relation_owner(Oid relid, Form_pg_class reltuple,