diff --git a/doc/src/sgml/maintenance.sgml b/doc/src/sgml/maintenance.sgml index 84fba9dcb1..2e09fee5ae 100644 --- a/doc/src/sgml/maintenance.sgml +++ b/doc/src/sgml/maintenance.sgml @@ -586,11 +586,11 @@ statistics in the system tables pg_class and pg_database. In particular, the relfrozenxid column of a table's - pg_class row contains the freeze cutoff XID that was used - by the last aggressive VACUUM for that table. All rows - inserted by transactions with XIDs older than this cutoff XID are - guaranteed to have been frozen. Similarly, - the datfrozenxid column of a database's + pg_class row contains the oldest remaining unfrozen + XID at the end of the most recent VACUUM that successfully + advanced relfrozenxid (typically the most recent + aggressive VACUUM). Similarly, the + datfrozenxid column of a database's pg_database row is a lower bound on the unfrozen XIDs appearing in that database — it is just the minimum of the per-table relfrozenxid values within the database. @@ -638,7 +638,11 @@ SELECT datname, age(datfrozenxid) FROM pg_database; set age(relfrozenxid) to a value just a little more than the vacuum_freeze_min_age setting that was used (more by the number of transactions started since the - VACUUM started). If no relfrozenxid-advancing + VACUUM started). VACUUM + will set relfrozenxid to the oldest XID + that remains in the table, so it's possible that the final value + will be much more recent than strictly required. + If no relfrozenxid-advancing VACUUM is issued on the table until autovacuum_freeze_max_age is reached, an autovacuum will soon be forced for the table. diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index 74ad445e59..1ee985f633 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -6079,10 +6079,12 @@ heap_inplace_update(Relation relation, HeapTuple tuple) * Determine what to do during freezing when a tuple is marked by a * MultiXactId. * - * NB -- this might have the side-effect of creating a new MultiXactId! - * * "flags" is an output value; it's used to tell caller what to do on return. - * Possible flags are: + * + * "mxid_oldest_xid_out" is an output value; it's used to track the oldest + * extant Xid within any Multixact that will remain after freezing executes. + * + * Possible values that we can set in "flags": * FRM_NOOP * don't do anything -- keep existing Xmax * FRM_INVALIDATE_XMAX @@ -6094,12 +6096,17 @@ heap_inplace_update(Relation relation, HeapTuple tuple) * FRM_RETURN_IS_MULTI * The return value is a new MultiXactId to set as new Xmax. * (caller must obtain proper infomask bits using GetMultiXactIdHintBits) + * + * "mxid_oldest_xid_out" is only set when "flags" contains either FRM_NOOP or + * FRM_RETURN_IS_MULTI, since we only leave behind a MultiXactId for these. + * + * NB: Creates a _new_ MultiXactId when FRM_RETURN_IS_MULTI is set in "flags". */ static TransactionId FreezeMultiXactId(MultiXactId multi, uint16 t_infomask, TransactionId relfrozenxid, TransactionId relminmxid, TransactionId cutoff_xid, MultiXactId cutoff_multi, - uint16 *flags) + uint16 *flags, TransactionId *mxid_oldest_xid_out) { TransactionId xid = InvalidTransactionId; int i; @@ -6111,6 +6118,7 @@ FreezeMultiXactId(MultiXactId multi, uint16 t_infomask, bool has_lockers; TransactionId update_xid; bool update_committed; + TransactionId temp_xid_out; *flags = 0; @@ -6147,7 +6155,7 @@ FreezeMultiXactId(MultiXactId multi, uint16 t_infomask, if (HEAP_XMAX_IS_LOCKED_ONLY(t_infomask)) { *flags |= FRM_INVALIDATE_XMAX; - xid = InvalidTransactionId; /* not strictly necessary */ + xid = InvalidTransactionId; } else { @@ -6174,7 +6182,7 @@ FreezeMultiXactId(MultiXactId multi, uint16 t_infomask, (errcode(ERRCODE_DATA_CORRUPTED), errmsg_internal("cannot freeze committed update xid %u", xid))); *flags |= FRM_INVALIDATE_XMAX; - xid = InvalidTransactionId; /* not strictly necessary */ + xid = InvalidTransactionId; } else { @@ -6182,6 +6190,10 @@ FreezeMultiXactId(MultiXactId multi, uint16 t_infomask, } } + /* + * Don't push back mxid_oldest_xid_out using FRM_RETURN_IS_XID Xid, or + * when no Xids will remain + */ return xid; } @@ -6205,6 +6217,7 @@ FreezeMultiXactId(MultiXactId multi, uint16 t_infomask, /* is there anything older than the cutoff? */ need_replace = false; + temp_xid_out = *mxid_oldest_xid_out; /* init for FRM_NOOP */ for (i = 0; i < nmembers; i++) { if (TransactionIdPrecedes(members[i].xid, cutoff_xid)) @@ -6212,28 +6225,38 @@ FreezeMultiXactId(MultiXactId multi, uint16 t_infomask, need_replace = true; break; } + if (TransactionIdPrecedes(members[i].xid, temp_xid_out)) + temp_xid_out = members[i].xid; } /* * In the simplest case, there is no member older than the cutoff; we can - * keep the existing MultiXactId as is. + * keep the existing MultiXactId as-is, avoiding a more expensive second + * pass over the multi */ if (!need_replace) { + /* + * When mxid_oldest_xid_out gets pushed back here it's likely that the + * update Xid was the oldest member, but we don't rely on that + */ *flags |= FRM_NOOP; + *mxid_oldest_xid_out = temp_xid_out; pfree(members); - return InvalidTransactionId; + return multi; } /* - * If the multi needs to be updated, figure out which members do we need - * to keep. + * Do a more thorough second pass over the multi to figure out which + * member XIDs actually need to be kept. Checking the precise status of + * individual members might even show that we don't need to keep anything. */ nnewmembers = 0; newmembers = palloc(sizeof(MultiXactMember) * nmembers); has_lockers = false; update_xid = InvalidTransactionId; update_committed = false; + temp_xid_out = *mxid_oldest_xid_out; /* init for FRM_RETURN_IS_MULTI */ for (i = 0; i < nmembers; i++) { @@ -6289,7 +6312,7 @@ FreezeMultiXactId(MultiXactId multi, uint16 t_infomask, } /* - * Since the tuple wasn't marked HEAPTUPLE_DEAD by vacuum, the + * Since the tuple wasn't totally removed when vacuum pruned, the * update Xid cannot possibly be older than the xid cutoff. The * presence of such a tuple would cause corruption, so be paranoid * and check. @@ -6302,15 +6325,20 @@ FreezeMultiXactId(MultiXactId multi, uint16 t_infomask, update_xid, cutoff_xid))); /* - * If we determined that it's an Xid corresponding to an update - * that must be retained, additionally add it to the list of - * members of the new Multi, in case we end up using that. (We - * might still decide to use only an update Xid and not a multi, - * but it's easier to maintain the list as we walk the old members - * list.) + * We determined that this is an Xid corresponding to an update + * that must be retained -- add it to new members list for later. + * + * Also consider pushing back temp_xid_out, which is needed when + * we later conclude that a new multi is required (i.e. when we go + * on to set FRM_RETURN_IS_MULTI for our caller because we also + * need to retain a locker that's still running). */ if (TransactionIdIsValid(update_xid)) + { newmembers[nnewmembers++] = members[i]; + if (TransactionIdPrecedes(members[i].xid, temp_xid_out)) + temp_xid_out = members[i].xid; + } } else { @@ -6318,8 +6346,18 @@ FreezeMultiXactId(MultiXactId multi, uint16 t_infomask, if (TransactionIdIsCurrentTransactionId(members[i].xid) || TransactionIdIsInProgress(members[i].xid)) { - /* running locker cannot possibly be older than the cutoff */ + /* + * Running locker cannot possibly be older than the cutoff. + * + * The cutoff is <= VACUUM's OldestXmin, which is also the + * initial value used for top-level relfrozenxid_out tracking + * state. A running locker cannot be older than VACUUM's + * OldestXmin, either, so we don't need a temp_xid_out step. + */ + Assert(TransactionIdIsNormal(members[i].xid)); Assert(!TransactionIdPrecedes(members[i].xid, cutoff_xid)); + Assert(!TransactionIdPrecedes(members[i].xid, + *mxid_oldest_xid_out)); newmembers[nnewmembers++] = members[i]; has_lockers = true; } @@ -6328,11 +6366,16 @@ FreezeMultiXactId(MultiXactId multi, uint16 t_infomask, pfree(members); + /* + * Determine what to do with caller's multi based on information gathered + * during our second pass + */ if (nnewmembers == 0) { /* nothing worth keeping!? Tell caller to remove the whole thing */ *flags |= FRM_INVALIDATE_XMAX; xid = InvalidTransactionId; + /* Don't push back mxid_oldest_xid_out -- no Xids will remain */ } else if (TransactionIdIsValid(update_xid) && !has_lockers) { @@ -6348,15 +6391,18 @@ FreezeMultiXactId(MultiXactId multi, uint16 t_infomask, if (update_committed) *flags |= FRM_MARK_COMMITTED; xid = update_xid; + /* Don't push back mxid_oldest_xid_out using FRM_RETURN_IS_XID Xid */ } else { /* * Create a new multixact with the surviving members of the previous - * one, to set as new Xmax in the tuple. + * one, to set as new Xmax in the tuple. The oldest surviving member + * might push back mxid_oldest_xid_out. */ xid = MultiXactIdCreateFromMembers(nnewmembers, newmembers); *flags |= FRM_RETURN_IS_MULTI; + *mxid_oldest_xid_out = temp_xid_out; } pfree(newmembers); @@ -6375,31 +6421,41 @@ FreezeMultiXactId(MultiXactId multi, uint16 t_infomask, * will be totally frozen after these operations are performed and false if * more freezing will eventually be required. * - * Caller is responsible for setting the offset field, if appropriate. + * Caller must set frz->offset itself, before heap_execute_freeze_tuple call. * * It is assumed that the caller has checked the tuple with * HeapTupleSatisfiesVacuum() and determined that it is not HEAPTUPLE_DEAD * (else we should be removing the tuple, not freezing it). * - * NB: cutoff_xid *must* be <= the current global xmin, to ensure that any + * The *relfrozenxid_out and *relminmxid_out arguments are the current target + * relfrozenxid and relminmxid for VACUUM caller's heap rel. Any and all + * unfrozen XIDs or MXIDs that remain in caller's rel after VACUUM finishes + * _must_ have values >= the final relfrozenxid/relminmxid values in pg_class. + * This includes XIDs that remain as MultiXact members from any tuple's xmax. + * Each call here pushes back *relfrozenxid_out and/or *relminmxid_out as + * needed to avoid unsafe final values in rel's authoritative pg_class tuple. + * + * NB: cutoff_xid *must* be <= VACUUM's OldestXmin, to ensure that any * XID older than it could neither be running nor seen as running by any * open transaction. This ensures that the replacement will not change * anyone's idea of the tuple state. - * Similarly, cutoff_multi must be less than or equal to the smallest - * MultiXactId used by any transaction currently open. + * Similarly, cutoff_multi must be <= VACUUM's OldestMxact. * - * If the tuple is in a shared buffer, caller must hold an exclusive lock on - * that buffer. + * NB: This function has side effects: it might allocate a new MultiXactId. + * It will be set as tuple's new xmax when our *frz output is processed within + * heap_execute_freeze_tuple later on. If the tuple is in a shared buffer + * then caller had better have an exclusive lock on it already. * - * NB: It is not enough to set hint bits to indicate something is - * committed/invalid -- they might not be set on a standby, or after crash - * recovery. We really need to remove old xids. + * NB: It is not enough to set hint bits to indicate an XID committed/aborted. + * The *frz WAL record we output completely removes all old XIDs during REDO. */ bool heap_prepare_freeze_tuple(HeapTupleHeader tuple, TransactionId relfrozenxid, TransactionId relminmxid, TransactionId cutoff_xid, TransactionId cutoff_multi, - xl_heap_freeze_tuple *frz, bool *totally_frozen) + xl_heap_freeze_tuple *frz, bool *totally_frozen, + TransactionId *relfrozenxid_out, + MultiXactId *relminmxid_out) { bool changed = false; bool xmax_already_frozen = false; @@ -6418,7 +6474,9 @@ heap_prepare_freeze_tuple(HeapTupleHeader tuple, * already a permanent value), while in the block below it is set true to * mean "xmin won't need freezing after what we do to it here" (false * otherwise). In both cases we're allowed to set totally_frozen, as far - * as xmin is concerned. + * as xmin is concerned. Both cases also don't require relfrozenxid_out + * handling, since either way the tuple's xmin will be a permanent value + * once we're done with it. */ xid = HeapTupleHeaderGetXmin(tuple); if (!TransactionIdIsNormal(xid)) @@ -6443,6 +6501,12 @@ heap_prepare_freeze_tuple(HeapTupleHeader tuple, frz->t_infomask |= HEAP_XMIN_FROZEN; changed = true; } + else + { + /* xmin to remain unfrozen. Could push back relfrozenxid_out. */ + if (TransactionIdPrecedes(xid, *relfrozenxid_out)) + *relfrozenxid_out = xid; + } } /* @@ -6452,7 +6516,7 @@ heap_prepare_freeze_tuple(HeapTupleHeader tuple, * freezing, too. Also, if a multi needs freezing, we cannot simply take * it out --- if there's a live updater Xid, it needs to be kept. * - * Make sure to keep heap_tuple_needs_freeze in sync with this. + * Make sure to keep heap_tuple_would_freeze in sync with this. */ xid = HeapTupleHeaderGetRawXmax(tuple); @@ -6460,15 +6524,28 @@ heap_prepare_freeze_tuple(HeapTupleHeader tuple, { TransactionId newxmax; uint16 flags; + TransactionId mxid_oldest_xid_out = *relfrozenxid_out; newxmax = FreezeMultiXactId(xid, tuple->t_infomask, relfrozenxid, relminmxid, - cutoff_xid, cutoff_multi, &flags); + cutoff_xid, cutoff_multi, + &flags, &mxid_oldest_xid_out); freeze_xmax = (flags & FRM_INVALIDATE_XMAX); if (flags & FRM_RETURN_IS_XID) { + /* + * xmax will become an updater Xid (original MultiXact's updater + * member Xid will be carried forward as a simple Xid in Xmax). + * Might have to ratchet back relfrozenxid_out here, though never + * relminmxid_out. + */ + Assert(!freeze_xmax); + Assert(TransactionIdIsValid(newxmax)); + if (TransactionIdPrecedes(newxmax, *relfrozenxid_out)) + *relfrozenxid_out = newxmax; + /* * NB -- some of these transformations are only valid because we * know the return Xid is a tuple updater (i.e. not merely a @@ -6487,6 +6564,19 @@ heap_prepare_freeze_tuple(HeapTupleHeader tuple, uint16 newbits; uint16 newbits2; + /* + * xmax is an old MultiXactId that we have to replace with a new + * MultiXactId, to carry forward two or more original member XIDs. + * Might have to ratchet back relfrozenxid_out here, though never + * relminmxid_out. + */ + Assert(!freeze_xmax); + Assert(MultiXactIdIsValid(newxmax)); + Assert(!MultiXactIdPrecedes(newxmax, *relminmxid_out)); + Assert(TransactionIdPrecedesOrEquals(mxid_oldest_xid_out, + *relfrozenxid_out)); + *relfrozenxid_out = mxid_oldest_xid_out; + /* * We can't use GetMultiXactIdHintBits directly on the new multi * here; that routine initializes the masks to all zeroes, which @@ -6503,6 +6593,30 @@ heap_prepare_freeze_tuple(HeapTupleHeader tuple, changed = true; } + else if (flags & FRM_NOOP) + { + /* + * xmax is a MultiXactId, and nothing about it changes for now. + * Might have to ratchet back relminmxid_out, relfrozenxid_out, or + * both together. + */ + Assert(!freeze_xmax); + Assert(MultiXactIdIsValid(newxmax) && xid == newxmax); + Assert(TransactionIdPrecedesOrEquals(mxid_oldest_xid_out, + *relfrozenxid_out)); + if (MultiXactIdPrecedes(xid, *relminmxid_out)) + *relminmxid_out = xid; + *relfrozenxid_out = mxid_oldest_xid_out; + } + else + { + /* + * Keeping nothing (neither an Xid nor a MultiXactId) in xmax. + * Won't have to ratchet back relminmxid_out or relfrozenxid_out. + */ + Assert(freeze_xmax); + Assert(!TransactionIdIsValid(newxmax)); + } } else if (TransactionIdIsNormal(xid)) { @@ -6527,15 +6641,21 @@ heap_prepare_freeze_tuple(HeapTupleHeader tuple, errmsg_internal("cannot freeze committed xmax %u", xid))); freeze_xmax = true; + /* No need for relfrozenxid_out handling, since we'll freeze xmax */ } else + { freeze_xmax = false; + if (TransactionIdPrecedes(xid, *relfrozenxid_out)) + *relfrozenxid_out = xid; + } } else if ((tuple->t_infomask & HEAP_XMAX_INVALID) || !TransactionIdIsValid(HeapTupleHeaderGetRawXmax(tuple))) { freeze_xmax = false; xmax_already_frozen = true; + /* No need for relfrozenxid_out handling for already-frozen xmax */ } else ereport(ERROR, @@ -6576,6 +6696,8 @@ heap_prepare_freeze_tuple(HeapTupleHeader tuple, * was removed in PostgreSQL 9.0. Note that if we were to respect * cutoff_xid here, we'd need to make surely to clear totally_frozen * when we skipped freezing on that basis. + * + * No need for relfrozenxid_out handling, since we always freeze xvac. */ if (TransactionIdIsNormal(xid)) { @@ -6653,11 +6775,14 @@ heap_freeze_tuple(HeapTupleHeader tuple, xl_heap_freeze_tuple frz; bool do_freeze; bool tuple_totally_frozen; + TransactionId relfrozenxid_out = cutoff_xid; + MultiXactId relminmxid_out = cutoff_multi; do_freeze = heap_prepare_freeze_tuple(tuple, relfrozenxid, relminmxid, cutoff_xid, cutoff_multi, - &frz, &tuple_totally_frozen); + &frz, &tuple_totally_frozen, + &relfrozenxid_out, &relminmxid_out); /* * Note that because this is not a WAL-logged operation, we don't need to @@ -7036,9 +7161,7 @@ ConditionalMultiXactIdWait(MultiXactId multi, MultiXactStatus status, * heap_tuple_needs_eventual_freeze * * Check to see whether any of the XID fields of a tuple (xmin, xmax, xvac) - * will eventually require freezing. Similar to heap_tuple_needs_freeze, - * but there's no cutoff, since we're trying to figure out whether freezing - * will ever be needed, not whether it's needed now. + * will eventually require freezing (if tuple isn't removed by pruning first). */ bool heap_tuple_needs_eventual_freeze(HeapTupleHeader tuple) @@ -7082,87 +7205,106 @@ heap_tuple_needs_eventual_freeze(HeapTupleHeader tuple) } /* - * heap_tuple_needs_freeze + * heap_tuple_would_freeze * - * Check to see whether any of the XID fields of a tuple (xmin, xmax, xvac) - * are older than the specified cutoff XID or MultiXactId. If so, return true. + * Return value indicates if heap_prepare_freeze_tuple sibling function would + * freeze any of the XID/XMID fields from the tuple, given the same cutoffs. + * We must also deal with dead tuples here, since (xmin, xmax, xvac) fields + * could be processed by pruning away the whole tuple instead of freezing. * - * It doesn't matter whether the tuple is alive or dead, we are checking - * to see if a tuple needs to be removed or frozen to avoid wraparound. - * - * NB: Cannot rely on hint bits here, they might not be set after a crash or - * on a standby. + * The *relfrozenxid_out and *relminmxid_out input/output arguments work just + * like the heap_prepare_freeze_tuple arguments that they're based on. We + * never freeze here, which makes tracking the oldest extant XID/MXID simple. */ bool -heap_tuple_needs_freeze(HeapTupleHeader tuple, TransactionId cutoff_xid, - MultiXactId cutoff_multi) +heap_tuple_would_freeze(HeapTupleHeader tuple, TransactionId cutoff_xid, + MultiXactId cutoff_multi, + TransactionId *relfrozenxid_out, + MultiXactId *relminmxid_out) { TransactionId xid; + MultiXactId multi; + bool would_freeze = false; + /* First deal with xmin */ xid = HeapTupleHeaderGetXmin(tuple); - if (TransactionIdIsNormal(xid) && - TransactionIdPrecedes(xid, cutoff_xid)) - return true; - - /* - * The considerations for multixacts are complicated; look at - * heap_prepare_freeze_tuple for justifications. This routine had better - * be in sync with that one! - */ - if (tuple->t_infomask & HEAP_XMAX_IS_MULTI) + if (TransactionIdIsNormal(xid)) { - MultiXactId multi; + if (TransactionIdPrecedes(xid, *relfrozenxid_out)) + *relfrozenxid_out = xid; + if (TransactionIdPrecedes(xid, cutoff_xid)) + would_freeze = true; + } + /* Now deal with xmax */ + xid = InvalidTransactionId; + multi = InvalidMultiXactId; + if (tuple->t_infomask & HEAP_XMAX_IS_MULTI) multi = HeapTupleHeaderGetRawXmax(tuple); - if (!MultiXactIdIsValid(multi)) - { - /* no xmax set, ignore */ - ; - } - else if (HEAP_LOCKED_UPGRADED(tuple->t_infomask)) - return true; - else if (MultiXactIdPrecedes(multi, cutoff_multi)) - return true; - else - { - MultiXactMember *members; - int nmembers; - int i; + else + xid = HeapTupleHeaderGetRawXmax(tuple); - /* need to check whether any member of the mxact is too old */ - - nmembers = GetMultiXactIdMembers(multi, &members, false, - HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask)); - - for (i = 0; i < nmembers; i++) - { - if (TransactionIdPrecedes(members[i].xid, cutoff_xid)) - { - pfree(members); - return true; - } - } - if (nmembers > 0) - pfree(members); - } + if (TransactionIdIsNormal(xid)) + { + /* xmax is a non-permanent XID */ + if (TransactionIdPrecedes(xid, *relfrozenxid_out)) + *relfrozenxid_out = xid; + if (TransactionIdPrecedes(xid, cutoff_xid)) + would_freeze = true; + } + else if (!MultiXactIdIsValid(multi)) + { + /* xmax is a permanent XID or invalid MultiXactId/XID */ + } + else if (HEAP_LOCKED_UPGRADED(tuple->t_infomask)) + { + /* xmax is a pg_upgrade'd MultiXact, which can't have updater XID */ + if (MultiXactIdPrecedes(multi, *relminmxid_out)) + *relminmxid_out = multi; + /* heap_prepare_freeze_tuple always freezes pg_upgrade'd xmax */ + would_freeze = true; } else { - xid = HeapTupleHeaderGetRawXmax(tuple); - if (TransactionIdIsNormal(xid) && - TransactionIdPrecedes(xid, cutoff_xid)) - return true; + /* xmax is a MultiXactId that may have an updater XID */ + MultiXactMember *members; + int nmembers; + + if (MultiXactIdPrecedes(multi, *relminmxid_out)) + *relminmxid_out = multi; + if (MultiXactIdPrecedes(multi, cutoff_multi)) + would_freeze = true; + + /* need to check whether any member of the mxact is old */ + nmembers = GetMultiXactIdMembers(multi, &members, false, + HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask)); + + for (int i = 0; i < nmembers; i++) + { + xid = members[i].xid; + Assert(TransactionIdIsNormal(xid)); + if (TransactionIdPrecedes(xid, *relfrozenxid_out)) + *relfrozenxid_out = xid; + if (TransactionIdPrecedes(xid, cutoff_xid)) + would_freeze = true; + } + if (nmembers > 0) + pfree(members); } if (tuple->t_infomask & HEAP_MOVED) { xid = HeapTupleHeaderGetXvac(tuple); - if (TransactionIdIsNormal(xid) && - TransactionIdPrecedes(xid, cutoff_xid)) - return true; + if (TransactionIdIsNormal(xid)) + { + if (TransactionIdPrecedes(xid, *relfrozenxid_out)) + *relfrozenxid_out = xid; + /* heap_prepare_freeze_tuple always freezes xvac */ + would_freeze = true; + } } - return false; + return would_freeze; } /* diff --git a/src/backend/access/heap/vacuumlazy.c b/src/backend/access/heap/vacuumlazy.c index 06b523a01f..826982f70b 100644 --- a/src/backend/access/heap/vacuumlazy.c +++ b/src/backend/access/heap/vacuumlazy.c @@ -144,7 +144,7 @@ typedef struct LVRelState Relation *indrels; int nindexes; - /* Aggressive VACUUM (scan all unfrozen pages)? */ + /* Aggressive VACUUM? (must set relfrozenxid >= FreezeLimit) */ bool aggressive; /* Use visibility map to skip? (disabled by DISABLE_PAGE_SKIPPING) */ bool skipwithvm; @@ -173,8 +173,9 @@ typedef struct LVRelState /* VACUUM operation's target cutoffs for freezing XIDs and MultiXactIds */ TransactionId FreezeLimit; MultiXactId MultiXactCutoff; - /* Are FreezeLimit/MultiXactCutoff still valid? */ - bool freeze_cutoffs_valid; + /* Tracks oldest extant XID/MXID for setting relfrozenxid/relminmxid */ + TransactionId NewRelfrozenXid; + MultiXactId NewRelminMxid; /* Error reporting state */ char *relnamespace; @@ -313,7 +314,8 @@ heap_vacuum_rel(Relation rel, VacuumParams *params, minmulti_updated; TransactionId OldestXmin, FreezeLimit; - MultiXactId MultiXactCutoff; + MultiXactId OldestMxact, + MultiXactCutoff; BlockNumber orig_rel_pages, new_rel_pages, new_rel_allvisible; @@ -345,20 +347,17 @@ heap_vacuum_rel(Relation rel, VacuumParams *params, /* * Get OldestXmin cutoff, which is used to determine which deleted tuples * are considered DEAD, not just RECENTLY_DEAD. Also get related cutoffs - * used to determine which XIDs/MultiXactIds will be frozen. - * - * If this is an aggressive VACUUM, then we're strictly required to freeze - * any and all XIDs from before FreezeLimit, so that we will be able to - * safely advance relfrozenxid up to FreezeLimit below (we must be able to - * advance relminmxid up to MultiXactCutoff, too). + * used to determine which XIDs/MultiXactIds will be frozen. If this is + * an aggressive VACUUM then lazy_scan_heap cannot leave behind unfrozen + * XIDs < FreezeLimit (all MXIDs < MultiXactCutoff also need to go away). */ aggressive = vacuum_set_xid_limits(rel, params->freeze_min_age, params->freeze_table_age, params->multixact_freeze_min_age, params->multixact_freeze_table_age, - &OldestXmin, &FreezeLimit, - &MultiXactCutoff); + &OldestXmin, &OldestMxact, + &FreezeLimit, &MultiXactCutoff); skipwithvm = true; if (params->options & VACOPT_DISABLE_PAGE_SKIPPING) @@ -505,10 +504,11 @@ heap_vacuum_rel(Relation rel, VacuumParams *params, vacrel->vistest = GlobalVisTestFor(rel); /* FreezeLimit controls XID freezing (always <= OldestXmin) */ vacrel->FreezeLimit = FreezeLimit; - /* MultiXactCutoff controls MXID freezing */ + /* MultiXactCutoff controls MXID freezing (always <= OldestMxact) */ vacrel->MultiXactCutoff = MultiXactCutoff; - /* Track if cutoffs became invalid (possible in !aggressive case only) */ - vacrel->freeze_cutoffs_valid = true; + /* Initialize state used to track oldest extant XID/XMID */ + vacrel->NewRelfrozenXid = OldestXmin; + vacrel->NewRelminMxid = OldestMxact; /* * Call lazy_scan_heap to perform all required heap pruning, index @@ -542,13 +542,33 @@ heap_vacuum_rel(Relation rel, VacuumParams *params, /* * Prepare to update rel's pg_class entry. * - * In principle new_live_tuples could be -1 indicating that we (still) - * don't know the tuple count. In practice that probably can't happen, - * since we'd surely have scanned some pages if the table is new and - * nonempty. - * + * Aggressive VACUUMs must always be able to advance relfrozenxid to a + * value >= FreezeLimit, and relminmxid to a value >= MultiXactCutoff. + * Non-aggressive VACUUMs may advance them by any amount, or not at all. + */ + Assert(vacrel->NewRelfrozenXid == OldestXmin || + TransactionIdPrecedesOrEquals(aggressive ? FreezeLimit : + vacrel->relfrozenxid, + vacrel->NewRelfrozenXid)); + Assert(vacrel->NewRelminMxid == OldestMxact || + MultiXactIdPrecedesOrEquals(aggressive ? MultiXactCutoff : + vacrel->relminmxid, + vacrel->NewRelminMxid)); + if (vacrel->scanned_pages + vacrel->frozenskipped_pages < orig_rel_pages) + { + /* + * Must keep original relfrozenxid in a non-aggressive VACUUM that + * had to skip an all-visible page. The state that tracks new + * values will have missed unfrozen XIDs from the pages we skipped. + */ + Assert(!aggressive); + vacrel->NewRelfrozenXid = InvalidTransactionId; + vacrel->NewRelminMxid = InvalidMultiXactId; + } + + /* * For safety, clamp relallvisible to be not more than what we're setting - * relpages to. + * pg_class.relpages to */ new_rel_pages = vacrel->rel_pages; /* After possible rel truncation */ visibilitymap_count(rel, &new_rel_allvisible, NULL); @@ -558,33 +578,14 @@ heap_vacuum_rel(Relation rel, VacuumParams *params, /* * Now actually update rel's pg_class entry. * - * Aggressive VACUUM must reliably advance relfrozenxid (and relminmxid). - * We are able to advance relfrozenxid in a non-aggressive VACUUM too, - * provided we didn't skip any all-visible (not all-frozen) pages using - * the visibility map, and assuming that we didn't fail to get a cleanup - * lock that made it unsafe with respect to FreezeLimit (or perhaps our - * MultiXactCutoff) established for VACUUM operation. + * In principle new_live_tuples could be -1 indicating that we (still) + * don't know the tuple count. In practice that can't happen, since we + * scan every page that isn't skipped using the visibility map. */ - if (vacrel->scanned_pages + vacrel->frozenskipped_pages < orig_rel_pages || - !vacrel->freeze_cutoffs_valid) - { - /* Cannot advance relfrozenxid/relminmxid */ - Assert(!aggressive); - frozenxid_updated = minmulti_updated = false; - vac_update_relstats(rel, new_rel_pages, vacrel->new_live_tuples, - new_rel_allvisible, vacrel->nindexes > 0, - InvalidTransactionId, InvalidMultiXactId, - NULL, NULL, false); - } - else - { - Assert(vacrel->scanned_pages + vacrel->frozenskipped_pages == - orig_rel_pages); - vac_update_relstats(rel, new_rel_pages, vacrel->new_live_tuples, - new_rel_allvisible, vacrel->nindexes > 0, - FreezeLimit, MultiXactCutoff, - &frozenxid_updated, &minmulti_updated, false); - } + vac_update_relstats(rel, new_rel_pages, vacrel->new_live_tuples, + new_rel_allvisible, vacrel->nindexes > 0, + vacrel->NewRelfrozenXid, vacrel->NewRelminMxid, + &frozenxid_updated, &minmulti_updated, false); /* * Report results to the stats collector, too. @@ -692,17 +693,17 @@ heap_vacuum_rel(Relation rel, VacuumParams *params, OldestXmin, diff); if (frozenxid_updated) { - diff = (int32) (FreezeLimit - vacrel->relfrozenxid); + diff = (int32) (vacrel->NewRelfrozenXid - vacrel->relfrozenxid); appendStringInfo(&buf, _("new relfrozenxid: %u, which is %d xids ahead of previous value\n"), - FreezeLimit, diff); + vacrel->NewRelfrozenXid, diff); } if (minmulti_updated) { - diff = (int32) (MultiXactCutoff - vacrel->relminmxid); + diff = (int32) (vacrel->NewRelminMxid - vacrel->relminmxid); appendStringInfo(&buf, _("new relminmxid: %u, which is %d mxids ahead of previous value\n"), - MultiXactCutoff, diff); + vacrel->NewRelminMxid, diff); } if (orig_rel_pages > 0) { @@ -1582,6 +1583,8 @@ lazy_scan_prune(LVRelState *vacrel, recently_dead_tuples; int nnewlpdead; int nfrozen; + TransactionId NewRelfrozenXid; + MultiXactId NewRelminMxid; OffsetNumber deadoffsets[MaxHeapTuplesPerPage]; xl_heap_freeze_tuple frozen[MaxHeapTuplesPerPage]; @@ -1591,7 +1594,9 @@ lazy_scan_prune(LVRelState *vacrel, retry: - /* Initialize (or reset) page-level counters */ + /* Initialize (or reset) page-level state */ + NewRelfrozenXid = vacrel->NewRelfrozenXid; + NewRelminMxid = vacrel->NewRelminMxid; tuples_deleted = 0; lpdead_items = 0; live_tuples = 0; @@ -1798,8 +1803,8 @@ retry: vacrel->relminmxid, vacrel->FreezeLimit, vacrel->MultiXactCutoff, - &frozen[nfrozen], - &tuple_totally_frozen)) + &frozen[nfrozen], &tuple_totally_frozen, + &NewRelfrozenXid, &NewRelminMxid)) { /* Will execute freeze below */ frozen[nfrozen++].offset = offnum; @@ -1813,13 +1818,16 @@ retry: prunestate->all_frozen = false; } + vacrel->offnum = InvalidOffsetNumber; + /* * We have now divided every item on the page into either an LP_DEAD item * that will need to be vacuumed in indexes later, or a LP_NORMAL tuple * that remains and needs to be considered for freezing now (LP_UNUSED and * LP_REDIRECT items also remain, but are of no further interest to us). */ - vacrel->offnum = InvalidOffsetNumber; + vacrel->NewRelfrozenXid = NewRelfrozenXid; + vacrel->NewRelminMxid = NewRelminMxid; /* * Consider the need to freeze any items with tuple storage from the page @@ -1969,6 +1977,8 @@ lazy_scan_noprune(LVRelState *vacrel, recently_dead_tuples, missed_dead_tuples; HeapTupleHeader tupleheader; + TransactionId NewRelfrozenXid = vacrel->NewRelfrozenXid; + MultiXactId NewRelminMxid = vacrel->NewRelminMxid; OffsetNumber deadoffsets[MaxHeapTuplesPerPage]; Assert(BufferGetBlockNumber(buf) == blkno); @@ -2013,22 +2023,37 @@ lazy_scan_noprune(LVRelState *vacrel, *hastup = true; /* page prevents rel truncation */ tupleheader = (HeapTupleHeader) PageGetItem(page, itemid); - if (heap_tuple_needs_freeze(tupleheader, + if (heap_tuple_would_freeze(tupleheader, vacrel->FreezeLimit, - vacrel->MultiXactCutoff)) + vacrel->MultiXactCutoff, + &NewRelfrozenXid, &NewRelminMxid)) { + /* Tuple with XID < FreezeLimit (or MXID < MultiXactCutoff) */ if (vacrel->aggressive) { - /* Going to have to get cleanup lock for lazy_scan_prune */ + /* + * Aggressive VACUUMs must always be able to advance rel's + * relfrozenxid to a value >= FreezeLimit (and be able to + * advance rel's relminmxid to a value >= MultiXactCutoff). + * The ongoing aggressive VACUUM won't be able to do that + * unless it can freeze an XID (or XMID) from this tuple now. + * + * The only safe option is to have caller perform processing + * of this page using lazy_scan_prune. Caller might have to + * wait a while for a cleanup lock, but it can't be helped. + */ vacrel->offnum = InvalidOffsetNumber; return false; } /* - * Current non-aggressive VACUUM operation definitely won't be - * able to advance relfrozenxid or relminmxid + * Non-aggressive VACUUMs are under no obligation to advance + * relfrozenxid (even by one XID). We can be much laxer here. + * + * Currently we always just accept an older final relfrozenxid + * and/or relminmxid value. We never make caller wait or work a + * little harder, even when it likely makes sense to do so. */ - vacrel->freeze_cutoffs_valid = false; } ItemPointerSet(&(tuple.t_self), blkno, offnum); @@ -2078,9 +2103,14 @@ lazy_scan_noprune(LVRelState *vacrel, vacrel->offnum = InvalidOffsetNumber; /* - * Now save details of the LP_DEAD items from the page in vacrel (though - * only when VACUUM uses two-pass strategy) + * By here we know for sure that caller can put off freezing and pruning + * this particular page until the next VACUUM. Remember its details now. + * (lazy_scan_prune expects a clean slate, so we have to do this last.) */ + vacrel->NewRelfrozenXid = NewRelfrozenXid; + vacrel->NewRelminMxid = NewRelminMxid; + + /* Save any LP_DEAD items found on the page in dead_items array */ if (vacrel->nindexes == 0) { /* Using one-pass strategy (since table has no indexes) */ diff --git a/src/backend/commands/cluster.c b/src/backend/commands/cluster.c index cd19e35319..322d6bb2f1 100644 --- a/src/backend/commands/cluster.c +++ b/src/backend/commands/cluster.c @@ -806,9 +806,10 @@ copy_table_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, bool verbose, Form_pg_class relform; TupleDesc oldTupDesc PG_USED_FOR_ASSERTS_ONLY; TupleDesc newTupDesc PG_USED_FOR_ASSERTS_ONLY; - TransactionId OldestXmin; - TransactionId FreezeXid; - MultiXactId MultiXactCutoff; + TransactionId OldestXmin, + FreezeXid; + MultiXactId OldestMxact, + MultiXactCutoff; bool use_sort; double num_tuples = 0, tups_vacuumed = 0, @@ -896,8 +897,8 @@ copy_table_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, bool verbose, * Since we're going to rewrite the whole table anyway, there's no reason * not to be aggressive about this. */ - vacuum_set_xid_limits(OldHeap, 0, 0, 0, 0, - &OldestXmin, &FreezeXid, &MultiXactCutoff); + vacuum_set_xid_limits(OldHeap, 0, 0, 0, 0, &OldestXmin, &OldestMxact, + &FreezeXid, &MultiXactCutoff); /* * FreezeXid will become the table's new relfrozenxid, and that mustn't go diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c index 50a4a612e5..deec4887be 100644 --- a/src/backend/commands/vacuum.c +++ b/src/backend/commands/vacuum.c @@ -945,14 +945,22 @@ get_all_vacuum_rels(int options) * The output parameters are: * - oldestXmin is the Xid below which tuples deleted by any xact (that * committed) should be considered DEAD, not just RECENTLY_DEAD. - * - freezeLimit is the Xid below which all Xids are replaced by - * FrozenTransactionId during vacuum. - * - multiXactCutoff is the value below which all MultiXactIds are removed - * from Xmax. + * - oldestMxact is the Mxid below which MultiXacts are definitely not + * seen as visible by any running transaction. + * - freezeLimit is the Xid below which all Xids are definitely replaced by + * FrozenTransactionId during aggressive vacuums. + * - multiXactCutoff is the value below which all MultiXactIds are definitely + * removed from Xmax during aggressive vacuums. * * Return value indicates if vacuumlazy.c caller should make its VACUUM * operation aggressive. An aggressive VACUUM must advance relfrozenxid up to - * FreezeLimit, and relminmxid up to multiXactCutoff. + * FreezeLimit (at a minimum), and relminmxid up to multiXactCutoff (at a + * minimum). + * + * oldestXmin and oldestMxact are the most recent values that can ever be + * passed to vac_update_relstats() as frozenxid and minmulti arguments by our + * vacuumlazy.c caller later on. These values should be passed when it turns + * out that VACUUM will leave no unfrozen XIDs/XMIDs behind in the table. */ bool vacuum_set_xid_limits(Relation rel, @@ -961,6 +969,7 @@ vacuum_set_xid_limits(Relation rel, int multixact_freeze_min_age, int multixact_freeze_table_age, TransactionId *oldestXmin, + MultiXactId *oldestMxact, TransactionId *freezeLimit, MultiXactId *multiXactCutoff) { @@ -969,7 +978,6 @@ vacuum_set_xid_limits(Relation rel, int effective_multixact_freeze_max_age; TransactionId limit; TransactionId safeLimit; - MultiXactId oldestMxact; MultiXactId mxactLimit; MultiXactId safeMxactLimit; int freezetable; @@ -1065,9 +1073,11 @@ vacuum_set_xid_limits(Relation rel, effective_multixact_freeze_max_age / 2); Assert(mxid_freezemin >= 0); + /* Remember for caller */ + *oldestMxact = GetOldestMultiXactId(); + /* compute the cutoff multi, being careful to generate a valid value */ - oldestMxact = GetOldestMultiXactId(); - mxactLimit = oldestMxact - mxid_freezemin; + mxactLimit = *oldestMxact - mxid_freezemin; if (mxactLimit < FirstMultiXactId) mxactLimit = FirstMultiXactId; @@ -1082,8 +1092,8 @@ vacuum_set_xid_limits(Relation rel, (errmsg("oldest multixact is far in the past"), errhint("Close open transactions with multixacts soon to avoid wraparound problems."))); /* Use the safe limit, unless an older mxact is still running */ - if (MultiXactIdPrecedes(oldestMxact, safeMxactLimit)) - mxactLimit = oldestMxact; + if (MultiXactIdPrecedes(*oldestMxact, safeMxactLimit)) + mxactLimit = *oldestMxact; else mxactLimit = safeMxactLimit; } @@ -1390,12 +1400,9 @@ vac_update_relstats(Relation relation, * Update relfrozenxid, unless caller passed InvalidTransactionId * indicating it has no new data. * - * Ordinarily, we don't let relfrozenxid go backwards: if things are - * working correctly, the only way the new frozenxid could be older would - * be if a previous VACUUM was done with a tighter freeze_min_age, in - * which case we don't want to forget the work it already did. However, - * if the stored relfrozenxid is "in the future", then it must be corrupt - * and it seems best to overwrite it with the cutoff we used this time. + * Ordinarily, we don't let relfrozenxid go backwards. However, if the + * stored relfrozenxid is "in the future" then it seems best to assume + * it's corrupt, and overwrite with the oldest remaining XID in the table. * This should match vac_update_datfrozenxid() concerning what we consider * to be "in the future". */ diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h index b46ab7d739..4403f01e13 100644 --- a/src/include/access/heapam.h +++ b/src/include/access/heapam.h @@ -167,8 +167,10 @@ extern void heap_inplace_update(Relation relation, HeapTuple tuple); extern bool heap_freeze_tuple(HeapTupleHeader tuple, TransactionId relfrozenxid, TransactionId relminmxid, TransactionId cutoff_xid, TransactionId cutoff_multi); -extern bool heap_tuple_needs_freeze(HeapTupleHeader tuple, TransactionId cutoff_xid, - MultiXactId cutoff_multi); +extern bool heap_tuple_would_freeze(HeapTupleHeader tuple, TransactionId cutoff_xid, + MultiXactId cutoff_multi, + TransactionId *relfrozenxid_out, + MultiXactId *relminmxid_out); extern bool heap_tuple_needs_eventual_freeze(HeapTupleHeader tuple); extern void simple_heap_insert(Relation relation, HeapTuple tup); diff --git a/src/include/access/heapam_xlog.h b/src/include/access/heapam_xlog.h index 5c47fdcec8..2d8a7f6270 100644 --- a/src/include/access/heapam_xlog.h +++ b/src/include/access/heapam_xlog.h @@ -410,7 +410,9 @@ extern bool heap_prepare_freeze_tuple(HeapTupleHeader tuple, TransactionId cutoff_xid, TransactionId cutoff_multi, xl_heap_freeze_tuple *frz, - bool *totally_frozen); + bool *totally_frozen, + TransactionId *relfrozenxid_out, + MultiXactId *relminmxid_out); extern void heap_execute_freeze_tuple(HeapTupleHeader tuple, xl_heap_freeze_tuple *xlrec_tp); extern XLogRecPtr log_heap_visible(RelFileNode rnode, Buffer heap_buffer, diff --git a/src/include/commands/vacuum.h b/src/include/commands/vacuum.h index d64f6268f2..ead88edda7 100644 --- a/src/include/commands/vacuum.h +++ b/src/include/commands/vacuum.h @@ -291,6 +291,7 @@ extern bool vacuum_set_xid_limits(Relation rel, int multixact_freeze_min_age, int multixact_freeze_table_age, TransactionId *oldestXmin, + MultiXactId *oldestMxact, TransactionId *freezeLimit, MultiXactId *multiXactCutoff); extern bool vacuum_xid_failsafe_check(TransactionId relfrozenxid, diff --git a/src/test/isolation/expected/vacuum-no-cleanup-lock.out b/src/test/isolation/expected/vacuum-no-cleanup-lock.out new file mode 100644 index 0000000000..f7bc93e8f1 --- /dev/null +++ b/src/test/isolation/expected/vacuum-no-cleanup-lock.out @@ -0,0 +1,189 @@ +Parsed test spec with 4 sessions + +starting permutation: vacuumer_pg_class_stats dml_insert vacuumer_nonaggressive_vacuum vacuumer_pg_class_stats +step vacuumer_pg_class_stats: + SELECT relpages, reltuples FROM pg_class WHERE oid = 'smalltbl'::regclass; + +relpages|reltuples +--------+--------- + 1| 20 +(1 row) + +step dml_insert: + INSERT INTO smalltbl SELECT max(id) + 1 FROM smalltbl; + +step vacuumer_nonaggressive_vacuum: + VACUUM smalltbl; + +step vacuumer_pg_class_stats: + SELECT relpages, reltuples FROM pg_class WHERE oid = 'smalltbl'::regclass; + +relpages|reltuples +--------+--------- + 1| 21 +(1 row) + + +starting permutation: vacuumer_pg_class_stats dml_insert pinholder_cursor vacuumer_nonaggressive_vacuum vacuumer_pg_class_stats pinholder_commit +step vacuumer_pg_class_stats: + SELECT relpages, reltuples FROM pg_class WHERE oid = 'smalltbl'::regclass; + +relpages|reltuples +--------+--------- + 1| 20 +(1 row) + +step dml_insert: + INSERT INTO smalltbl SELECT max(id) + 1 FROM smalltbl; + +step pinholder_cursor: + BEGIN; + DECLARE c1 CURSOR FOR SELECT 1 AS dummy FROM smalltbl; + FETCH NEXT FROM c1; + +dummy +----- + 1 +(1 row) + +step vacuumer_nonaggressive_vacuum: + VACUUM smalltbl; + +step vacuumer_pg_class_stats: + SELECT relpages, reltuples FROM pg_class WHERE oid = 'smalltbl'::regclass; + +relpages|reltuples +--------+--------- + 1| 21 +(1 row) + +step pinholder_commit: + COMMIT; + + +starting permutation: vacuumer_pg_class_stats pinholder_cursor dml_insert dml_delete dml_insert vacuumer_nonaggressive_vacuum vacuumer_pg_class_stats pinholder_commit +step vacuumer_pg_class_stats: + SELECT relpages, reltuples FROM pg_class WHERE oid = 'smalltbl'::regclass; + +relpages|reltuples +--------+--------- + 1| 20 +(1 row) + +step pinholder_cursor: + BEGIN; + DECLARE c1 CURSOR FOR SELECT 1 AS dummy FROM smalltbl; + FETCH NEXT FROM c1; + +dummy +----- + 1 +(1 row) + +step dml_insert: + INSERT INTO smalltbl SELECT max(id) + 1 FROM smalltbl; + +step dml_delete: + DELETE FROM smalltbl WHERE id = (SELECT min(id) FROM smalltbl); + +step dml_insert: + INSERT INTO smalltbl SELECT max(id) + 1 FROM smalltbl; + +step vacuumer_nonaggressive_vacuum: + VACUUM smalltbl; + +step vacuumer_pg_class_stats: + SELECT relpages, reltuples FROM pg_class WHERE oid = 'smalltbl'::regclass; + +relpages|reltuples +--------+--------- + 1| 21 +(1 row) + +step pinholder_commit: + COMMIT; + + +starting permutation: vacuumer_pg_class_stats dml_insert dml_delete pinholder_cursor dml_insert vacuumer_nonaggressive_vacuum vacuumer_pg_class_stats pinholder_commit +step vacuumer_pg_class_stats: + SELECT relpages, reltuples FROM pg_class WHERE oid = 'smalltbl'::regclass; + +relpages|reltuples +--------+--------- + 1| 20 +(1 row) + +step dml_insert: + INSERT INTO smalltbl SELECT max(id) + 1 FROM smalltbl; + +step dml_delete: + DELETE FROM smalltbl WHERE id = (SELECT min(id) FROM smalltbl); + +step pinholder_cursor: + BEGIN; + DECLARE c1 CURSOR FOR SELECT 1 AS dummy FROM smalltbl; + FETCH NEXT FROM c1; + +dummy +----- + 1 +(1 row) + +step dml_insert: + INSERT INTO smalltbl SELECT max(id) + 1 FROM smalltbl; + +step vacuumer_nonaggressive_vacuum: + VACUUM smalltbl; + +step vacuumer_pg_class_stats: + SELECT relpages, reltuples FROM pg_class WHERE oid = 'smalltbl'::regclass; + +relpages|reltuples +--------+--------- + 1| 21 +(1 row) + +step pinholder_commit: + COMMIT; + + +starting permutation: dml_begin dml_other_begin dml_key_share dml_other_key_share vacuumer_nonaggressive_vacuum pinholder_cursor dml_other_update dml_commit dml_other_commit vacuumer_nonaggressive_vacuum pinholder_commit vacuumer_nonaggressive_vacuum +step dml_begin: BEGIN; +step dml_other_begin: BEGIN; +step dml_key_share: SELECT id FROM smalltbl WHERE id = 3 FOR KEY SHARE; +id +-- + 3 +(1 row) + +step dml_other_key_share: SELECT id FROM smalltbl WHERE id = 3 FOR KEY SHARE; +id +-- + 3 +(1 row) + +step vacuumer_nonaggressive_vacuum: + VACUUM smalltbl; + +step pinholder_cursor: + BEGIN; + DECLARE c1 CURSOR FOR SELECT 1 AS dummy FROM smalltbl; + FETCH NEXT FROM c1; + +dummy +----- + 1 +(1 row) + +step dml_other_update: UPDATE smalltbl SET t = 'u' WHERE id = 3; +step dml_commit: COMMIT; +step dml_other_commit: COMMIT; +step vacuumer_nonaggressive_vacuum: + VACUUM smalltbl; + +step pinholder_commit: + COMMIT; + +step vacuumer_nonaggressive_vacuum: + VACUUM smalltbl; + diff --git a/src/test/isolation/expected/vacuum-reltuples.out b/src/test/isolation/expected/vacuum-reltuples.out deleted file mode 100644 index ce55376e7f..0000000000 --- a/src/test/isolation/expected/vacuum-reltuples.out +++ /dev/null @@ -1,67 +0,0 @@ -Parsed test spec with 2 sessions - -starting permutation: modify vac stats -step modify: - insert into smalltbl select max(id)+1 from smalltbl; - -step vac: - vacuum smalltbl; - -step stats: - select relpages, reltuples from pg_class - where oid='smalltbl'::regclass; - -relpages|reltuples ---------+--------- - 1| 21 -(1 row) - - -starting permutation: modify open fetch1 vac close stats -step modify: - insert into smalltbl select max(id)+1 from smalltbl; - -step open: - begin; - declare c1 cursor for select 1 as dummy from smalltbl; - -step fetch1: - fetch next from c1; - -dummy ------ - 1 -(1 row) - -step vac: - vacuum smalltbl; - -step close: - commit; - -step stats: - select relpages, reltuples from pg_class - where oid='smalltbl'::regclass; - -relpages|reltuples ---------+--------- - 1| 21 -(1 row) - - -starting permutation: modify vac stats -step modify: - insert into smalltbl select max(id)+1 from smalltbl; - -step vac: - vacuum smalltbl; - -step stats: - select relpages, reltuples from pg_class - where oid='smalltbl'::regclass; - -relpages|reltuples ---------+--------- - 1| 21 -(1 row) - diff --git a/src/test/isolation/isolation_schedule b/src/test/isolation/isolation_schedule index 00749a40bd..a48caae228 100644 --- a/src/test/isolation/isolation_schedule +++ b/src/test/isolation/isolation_schedule @@ -84,7 +84,7 @@ test: alter-table-4 test: create-trigger test: sequence-ddl test: async-notify -test: vacuum-reltuples +test: vacuum-no-cleanup-lock test: timeouts test: vacuum-concurrent-drop test: vacuum-conflict diff --git a/src/test/isolation/specs/vacuum-no-cleanup-lock.spec b/src/test/isolation/specs/vacuum-no-cleanup-lock.spec new file mode 100644 index 0000000000..a88be66de5 --- /dev/null +++ b/src/test/isolation/specs/vacuum-no-cleanup-lock.spec @@ -0,0 +1,150 @@ +# Test for vacuum's reduced processing of heap pages (used for any heap page +# where a cleanup lock isn't immediately available) +# +# Debugging tip: Change VACUUM to VACUUM VERBOSE to get feedback on what's +# really going on + +# Use name type here to avoid TOAST table: +setup +{ + CREATE TABLE smalltbl AS SELECT i AS id, 't'::name AS t FROM generate_series(1,20) i; + ALTER TABLE smalltbl SET (autovacuum_enabled = off); + ALTER TABLE smalltbl ADD PRIMARY KEY (id); +} +setup +{ + VACUUM ANALYZE smalltbl; +} + +teardown +{ + DROP TABLE smalltbl; +} + +# This session holds a pin on smalltbl's only heap page: +session pinholder +step pinholder_cursor +{ + BEGIN; + DECLARE c1 CURSOR FOR SELECT 1 AS dummy FROM smalltbl; + FETCH NEXT FROM c1; +} +step pinholder_commit +{ + COMMIT; +} + +# This session inserts and deletes tuples, potentially affecting reltuples: +session dml +step dml_insert +{ + INSERT INTO smalltbl SELECT max(id) + 1 FROM smalltbl; +} +step dml_delete +{ + DELETE FROM smalltbl WHERE id = (SELECT min(id) FROM smalltbl); +} +step dml_begin { BEGIN; } +step dml_key_share { SELECT id FROM smalltbl WHERE id = 3 FOR KEY SHARE; } +step dml_commit { COMMIT; } + +# Needed for Multixact test: +session dml_other +step dml_other_begin { BEGIN; } +step dml_other_key_share { SELECT id FROM smalltbl WHERE id = 3 FOR KEY SHARE; } +step dml_other_update { UPDATE smalltbl SET t = 'u' WHERE id = 3; } +step dml_other_commit { COMMIT; } + +# This session runs non-aggressive VACUUM, but with maximally aggressive +# cutoffs for tuple freezing (e.g., FreezeLimit == OldestXmin): +session vacuumer +setup +{ + SET vacuum_freeze_min_age = 0; + SET vacuum_multixact_freeze_min_age = 0; +} +step vacuumer_nonaggressive_vacuum +{ + VACUUM smalltbl; +} +step vacuumer_pg_class_stats +{ + SELECT relpages, reltuples FROM pg_class WHERE oid = 'smalltbl'::regclass; +} + +# Test VACUUM's reltuples counting mechanism. +# +# Final pg_class.reltuples should never be affected by VACUUM's inability to +# get a cleanup lock on any page, except to the extent that any cleanup lock +# contention changes the number of tuples that remain ("missed dead" tuples +# are counted in reltuples, much like "recently dead" tuples). + +# Easy case: +permutation + vacuumer_pg_class_stats # Start with 20 tuples + dml_insert + vacuumer_nonaggressive_vacuum + vacuumer_pg_class_stats # End with 21 tuples + +# Harder case -- count 21 tuples at the end (like last time), but with cleanup +# lock contention this time: +permutation + vacuumer_pg_class_stats # Start with 20 tuples + dml_insert + pinholder_cursor + vacuumer_nonaggressive_vacuum + vacuumer_pg_class_stats # End with 21 tuples + pinholder_commit # order doesn't matter + +# Same as "harder case", but vary the order, and delete an inserted row: +permutation + vacuumer_pg_class_stats # Start with 20 tuples + pinholder_cursor + dml_insert + dml_delete + dml_insert + vacuumer_nonaggressive_vacuum + # reltuples is 21 here again -- "recently dead" tuple won't be included in + # count here: + vacuumer_pg_class_stats + pinholder_commit # order doesn't matter + +# Same as "harder case", but initial insert and delete before cursor: +permutation + vacuumer_pg_class_stats # Start with 20 tuples + dml_insert + dml_delete + pinholder_cursor + dml_insert + vacuumer_nonaggressive_vacuum + # reltuples is 21 here again -- "missed dead" tuple ("recently dead" when + # concurrent activity held back VACUUM's OldestXmin) won't be included in + # count here: + vacuumer_pg_class_stats + pinholder_commit # order doesn't matter + +# Test VACUUM's mechanism for skipping MultiXact freezing. +# +# This provides test coverage for code paths that are only hit when we need to +# freeze, but inability to acquire a cleanup lock on a heap page makes +# freezing some XIDs/XMIDs < FreezeLimit/MultiXactCutoff impossible (without +# waiting for a cleanup lock, which non-aggressive VACUUM is unwilling to do). +permutation + dml_begin + dml_other_begin + dml_key_share + dml_other_key_share + # Will get cleanup lock, can't advance relminmxid yet: + # (though will usually advance relfrozenxid by ~2 XIDs) + vacuumer_nonaggressive_vacuum + pinholder_cursor + dml_other_update + dml_commit + dml_other_commit + # Can't cleanup lock, so still can't advance relminmxid here: + # (relfrozenxid held back by XIDs in MultiXact too) + vacuumer_nonaggressive_vacuum + pinholder_commit + # Pin was dropped, so will advance relminmxid, at long last: + # (ditto for relfrozenxid advancement) + vacuumer_nonaggressive_vacuum diff --git a/src/test/isolation/specs/vacuum-reltuples.spec b/src/test/isolation/specs/vacuum-reltuples.spec deleted file mode 100644 index a2a461f2f5..0000000000 --- a/src/test/isolation/specs/vacuum-reltuples.spec +++ /dev/null @@ -1,49 +0,0 @@ -# Test for vacuum's handling of reltuples when pages are skipped due -# to page pins. We absolutely need to avoid setting reltuples=0 in -# such cases, since that interferes badly with planning. -# -# Expected result for all three permutation is 21 tuples, including -# the second permutation. VACUUM is able to count the concurrently -# inserted tuple in its final reltuples, even when a cleanup lock -# cannot be acquired on the affected heap page. - -setup { - create table smalltbl - as select i as id from generate_series(1,20) i; - alter table smalltbl set (autovacuum_enabled = off); -} -setup { - vacuum analyze smalltbl; -} - -teardown { - drop table smalltbl; -} - -session worker -step open { - begin; - declare c1 cursor for select 1 as dummy from smalltbl; -} -step fetch1 { - fetch next from c1; -} -step close { - commit; -} -step stats { - select relpages, reltuples from pg_class - where oid='smalltbl'::regclass; -} - -session vacuumer -step vac { - vacuum smalltbl; -} -step modify { - insert into smalltbl select max(id)+1 from smalltbl; -} - -permutation modify vac stats -permutation modify open fetch1 vac close stats -permutation modify vac stats