diff --git a/src/backend/access/heap/pruneheap.c b/src/backend/access/heap/pruneheap.c index f92eef98e8..35a7c6147e 100644 --- a/src/backend/access/heap/pruneheap.c +++ b/src/backend/access/heap/pruneheap.c @@ -51,20 +51,22 @@ typedef struct * 1. Otherwise every access would need to subtract 1. */ bool marked[MaxHeapTuplesPerPage + 1]; + + int ndeleted; /* Number of tuples deleted from the page */ } PruneState; /* Local functions */ static HTSV_Result heap_prune_satisfies_vacuum(PruneState *prstate, HeapTuple tup, Buffer buffer); -static int heap_prune_chain(Page page, BlockNumber blockno, OffsetNumber maxoff, +static void heap_prune_chain(Page page, BlockNumber blockno, OffsetNumber maxoff, OffsetNumber rootoffnum, int8 *htsv, PruneState *prstate); static void heap_prune_record_prunable(PruneState *prstate, TransactionId xid); static void heap_prune_record_redirect(PruneState *prstate, - OffsetNumber offnum, OffsetNumber rdoffnum); -static void heap_prune_record_dead(PruneState *prstate, OffsetNumber offnum); -static void heap_prune_record_dead_or_unused(PruneState *prstate, OffsetNumber offnum); -static void heap_prune_record_unused(PruneState *prstate, OffsetNumber offnum); + OffsetNumber offnum, OffsetNumber rdoffnum, bool was_normal); +static void heap_prune_record_dead(PruneState *prstate, OffsetNumber offnum, bool was_normal); +static void heap_prune_record_dead_or_unused(PruneState *prstate, OffsetNumber offnum, bool was_normal); +static void heap_prune_record_unused(PruneState *prstate, OffsetNumber offnum, bool was_normal); static void page_verify_redirects(Page page); @@ -241,6 +243,7 @@ heap_page_prune(Relation relation, Buffer buffer, prstate.snapshotConflictHorizon = InvalidTransactionId; prstate.nredirected = prstate.ndead = prstate.nunused = 0; memset(prstate.marked, 0, sizeof(prstate.marked)); + prstate.ndeleted = 0; /* * presult->htsv is not initialized here because all ntuple spots in the @@ -321,8 +324,8 @@ heap_page_prune(Relation relation, Buffer buffer, continue; /* Process this item or chain of items */ - presult->ndeleted += heap_prune_chain(page, blockno, maxoff, offnum, - presult->htsv, &prstate); + heap_prune_chain(page, blockno, maxoff, + offnum, presult->htsv, &prstate); } /* Clear the offset information once we have processed the given page. */ @@ -394,8 +397,9 @@ heap_page_prune(Relation relation, Buffer buffer, END_CRIT_SECTION(); - /* Record number of newly-set-LP_DEAD items for caller */ + /* Copy information back for caller */ presult->nnewlpdead = prstate.ndead; + presult->ndeleted = prstate.ndeleted; } @@ -444,22 +448,23 @@ heap_prune_satisfies_vacuum(PruneState *prstate, HeapTuple tup, Buffer buffer) * to the redirected[] array (two entries per redirection); items to be set to * LP_DEAD state are added to nowdead[]; and items to be set to LP_UNUSED * state are added to nowunused[]. - * - * Returns the number of tuples (to be) deleted from the page. */ -static int +static void heap_prune_chain(Page page, BlockNumber blockno, OffsetNumber maxoff, OffsetNumber rootoffnum, int8 *htsv, PruneState *prstate) { - int ndeleted = 0; TransactionId priorXmax = InvalidTransactionId; ItemId rootlp; HeapTupleHeader htup; - OffsetNumber latestdead = InvalidOffsetNumber, - offnum; + OffsetNumber offnum; OffsetNumber chainitems[MaxHeapTuplesPerPage]; - int nchain = 0, - i; + + /* + * After traversing the HOT chain, ndeadchain is the index in chainitems + * of the first live successor after the last dead item. + */ + int ndeadchain = 0, + nchain = 0; rootlp = PageGetItemId(page, rootoffnum); @@ -494,14 +499,12 @@ heap_prune_chain(Page page, BlockNumber blockno, OffsetNumber maxoff, if (htsv[rootoffnum] == HEAPTUPLE_DEAD && !HeapTupleHeaderIsHotUpdated(htup)) { - heap_prune_record_unused(prstate, rootoffnum); + heap_prune_record_unused(prstate, rootoffnum, true); HeapTupleHeaderAdvanceConflictHorizon(htup, &prstate->snapshotConflictHorizon); - ndeleted++; } - /* Nothing more to do */ - return ndeleted; + return; } } @@ -512,8 +515,6 @@ heap_prune_chain(Page page, BlockNumber blockno, OffsetNumber maxoff, for (;;) { ItemId lp; - bool tupdead, - recent_dead; /* Sanity check (pure paranoia) */ if (offnum < FirstOffsetNumber) @@ -563,7 +564,7 @@ heap_prune_chain(Page page, BlockNumber blockno, OffsetNumber maxoff, * the LP was already marked dead. */ if (unlikely(prstate->mark_unused_now)) - heap_prune_record_unused(prstate, offnum); + heap_prune_record_unused(prstate, offnum, false); break; } @@ -586,23 +587,41 @@ heap_prune_chain(Page page, BlockNumber blockno, OffsetNumber maxoff, /* * Check tuple's visibility status. */ - tupdead = recent_dead = false; - switch (htsv_get_valid_status(htsv[offnum])) { case HEAPTUPLE_DEAD: - tupdead = true; + + /* Remember the last DEAD tuple seen */ + ndeadchain = nchain; + HeapTupleHeaderAdvanceConflictHorizon(htup, + &prstate->snapshotConflictHorizon); + + /* Advance to next chain member */ break; case HEAPTUPLE_RECENTLY_DEAD: - recent_dead = true; /* * This tuple may soon become DEAD. Update the hint field so * that the page is reconsidered for pruning in future. + * + * We don't need to advance the conflict horizon for + * RECENTLY_DEAD tuples, even if we are removing them. This + * is because we only remove RECENTLY_DEAD tuples if they + * precede a DEAD tuple, and the DEAD tuple must have been + * inserted by a newer transaction than the RECENTLY_DEAD + * tuple by virtue of being later in the chain. We will have + * advanced the conflict horizon for the DEAD tuple. */ heap_prune_record_prunable(prstate, HeapTupleHeaderGetUpdateXid(htup)); + + /* + * Advance past RECENTLY_DEAD tuples just in case there's a + * DEAD one after them. We have to make sure that we don't + * miss any DEAD tuples, since DEAD tuples that still have + * tuple storage after pruning will confuse VACUUM. + */ break; case HEAPTUPLE_DELETE_IN_PROGRESS: @@ -613,7 +632,7 @@ heap_prune_chain(Page page, BlockNumber blockno, OffsetNumber maxoff, */ heap_prune_record_prunable(prstate, HeapTupleHeaderGetUpdateXid(htup)); - break; + goto process_chain; case HEAPTUPLE_LIVE: case HEAPTUPLE_INSERT_IN_PROGRESS: @@ -624,35 +643,19 @@ heap_prune_chain(Page page, BlockNumber blockno, OffsetNumber maxoff, * But we don't. See related decisions about when to mark the * page prunable in heapam.c. */ - break; + goto process_chain; default: elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result"); - break; + goto process_chain; } - /* - * Remember the last DEAD tuple seen. We will advance past - * RECENTLY_DEAD tuples just in case there's a DEAD one after them; - * but we can't advance past anything else. We have to make sure that - * we don't miss any DEAD tuples, since DEAD tuples that still have - * tuple storage after pruning will confuse VACUUM. - */ - if (tupdead) - { - latestdead = offnum; - HeapTupleHeaderAdvanceConflictHorizon(htup, - &prstate->snapshotConflictHorizon); - } - else if (!recent_dead) - break; - /* * If the tuple is not HOT-updated, then we are at the end of this * HOT-update chain. */ if (!HeapTupleHeaderIsHotUpdated(htup)) - break; + goto process_chain; /* HOT implies it can't have moved to different partition */ Assert(!HeapTupleHeaderIndicatesMovedPartitions(htup)); @@ -665,57 +668,52 @@ heap_prune_chain(Page page, BlockNumber blockno, OffsetNumber maxoff, priorXmax = HeapTupleHeaderGetUpdateXid(htup); } - /* - * If we found a DEAD tuple in the chain, adjust the HOT chain so that all - * the DEAD tuples at the start of the chain are removed and the root line - * pointer is appropriately redirected. - */ - if (OffsetNumberIsValid(latestdead)) - { - /* - * Mark as unused each intermediate item that we are able to remove - * from the chain. - * - * When the previous item is the last dead tuple seen, we are at the - * right candidate for redirection. - */ - for (i = 1; (i < nchain) && (chainitems[i - 1] != latestdead); i++) - { - heap_prune_record_unused(prstate, chainitems[i]); - ndeleted++; - } - - /* - * If the root entry had been a normal tuple, we are deleting it, so - * count it in the result. But changing a redirect (even to DEAD - * state) doesn't count. - */ - if (ItemIdIsNormal(rootlp)) - ndeleted++; - - /* - * If the DEAD tuple is at the end of the chain, the entire chain is - * dead and the root line pointer can be marked dead. Otherwise just - * redirect the root to the correct chain member. - */ - if (i >= nchain) - heap_prune_record_dead_or_unused(prstate, rootoffnum); - else - heap_prune_record_redirect(prstate, rootoffnum, chainitems[i]); - } - else if (nchain < 2 && ItemIdIsRedirected(rootlp)) + if (ItemIdIsRedirected(rootlp) && nchain < 2) { /* * We found a redirect item that doesn't point to a valid follow-on * item. This can happen if the loop in heap_page_prune caused us to * visit the dead successor of a redirect item before visiting the * redirect item. We can clean up by setting the redirect item to - * DEAD state or LP_UNUSED if the caller indicated. + * LP_DEAD state or LP_UNUSED if the caller indicated. */ - heap_prune_record_dead_or_unused(prstate, rootoffnum); + heap_prune_record_dead_or_unused(prstate, rootoffnum, false); + return; } - return ndeleted; +process_chain: + + if (ndeadchain == 0) + { + /* + * No DEAD tuple was found, so the chain is entirely composed of + * normal, unchanged tuples. Leave it alone. + */ + } + else if (ndeadchain == nchain) + { + /* + * The entire chain is dead. Mark the root line pointer LP_DEAD, and + * fully remove the other tuples in the chain. + */ + heap_prune_record_dead_or_unused(prstate, rootoffnum, ItemIdIsNormal(rootlp)); + for (int i = 1; i < nchain; i++) + heap_prune_record_unused(prstate, chainitems[i], true); + } + else + { + /* + * We found a DEAD tuple in the chain. Redirect the root line pointer + * to the first non-DEAD tuple, and mark as unused each intermediate + * item that we are able to remove from the chain. + */ + heap_prune_record_redirect(prstate, rootoffnum, chainitems[ndeadchain], + ItemIdIsNormal(rootlp)); + for (int i = 1; i < ndeadchain; i++) + heap_prune_record_unused(prstate, chainitems[i], true); + + /* the rest of tuples in the chain are normal, unchanged tuples */ + } } /* Record lowest soon-prunable XID */ @@ -735,7 +733,8 @@ heap_prune_record_prunable(PruneState *prstate, TransactionId xid) /* Record line pointer to be redirected */ static void heap_prune_record_redirect(PruneState *prstate, - OffsetNumber offnum, OffsetNumber rdoffnum) + OffsetNumber offnum, OffsetNumber rdoffnum, + bool was_normal) { Assert(prstate->nredirected < MaxHeapTuplesPerPage); prstate->redirected[prstate->nredirected * 2] = offnum; @@ -745,17 +744,34 @@ heap_prune_record_redirect(PruneState *prstate, prstate->marked[offnum] = true; Assert(!prstate->marked[rdoffnum]); prstate->marked[rdoffnum] = true; + + /* + * If the root entry had been a normal tuple, we are deleting it, so count + * it in the result. But changing a redirect (even to DEAD state) doesn't + * count. + */ + if (was_normal) + prstate->ndeleted++; } /* Record line pointer to be marked dead */ static void -heap_prune_record_dead(PruneState *prstate, OffsetNumber offnum) +heap_prune_record_dead(PruneState *prstate, OffsetNumber offnum, + bool was_normal) { Assert(prstate->ndead < MaxHeapTuplesPerPage); prstate->nowdead[prstate->ndead] = offnum; prstate->ndead++; Assert(!prstate->marked[offnum]); prstate->marked[offnum] = true; + + /* + * If the root entry had been a normal tuple, we are deleting it, so count + * it in the result. But changing a redirect (even to DEAD state) doesn't + * count. + */ + if (was_normal) + prstate->ndeleted++; } /* @@ -765,7 +781,8 @@ heap_prune_record_dead(PruneState *prstate, OffsetNumber offnum) * pointers LP_DEAD if mark_unused_now is true. */ static void -heap_prune_record_dead_or_unused(PruneState *prstate, OffsetNumber offnum) +heap_prune_record_dead_or_unused(PruneState *prstate, OffsetNumber offnum, + bool was_normal) { /* * If the caller set mark_unused_now to true, we can remove dead tuples @@ -774,20 +791,28 @@ heap_prune_record_dead_or_unused(PruneState *prstate, OffsetNumber offnum) * likely. */ if (unlikely(prstate->mark_unused_now)) - heap_prune_record_unused(prstate, offnum); + heap_prune_record_unused(prstate, offnum, was_normal); else - heap_prune_record_dead(prstate, offnum); + heap_prune_record_dead(prstate, offnum, was_normal); } /* Record line pointer to be marked unused */ static void -heap_prune_record_unused(PruneState *prstate, OffsetNumber offnum) +heap_prune_record_unused(PruneState *prstate, OffsetNumber offnum, bool was_normal) { Assert(prstate->nunused < MaxHeapTuplesPerPage); prstate->nowunused[prstate->nunused] = offnum; prstate->nunused++; Assert(!prstate->marked[offnum]); prstate->marked[offnum] = true; + + /* + * If the root entry had been a normal tuple, we are deleting it, so count + * it in the result. But changing a redirect (even to DEAD state) doesn't + * count. + */ + if (was_normal) + prstate->ndeleted++; }