From 3e701a04febb0c423d36ce0c47721a4c276439a2 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Thu, 13 Mar 2008 18:00:32 +0000 Subject: [PATCH] Fix heap_page_prune's problem with failing to send cache invalidation messages if the calling transaction aborts later on. Collapsing out line pointer redirects is a done deal as soon as we complete the page update, so syscache *must* be notified even if the VACUUM FULL as a whole doesn't complete. To fix, add some functionality to inval.c to allow the pending inval messages to be sent immediately while heap_page_prune is still running. The implementation is a bit chintzy: it will only work in the context of VACUUM FULL. But that's all we need now, and it can always be extended later if needed. Per my trouble report of a week ago. --- src/backend/access/heap/pruneheap.c | 37 +++++++---- src/backend/utils/cache/inval.c | 95 ++++++++++++++++++++++++++++- src/include/utils/inval.h | 6 +- 3 files changed, 124 insertions(+), 14 deletions(-) diff --git a/src/backend/access/heap/pruneheap.c b/src/backend/access/heap/pruneheap.c index 09c55ee761..3db5d0b82e 100644 --- a/src/backend/access/heap/pruneheap.c +++ b/src/backend/access/heap/pruneheap.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/heap/pruneheap.c,v 1.7 2008/03/08 21:57:59 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/access/heap/pruneheap.c,v 1.8 2008/03/13 18:00:32 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -158,7 +158,14 @@ heap_page_prune(Relation relation, Buffer buffer, TransactionId OldestXmin, * much logic as possible out of the critical section, and also ensures * that WAL replay will work the same as the normal case. * - * First, initialize the new pd_prune_xid value to zero (indicating no + * First, inform inval.c that upcoming CacheInvalidateHeapTuple calls + * are nontransactional. + */ + if (redirect_move) + BeginNonTransactionalInvalidation(); + + /* + * Initialize the new pd_prune_xid value to zero (indicating no * prunable tuples). If we find any tuples which may soon become * prunable, we will save the lowest relevant XID in new_prune_xid. * Also initialize the rest of our working state. @@ -191,6 +198,18 @@ heap_page_prune(Relation relation, Buffer buffer, TransactionId OldestXmin, redirect_move); } + /* + * Send invalidation messages for any tuples we are about to move. + * It is safe to do this now, even though we could theoretically still + * fail before making the actual page update, because a useless cache + * invalidation doesn't hurt anything. Also, no one else can reload the + * tuples while we have exclusive buffer lock, so it's not too early to + * send the invals. This avoids sending the invals while inside the + * critical section, which is a good thing for robustness. + */ + if (redirect_move) + EndNonTransactionalInvalidation(); + /* Any error while applying the changes is critical */ START_CRIT_SECTION(); @@ -587,16 +606,10 @@ heap_prune_chain(Relation relation, Buffer buffer, OffsetNumber rootoffnum, /* * If we are going to implement a redirect by moving tuples, we have * to issue a cache invalidation against the redirection target tuple, - * because its CTID will be effectively changed by the move. It is - * safe to do this now, even though we might fail before reaching the - * actual page update, because a useless cache invalidation doesn't - * hurt anything. Furthermore we really really don't want to issue - * the inval inside the critical section, since it has a nonzero - * chance of causing an error (eg, due to running out of memory). - * - * XXX this is not really right because CacheInvalidateHeapTuple - * expects transactional semantics, and our move isn't transactional. - * FIXME!! + * because its CTID will be effectively changed by the move. Note that + * CacheInvalidateHeapTuple only queues the request, it doesn't send it; + * if we fail before reaching EndNonTransactionalInvalidation, nothing + * happens and no harm is done. */ if (OffsetNumberIsValid(redirect_target)) { diff --git a/src/backend/utils/cache/inval.c b/src/backend/utils/cache/inval.c index 033ae3a2d6..ccb002f0f5 100644 --- a/src/backend/utils/cache/inval.c +++ b/src/backend/utils/cache/inval.c @@ -80,7 +80,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/cache/inval.c,v 1.83 2008/01/01 19:45:53 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/utils/cache/inval.c,v 1.84 2008/03/13 18:00:32 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -964,6 +964,99 @@ CommandEndInvalidationMessages(void) &transInvalInfo->CurrentCmdInvalidMsgs); } + +/* + * BeginNonTransactionalInvalidation + * Prepare for invalidation messages for nontransactional updates. + * + * A nontransactional invalidation is one that must be sent whether or not + * the current transaction eventually commits. We arrange for all invals + * queued between this call and EndNonTransactionalInvalidation() to be sent + * immediately when the latter is called. + * + * Currently, this is only used by heap_page_prune(), and only when it is + * invoked during VACUUM FULL's first pass over a table. We expect therefore + * that we are not inside a subtransaction and there are no already-pending + * invalidations. This could be relaxed by setting up a new nesting level of + * invalidation data, but for now there's no need. Note that heap_page_prune + * knows that this function does not change any state, and therefore there's + * no need to worry about cleaning up if there's an elog(ERROR) before + * reaching EndNonTransactionalInvalidation (the invals will just be thrown + * away if that happens). + */ +void +BeginNonTransactionalInvalidation(void) +{ + /* Must be at top of stack */ + Assert(transInvalInfo != NULL && transInvalInfo->parent == NULL); + + /* Must not have any previously-queued activity */ + Assert(transInvalInfo->PriorCmdInvalidMsgs.cclist == NULL); + Assert(transInvalInfo->PriorCmdInvalidMsgs.rclist == NULL); + Assert(transInvalInfo->CurrentCmdInvalidMsgs.cclist == NULL); + Assert(transInvalInfo->CurrentCmdInvalidMsgs.rclist == NULL); + Assert(transInvalInfo->RelcacheInitFileInval == false); +} + +/* + * EndNonTransactionalInvalidation + * Process queued-up invalidation messages for nontransactional updates. + * + * We expect to find messages in CurrentCmdInvalidMsgs only (else there + * was a CommandCounterIncrement within the "nontransactional" update). + * We must process them locally and send them out to the shared invalidation + * message queue. + * + * We must also reset the lists to empty and explicitly free memory (we can't + * rely on end-of-transaction cleanup for that). + */ +void +EndNonTransactionalInvalidation(void) +{ + InvalidationChunk *chunk; + InvalidationChunk *next; + + /* Must be at top of stack */ + Assert(transInvalInfo != NULL && transInvalInfo->parent == NULL); + + /* Must not have any prior-command messages */ + Assert(transInvalInfo->PriorCmdInvalidMsgs.cclist == NULL); + Assert(transInvalInfo->PriorCmdInvalidMsgs.rclist == NULL); + + /* + * At present, this function is only used for CTID-changing updates; + * since the relcache init file doesn't store any tuple CTIDs, we + * don't have to invalidate it. That might not be true forever + * though, in which case we'd need code similar to AtEOXact_Inval. + */ + + /* Send out the invals */ + ProcessInvalidationMessages(&transInvalInfo->CurrentCmdInvalidMsgs, + LocalExecuteInvalidationMessage); + ProcessInvalidationMessages(&transInvalInfo->CurrentCmdInvalidMsgs, + SendSharedInvalidMessage); + + /* Clean up and release memory */ + for (chunk = transInvalInfo->CurrentCmdInvalidMsgs.cclist; + chunk != NULL; + chunk = next) + { + next = chunk->next; + pfree(chunk); + } + for (chunk = transInvalInfo->CurrentCmdInvalidMsgs.rclist; + chunk != NULL; + chunk = next) + { + next = chunk->next; + pfree(chunk); + } + transInvalInfo->CurrentCmdInvalidMsgs.cclist = NULL; + transInvalInfo->CurrentCmdInvalidMsgs.rclist = NULL; + transInvalInfo->RelcacheInitFileInval = false; +} + + /* * CacheInvalidateHeapTuple * Register the given tuple for invalidation at end of command diff --git a/src/include/utils/inval.h b/src/include/utils/inval.h index df0e5f397b..80c8c2ab33 100644 --- a/src/include/utils/inval.h +++ b/src/include/utils/inval.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/utils/inval.h,v 1.41 2008/01/01 19:45:59 momjian Exp $ + * $PostgreSQL: pgsql/src/include/utils/inval.h,v 1.42 2008/03/13 18:00:32 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -37,6 +37,10 @@ extern void PostPrepare_Inval(void); extern void CommandEndInvalidationMessages(void); +extern void BeginNonTransactionalInvalidation(void); + +extern void EndNonTransactionalInvalidation(void); + extern void CacheInvalidateHeapTuple(Relation relation, HeapTuple tuple); extern void CacheInvalidateRelcache(Relation relation);