Fix possible crash during FATAL exit from reindexing.

index.c supposed that it could just use a PG_TRY block to clean up the
state associated with an active REINDEX operation.  However, that code
doesn't run if we do a FATAL exit --- for example, due to a SIGTERM
shutdown signal --- while the REINDEX is happening.  And that state does
get consulted during catalog accesses, which makes it problematic if we
do any catalog accesses during shutdown --- for example, to clean up any
temp tables created in the session.

If this combination of circumstances occurred, we could find ourselves
trying to access already-freed memory.  In debug builds that'd fairly
reliably cause an assertion failure.  In production we might often
get away with it, but with some bad luck it could cause a core dump.

Another possible bad outcome is an erroneous conclusion that an
index-to-be-accessed is being reindexed; but it looks like that would
be unlikely to have any consequences worse than failing to drop temp
tables right away.  (They'd still get dropped by the next session that
uses that temp schema.)

To fix, get rid of the use of PG_TRY here, and instead hook into
the transaction abort mechanisms to clean up reindex state.

Per bug #16378 from Alexander Lakhin.  This has been wrong for a
very long time, so back-patch to all supported branches.

Discussion: https://postgr.es/m/16378-7a70ca41b3ec2009@postgresql.org
This commit is contained in:
Tom Lane 2020-04-21 15:58:42 -04:00
parent 90abbba074
commit 5a4efd100a
3 changed files with 108 additions and 99 deletions

View File

@ -30,6 +30,7 @@
#include "access/xlog.h"
#include "access/xloginsert.h"
#include "access/xlogutils.h"
#include "catalog/index.h"
#include "catalog/namespace.h"
#include "catalog/pg_enum.h"
#include "catalog/storage.h"
@ -2646,6 +2647,9 @@ AbortTransaction(void)
*/
SetUserIdAndSecContext(s->prevUser, s->prevSecContext);
/* Forget about any active REINDEX. */
ResetReindexState(s->nestingLevel);
/* If in parallel mode, clean up workers and exit parallel mode. */
if (IsInParallelMode())
{
@ -4946,6 +4950,9 @@ AbortSubTransaction(void)
*/
SetUserIdAndSecContext(s->prevUser, s->prevSecContext);
/* Forget about any active REINDEX. */
ResetReindexState(s->nestingLevel);
/* Exit from parallel mode, if necessary. */
if (IsInParallelMode())
{

View File

@ -130,7 +130,6 @@ static void SetReindexProcessing(Oid heapOid, Oid indexOid);
static void ResetReindexProcessing(void);
static void SetReindexPending(List *indexes);
static void RemoveReindexPending(Oid indexOid);
static void ResetReindexPending(void);
/*
@ -1532,8 +1531,8 @@ index_concurrently_swap(Oid newIndexId, Oid oldIndexId, const char *oldName)
newIndexForm->indisclustered = oldIndexForm->indisclustered;
/*
* Mark the new index as valid, and the old index as invalid similarly
* to what index_set_state_flags() does.
* Mark the new index as valid, and the old index as invalid similarly to
* what index_set_state_flags() does.
*/
newIndexForm->indisvalid = true;
oldIndexForm->indisvalid = false;
@ -3534,26 +3533,17 @@ reindex_index(Oid indexId, bool skip_constraint_checks, char persistence,
indexInfo->ii_ExclusionStrats = NULL;
}
/* ensure SetReindexProcessing state isn't leaked */
PG_TRY();
{
/* Suppress use of the target index while rebuilding it */
SetReindexProcessing(heapId, indexId);
/* Suppress use of the target index while rebuilding it */
SetReindexProcessing(heapId, indexId);
/* Create a new physical relation for the index */
RelationSetNewRelfilenode(iRel, persistence);
/* Create a new physical relation for the index */
RelationSetNewRelfilenode(iRel, persistence);
/* Initialize the index and rebuild */
/* Note: we do not need to re-establish pkey setting */
index_build(heapRelation, iRel, indexInfo, true, true);
}
PG_CATCH();
{
/* Make sure flag gets cleared on error exit */
ResetReindexProcessing();
PG_RE_THROW();
}
PG_END_TRY();
/* Initialize the index and rebuild */
/* Note: we do not need to re-establish pkey setting */
index_build(heapRelation, iRel, indexInfo, true, true);
/* Re-allow use of target index */
ResetReindexProcessing();
/*
@ -3691,7 +3681,9 @@ reindex_relation(Oid relid, int flags, int options)
Relation rel;
Oid toast_relid;
List *indexIds;
char persistence;
bool result;
ListCell *indexId;
int i;
/*
@ -3726,79 +3718,65 @@ reindex_relation(Oid relid, int flags, int options)
*/
indexIds = RelationGetIndexList(rel);
PG_TRY();
if (flags & REINDEX_REL_SUPPRESS_INDEX_USE)
{
ListCell *indexId;
char persistence;
if (flags & REINDEX_REL_SUPPRESS_INDEX_USE)
{
/* Suppress use of all the indexes until they are rebuilt */
SetReindexPending(indexIds);
/*
* Make the new heap contents visible --- now things might be
* inconsistent!
*/
CommandCounterIncrement();
}
/* Suppress use of all the indexes until they are rebuilt */
SetReindexPending(indexIds);
/*
* Compute persistence of indexes: same as that of owning rel, unless
* caller specified otherwise.
* Make the new heap contents visible --- now things might be
* inconsistent!
*/
if (flags & REINDEX_REL_FORCE_INDEXES_UNLOGGED)
persistence = RELPERSISTENCE_UNLOGGED;
else if (flags & REINDEX_REL_FORCE_INDEXES_PERMANENT)
persistence = RELPERSISTENCE_PERMANENT;
else
persistence = rel->rd_rel->relpersistence;
/* Reindex all the indexes. */
i = 1;
foreach(indexId, indexIds)
{
Oid indexOid = lfirst_oid(indexId);
Oid indexNamespaceId = get_rel_namespace(indexOid);
/*
* Skip any invalid indexes on a TOAST table. These can only be
* duplicate leftovers from a failed REINDEX CONCURRENTLY, and if
* rebuilt it would not be possible to drop them anymore.
*/
if (IsToastNamespace(indexNamespaceId) &&
!get_index_isvalid(indexOid))
{
ereport(WARNING,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("cannot reindex invalid index \"%s.%s\" on TOAST table, skipping",
get_namespace_name(indexNamespaceId),
get_rel_name(indexOid))));
continue;
}
reindex_index(indexOid, !(flags & REINDEX_REL_CHECK_CONSTRAINTS),
persistence, options);
CommandCounterIncrement();
/* Index should no longer be in the pending list */
Assert(!ReindexIsProcessingIndex(indexOid));
/* Set index rebuild count */
pgstat_progress_update_param(PROGRESS_CLUSTER_INDEX_REBUILD_COUNT,
i);
i++;
}
CommandCounterIncrement();
}
PG_CATCH();
/*
* Compute persistence of indexes: same as that of owning rel, unless
* caller specified otherwise.
*/
if (flags & REINDEX_REL_FORCE_INDEXES_UNLOGGED)
persistence = RELPERSISTENCE_UNLOGGED;
else if (flags & REINDEX_REL_FORCE_INDEXES_PERMANENT)
persistence = RELPERSISTENCE_PERMANENT;
else
persistence = rel->rd_rel->relpersistence;
/* Reindex all the indexes. */
i = 1;
foreach(indexId, indexIds)
{
/* Make sure list gets cleared on error exit */
ResetReindexPending();
PG_RE_THROW();
Oid indexOid = lfirst_oid(indexId);
Oid indexNamespaceId = get_rel_namespace(indexOid);
/*
* Skip any invalid indexes on a TOAST table. These can only be
* duplicate leftovers from a failed REINDEX CONCURRENTLY, and if
* rebuilt it would not be possible to drop them anymore.
*/
if (IsToastNamespace(indexNamespaceId) &&
!get_index_isvalid(indexOid))
{
ereport(WARNING,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("cannot reindex invalid index \"%s.%s\" on TOAST table, skipping",
get_namespace_name(indexNamespaceId),
get_rel_name(indexOid))));
continue;
}
reindex_index(indexOid, !(flags & REINDEX_REL_CHECK_CONSTRAINTS),
persistence, options);
CommandCounterIncrement();
/* Index should no longer be in the pending list */
Assert(!ReindexIsProcessingIndex(indexOid));
/* Set index rebuild count */
pgstat_progress_update_param(PROGRESS_CLUSTER_INDEX_REBUILD_COUNT,
i);
i++;
}
PG_END_TRY();
ResetReindexPending();
/*
* Close rel, but continue to hold the lock.
@ -3832,6 +3810,7 @@ reindex_relation(Oid relid, int flags, int options)
static Oid currentlyReindexedHeap = InvalidOid;
static Oid currentlyReindexedIndex = InvalidOid;
static List *pendingReindexedIndexes = NIL;
static int reindexingNestLevel = 0;
/*
* ReindexIsProcessingHeap
@ -3868,8 +3847,6 @@ ReindexIsProcessingIndex(Oid indexOid)
/*
* SetReindexProcessing
* Set flag that specified heap/index are being reindexed.
*
* NB: caller must use a PG_TRY block to ensure ResetReindexProcessing is done.
*/
static void
SetReindexProcessing(Oid heapOid, Oid indexOid)
@ -3882,6 +3859,8 @@ SetReindexProcessing(Oid heapOid, Oid indexOid)
currentlyReindexedIndex = indexOid;
/* Index is no longer "pending" reindex. */
RemoveReindexPending(indexOid);
/* This may have been set already, but in case it isn't, do so now. */
reindexingNestLevel = GetCurrentTransactionNestLevel();
}
/*
@ -3891,17 +3870,16 @@ SetReindexProcessing(Oid heapOid, Oid indexOid)
static void
ResetReindexProcessing(void)
{
/* This may be called in leader error path */
currentlyReindexedHeap = InvalidOid;
currentlyReindexedIndex = InvalidOid;
/* reindexingNestLevel remains set till end of (sub)transaction */
}
/*
* SetReindexPending
* Mark the given indexes as pending reindex.
*
* NB: caller must use a PG_TRY block to ensure ResetReindexPending is done.
* Also, we assume that the current memory context stays valid throughout.
* NB: we assume that the current memory context stays valid throughout.
*/
static void
SetReindexPending(List *indexes)
@ -3912,6 +3890,7 @@ SetReindexPending(List *indexes)
if (IsInParallelMode())
elog(ERROR, "cannot modify reindex state during a parallel operation");
pendingReindexedIndexes = list_copy(indexes);
reindexingNestLevel = GetCurrentTransactionNestLevel();
}
/*
@ -3928,14 +3907,32 @@ RemoveReindexPending(Oid indexOid)
}
/*
* ResetReindexPending
* Unset reindex-pending status.
* ResetReindexState
* Clear all reindexing state during (sub)transaction abort.
*/
static void
ResetReindexPending(void)
void
ResetReindexState(int nestLevel)
{
/* This may be called in leader error path */
pendingReindexedIndexes = NIL;
/*
* Because reindexing is not re-entrant, we don't need to cope with nested
* reindexing states. We just need to avoid messing up the outer-level
* state in case a subtransaction fails within a REINDEX. So checking the
* current nest level against that of the reindex operation is sufficient.
*/
if (reindexingNestLevel >= nestLevel)
{
currentlyReindexedHeap = InvalidOid;
currentlyReindexedIndex = InvalidOid;
/*
* We needn't try to release the contents of pendingReindexedIndexes;
* that list should be in a transaction-lifespan context, so it will
* go away automatically.
*/
pendingReindexedIndexes = NIL;
reindexingNestLevel = 0;
}
}
/*
@ -3988,4 +3985,7 @@ RestoreReindexState(void *reindexstate)
lappend_oid(pendingReindexedIndexes,
sistate->pendingReindexedIndexes[c]);
MemoryContextSwitchTo(oldcontext);
/* Note the worker has its own transaction nesting level */
reindexingNestLevel = GetCurrentTransactionNestLevel();
}

View File

@ -131,6 +131,8 @@ extern void validate_index(Oid heapId, Oid indexId, Snapshot snapshot);
extern void index_set_state_flags(Oid indexId, IndexStateFlagsAction action);
extern Oid IndexGetRelation(Oid indexId, bool missing_ok);
extern void reindex_index(Oid indexId, bool skip_constraint_checks,
char relpersistence, int options);
@ -145,8 +147,8 @@ extern bool reindex_relation(Oid relid, int flags, int options);
extern bool ReindexIsProcessingHeap(Oid heapOid);
extern bool ReindexIsProcessingIndex(Oid indexOid);
extern Oid IndexGetRelation(Oid indexId, bool missing_ok);
extern void ResetReindexState(int nestLevel);
extern Size EstimateReindexStateSpace(void);
extern void SerializeReindexState(Size maxsize, char *start_address);
extern void RestoreReindexState(void *reindexstate);