Cope with catcache entries becoming stale during detoasting.

We've long had a policy that any toasted fields in a catalog tuple
should be pulled in-line before entering the tuple in a catalog cache.
However, that requires access to the catalog's toast table, and we'll
typically do AcceptInvalidationMessages while opening the toast table.
So it's possible that the catalog tuple is outdated by the time we
finish detoasting it.  Since no cache entry exists yet, we can't
mark the entry stale during AcceptInvalidationMessages, and instead
we'll press forward and build an apparently-valid cache entry.  The
upshot is that we have a race condition whereby an out-of-date entry
could be made in a backend's catalog cache, and persist there
indefinitely causing indeterminate misbehavior.

To fix, use the existing systable_recheck_tuple code to recheck
whether the catalog tuple is still up-to-date after we finish
detoasting it.  If not, loop around and restart the process of
searching the catalog and constructing cache entries from the top.
The case is rare enough that this shouldn't create any meaningful
performance penalty, even in the SearchCatCacheList case where
we need to tear down and reconstruct the whole list.

Indeed, the case is so rare that AFAICT it doesn't occur during
our regression tests, and there doesn't seem to be any easy way
to build a test that would exercise it reliably.  To allow
testing of the retry code paths, add logic (in USE_ASSERT_CHECKING
builds only) that randomly pretends that the recheck failed about
one time out of a thousand.  This is enough to ensure that we'll
pass through the retry paths during most regression test runs.

By adding an extra level of looping, this commit creates a need
to reindent most of SearchCatCacheMiss and SearchCatCacheList.
I'll do that separately, to allow putting those changes in
.git-blame-ignore-revs.

Patch by me; thanks to Alexander Lakhin for having built a test
case to prove the bug is real, and to Xiaoran Wang for review.
Back-patch to all supported branches.

Discussion: https://postgr.es/m/1393953.1698353013@sss.pgh.pa.us
Discussion: https://postgr.es/m/CAGjhLkOoBEC9mLsnB42d3CO1vcMx71MLSEuigeABbQ8oRdA6gw@mail.gmail.com
This commit is contained in:
Tom Lane 2024-01-13 13:46:27 -05:00
parent 4f622503d6
commit ad98fb1422
1 changed files with 124 additions and 40 deletions

View File

@ -24,6 +24,7 @@
#include "catalog/pg_operator.h"
#include "catalog/pg_type.h"
#include "common/hashfn.h"
#include "common/pg_prng.h"
#include "miscadmin.h"
#include "port/pg_bitutils.h"
#ifdef CATCACHE_STATS
@ -90,10 +91,10 @@ static void CatCachePrintStats(int code, Datum arg);
static void CatCacheRemoveCTup(CatCache *cache, CatCTup *ct);
static void CatCacheRemoveCList(CatCache *cache, CatCList *cl);
static void CatalogCacheInitializeCache(CatCache *cache);
static CatCTup *CatalogCacheCreateEntry(CatCache *cache, HeapTuple ntp,
static CatCTup *CatalogCacheCreateEntry(CatCache *cache,
HeapTuple ntp, SysScanDesc scandesc,
Datum *arguments,
uint32 hashValue, Index hashIndex,
bool negative);
uint32 hashValue, Index hashIndex);
static void ReleaseCatCacheWithOwner(HeapTuple tuple, ResourceOwner resowner);
static void ReleaseCatCacheListWithOwner(CatCList *list, ResourceOwner resowner);
@ -1372,6 +1373,7 @@ SearchCatCacheMiss(CatCache *cache,
SysScanDesc scandesc;
HeapTuple ntp;
CatCTup *ct;
bool stale;
Datum arguments[CATCACHE_MAXKEYS];
/* Initialize local parameter array */
@ -1380,16 +1382,6 @@ SearchCatCacheMiss(CatCache *cache,
arguments[2] = v3;
arguments[3] = v4;
/*
* Ok, need to make a lookup in the relation, copy the scankey and fill
* out any per-call fields.
*/
memcpy(cur_skey, cache->cc_skey, sizeof(ScanKeyData) * nkeys);
cur_skey[0].sk_argument = v1;
cur_skey[1].sk_argument = v2;
cur_skey[2].sk_argument = v3;
cur_skey[3].sk_argument = v4;
/*
* Tuple was not found in cache, so we have to try to retrieve it directly
* from the relation. If found, we will add it to the cache; if not
@ -1404,9 +1396,28 @@ SearchCatCacheMiss(CatCache *cache,
* will eventually age out of the cache, so there's no functional problem.
* This case is rare enough that it's not worth expending extra cycles to
* detect.
*
* Another case, which we *must* handle, is that the tuple could become
* outdated during CatalogCacheCreateEntry's attempt to detoast it (since
* AcceptInvalidationMessages can run during TOAST table access). We do
* not want to return already-stale catcache entries, so we loop around
* and do the table scan again if that happens.
*/
relation = table_open(cache->cc_reloid, AccessShareLock);
do
{
/*
* Ok, need to make a lookup in the relation, copy the scankey and
* fill out any per-call fields. (We must re-do this when retrying,
* because systable_beginscan scribbles on the scankey.)
*/
memcpy(cur_skey, cache->cc_skey, sizeof(ScanKeyData) * nkeys);
cur_skey[0].sk_argument = v1;
cur_skey[1].sk_argument = v2;
cur_skey[2].sk_argument = v3;
cur_skey[3].sk_argument = v4;
scandesc = systable_beginscan(relation,
cache->cc_indexoid,
IndexScanOK(cache, cur_skey),
@ -1415,12 +1426,18 @@ SearchCatCacheMiss(CatCache *cache,
cur_skey);
ct = NULL;
stale = false;
while (HeapTupleIsValid(ntp = systable_getnext(scandesc)))
{
ct = CatalogCacheCreateEntry(cache, ntp, arguments,
hashValue, hashIndex,
false);
ct = CatalogCacheCreateEntry(cache, ntp, scandesc, NULL,
hashValue, hashIndex);
/* upon failure, we must start the scan over */
if (ct == NULL)
{
stale = true;
break;
}
/* immediately set the refcount to 1 */
ResourceOwnerEnlarge(CurrentResourceOwner);
ct->refcount++;
@ -1429,6 +1446,7 @@ SearchCatCacheMiss(CatCache *cache,
}
systable_endscan(scandesc);
} while (stale);
table_close(relation, AccessShareLock);
@ -1447,9 +1465,11 @@ SearchCatCacheMiss(CatCache *cache,
if (IsBootstrapProcessingMode())
return NULL;
ct = CatalogCacheCreateEntry(cache, NULL, arguments,
hashValue, hashIndex,
true);
ct = CatalogCacheCreateEntry(cache, NULL, NULL, arguments,
hashValue, hashIndex);
/* Creating a negative cache entry shouldn't fail */
Assert(ct != NULL);
CACHE_elog(DEBUG2, "SearchCatCache(%s): Contains %d/%d tuples",
cache->cc_relname, cache->cc_ntup, CacheHdr->ch_ntup);
@ -1663,7 +1683,8 @@ SearchCatCacheList(CatCache *cache,
* We have to bump the member refcounts temporarily to ensure they won't
* get dropped from the cache while loading other members. We use a PG_TRY
* block to ensure we can undo those refcounts if we get an error before
* we finish constructing the CatCList.
* we finish constructing the CatCList. ctlist must be valid throughout
* the PG_TRY block.
*/
ctlist = NIL;
@ -1672,19 +1693,23 @@ SearchCatCacheList(CatCache *cache,
ScanKeyData cur_skey[CATCACHE_MAXKEYS];
Relation relation;
SysScanDesc scandesc;
/*
* Ok, need to make a lookup in the relation, copy the scankey and
* fill out any per-call fields.
*/
memcpy(cur_skey, cache->cc_skey, sizeof(ScanKeyData) * cache->cc_nkeys);
cur_skey[0].sk_argument = v1;
cur_skey[1].sk_argument = v2;
cur_skey[2].sk_argument = v3;
cur_skey[3].sk_argument = v4;
bool stale;
relation = table_open(cache->cc_reloid, AccessShareLock);
do
{
/*
* Ok, need to make a lookup in the relation, copy the scankey and
* fill out any per-call fields. (We must re-do this when
* retrying, because systable_beginscan scribbles on the scankey.)
*/
memcpy(cur_skey, cache->cc_skey, sizeof(ScanKeyData) * cache->cc_nkeys);
cur_skey[0].sk_argument = v1;
cur_skey[1].sk_argument = v2;
cur_skey[2].sk_argument = v3;
cur_skey[3].sk_argument = v4;
scandesc = systable_beginscan(relation,
cache->cc_indexoid,
IndexScanOK(cache, cur_skey),
@ -1695,6 +1720,8 @@ SearchCatCacheList(CatCache *cache,
/* The list will be ordered iff we are doing an index scan */
ordered = (scandesc->irel != NULL);
stale = false;
while (HeapTupleIsValid(ntp = systable_getnext(scandesc)))
{
uint32 hashValue;
@ -1737,9 +1764,32 @@ SearchCatCacheList(CatCache *cache,
if (!found)
{
/* We didn't find a usable entry, so make a new one */
ct = CatalogCacheCreateEntry(cache, ntp, arguments,
hashValue, hashIndex,
false);
ct = CatalogCacheCreateEntry(cache, ntp, scandesc, NULL,
hashValue, hashIndex);
/* upon failure, we must start the scan over */
if (ct == NULL)
{
/*
* Release refcounts on any items we already had. We dare
* not try to free them if they're now unreferenced, since
* an error while doing that would result in the PG_CATCH
* below doing extra refcount decrements. Besides, we'll
* likely re-adopt those items in the next iteration, so
* it's not worth complicating matters to try to get rid
* of them.
*/
foreach(ctlist_item, ctlist)
{
ct = (CatCTup *) lfirst(ctlist_item);
Assert(ct->c_list == NULL);
Assert(ct->refcount > 0);
ct->refcount--;
}
/* Reset ctlist in preparation for new try */
ctlist = NIL;
stale = true;
break;
}
}
/* Careful here: add entry to ctlist, then bump its refcount */
@ -1749,6 +1799,7 @@ SearchCatCacheList(CatCache *cache,
}
systable_endscan(scandesc);
} while (stale);
table_close(relation, AccessShareLock);
@ -1865,22 +1916,42 @@ ReleaseCatCacheListWithOwner(CatCList *list, ResourceOwner resowner)
* CatalogCacheCreateEntry
* Create a new CatCTup entry, copying the given HeapTuple and other
* supplied data into it. The new entry initially has refcount 0.
*
* To create a normal cache entry, ntp must be the HeapTuple just fetched
* from scandesc, and "arguments" is not used. To create a negative cache
* entry, pass NULL for ntp and scandesc; then "arguments" is the cache
* keys to use. In either case, hashValue/hashIndex are the hash values
* computed from the cache keys.
*
* Returns NULL if we attempt to detoast the tuple and observe that it
* became stale. (This cannot happen for a negative entry.) Caller must
* retry the tuple lookup in that case.
*/
static CatCTup *
CatalogCacheCreateEntry(CatCache *cache, HeapTuple ntp, Datum *arguments,
uint32 hashValue, Index hashIndex,
bool negative)
CatalogCacheCreateEntry(CatCache *cache, HeapTuple ntp, SysScanDesc scandesc,
Datum *arguments,
uint32 hashValue, Index hashIndex)
{
CatCTup *ct;
HeapTuple dtp;
MemoryContext oldcxt;
/* negative entries have no tuple associated */
if (ntp)
{
int i;
Assert(!negative);
/*
* The visibility recheck below essentially never fails during our
* regression tests, and there's no easy way to force it to fail for
* testing purposes. To ensure we have test coverage for the retry
* paths in our callers, make debug builds randomly fail about 0.1% of
* the times through this code path, even when there's no toasted
* fields.
*/
#ifdef USE_ASSERT_CHECKING
if (pg_prng_uint32(&pg_global_prng_state) <= (PG_UINT32_MAX / 1000))
return NULL;
#endif
/*
* If there are any out-of-line toasted fields in the tuple, expand
@ -1890,7 +1961,20 @@ CatalogCacheCreateEntry(CatCache *cache, HeapTuple ntp, Datum *arguments,
* something using a slightly stale catcache entry.
*/
if (HeapTupleHasExternal(ntp))
{
dtp = toast_flatten_tuple(ntp, cache->cc_tupdesc);
/*
* The tuple could become stale while we are doing toast table
* access (since AcceptInvalidationMessages can run then), so we
* must recheck its visibility afterwards.
*/
if (!systable_recheck_tuple(scandesc, ntp))
{
heap_freetuple(dtp);
return NULL;
}
}
else
dtp = ntp;
@ -1929,7 +2013,7 @@ CatalogCacheCreateEntry(CatCache *cache, HeapTuple ntp, Datum *arguments,
}
else
{
Assert(negative);
/* Set up keys for a negative cache entry */
oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
ct = (CatCTup *) palloc(sizeof(CatCTup));
@ -1951,7 +2035,7 @@ CatalogCacheCreateEntry(CatCache *cache, HeapTuple ntp, Datum *arguments,
ct->c_list = NULL;
ct->refcount = 0; /* for the moment */
ct->dead = false;
ct->negative = negative;
ct->negative = (ntp == NULL);
ct->hash_value = hashValue;
dlist_push_head(&cache->cc_bucket[hashIndex], &ct->cache_elem);