From 473182c9523afad10e9507145690d902a0bc7f04 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Fri, 22 Mar 2024 17:13:53 -0400 Subject: [PATCH] Use a hash table for catcache.c's CatCList objects. Up to now, all of the "catcache list" objects within a catalog cache were just chained together on a single dlist, requiring O(N) time to search. Remarkably, we've not had serious performance problems with that so far; but we got a complaint of a bad performance regression from v15 in a case with a large number of roles in the system, which traced down to O(N^2) total time when we probed N catcache lists. Replace that data structure with a hashtable having an enlargeable number of dlists, in an exactly parallel way to the data structure we've used for years for the plain CatCTup cache members. The extra cost of maintaining a hash table seems negligible, since we were already computing a hash value for list searches. Normally this'd be HEAD-only material, but in view of the performance regression it seems advisable to back-patch into v16. In the v16 version of the patch, leave the dead cc_lists field where it is and add the new fields at the end of struct catcache, to avoid possible ABI breakage in case any external code is looking at these structs. (We assume no external code is actually allocating new catcache structs.) Per report from alex work. Discussion: https://postgr.es/m/CAGvXd3OSMbJQwOSc-Tq-Ro1CAz=vggErdSG7pv2s6vmmTOLJSg@mail.gmail.com --- src/backend/utils/cache/catcache.c | 143 ++++++++++++++++++++++++----- src/include/utils/catcache.h | 6 +- 2 files changed, 124 insertions(+), 25 deletions(-) diff --git a/src/backend/utils/cache/catcache.c b/src/backend/utils/cache/catcache.c index d5a3c1b591..569f51cb33 100644 --- a/src/backend/utils/cache/catcache.c +++ b/src/backend/utils/cache/catcache.c @@ -88,6 +88,8 @@ static void CatCachePrintStats(int code, Datum arg); #endif static void CatCacheRemoveCTup(CatCache *cache, CatCTup *ct); static void CatCacheRemoveCList(CatCache *cache, CatCList *cl); +static void RehashCatCache(CatCache *cp); +static void RehashCatCacheLists(CatCache *cp); static void CatalogCacheInitializeCache(CatCache *cache); static CatCTup *CatalogCacheCreateEntry(CatCache *cache, HeapTuple ntp, SysScanDesc scandesc, @@ -444,6 +446,7 @@ CatCachePrintStats(int code, Datum arg) long cc_neg_hits = 0; long cc_newloads = 0; long cc_invals = 0; + long cc_nlists = 0; long cc_lsearches = 0; long cc_lhits = 0; @@ -453,7 +456,7 @@ CatCachePrintStats(int code, Datum arg) if (cache->cc_ntup == 0 && cache->cc_searches == 0) continue; /* don't print unused caches */ - elog(DEBUG2, "catcache %s/%u: %d tup, %ld srch, %ld+%ld=%ld hits, %ld+%ld=%ld loads, %ld invals, %ld lsrch, %ld lhits", + elog(DEBUG2, "catcache %s/%u: %d tup, %ld srch, %ld+%ld=%ld hits, %ld+%ld=%ld loads, %ld invals, %d lists, %ld lsrch, %ld lhits", cache->cc_relname, cache->cc_indexoid, cache->cc_ntup, @@ -465,6 +468,7 @@ CatCachePrintStats(int code, Datum arg) cache->cc_searches - cache->cc_hits - cache->cc_neg_hits - cache->cc_newloads, cache->cc_searches - cache->cc_hits - cache->cc_neg_hits, cache->cc_invals, + cache->cc_nlist, cache->cc_lsearches, cache->cc_lhits); cc_searches += cache->cc_searches; @@ -472,10 +476,11 @@ CatCachePrintStats(int code, Datum arg) cc_neg_hits += cache->cc_neg_hits; cc_newloads += cache->cc_newloads; cc_invals += cache->cc_invals; + cc_nlists += cache->cc_nlist; cc_lsearches += cache->cc_lsearches; cc_lhits += cache->cc_lhits; } - elog(DEBUG2, "catcache totals: %d tup, %ld srch, %ld+%ld=%ld hits, %ld+%ld=%ld loads, %ld invals, %ld lsrch, %ld lhits", + elog(DEBUG2, "catcache totals: %d tup, %ld srch, %ld+%ld=%ld hits, %ld+%ld=%ld loads, %ld invals, %ld lists, %ld lsrch, %ld lhits", CacheHdr->ch_ntup, cc_searches, cc_hits, @@ -485,6 +490,7 @@ CatCachePrintStats(int code, Datum arg) cc_searches - cc_hits - cc_neg_hits - cc_newloads, cc_searches - cc_hits - cc_neg_hits, cc_invals, + cc_nlists, cc_lsearches, cc_lhits); } @@ -573,6 +579,8 @@ CatCacheRemoveCList(CatCache *cache, CatCList *cl) cache->cc_keyno, cl->keys); pfree(cl); + + --cache->cc_nlist; } @@ -611,14 +619,19 @@ CatCacheInvalidate(CatCache *cache, uint32 hashValue) * Invalidate *all* CatCLists in this cache; it's too hard to tell which * searches might still be correct, so just zap 'em all. */ - dlist_foreach_modify(iter, &cache->cc_lists) + for (int i = 0; i < cache->cc_nlbuckets; i++) { - CatCList *cl = dlist_container(CatCList, cache_elem, iter.cur); + dlist_head *bucket = &cache->cc_lbucket[i]; - if (cl->refcount > 0) - cl->dead = true; - else - CatCacheRemoveCList(cache, cl); + dlist_foreach_modify(iter, bucket) + { + CatCList *cl = dlist_container(CatCList, cache_elem, iter.cur); + + if (cl->refcount > 0) + cl->dead = true; + else + CatCacheRemoveCList(cache, cl); + } } /* @@ -691,14 +704,19 @@ ResetCatalogCache(CatCache *cache) int i; /* Remove each list in this cache, or at least mark it dead */ - dlist_foreach_modify(iter, &cache->cc_lists) + for (i = 0; i < cache->cc_nlbuckets; i++) { - CatCList *cl = dlist_container(CatCList, cache_elem, iter.cur); + dlist_head *bucket = &cache->cc_lbucket[i]; - if (cl->refcount > 0) - cl->dead = true; - else - CatCacheRemoveCList(cache, cl); + dlist_foreach_modify(iter, bucket) + { + CatCList *cl = dlist_container(CatCList, cache_elem, iter.cur); + + if (cl->refcount > 0) + cl->dead = true; + else + CatCacheRemoveCList(cache, cl); + } } /* Remove each tuple in this cache, or at least mark it dead */ @@ -862,6 +880,12 @@ InitCatCache(int id, MCXT_ALLOC_ZERO); cp->cc_bucket = palloc0(nbuckets * sizeof(dlist_head)); + /* + * Many catcaches never receive any list searches. Therefore, we don't + * allocate the cc_lbuckets till we get a list search. + */ + cp->cc_lbucket = NULL; + /* * initialize the cache's relation information for the relation * corresponding to this cache, and initialize some of the new cache's @@ -874,7 +898,9 @@ InitCatCache(int id, cp->cc_relisshared = false; /* temporary */ cp->cc_tupdesc = (TupleDesc) NULL; cp->cc_ntup = 0; + cp->cc_nlist = 0; cp->cc_nbuckets = nbuckets; + cp->cc_nlbuckets = 0; cp->cc_nkeys = nkeys; for (i = 0; i < nkeys; ++i) { @@ -939,6 +965,44 @@ RehashCatCache(CatCache *cp) cp->cc_bucket = newbucket; } +/* + * Enlarge a catcache's list storage, doubling the number of buckets. + */ +static void +RehashCatCacheLists(CatCache *cp) +{ + dlist_head *newbucket; + int newnbuckets; + int i; + + elog(DEBUG1, "rehashing catalog cache id %d for %s; %d lists, %d buckets", + cp->id, cp->cc_relname, cp->cc_nlist, cp->cc_nlbuckets); + + /* Allocate a new, larger, hash table. */ + newnbuckets = cp->cc_nlbuckets * 2; + newbucket = (dlist_head *) MemoryContextAllocZero(CacheMemoryContext, newnbuckets * sizeof(dlist_head)); + + /* Move all entries from old hash table to new. */ + for (i = 0; i < cp->cc_nlbuckets; i++) + { + dlist_mutable_iter iter; + + dlist_foreach_modify(iter, &cp->cc_lbucket[i]) + { + CatCList *cl = dlist_container(CatCList, cache_elem, iter.cur); + int hashIndex = HASH_INDEX(cl->hash_value, newnbuckets); + + dlist_delete(iter.cur); + dlist_push_head(&newbucket[hashIndex], &cl->cache_elem); + } + } + + /* Switch to the new array. */ + pfree(cp->cc_lbucket); + cp->cc_nlbuckets = newnbuckets; + cp->cc_lbucket = newbucket; +} + /* * CatalogCacheInitializeCache * @@ -1588,7 +1652,9 @@ SearchCatCacheList(CatCache *cache, Datum v4 = 0; /* dummy last-column value */ Datum arguments[CATCACHE_MAXKEYS]; uint32 lHashValue; + Index lHashIndex; dlist_iter iter; + dlist_head *lbucket; CatCList *cl; CatCTup *ct; List *volatile ctlist; @@ -1602,7 +1668,7 @@ SearchCatCacheList(CatCache *cache, /* * one-time startup overhead for each cache */ - if (cache->cc_tupdesc == NULL) + if (unlikely(cache->cc_tupdesc == NULL)) CatalogCacheInitializeCache(cache); Assert(nkeys > 0 && nkeys < cache->cc_nkeys); @@ -1618,11 +1684,36 @@ SearchCatCacheList(CatCache *cache, arguments[3] = v4; /* - * compute a hash value of the given keys for faster search. We don't - * presently divide the CatCList items into buckets, but this still lets - * us skip non-matching items quickly most of the time. + * If we haven't previously done a list search in this cache, create the + * bucket header array; otherwise, consider whether it's time to enlarge + * it. + */ + if (cache->cc_lbucket == NULL) + { + /* Arbitrary initial size --- must be a power of 2 */ + int nbuckets = 16; + + cache->cc_lbucket = (dlist_head *) + MemoryContextAllocZero(CacheMemoryContext, + nbuckets * sizeof(dlist_head)); + /* Don't set cc_nlbuckets if we get OOM allocating cc_lbucket */ + cache->cc_nlbuckets = nbuckets; + } + else + { + /* + * If the hash table has become too full, enlarge the buckets array. + * Quite arbitrarily, we enlarge when fill factor > 2. + */ + if (cache->cc_nlist > cache->cc_nlbuckets * 2) + RehashCatCacheLists(cache); + } + + /* + * Find the hash bucket in which to look for the CatCList. */ lHashValue = CatalogCacheComputeHashValue(cache, nkeys, v1, v2, v3, v4); + lHashIndex = HASH_INDEX(lHashValue, cache->cc_nlbuckets); /* * scan the items until we find a match or exhaust our list @@ -1630,7 +1721,8 @@ SearchCatCacheList(CatCache *cache, * Note: it's okay to use dlist_foreach here, even though we modify the * dlist within the loop, because we don't continue the loop afterwards. */ - dlist_foreach(iter, &cache->cc_lists) + lbucket = &cache->cc_lbucket[lHashIndex]; + dlist_foreach(iter, lbucket) { cl = dlist_container(CatCList, cache_elem, iter.cur); @@ -1650,13 +1742,13 @@ SearchCatCacheList(CatCache *cache, continue; /* - * We found a matching list. Move the list to the front of the - * cache's list-of-lists, to speed subsequent searches. (We do not + * We found a matching list. Move the list to the front of the list + * for its hashbucket, so as to speed subsequent searches. (We do not * move the members to the fronts of their hashbucket lists, however, * since there's no point in that unless they are searched for * individually.) */ - dlist_move_head(&cache->cc_lists, &cl->cache_elem); + dlist_move_head(lbucket, &cl->cache_elem); /* Bump the list's refcount and return it */ ResourceOwnerEnlarge(CurrentResourceOwner); @@ -1868,7 +1960,12 @@ SearchCatCacheList(CatCache *cache, } Assert(i == nmembers); - dlist_push_head(&cache->cc_lists, &cl->cache_elem); + /* + * Add the CatCList to the appropriate bucket, and count it. + */ + dlist_push_head(lbucket, &cl->cache_elem); + + cache->cc_nlist++; /* Finally, bump the list's refcount and return it */ cl->refcount++; diff --git a/src/include/utils/catcache.h b/src/include/utils/catcache.h index a75a528de8..3fb9647b87 100644 --- a/src/include/utils/catcache.h +++ b/src/include/utils/catcache.h @@ -51,9 +51,11 @@ typedef struct catcache CCFastEqualFN cc_fastequal[CATCACHE_MAXKEYS]; /* fast equal function for * each key */ int cc_keyno[CATCACHE_MAXKEYS]; /* AttrNumber of each key */ - dlist_head cc_lists; /* list of CatCList structs */ - int cc_ntup; /* # of tuples currently in this cache */ int cc_nkeys; /* # of keys (1..CATCACHE_MAXKEYS) */ + int cc_ntup; /* # of tuples currently in this cache */ + int cc_nlist; /* # of CatCLists currently in this cache */ + int cc_nlbuckets; /* # of CatCList hash buckets in this cache */ + dlist_head *cc_lbucket; /* hash buckets for CatCLists */ const char *cc_relname; /* name of relation the tuples come from */ Oid cc_reloid; /* OID of relation the tuples come from */ Oid cc_indexoid; /* OID of index matching cache keys */