diff --git a/src/backend/utils/cache/inval.c b/src/backend/utils/cache/inval.c index 9d21ee6b07..f0c2612087 100644 --- a/src/backend/utils/cache/inval.c +++ b/src/backend/utils/cache/inval.c @@ -515,10 +515,12 @@ RegisterRelcacheInvalidation(Oid dbId, Oid relId) (void) GetCurrentCommandId(true); /* - * If the relation being invalidated is one of those cached in the local - * relcache init file, mark that we need to zap that file at commit. + * If the relation being invalidated is one of those cached in a relcache + * init file, mark that we need to zap that file at commit. For simplicity + * invalidations for a specific database always invalidate the shared file + * as well. */ - if (OidIsValid(dbId) && RelationIdIsInInitFile(relId)) + if (RelationIdIsInInitFile(relId)) transInvalInfo->RelcacheInitFileInval = true; } @@ -870,18 +872,26 @@ ProcessCommittedInvalidationMessages(SharedInvalidationMessage *msgs, if (RelcacheInitFileInval) { + elog(trace_recovery(DEBUG4), "removing relcache init files for database %u", + dbid); + /* - * RelationCacheInitFilePreInvalidate requires DatabasePath to be set, - * but we should not use SetDatabasePath during recovery, since it is + * RelationCacheInitFilePreInvalidate, when the invalidation message + * is for a specific database, requires DatabasePath to be set, but we + * should not use SetDatabasePath during recovery, since it is * intended to be used only once by normal backends. Hence, a quick * hack: set DatabasePath directly then unset after use. */ - DatabasePath = GetDatabasePath(dbid, tsid); - elog(trace_recovery(DEBUG4), "removing relcache init file in \"%s\"", - DatabasePath); + if (OidIsValid(dbid)) + DatabasePath = GetDatabasePath(dbid, tsid); + RelationCacheInitFilePreInvalidate(); - pfree(DatabasePath); - DatabasePath = NULL; + + if (OidIsValid(dbid)) + { + pfree(DatabasePath); + DatabasePath = NULL; + } } SendSharedInvalidMessages(msgs, nmsgs); diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c index d5dc505720..0853a0c913 100644 --- a/src/backend/utils/cache/relcache.c +++ b/src/backend/utils/cache/relcache.c @@ -241,6 +241,7 @@ static void RelationDestroyRelation(Relation relation, bool remember_tupdesc); static void RelationClearRelation(Relation relation, bool rebuild); static void RelationReloadIndexInfo(Relation relation); +static void RelationReloadNailed(Relation relation); static void RelationFlushRelation(Relation relation); static void RememberToFreeTupleDescAtEOX(TupleDesc td); static void AtEOXact_cleanup(Relation relation, bool isCommit); @@ -277,7 +278,7 @@ static void IndexSupportInitialize(oidvector *indclass, static OpClassCacheEnt *LookupOpclassInfo(Oid operatorClassOid, StrategyNumber numSupport); static void RelationCacheInitFileRemoveInDir(const char *tblspcpath); -static void unlink_initfile(const char *initfilename); +static void unlink_initfile(const char *initfilename, int elevel); /* @@ -1782,7 +1783,16 @@ RelationIdGetRelation(Oid relationId) RelationReloadIndexInfo(rd); else RelationClearRelation(rd, true); - Assert(rd->rd_isvalid); + + /* + * Normally entries need to be valid here, but before the relcache + * has been initialized, not enough infrastructure exists to + * perform pg_class lookups. The structure of such entries doesn't + * change, but we still want to update the rd_rel entry. So + * rd_isvalid = false is left in place for a later lookup. + */ + Assert(rd->rd_isvalid || + (rd->rd_isnailed && !criticalRelcachesBuilt)); } return rd; } @@ -1985,6 +1995,81 @@ RelationReloadIndexInfo(Relation relation) relation->rd_isvalid = true; } +/* + * RelationReloadNailed - reload minimal information for nailed relations. + * + * The structure of a nailed relation can never change (which is good, because + * we rely on knowing their structure to be able to read catalog content). But + * some parts, e.g. pg_class.relfrozenxid, are still important to have + * accurate content for. Therefore those need to be reloaded after the arrival + * of invalidations. + */ +static void +RelationReloadNailed(Relation relation) +{ + Assert(relation->rd_isnailed); + + /* + * Redo RelationInitPhysicalAddr in case it is a mapped relation whose + * mapping changed. + */ + RelationInitPhysicalAddr(relation); + + /* flag as needing to be revalidated */ + relation->rd_isvalid = false; + + /* + * Can only reread catalog contents if in a transaction. If the relation + * is currently open (not counting the nailed refcount), do so + * immediately. Otherwise we've already marked the entry as possibly + * invalid, and it'll be fixed when next opened. + */ + if (!IsTransactionState() || relation->rd_refcnt <= 1) + return; + + if (relation->rd_rel->relkind == RELKIND_INDEX) + { + /* + * If it's a nailed-but-not-mapped index, then we need to re-read the + * pg_class row to see if its relfilenode changed. + */ + RelationReloadIndexInfo(relation); + } + else + { + /* + * Reload a non-index entry. We can't easily do so if relcaches + * aren't yet built, but that's fine because at that stage the + * attributes that need to be current (like relfrozenxid) aren't yet + * accessed. To ensure the entry will later be revalidated, we leave + * it in invalid state, but allow use (cf. RelationIdGetRelation()). + */ + if (criticalRelcachesBuilt) + { + HeapTuple pg_class_tuple; + Form_pg_class relp; + + /* + * NB: Mark the entry as valid before starting to scan, to avoid + * self-recursion when re-building pg_class. + */ + relation->rd_isvalid = true; + + pg_class_tuple = ScanPgRelation(RelationGetRelid(relation), + true, false); + relp = (Form_pg_class) GETSTRUCT(pg_class_tuple); + memcpy(relation->rd_rel, relp, CLASS_TUPLE_SIZE); + heap_freetuple(pg_class_tuple); + + /* + * Again mark as valid, to protect against concurrently arriving + * invalidations. + */ + relation->rd_isvalid = true; + } + } +} + /* * RelationDestroyRelation * @@ -2089,26 +2174,12 @@ RelationClearRelation(Relation relation, bool rebuild) RelationCloseSmgr(relation); /* - * Never, never ever blow away a nailed-in system relation, because we'd - * be unable to recover. However, we must redo RelationInitPhysicalAddr - * in case it is a mapped relation whose mapping changed. - * - * If it's a nailed-but-not-mapped index, then we need to re-read the - * pg_class row to see if its relfilenode changed. We do that immediately - * if we're inside a valid transaction and the relation is open (not - * counting the nailed refcount). Otherwise just mark the entry as - * possibly invalid, and it'll be fixed when next opened. + * Treat nailed-in system relations separately, they always need to be + * accessible, so we can't blow them away. */ if (relation->rd_isnailed) { - RelationInitPhysicalAddr(relation); - - if (relation->rd_rel->relkind == RELKIND_INDEX) - { - relation->rd_isvalid = false; /* needs to be revalidated */ - if (relation->rd_refcnt > 1 && IsTransactionState()) - RelationReloadIndexInfo(relation); - } + RelationReloadNailed(relation); return; } @@ -5380,24 +5451,26 @@ write_item(const void *data, Size len, FILE *fp) /* * Determine whether a given relation (identified by OID) is one of the ones - * we should store in the local relcache init file. + * we should store in a relcache init file. * * We must cache all nailed rels, and for efficiency we should cache every rel * that supports a syscache. The former set is almost but not quite a subset - * of the latter. Currently, we must special-case TriggerRelidNameIndexId, - * which RelationCacheInitializePhase3 chooses to nail for efficiency reasons, - * but which does not support any syscache. - * - * Note: this function is currently never called for shared rels. If it were, - * we'd probably also need a special case for DatabaseNameIndexId, which is - * critical but does not support a syscache. + * of the latter. The special cases are relations where + * RelationCacheInitializePhase2/3 chooses to nail for efficiency reasons, but + * which do not support any syscache. */ bool RelationIdIsInInitFile(Oid relationId) { - if (relationId == TriggerRelidNameIndexId) + if (relationId == SharedSecLabelRelationId || + relationId == TriggerRelidNameIndexId || + relationId == DatabaseNameIndexId || + relationId == SharedSecLabelObjectIndexId) { - /* If this Assert fails, we don't need this special case anymore. */ + /* + * If this Assert fails, we don't need the applicable special case + * anymore. + */ Assert(!RelationSupportsSysCache(relationId)); return true; } @@ -5471,38 +5544,30 @@ RelationHasUnloggedIndex(Relation rel) * We take the lock and do the unlink in RelationCacheInitFilePreInvalidate, * then release the lock in RelationCacheInitFilePostInvalidate. Caller must * send any pending SI messages between those calls. - * - * Notice this deals only with the local init file, not the shared init file. - * The reason is that there can never be a "significant" change to the - * relcache entry of a shared relation; the most that could happen is - * updates of noncritical fields such as relpages/reltuples. So, while - * it's worth updating the shared init file from time to time, it can never - * be invalid enough to make it necessary to remove it. */ void RelationCacheInitFilePreInvalidate(void) { - char initfilename[MAXPGPATH]; + char localinitfname[MAXPGPATH]; + char sharedinitfname[MAXPGPATH]; - snprintf(initfilename, sizeof(initfilename), "%s/%s", - DatabasePath, RELCACHE_INIT_FILENAME); + if (DatabasePath) + snprintf(localinitfname, sizeof(localinitfname), "%s/%s", + DatabasePath, RELCACHE_INIT_FILENAME); + snprintf(sharedinitfname, sizeof(sharedinitfname), "global/%s", + RELCACHE_INIT_FILENAME); LWLockAcquire(RelCacheInitLock, LW_EXCLUSIVE); - if (unlink(initfilename) < 0) - { - /* - * The file might not be there if no backend has been started since - * the last removal. But complain about failures other than ENOENT. - * Fortunately, it's not too late to abort the transaction if we can't - * get rid of the would-be-obsolete init file. - */ - if (errno != ENOENT) - ereport(ERROR, - (errcode_for_file_access(), - errmsg("could not remove cache file \"%s\": %m", - initfilename))); - } + /* + * The files might not be there if no backend has been started since the + * last removal. But complain about failures other than ENOENT with + * ERROR. Fortunately, it's not too late to abort the transaction if we + * can't get rid of the would-be-obsolete init file. + */ + if (DatabasePath) + unlink_initfile(localinitfname, ERROR); + unlink_initfile(sharedinitfname, ERROR); } void @@ -5528,13 +5593,9 @@ RelationCacheInitFileRemove(void) struct dirent *de; char path[MAXPGPATH + 10 + sizeof(TABLESPACE_VERSION_DIRECTORY)]; - /* - * We zap the shared cache file too. In theory it can't get out of sync - * enough to be a problem, but in data-corruption cases, who knows ... - */ snprintf(path, sizeof(path), "global/%s", RELCACHE_INIT_FILENAME); - unlink_initfile(path); + unlink_initfile(path, LOG); /* Scan everything in the default tablespace */ RelationCacheInitFileRemoveInDir("base"); @@ -5586,7 +5647,7 @@ RelationCacheInitFileRemoveInDir(const char *tblspcpath) /* Try to remove the init file in each database */ snprintf(initfilename, sizeof(initfilename), "%s/%s/%s", tblspcpath, de->d_name, RELCACHE_INIT_FILENAME); - unlink_initfile(initfilename); + unlink_initfile(initfilename, LOG); } } @@ -5594,12 +5655,15 @@ RelationCacheInitFileRemoveInDir(const char *tblspcpath) } static void -unlink_initfile(const char *initfilename) +unlink_initfile(const char *initfilename, int elevel) { if (unlink(initfilename) < 0) { /* It might not be there, but log any error other than ENOENT */ if (errno != ENOENT) - elog(LOG, "could not remove cache file \"%s\": %m", initfilename); + ereport(elevel, + (errcode_for_file_access(), + errmsg("could not remove cache file \"%s\": %m", + initfilename))); } } diff --git a/src/include/storage/standbydefs.h b/src/include/storage/standbydefs.h index ea22d77e07..01eb01dfa1 100644 --- a/src/include/storage/standbydefs.h +++ b/src/include/storage/standbydefs.h @@ -64,7 +64,7 @@ typedef struct xl_invalidations { Oid dbId; /* MyDatabaseId */ Oid tsId; /* MyDatabaseTableSpace */ - bool relcacheInitFileInval; /* invalidate relcache init file */ + bool relcacheInitFileInval; /* invalidate relcache init files */ int nmsgs; /* number of shared inval msgs */ SharedInvalidationMessage msgs[FLEXIBLE_ARRAY_MEMBER]; } xl_invalidations;