1996-07-09 08:22:35 +02:00
|
|
|
/*-------------------------------------------------------------------------
|
|
|
|
*
|
1999-02-14 00:22:53 +01:00
|
|
|
* syscache.c
|
1996-07-09 08:22:35 +02:00
|
|
|
* System cache management routines
|
|
|
|
*
|
2024-01-04 02:49:05 +01:00
|
|
|
* Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
|
2000-01-26 06:58:53 +01:00
|
|
|
* Portions Copyright (c) 1994, Regents of the University of California
|
1996-07-09 08:22:35 +02:00
|
|
|
*
|
|
|
|
*
|
|
|
|
* IDENTIFICATION
|
2010-09-20 22:08:53 +02:00
|
|
|
* src/backend/utils/cache/syscache.c
|
1996-07-09 08:22:35 +02:00
|
|
|
*
|
|
|
|
* NOTES
|
|
|
|
* These routines allow the parser/planner/executor to perform
|
|
|
|
* rapid lookups on the contents of the system catalogs.
|
|
|
|
*
|
2011-06-18 23:37:30 +02:00
|
|
|
* see utils/syscache.h for a list of the cache IDs
|
1996-07-09 08:22:35 +02:00
|
|
|
*
|
|
|
|
*-------------------------------------------------------------------------
|
|
|
|
*/
|
1996-11-03 07:54:38 +01:00
|
|
|
#include "postgres.h"
|
1997-09-07 07:04:48 +02:00
|
|
|
|
2012-08-30 22:15:44 +02:00
|
|
|
#include "access/htup_details.h"
|
2024-01-23 07:13:38 +01:00
|
|
|
#include "catalog/pg_db_role_setting_d.h"
|
|
|
|
#include "catalog/pg_depend_d.h"
|
|
|
|
#include "catalog/pg_description_d.h"
|
|
|
|
#include "catalog/pg_seclabel_d.h"
|
|
|
|
#include "catalog/pg_shdepend_d.h"
|
|
|
|
#include "catalog/pg_shdescription_d.h"
|
|
|
|
#include "catalog/pg_shseclabel_d.h"
|
2024-02-16 21:05:36 +01:00
|
|
|
#include "common/int.h"
|
2019-11-07 04:51:04 +01:00
|
|
|
#include "lib/qunique.h"
|
2012-08-29 00:26:24 +02:00
|
|
|
#include "utils/catcache.h"
|
2023-03-25 22:49:33 +01:00
|
|
|
#include "utils/lsyscache.h"
|
2019-11-12 04:00:16 +01:00
|
|
|
#include "utils/rel.h"
|
2000-06-17 06:56:39 +02:00
|
|
|
#include "utils/syscache.h"
|
1997-09-07 07:04:48 +02:00
|
|
|
|
1999-11-22 18:56:41 +01:00
|
|
|
/*---------------------------------------------------------------------------
|
|
|
|
|
|
|
|
Adding system caches:
|
|
|
|
|
2002-08-05 05:29:17 +02:00
|
|
|
There must be a unique index underlying each syscache (ie, an index
|
|
|
|
whose key is the same as that of the cache). If there is not one
|
2021-06-29 07:57:16 +02:00
|
|
|
already, add the definition for it to include/catalog/pg_*.h using
|
|
|
|
DECLARE_UNIQUE_INDEX.
|
2005-04-14 22:03:27 +02:00
|
|
|
(Adding an index requires a catversion.h update, while simply
|
|
|
|
adding/deleting caches only requires a recompile.)
|
1999-11-22 18:56:41 +01:00
|
|
|
|
2024-01-23 07:13:38 +01:00
|
|
|
Add a MAKE_SYSCACHE call to the same pg_*.h file specifying the name of
|
|
|
|
your cache, the underlying index, and the initial number of hash buckets.
|
|
|
|
|
|
|
|
The number of hash buckets must be a power of 2. It's reasonable to
|
|
|
|
set this to the number of entries that might be in the particular cache
|
|
|
|
in a medium-size database.
|
|
|
|
|
1999-11-22 18:56:41 +01:00
|
|
|
Finally, any place your relation gets heap_insert() or
|
2017-03-02 12:33:50 +01:00
|
|
|
heap_update() calls, use CatalogTupleInsert() or CatalogTupleUpdate()
|
|
|
|
instead, which also update indexes. The heap_* calls do not do that.
|
1999-11-22 18:56:41 +01:00
|
|
|
|
2005-04-14 22:03:27 +02:00
|
|
|
*---------------------------------------------------------------------------
|
1999-11-22 18:56:41 +01:00
|
|
|
*/
|
|
|
|
|
2001-02-22 19:39:20 +01:00
|
|
|
/*
|
2000-06-17 06:56:39 +02:00
|
|
|
* struct cachedesc: information defining a single syscache
|
|
|
|
*/
|
|
|
|
struct cachedesc
|
|
|
|
{
|
2005-04-14 22:03:27 +02:00
|
|
|
Oid reloid; /* OID of the relation being cached */
|
|
|
|
Oid indoid; /* OID of index relation for this cache */
|
2000-06-17 06:56:39 +02:00
|
|
|
int nkeys; /* # of keys needed for cache lookup */
|
|
|
|
int key[4]; /* attribute numbers of key attrs */
|
2006-06-15 04:08:09 +02:00
|
|
|
int nbuckets; /* number of hash buckets for this cache */
|
2000-06-17 06:56:39 +02:00
|
|
|
};
|
|
|
|
|
2022-12-22 22:40:18 +01:00
|
|
|
/* Macro to provide nkeys and key array with convenient syntax. */
|
|
|
|
#define KEY(...) VA_ARGS_NARGS(__VA_ARGS__), { __VA_ARGS__ }
|
|
|
|
|
2024-01-23 07:13:38 +01:00
|
|
|
#include "catalog/syscache_info.h"
|
1997-09-07 07:04:48 +02:00
|
|
|
|
2022-12-08 14:30:01 +01:00
|
|
|
StaticAssertDecl(lengthof(cacheinfo) == SysCacheSize,
|
|
|
|
"SysCacheSize does not match syscache.c's array");
|
|
|
|
|
Use a safer method for determining whether relcache init file is stale.
When we invalidate the relcache entry for a system catalog or index, we
must also delete the relcache "init file" if the init file contains a copy
of that rel's entry. The old way of doing this relied on a specially
maintained list of the OIDs of relations present in the init file: we made
the list either when reading the file in, or when writing the file out.
The problem is that when writing the file out, we included only rels
present in our local relcache, which might have already suffered some
deletions due to relcache inval events. In such cases we correctly decided
not to overwrite the real init file with incomplete data --- but we still
used the incomplete initFileRelationIds list for the rest of the current
session. This could result in wrong decisions about whether the session's
own actions require deletion of the init file, potentially allowing an init
file created by some other concurrent session to be left around even though
it's been made stale.
Since we don't support changing the schema of a system catalog at runtime,
the only likely scenario in which this would cause a problem in the field
involves a "vacuum full" on a catalog concurrently with other activity, and
even then it's far from easy to provoke. Remarkably, this has been broken
since 2002 (in commit 786340441706ac1957a031f11ad1c2e5b6e18314), but we had
never seen a reproducible test case until recently. If it did happen in
the field, the symptoms would probably involve unexpected "cache lookup
failed" errors to begin with, then "could not open file" failures after the
next checkpoint, as all accesses to the affected catalog stopped working.
Recovery would require manually removing the stale "pg_internal.init" file.
To fix, get rid of the initFileRelationIds list, and instead consult
syscache.c's list of relations used in catalog caches to decide whether a
relation is included in the init file. This should be a tad more efficient
anyway, since we're replacing linear search of a list with ~100 entries
with a binary search. It's a bit ugly that the init file contents are now
so directly tied to the catalog caches, but in practice that won't make
much difference.
Back-patch to all supported branches.
2015-06-07 21:32:09 +02:00
|
|
|
static CatCache *SysCache[SysCacheSize];
|
|
|
|
|
2000-02-18 10:30:20 +01:00
|
|
|
static bool CacheInitialized = false;
|
2000-06-17 06:56:39 +02:00
|
|
|
|
Use a safer method for determining whether relcache init file is stale.
When we invalidate the relcache entry for a system catalog or index, we
must also delete the relcache "init file" if the init file contains a copy
of that rel's entry. The old way of doing this relied on a specially
maintained list of the OIDs of relations present in the init file: we made
the list either when reading the file in, or when writing the file out.
The problem is that when writing the file out, we included only rels
present in our local relcache, which might have already suffered some
deletions due to relcache inval events. In such cases we correctly decided
not to overwrite the real init file with incomplete data --- but we still
used the incomplete initFileRelationIds list for the rest of the current
session. This could result in wrong decisions about whether the session's
own actions require deletion of the init file, potentially allowing an init
file created by some other concurrent session to be left around even though
it's been made stale.
Since we don't support changing the schema of a system catalog at runtime,
the only likely scenario in which this would cause a problem in the field
involves a "vacuum full" on a catalog concurrently with other activity, and
even then it's far from easy to provoke. Remarkably, this has been broken
since 2002 (in commit 786340441706ac1957a031f11ad1c2e5b6e18314), but we had
never seen a reproducible test case until recently. If it did happen in
the field, the symptoms would probably involve unexpected "cache lookup
failed" errors to begin with, then "could not open file" failures after the
next checkpoint, as all accesses to the affected catalog stopped working.
Recovery would require manually removing the stale "pg_internal.init" file.
To fix, get rid of the initFileRelationIds list, and instead consult
syscache.c's list of relations used in catalog caches to decide whether a
relation is included in the init file. This should be a tad more efficient
anyway, since we're replacing linear search of a list with ~100 entries
with a binary search. It's a bit ugly that the init file contents are now
so directly tied to the catalog caches, but in practice that won't make
much difference.
Back-patch to all supported branches.
2015-06-07 21:32:09 +02:00
|
|
|
/* Sorted array of OIDs of tables that have caches on them */
|
|
|
|
static Oid SysCacheRelationOid[SysCacheSize];
|
Use an MVCC snapshot, rather than SnapshotNow, for catalog scans.
SnapshotNow scans have the undesirable property that, in the face of
concurrent updates, the scan can fail to see either the old or the new
versions of the row. In many cases, we work around this by requiring
DDL operations to hold AccessExclusiveLock on the object being
modified; in some cases, the existing locking is inadequate and random
failures occur as a result. This commit doesn't change anything
related to locking, but will hopefully pave the way to allowing lock
strength reductions in the future.
The major issue has held us back from making this change in the past
is that taking an MVCC snapshot is significantly more expensive than
using a static special snapshot such as SnapshotNow. However, testing
of various worst-case scenarios reveals that this problem is not
severe except under fairly extreme workloads. To mitigate those
problems, we avoid retaking the MVCC snapshot for each new scan;
instead, we take a new snapshot only when invalidation messages have
been processed. The catcache machinery already requires that
invalidation messages be sent before releasing the related heavyweight
lock; else other backends might rely on locally-cached data rather
than scanning the catalog at all. Thus, making snapshot reuse
dependent on the same guarantees shouldn't break anything that wasn't
already subtly broken.
Patch by me. Review by Michael Paquier and Andres Freund.
2013-07-02 15:47:01 +02:00
|
|
|
static int SysCacheRelationOidSize;
|
|
|
|
|
Use a safer method for determining whether relcache init file is stale.
When we invalidate the relcache entry for a system catalog or index, we
must also delete the relcache "init file" if the init file contains a copy
of that rel's entry. The old way of doing this relied on a specially
maintained list of the OIDs of relations present in the init file: we made
the list either when reading the file in, or when writing the file out.
The problem is that when writing the file out, we included only rels
present in our local relcache, which might have already suffered some
deletions due to relcache inval events. In such cases we correctly decided
not to overwrite the real init file with incomplete data --- but we still
used the incomplete initFileRelationIds list for the rest of the current
session. This could result in wrong decisions about whether the session's
own actions require deletion of the init file, potentially allowing an init
file created by some other concurrent session to be left around even though
it's been made stale.
Since we don't support changing the schema of a system catalog at runtime,
the only likely scenario in which this would cause a problem in the field
involves a "vacuum full" on a catalog concurrently with other activity, and
even then it's far from easy to provoke. Remarkably, this has been broken
since 2002 (in commit 786340441706ac1957a031f11ad1c2e5b6e18314), but we had
never seen a reproducible test case until recently. If it did happen in
the field, the symptoms would probably involve unexpected "cache lookup
failed" errors to begin with, then "could not open file" failures after the
next checkpoint, as all accesses to the affected catalog stopped working.
Recovery would require manually removing the stale "pg_internal.init" file.
To fix, get rid of the initFileRelationIds list, and instead consult
syscache.c's list of relations used in catalog caches to decide whether a
relation is included in the init file. This should be a tad more efficient
anyway, since we're replacing linear search of a list with ~100 entries
with a binary search. It's a bit ugly that the init file contents are now
so directly tied to the catalog caches, but in practice that won't make
much difference.
Back-patch to all supported branches.
2015-06-07 21:32:09 +02:00
|
|
|
/* Sorted array of OIDs of tables and indexes used by caches */
|
|
|
|
static Oid SysCacheSupportingRelOid[SysCacheSize * 2];
|
|
|
|
static int SysCacheSupportingRelOidSize;
|
|
|
|
|
Use an MVCC snapshot, rather than SnapshotNow, for catalog scans.
SnapshotNow scans have the undesirable property that, in the face of
concurrent updates, the scan can fail to see either the old or the new
versions of the row. In many cases, we work around this by requiring
DDL operations to hold AccessExclusiveLock on the object being
modified; in some cases, the existing locking is inadequate and random
failures occur as a result. This commit doesn't change anything
related to locking, but will hopefully pave the way to allowing lock
strength reductions in the future.
The major issue has held us back from making this change in the past
is that taking an MVCC snapshot is significantly more expensive than
using a static special snapshot such as SnapshotNow. However, testing
of various worst-case scenarios reveals that this problem is not
severe except under fairly extreme workloads. To mitigate those
problems, we avoid retaking the MVCC snapshot for each new scan;
instead, we take a new snapshot only when invalidation messages have
been processed. The catcache machinery already requires that
invalidation messages be sent before releasing the related heavyweight
lock; else other backends might rely on locally-cached data rather
than scanning the catalog at all. Thus, making snapshot reuse
dependent on the same guarantees shouldn't break anything that wasn't
already subtly broken.
Patch by me. Review by Michael Paquier and Andres Freund.
2013-07-02 15:47:01 +02:00
|
|
|
static int oid_compare(const void *a, const void *b);
|
2000-06-17 06:56:39 +02:00
|
|
|
|
Use a safer method for determining whether relcache init file is stale.
When we invalidate the relcache entry for a system catalog or index, we
must also delete the relcache "init file" if the init file contains a copy
of that rel's entry. The old way of doing this relied on a specially
maintained list of the OIDs of relations present in the init file: we made
the list either when reading the file in, or when writing the file out.
The problem is that when writing the file out, we included only rels
present in our local relcache, which might have already suffered some
deletions due to relcache inval events. In such cases we correctly decided
not to overwrite the real init file with incomplete data --- but we still
used the incomplete initFileRelationIds list for the rest of the current
session. This could result in wrong decisions about whether the session's
own actions require deletion of the init file, potentially allowing an init
file created by some other concurrent session to be left around even though
it's been made stale.
Since we don't support changing the schema of a system catalog at runtime,
the only likely scenario in which this would cause a problem in the field
involves a "vacuum full" on a catalog concurrently with other activity, and
even then it's far from easy to provoke. Remarkably, this has been broken
since 2002 (in commit 786340441706ac1957a031f11ad1c2e5b6e18314), but we had
never seen a reproducible test case until recently. If it did happen in
the field, the symptoms would probably involve unexpected "cache lookup
failed" errors to begin with, then "could not open file" failures after the
next checkpoint, as all accesses to the affected catalog stopped working.
Recovery would require manually removing the stale "pg_internal.init" file.
To fix, get rid of the initFileRelationIds list, and instead consult
syscache.c's list of relations used in catalog caches to decide whether a
relation is included in the init file. This should be a tad more efficient
anyway, since we're replacing linear search of a list with ~100 entries
with a binary search. It's a bit ugly that the init file contents are now
so directly tied to the catalog caches, but in practice that won't make
much difference.
Back-patch to all supported branches.
2015-06-07 21:32:09 +02:00
|
|
|
|
1996-07-09 08:22:35 +02:00
|
|
|
/*
|
2000-11-16 23:30:52 +01:00
|
|
|
* InitCatalogCache - initialize the caches
|
1996-07-09 08:22:35 +02:00
|
|
|
*
|
2000-11-16 23:30:52 +01:00
|
|
|
* Note that no database access is done here; we only allocate memory
|
|
|
|
* and initialize the cache structure. Interrogation of the database
|
2002-02-19 21:11:20 +01:00
|
|
|
* to complete initialization of a cache happens upon first use
|
2000-11-16 23:30:52 +01:00
|
|
|
* of that cache.
|
1996-07-09 08:22:35 +02:00
|
|
|
*/
|
|
|
|
void
|
2000-11-16 23:30:52 +01:00
|
|
|
InitCatalogCache(void)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
2000-11-16 23:30:52 +01:00
|
|
|
int cacheId;
|
1996-07-09 08:22:35 +02:00
|
|
|
|
2000-11-16 23:30:52 +01:00
|
|
|
Assert(!CacheInitialized);
|
2000-06-17 06:56:39 +02:00
|
|
|
|
Use a safer method for determining whether relcache init file is stale.
When we invalidate the relcache entry for a system catalog or index, we
must also delete the relcache "init file" if the init file contains a copy
of that rel's entry. The old way of doing this relied on a specially
maintained list of the OIDs of relations present in the init file: we made
the list either when reading the file in, or when writing the file out.
The problem is that when writing the file out, we included only rels
present in our local relcache, which might have already suffered some
deletions due to relcache inval events. In such cases we correctly decided
not to overwrite the real init file with incomplete data --- but we still
used the incomplete initFileRelationIds list for the rest of the current
session. This could result in wrong decisions about whether the session's
own actions require deletion of the init file, potentially allowing an init
file created by some other concurrent session to be left around even though
it's been made stale.
Since we don't support changing the schema of a system catalog at runtime,
the only likely scenario in which this would cause a problem in the field
involves a "vacuum full" on a catalog concurrently with other activity, and
even then it's far from easy to provoke. Remarkably, this has been broken
since 2002 (in commit 786340441706ac1957a031f11ad1c2e5b6e18314), but we had
never seen a reproducible test case until recently. If it did happen in
the field, the symptoms would probably involve unexpected "cache lookup
failed" errors to begin with, then "could not open file" failures after the
next checkpoint, as all accesses to the affected catalog stopped working.
Recovery would require manually removing the stale "pg_internal.init" file.
To fix, get rid of the initFileRelationIds list, and instead consult
syscache.c's list of relations used in catalog caches to decide whether a
relation is included in the init file. This should be a tad more efficient
anyway, since we're replacing linear search of a list with ~100 entries
with a binary search. It's a bit ugly that the init file contents are now
so directly tied to the catalog caches, but in practice that won't make
much difference.
Back-patch to all supported branches.
2015-06-07 21:32:09 +02:00
|
|
|
SysCacheRelationOidSize = SysCacheSupportingRelOidSize = 0;
|
1997-09-07 07:04:48 +02:00
|
|
|
|
2000-11-16 23:30:52 +01:00
|
|
|
for (cacheId = 0; cacheId < SysCacheSize; cacheId++)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
2022-12-22 22:40:18 +01:00
|
|
|
/*
|
|
|
|
* Assert that every enumeration value defined in syscache.h has been
|
|
|
|
* populated in the cacheinfo array.
|
|
|
|
*/
|
2023-07-27 03:55:16 +02:00
|
|
|
Assert(OidIsValid(cacheinfo[cacheId].reloid));
|
|
|
|
Assert(OidIsValid(cacheinfo[cacheId].indoid));
|
|
|
|
/* .nbuckets and .key[] are checked by InitCatCache() */
|
2022-12-22 22:40:18 +01:00
|
|
|
|
2000-11-16 23:30:52 +01:00
|
|
|
SysCache[cacheId] = InitCatCache(cacheId,
|
2005-04-14 22:03:27 +02:00
|
|
|
cacheinfo[cacheId].reloid,
|
|
|
|
cacheinfo[cacheId].indoid,
|
2000-11-16 23:30:52 +01:00
|
|
|
cacheinfo[cacheId].nkeys,
|
2006-06-15 04:08:09 +02:00
|
|
|
cacheinfo[cacheId].key,
|
|
|
|
cacheinfo[cacheId].nbuckets);
|
2000-11-16 23:30:52 +01:00
|
|
|
if (!PointerIsValid(SysCache[cacheId]))
|
2005-04-14 22:03:27 +02:00
|
|
|
elog(ERROR, "could not initialize cache %u (%d)",
|
|
|
|
cacheinfo[cacheId].reloid, cacheId);
|
Use a safer method for determining whether relcache init file is stale.
When we invalidate the relcache entry for a system catalog or index, we
must also delete the relcache "init file" if the init file contains a copy
of that rel's entry. The old way of doing this relied on a specially
maintained list of the OIDs of relations present in the init file: we made
the list either when reading the file in, or when writing the file out.
The problem is that when writing the file out, we included only rels
present in our local relcache, which might have already suffered some
deletions due to relcache inval events. In such cases we correctly decided
not to overwrite the real init file with incomplete data --- but we still
used the incomplete initFileRelationIds list for the rest of the current
session. This could result in wrong decisions about whether the session's
own actions require deletion of the init file, potentially allowing an init
file created by some other concurrent session to be left around even though
it's been made stale.
Since we don't support changing the schema of a system catalog at runtime,
the only likely scenario in which this would cause a problem in the field
involves a "vacuum full" on a catalog concurrently with other activity, and
even then it's far from easy to provoke. Remarkably, this has been broken
since 2002 (in commit 786340441706ac1957a031f11ad1c2e5b6e18314), but we had
never seen a reproducible test case until recently. If it did happen in
the field, the symptoms would probably involve unexpected "cache lookup
failed" errors to begin with, then "could not open file" failures after the
next checkpoint, as all accesses to the affected catalog stopped working.
Recovery would require manually removing the stale "pg_internal.init" file.
To fix, get rid of the initFileRelationIds list, and instead consult
syscache.c's list of relations used in catalog caches to decide whether a
relation is included in the init file. This should be a tad more efficient
anyway, since we're replacing linear search of a list with ~100 entries
with a binary search. It's a bit ugly that the init file contents are now
so directly tied to the catalog caches, but in practice that won't make
much difference.
Back-patch to all supported branches.
2015-06-07 21:32:09 +02:00
|
|
|
/* Accumulate data for OID lists, too */
|
Use an MVCC snapshot, rather than SnapshotNow, for catalog scans.
SnapshotNow scans have the undesirable property that, in the face of
concurrent updates, the scan can fail to see either the old or the new
versions of the row. In many cases, we work around this by requiring
DDL operations to hold AccessExclusiveLock on the object being
modified; in some cases, the existing locking is inadequate and random
failures occur as a result. This commit doesn't change anything
related to locking, but will hopefully pave the way to allowing lock
strength reductions in the future.
The major issue has held us back from making this change in the past
is that taking an MVCC snapshot is significantly more expensive than
using a static special snapshot such as SnapshotNow. However, testing
of various worst-case scenarios reveals that this problem is not
severe except under fairly extreme workloads. To mitigate those
problems, we avoid retaking the MVCC snapshot for each new scan;
instead, we take a new snapshot only when invalidation messages have
been processed. The catcache machinery already requires that
invalidation messages be sent before releasing the related heavyweight
lock; else other backends might rely on locally-cached data rather
than scanning the catalog at all. Thus, making snapshot reuse
dependent on the same guarantees shouldn't break anything that wasn't
already subtly broken.
Patch by me. Review by Michael Paquier and Andres Freund.
2013-07-02 15:47:01 +02:00
|
|
|
SysCacheRelationOid[SysCacheRelationOidSize++] =
|
|
|
|
cacheinfo[cacheId].reloid;
|
Use a safer method for determining whether relcache init file is stale.
When we invalidate the relcache entry for a system catalog or index, we
must also delete the relcache "init file" if the init file contains a copy
of that rel's entry. The old way of doing this relied on a specially
maintained list of the OIDs of relations present in the init file: we made
the list either when reading the file in, or when writing the file out.
The problem is that when writing the file out, we included only rels
present in our local relcache, which might have already suffered some
deletions due to relcache inval events. In such cases we correctly decided
not to overwrite the real init file with incomplete data --- but we still
used the incomplete initFileRelationIds list for the rest of the current
session. This could result in wrong decisions about whether the session's
own actions require deletion of the init file, potentially allowing an init
file created by some other concurrent session to be left around even though
it's been made stale.
Since we don't support changing the schema of a system catalog at runtime,
the only likely scenario in which this would cause a problem in the field
involves a "vacuum full" on a catalog concurrently with other activity, and
even then it's far from easy to provoke. Remarkably, this has been broken
since 2002 (in commit 786340441706ac1957a031f11ad1c2e5b6e18314), but we had
never seen a reproducible test case until recently. If it did happen in
the field, the symptoms would probably involve unexpected "cache lookup
failed" errors to begin with, then "could not open file" failures after the
next checkpoint, as all accesses to the affected catalog stopped working.
Recovery would require manually removing the stale "pg_internal.init" file.
To fix, get rid of the initFileRelationIds list, and instead consult
syscache.c's list of relations used in catalog caches to decide whether a
relation is included in the init file. This should be a tad more efficient
anyway, since we're replacing linear search of a list with ~100 entries
with a binary search. It's a bit ugly that the init file contents are now
so directly tied to the catalog caches, but in practice that won't make
much difference.
Back-patch to all supported branches.
2015-06-07 21:32:09 +02:00
|
|
|
SysCacheSupportingRelOid[SysCacheSupportingRelOidSize++] =
|
|
|
|
cacheinfo[cacheId].reloid;
|
|
|
|
SysCacheSupportingRelOid[SysCacheSupportingRelOidSize++] =
|
|
|
|
cacheinfo[cacheId].indoid;
|
Use an MVCC snapshot, rather than SnapshotNow, for catalog scans.
SnapshotNow scans have the undesirable property that, in the face of
concurrent updates, the scan can fail to see either the old or the new
versions of the row. In many cases, we work around this by requiring
DDL operations to hold AccessExclusiveLock on the object being
modified; in some cases, the existing locking is inadequate and random
failures occur as a result. This commit doesn't change anything
related to locking, but will hopefully pave the way to allowing lock
strength reductions in the future.
The major issue has held us back from making this change in the past
is that taking an MVCC snapshot is significantly more expensive than
using a static special snapshot such as SnapshotNow. However, testing
of various worst-case scenarios reveals that this problem is not
severe except under fairly extreme workloads. To mitigate those
problems, we avoid retaking the MVCC snapshot for each new scan;
instead, we take a new snapshot only when invalidation messages have
been processed. The catcache machinery already requires that
invalidation messages be sent before releasing the related heavyweight
lock; else other backends might rely on locally-cached data rather
than scanning the catalog at all. Thus, making snapshot reuse
dependent on the same guarantees shouldn't break anything that wasn't
already subtly broken.
Patch by me. Review by Michael Paquier and Andres Freund.
2013-07-02 15:47:01 +02:00
|
|
|
/* see comments for RelationInvalidatesSnapshotsOnly */
|
|
|
|
Assert(!RelationInvalidatesSnapshotsOnly(cacheinfo[cacheId].reloid));
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
Use an MVCC snapshot, rather than SnapshotNow, for catalog scans.
SnapshotNow scans have the undesirable property that, in the face of
concurrent updates, the scan can fail to see either the old or the new
versions of the row. In many cases, we work around this by requiring
DDL operations to hold AccessExclusiveLock on the object being
modified; in some cases, the existing locking is inadequate and random
failures occur as a result. This commit doesn't change anything
related to locking, but will hopefully pave the way to allowing lock
strength reductions in the future.
The major issue has held us back from making this change in the past
is that taking an MVCC snapshot is significantly more expensive than
using a static special snapshot such as SnapshotNow. However, testing
of various worst-case scenarios reveals that this problem is not
severe except under fairly extreme workloads. To mitigate those
problems, we avoid retaking the MVCC snapshot for each new scan;
instead, we take a new snapshot only when invalidation messages have
been processed. The catcache machinery already requires that
invalidation messages be sent before releasing the related heavyweight
lock; else other backends might rely on locally-cached data rather
than scanning the catalog at all. Thus, making snapshot reuse
dependent on the same guarantees shouldn't break anything that wasn't
already subtly broken.
Patch by me. Review by Michael Paquier and Andres Freund.
2013-07-02 15:47:01 +02:00
|
|
|
|
Use a safer method for determining whether relcache init file is stale.
When we invalidate the relcache entry for a system catalog or index, we
must also delete the relcache "init file" if the init file contains a copy
of that rel's entry. The old way of doing this relied on a specially
maintained list of the OIDs of relations present in the init file: we made
the list either when reading the file in, or when writing the file out.
The problem is that when writing the file out, we included only rels
present in our local relcache, which might have already suffered some
deletions due to relcache inval events. In such cases we correctly decided
not to overwrite the real init file with incomplete data --- but we still
used the incomplete initFileRelationIds list for the rest of the current
session. This could result in wrong decisions about whether the session's
own actions require deletion of the init file, potentially allowing an init
file created by some other concurrent session to be left around even though
it's been made stale.
Since we don't support changing the schema of a system catalog at runtime,
the only likely scenario in which this would cause a problem in the field
involves a "vacuum full" on a catalog concurrently with other activity, and
even then it's far from easy to provoke. Remarkably, this has been broken
since 2002 (in commit 786340441706ac1957a031f11ad1c2e5b6e18314), but we had
never seen a reproducible test case until recently. If it did happen in
the field, the symptoms would probably involve unexpected "cache lookup
failed" errors to begin with, then "could not open file" failures after the
next checkpoint, as all accesses to the affected catalog stopped working.
Recovery would require manually removing the stale "pg_internal.init" file.
To fix, get rid of the initFileRelationIds list, and instead consult
syscache.c's list of relations used in catalog caches to decide whether a
relation is included in the init file. This should be a tad more efficient
anyway, since we're replacing linear search of a list with ~100 entries
with a binary search. It's a bit ugly that the init file contents are now
so directly tied to the catalog caches, but in practice that won't make
much difference.
Back-patch to all supported branches.
2015-06-07 21:32:09 +02:00
|
|
|
Assert(SysCacheRelationOidSize <= lengthof(SysCacheRelationOid));
|
|
|
|
Assert(SysCacheSupportingRelOidSize <= lengthof(SysCacheSupportingRelOid));
|
|
|
|
|
|
|
|
/* Sort and de-dup OID arrays, so we can use binary search. */
|
2024-02-16 18:37:50 +01:00
|
|
|
qsort(SysCacheRelationOid, SysCacheRelationOidSize,
|
|
|
|
sizeof(Oid), oid_compare);
|
2019-11-07 04:51:04 +01:00
|
|
|
SysCacheRelationOidSize =
|
|
|
|
qunique(SysCacheRelationOid, SysCacheRelationOidSize, sizeof(Oid),
|
|
|
|
oid_compare);
|
Use an MVCC snapshot, rather than SnapshotNow, for catalog scans.
SnapshotNow scans have the undesirable property that, in the face of
concurrent updates, the scan can fail to see either the old or the new
versions of the row. In many cases, we work around this by requiring
DDL operations to hold AccessExclusiveLock on the object being
modified; in some cases, the existing locking is inadequate and random
failures occur as a result. This commit doesn't change anything
related to locking, but will hopefully pave the way to allowing lock
strength reductions in the future.
The major issue has held us back from making this change in the past
is that taking an MVCC snapshot is significantly more expensive than
using a static special snapshot such as SnapshotNow. However, testing
of various worst-case scenarios reveals that this problem is not
severe except under fairly extreme workloads. To mitigate those
problems, we avoid retaking the MVCC snapshot for each new scan;
instead, we take a new snapshot only when invalidation messages have
been processed. The catcache machinery already requires that
invalidation messages be sent before releasing the related heavyweight
lock; else other backends might rely on locally-cached data rather
than scanning the catalog at all. Thus, making snapshot reuse
dependent on the same guarantees shouldn't break anything that wasn't
already subtly broken.
Patch by me. Review by Michael Paquier and Andres Freund.
2013-07-02 15:47:01 +02:00
|
|
|
|
2024-02-16 18:37:50 +01:00
|
|
|
qsort(SysCacheSupportingRelOid, SysCacheSupportingRelOidSize,
|
|
|
|
sizeof(Oid), oid_compare);
|
2019-11-07 04:51:04 +01:00
|
|
|
SysCacheSupportingRelOidSize =
|
|
|
|
qunique(SysCacheSupportingRelOid, SysCacheSupportingRelOidSize,
|
|
|
|
sizeof(Oid), oid_compare);
|
Use a safer method for determining whether relcache init file is stale.
When we invalidate the relcache entry for a system catalog or index, we
must also delete the relcache "init file" if the init file contains a copy
of that rel's entry. The old way of doing this relied on a specially
maintained list of the OIDs of relations present in the init file: we made
the list either when reading the file in, or when writing the file out.
The problem is that when writing the file out, we included only rels
present in our local relcache, which might have already suffered some
deletions due to relcache inval events. In such cases we correctly decided
not to overwrite the real init file with incomplete data --- but we still
used the incomplete initFileRelationIds list for the rest of the current
session. This could result in wrong decisions about whether the session's
own actions require deletion of the init file, potentially allowing an init
file created by some other concurrent session to be left around even though
it's been made stale.
Since we don't support changing the schema of a system catalog at runtime,
the only likely scenario in which this would cause a problem in the field
involves a "vacuum full" on a catalog concurrently with other activity, and
even then it's far from easy to provoke. Remarkably, this has been broken
since 2002 (in commit 786340441706ac1957a031f11ad1c2e5b6e18314), but we had
never seen a reproducible test case until recently. If it did happen in
the field, the symptoms would probably involve unexpected "cache lookup
failed" errors to begin with, then "could not open file" failures after the
next checkpoint, as all accesses to the affected catalog stopped working.
Recovery would require manually removing the stale "pg_internal.init" file.
To fix, get rid of the initFileRelationIds list, and instead consult
syscache.c's list of relations used in catalog caches to decide whether a
relation is included in the init file. This should be a tad more efficient
anyway, since we're replacing linear search of a list with ~100 entries
with a binary search. It's a bit ugly that the init file contents are now
so directly tied to the catalog caches, but in practice that won't make
much difference.
Back-patch to all supported branches.
2015-06-07 21:32:09 +02:00
|
|
|
|
2000-02-18 10:30:20 +01:00
|
|
|
CacheInitialized = true;
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
1998-09-01 06:40:42 +02:00
|
|
|
|
2002-02-19 21:11:20 +01:00
|
|
|
/*
|
|
|
|
* InitCatalogCachePhase2 - finish initializing the caches
|
|
|
|
*
|
|
|
|
* Finish initializing all the caches, including necessary database
|
|
|
|
* access.
|
|
|
|
*
|
|
|
|
* This is *not* essential; normally we allow syscaches to be initialized
|
|
|
|
* on first use. However, it is useful as a mechanism to preload the
|
|
|
|
* relcache with entries for the most-commonly-used system catalogs.
|
|
|
|
* Therefore, we invoke this routine when we need to write a new relcache
|
|
|
|
* init file.
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
InitCatalogCachePhase2(void)
|
|
|
|
{
|
|
|
|
int cacheId;
|
|
|
|
|
|
|
|
Assert(CacheInitialized);
|
|
|
|
|
|
|
|
for (cacheId = 0; cacheId < SysCacheSize; cacheId++)
|
2006-10-06 20:23:35 +02:00
|
|
|
InitCatCachePhase2(SysCache[cacheId], true);
|
2002-02-19 21:11:20 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
1996-07-09 08:22:35 +02:00
|
|
|
/*
|
2000-11-16 23:30:52 +01:00
|
|
|
* SearchSysCache
|
1996-07-09 08:22:35 +02:00
|
|
|
*
|
2000-11-16 23:30:52 +01:00
|
|
|
* A layer on top of SearchCatCache that does the initialization and
|
1998-08-19 04:04:17 +02:00
|
|
|
* key-setting for you.
|
1996-07-09 08:22:35 +02:00
|
|
|
*
|
1998-08-19 04:04:17 +02:00
|
|
|
* Returns the cache copy of the tuple if one is found, NULL if not.
|
2000-11-16 23:30:52 +01:00
|
|
|
* The tuple is the 'cache' copy and must NOT be modified!
|
2000-06-06 19:02:38 +02:00
|
|
|
*
|
2000-11-16 23:30:52 +01:00
|
|
|
* When the caller is done using the tuple, call ReleaseSysCache()
|
|
|
|
* to release the reference count grabbed by SearchSysCache(). If this
|
|
|
|
* is not done, the tuple will remain locked in cache until end of
|
|
|
|
* transaction, which is tolerable but not desirable.
|
2000-06-17 06:56:39 +02:00
|
|
|
*
|
2000-11-16 23:30:52 +01:00
|
|
|
* CAUTION: The tuple that is returned must NOT be freed by the caller!
|
1996-07-09 08:22:35 +02:00
|
|
|
*/
|
|
|
|
HeapTuple
|
2000-11-16 23:30:52 +01:00
|
|
|
SearchSysCache(int cacheId,
|
|
|
|
Datum key1,
|
|
|
|
Datum key2,
|
|
|
|
Datum key3,
|
|
|
|
Datum key4)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
Improve sys/catcache performance.
The following are the individual improvements:
1) Avoidance of FunctionCallInfo based function calls, replaced by
more efficient functions with a native C argument interface.
2) Don't extract columns from a cache entry's tuple whenever matching
entries - instead store them as a Datum array. This also allows to
get rid of having to build dummy tuples for negative & list
entries, and of a hack for dealing with cstring vs. text weirdness.
3) Reorder members of catcache.h struct, so imortant entries are more
likely to be on one cacheline.
4) Allowing the compiler to specialize critical SearchCatCache for a
specific number of attributes allows to unroll loops and avoid
other nkeys dependant initialization.
5) Only initializing the ScanKey when necessary, i.e. catcache misses,
greatly reduces cache unnecessary cpu cache misses.
6) Split of the cache-miss case from the hash lookup, reducing stack
allocations etc in the common case.
7) CatCTup and their corresponding heaptuple are allocated in one
piece.
This results in making cache lookups themselves roughly three times as
fast - full-system benchmarks obviously improve less than that.
I've also evaluated further techniques:
- replace open coded hash with simplehash - the list walk right now
shows up in profiles. Unfortunately it's not easy to do so safely as
an entry's memory location can change at various times, which
doesn't work well with the refcounting and cache invalidation.
- Cacheline-aligning CatCTup entries - helps some with performance,
but the win isn't big and the code for it is ugly, because the
tuples have to be freed as well.
- add more proper functions, rather than macros for
SearchSysCacheCopyN etc., but right now they don't show up in
profiles.
The reason the macro wrapper for syscache.c/h have to be changed,
rather than just catcache, is that doing otherwise would require
exposing the SysCache array to the outside. That might be a good idea
anyway, but it's for another day.
Author: Andres Freund
Reviewed-By: Robert Haas
Discussion: https://postgr.es/m/20170914061207.zxotvyopetm7lrrp@alap3.anarazel.de
2017-10-13 22:16:50 +02:00
|
|
|
Assert(cacheId >= 0 && cacheId < SysCacheSize &&
|
|
|
|
PointerIsValid(SysCache[cacheId]));
|
2000-01-24 03:12:58 +01:00
|
|
|
|
2000-11-16 23:30:52 +01:00
|
|
|
return SearchCatCache(SysCache[cacheId], key1, key2, key3, key4);
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
|
|
|
|
Improve sys/catcache performance.
The following are the individual improvements:
1) Avoidance of FunctionCallInfo based function calls, replaced by
more efficient functions with a native C argument interface.
2) Don't extract columns from a cache entry's tuple whenever matching
entries - instead store them as a Datum array. This also allows to
get rid of having to build dummy tuples for negative & list
entries, and of a hack for dealing with cstring vs. text weirdness.
3) Reorder members of catcache.h struct, so imortant entries are more
likely to be on one cacheline.
4) Allowing the compiler to specialize critical SearchCatCache for a
specific number of attributes allows to unroll loops and avoid
other nkeys dependant initialization.
5) Only initializing the ScanKey when necessary, i.e. catcache misses,
greatly reduces cache unnecessary cpu cache misses.
6) Split of the cache-miss case from the hash lookup, reducing stack
allocations etc in the common case.
7) CatCTup and their corresponding heaptuple are allocated in one
piece.
This results in making cache lookups themselves roughly three times as
fast - full-system benchmarks obviously improve less than that.
I've also evaluated further techniques:
- replace open coded hash with simplehash - the list walk right now
shows up in profiles. Unfortunately it's not easy to do so safely as
an entry's memory location can change at various times, which
doesn't work well with the refcounting and cache invalidation.
- Cacheline-aligning CatCTup entries - helps some with performance,
but the win isn't big and the code for it is ugly, because the
tuples have to be freed as well.
- add more proper functions, rather than macros for
SearchSysCacheCopyN etc., but right now they don't show up in
profiles.
The reason the macro wrapper for syscache.c/h have to be changed,
rather than just catcache, is that doing otherwise would require
exposing the SysCache array to the outside. That might be a good idea
anyway, but it's for another day.
Author: Andres Freund
Reviewed-By: Robert Haas
Discussion: https://postgr.es/m/20170914061207.zxotvyopetm7lrrp@alap3.anarazel.de
2017-10-13 22:16:50 +02:00
|
|
|
HeapTuple
|
|
|
|
SearchSysCache1(int cacheId,
|
|
|
|
Datum key1)
|
|
|
|
{
|
|
|
|
Assert(cacheId >= 0 && cacheId < SysCacheSize &&
|
|
|
|
PointerIsValid(SysCache[cacheId]));
|
|
|
|
Assert(SysCache[cacheId]->cc_nkeys == 1);
|
|
|
|
|
|
|
|
return SearchCatCache1(SysCache[cacheId], key1);
|
|
|
|
}
|
|
|
|
|
|
|
|
HeapTuple
|
|
|
|
SearchSysCache2(int cacheId,
|
|
|
|
Datum key1, Datum key2)
|
|
|
|
{
|
|
|
|
Assert(cacheId >= 0 && cacheId < SysCacheSize &&
|
|
|
|
PointerIsValid(SysCache[cacheId]));
|
|
|
|
Assert(SysCache[cacheId]->cc_nkeys == 2);
|
|
|
|
|
|
|
|
return SearchCatCache2(SysCache[cacheId], key1, key2);
|
|
|
|
}
|
|
|
|
|
|
|
|
HeapTuple
|
|
|
|
SearchSysCache3(int cacheId,
|
|
|
|
Datum key1, Datum key2, Datum key3)
|
|
|
|
{
|
|
|
|
Assert(cacheId >= 0 && cacheId < SysCacheSize &&
|
|
|
|
PointerIsValid(SysCache[cacheId]));
|
|
|
|
Assert(SysCache[cacheId]->cc_nkeys == 3);
|
|
|
|
|
|
|
|
return SearchCatCache3(SysCache[cacheId], key1, key2, key3);
|
|
|
|
}
|
|
|
|
|
|
|
|
HeapTuple
|
|
|
|
SearchSysCache4(int cacheId,
|
|
|
|
Datum key1, Datum key2, Datum key3, Datum key4)
|
|
|
|
{
|
|
|
|
Assert(cacheId >= 0 && cacheId < SysCacheSize &&
|
|
|
|
PointerIsValid(SysCache[cacheId]));
|
|
|
|
Assert(SysCache[cacheId]->cc_nkeys == 4);
|
|
|
|
|
|
|
|
return SearchCatCache4(SysCache[cacheId], key1, key2, key3, key4);
|
|
|
|
}
|
|
|
|
|
2000-11-16 23:30:52 +01:00
|
|
|
/*
|
|
|
|
* ReleaseSysCache
|
|
|
|
* Release previously grabbed reference count on a tuple
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
ReleaseSysCache(HeapTuple tuple)
|
|
|
|
{
|
|
|
|
ReleaseCatCache(tuple);
|
|
|
|
}
|
1996-07-09 08:22:35 +02:00
|
|
|
|
2000-06-17 06:56:39 +02:00
|
|
|
/*
|
2000-11-16 23:30:52 +01:00
|
|
|
* SearchSysCacheCopy
|
2000-06-17 06:56:39 +02:00
|
|
|
*
|
2000-11-16 23:30:52 +01:00
|
|
|
* A convenience routine that does SearchSysCache and (if successful)
|
|
|
|
* returns a modifiable copy of the syscache entry. The original
|
|
|
|
* syscache entry is released before returning. The caller should
|
|
|
|
* heap_freetuple() the result when done with it.
|
2000-06-17 06:56:39 +02:00
|
|
|
*/
|
|
|
|
HeapTuple
|
2000-11-16 23:30:52 +01:00
|
|
|
SearchSysCacheCopy(int cacheId,
|
|
|
|
Datum key1,
|
|
|
|
Datum key2,
|
|
|
|
Datum key3,
|
|
|
|
Datum key4)
|
2000-06-17 06:56:39 +02:00
|
|
|
{
|
2000-11-16 23:30:52 +01:00
|
|
|
HeapTuple tuple,
|
|
|
|
newtuple;
|
|
|
|
|
|
|
|
tuple = SearchSysCache(cacheId, key1, key2, key3, key4);
|
|
|
|
if (!HeapTupleIsValid(tuple))
|
|
|
|
return tuple;
|
|
|
|
newtuple = heap_copytuple(tuple);
|
|
|
|
ReleaseSysCache(tuple);
|
|
|
|
return newtuple;
|
2000-06-17 06:56:39 +02:00
|
|
|
}
|
|
|
|
|
2001-08-10 20:57:42 +02:00
|
|
|
/*
|
|
|
|
* SearchSysCacheExists
|
|
|
|
*
|
|
|
|
* A convenience routine that just probes to see if a tuple can be found.
|
|
|
|
* No lock is retained on the syscache entry.
|
|
|
|
*/
|
|
|
|
bool
|
|
|
|
SearchSysCacheExists(int cacheId,
|
|
|
|
Datum key1,
|
|
|
|
Datum key2,
|
|
|
|
Datum key3,
|
|
|
|
Datum key4)
|
|
|
|
{
|
|
|
|
HeapTuple tuple;
|
|
|
|
|
|
|
|
tuple = SearchSysCache(cacheId, key1, key2, key3, key4);
|
|
|
|
if (!HeapTupleIsValid(tuple))
|
|
|
|
return false;
|
|
|
|
ReleaseSysCache(tuple);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2000-11-16 23:30:52 +01:00
|
|
|
/*
|
|
|
|
* GetSysCacheOid
|
|
|
|
*
|
Remove WITH OIDS support, change oid catalog column visibility.
Previously tables declared WITH OIDS, including a significant fraction
of the catalog tables, stored the oid column not as a normal column,
but as part of the tuple header.
This special column was not shown by default, which was somewhat odd,
as it's often (consider e.g. pg_class.oid) one of the more important
parts of a row. Neither pg_dump nor COPY included the contents of the
oid column by default.
The fact that the oid column was not an ordinary column necessitated a
significant amount of special case code to support oid columns. That
already was painful for the existing, but upcoming work aiming to make
table storage pluggable, would have required expanding and duplicating
that "specialness" significantly.
WITH OIDS has been deprecated since 2005 (commit ff02d0a05280e0).
Remove it.
Removing includes:
- CREATE TABLE and ALTER TABLE syntax for declaring the table to be
WITH OIDS has been removed (WITH (oids[ = true]) will error out)
- pg_dump does not support dumping tables declared WITH OIDS and will
issue a warning when dumping one (and ignore the oid column).
- restoring an pg_dump archive with pg_restore will warn when
restoring a table with oid contents (and ignore the oid column)
- COPY will refuse to load binary dump that includes oids.
- pg_upgrade will error out when encountering tables declared WITH
OIDS, they have to be altered to remove the oid column first.
- Functionality to access the oid of the last inserted row (like
plpgsql's RESULT_OID, spi's SPI_lastoid, ...) has been removed.
The syntax for declaring a table WITHOUT OIDS (or WITH (oids = false)
for CREATE TABLE) is still supported. While that requires a bit of
support code, it seems unnecessary to break applications / dumps that
do not use oids, and are explicit about not using them.
The biggest user of WITH OID columns was postgres' catalog. This
commit changes all 'magic' oid columns to be columns that are normally
declared and stored. To reduce unnecessary query breakage all the
newly added columns are still named 'oid', even if a table's column
naming scheme would indicate 'reloid' or such. This obviously
requires adapting a lot code, mostly replacing oid access via
HeapTupleGetOid() with access to the underlying Form_pg_*->oid column.
The bootstrap process now assigns oids for all oid columns in
genbki.pl that do not have an explicit value (starting at the largest
oid previously used), only oids assigned later by oids will be above
FirstBootstrapObjectId. As the oid column now is a normal column the
special bootstrap syntax for oids has been removed.
Oids are not automatically assigned during insertion anymore, all
backend code explicitly assigns oids with GetNewOidWithIndex(). For
the rare case that insertions into the catalog via SQL are called for
the new pg_nextoid() function can be used (which only works on catalog
tables).
The fact that oid columns on system tables are now normal columns
means that they will be included in the set of columns expanded
by * (i.e. SELECT * FROM pg_class will now include the table's oid,
previously it did not). It'd not technically be hard to hide oid
column by default, but that'd mean confusing behavior would either
have to be carried forward forever, or it'd cause breakage down the
line.
While it's not unlikely that further adjustments are needed, the
scope/invasiveness of the patch makes it worthwhile to get merge this
now. It's painful to maintain externally, too complicated to commit
after the code code freeze, and a dependency of a number of other
patches.
Catversion bump, for obvious reasons.
Author: Andres Freund, with contributions by John Naylor
Discussion: https://postgr.es/m/20180930034810.ywp2c7awz7opzcfr@alap3.anarazel.de
2018-11-21 00:36:57 +01:00
|
|
|
* A convenience routine that does SearchSysCache and returns the OID in the
|
|
|
|
* oidcol column of the found tuple, or InvalidOid if no tuple could be found.
|
2000-11-16 23:30:52 +01:00
|
|
|
* No lock is retained on the syscache entry.
|
|
|
|
*/
|
|
|
|
Oid
|
|
|
|
GetSysCacheOid(int cacheId,
|
Remove WITH OIDS support, change oid catalog column visibility.
Previously tables declared WITH OIDS, including a significant fraction
of the catalog tables, stored the oid column not as a normal column,
but as part of the tuple header.
This special column was not shown by default, which was somewhat odd,
as it's often (consider e.g. pg_class.oid) one of the more important
parts of a row. Neither pg_dump nor COPY included the contents of the
oid column by default.
The fact that the oid column was not an ordinary column necessitated a
significant amount of special case code to support oid columns. That
already was painful for the existing, but upcoming work aiming to make
table storage pluggable, would have required expanding and duplicating
that "specialness" significantly.
WITH OIDS has been deprecated since 2005 (commit ff02d0a05280e0).
Remove it.
Removing includes:
- CREATE TABLE and ALTER TABLE syntax for declaring the table to be
WITH OIDS has been removed (WITH (oids[ = true]) will error out)
- pg_dump does not support dumping tables declared WITH OIDS and will
issue a warning when dumping one (and ignore the oid column).
- restoring an pg_dump archive with pg_restore will warn when
restoring a table with oid contents (and ignore the oid column)
- COPY will refuse to load binary dump that includes oids.
- pg_upgrade will error out when encountering tables declared WITH
OIDS, they have to be altered to remove the oid column first.
- Functionality to access the oid of the last inserted row (like
plpgsql's RESULT_OID, spi's SPI_lastoid, ...) has been removed.
The syntax for declaring a table WITHOUT OIDS (or WITH (oids = false)
for CREATE TABLE) is still supported. While that requires a bit of
support code, it seems unnecessary to break applications / dumps that
do not use oids, and are explicit about not using them.
The biggest user of WITH OID columns was postgres' catalog. This
commit changes all 'magic' oid columns to be columns that are normally
declared and stored. To reduce unnecessary query breakage all the
newly added columns are still named 'oid', even if a table's column
naming scheme would indicate 'reloid' or such. This obviously
requires adapting a lot code, mostly replacing oid access via
HeapTupleGetOid() with access to the underlying Form_pg_*->oid column.
The bootstrap process now assigns oids for all oid columns in
genbki.pl that do not have an explicit value (starting at the largest
oid previously used), only oids assigned later by oids will be above
FirstBootstrapObjectId. As the oid column now is a normal column the
special bootstrap syntax for oids has been removed.
Oids are not automatically assigned during insertion anymore, all
backend code explicitly assigns oids with GetNewOidWithIndex(). For
the rare case that insertions into the catalog via SQL are called for
the new pg_nextoid() function can be used (which only works on catalog
tables).
The fact that oid columns on system tables are now normal columns
means that they will be included in the set of columns expanded
by * (i.e. SELECT * FROM pg_class will now include the table's oid,
previously it did not). It'd not technically be hard to hide oid
column by default, but that'd mean confusing behavior would either
have to be carried forward forever, or it'd cause breakage down the
line.
While it's not unlikely that further adjustments are needed, the
scope/invasiveness of the patch makes it worthwhile to get merge this
now. It's painful to maintain externally, too complicated to commit
after the code code freeze, and a dependency of a number of other
patches.
Catversion bump, for obvious reasons.
Author: Andres Freund, with contributions by John Naylor
Discussion: https://postgr.es/m/20180930034810.ywp2c7awz7opzcfr@alap3.anarazel.de
2018-11-21 00:36:57 +01:00
|
|
|
AttrNumber oidcol,
|
2000-11-16 23:30:52 +01:00
|
|
|
Datum key1,
|
|
|
|
Datum key2,
|
|
|
|
Datum key3,
|
|
|
|
Datum key4)
|
|
|
|
{
|
|
|
|
HeapTuple tuple;
|
Remove WITH OIDS support, change oid catalog column visibility.
Previously tables declared WITH OIDS, including a significant fraction
of the catalog tables, stored the oid column not as a normal column,
but as part of the tuple header.
This special column was not shown by default, which was somewhat odd,
as it's often (consider e.g. pg_class.oid) one of the more important
parts of a row. Neither pg_dump nor COPY included the contents of the
oid column by default.
The fact that the oid column was not an ordinary column necessitated a
significant amount of special case code to support oid columns. That
already was painful for the existing, but upcoming work aiming to make
table storage pluggable, would have required expanding and duplicating
that "specialness" significantly.
WITH OIDS has been deprecated since 2005 (commit ff02d0a05280e0).
Remove it.
Removing includes:
- CREATE TABLE and ALTER TABLE syntax for declaring the table to be
WITH OIDS has been removed (WITH (oids[ = true]) will error out)
- pg_dump does not support dumping tables declared WITH OIDS and will
issue a warning when dumping one (and ignore the oid column).
- restoring an pg_dump archive with pg_restore will warn when
restoring a table with oid contents (and ignore the oid column)
- COPY will refuse to load binary dump that includes oids.
- pg_upgrade will error out when encountering tables declared WITH
OIDS, they have to be altered to remove the oid column first.
- Functionality to access the oid of the last inserted row (like
plpgsql's RESULT_OID, spi's SPI_lastoid, ...) has been removed.
The syntax for declaring a table WITHOUT OIDS (or WITH (oids = false)
for CREATE TABLE) is still supported. While that requires a bit of
support code, it seems unnecessary to break applications / dumps that
do not use oids, and are explicit about not using them.
The biggest user of WITH OID columns was postgres' catalog. This
commit changes all 'magic' oid columns to be columns that are normally
declared and stored. To reduce unnecessary query breakage all the
newly added columns are still named 'oid', even if a table's column
naming scheme would indicate 'reloid' or such. This obviously
requires adapting a lot code, mostly replacing oid access via
HeapTupleGetOid() with access to the underlying Form_pg_*->oid column.
The bootstrap process now assigns oids for all oid columns in
genbki.pl that do not have an explicit value (starting at the largest
oid previously used), only oids assigned later by oids will be above
FirstBootstrapObjectId. As the oid column now is a normal column the
special bootstrap syntax for oids has been removed.
Oids are not automatically assigned during insertion anymore, all
backend code explicitly assigns oids with GetNewOidWithIndex(). For
the rare case that insertions into the catalog via SQL are called for
the new pg_nextoid() function can be used (which only works on catalog
tables).
The fact that oid columns on system tables are now normal columns
means that they will be included in the set of columns expanded
by * (i.e. SELECT * FROM pg_class will now include the table's oid,
previously it did not). It'd not technically be hard to hide oid
column by default, but that'd mean confusing behavior would either
have to be carried forward forever, or it'd cause breakage down the
line.
While it's not unlikely that further adjustments are needed, the
scope/invasiveness of the patch makes it worthwhile to get merge this
now. It's painful to maintain externally, too complicated to commit
after the code code freeze, and a dependency of a number of other
patches.
Catversion bump, for obvious reasons.
Author: Andres Freund, with contributions by John Naylor
Discussion: https://postgr.es/m/20180930034810.ywp2c7awz7opzcfr@alap3.anarazel.de
2018-11-21 00:36:57 +01:00
|
|
|
bool isNull;
|
2000-11-16 23:30:52 +01:00
|
|
|
Oid result;
|
|
|
|
|
|
|
|
tuple = SearchSysCache(cacheId, key1, key2, key3, key4);
|
|
|
|
if (!HeapTupleIsValid(tuple))
|
|
|
|
return InvalidOid;
|
Remove WITH OIDS support, change oid catalog column visibility.
Previously tables declared WITH OIDS, including a significant fraction
of the catalog tables, stored the oid column not as a normal column,
but as part of the tuple header.
This special column was not shown by default, which was somewhat odd,
as it's often (consider e.g. pg_class.oid) one of the more important
parts of a row. Neither pg_dump nor COPY included the contents of the
oid column by default.
The fact that the oid column was not an ordinary column necessitated a
significant amount of special case code to support oid columns. That
already was painful for the existing, but upcoming work aiming to make
table storage pluggable, would have required expanding and duplicating
that "specialness" significantly.
WITH OIDS has been deprecated since 2005 (commit ff02d0a05280e0).
Remove it.
Removing includes:
- CREATE TABLE and ALTER TABLE syntax for declaring the table to be
WITH OIDS has been removed (WITH (oids[ = true]) will error out)
- pg_dump does not support dumping tables declared WITH OIDS and will
issue a warning when dumping one (and ignore the oid column).
- restoring an pg_dump archive with pg_restore will warn when
restoring a table with oid contents (and ignore the oid column)
- COPY will refuse to load binary dump that includes oids.
- pg_upgrade will error out when encountering tables declared WITH
OIDS, they have to be altered to remove the oid column first.
- Functionality to access the oid of the last inserted row (like
plpgsql's RESULT_OID, spi's SPI_lastoid, ...) has been removed.
The syntax for declaring a table WITHOUT OIDS (or WITH (oids = false)
for CREATE TABLE) is still supported. While that requires a bit of
support code, it seems unnecessary to break applications / dumps that
do not use oids, and are explicit about not using them.
The biggest user of WITH OID columns was postgres' catalog. This
commit changes all 'magic' oid columns to be columns that are normally
declared and stored. To reduce unnecessary query breakage all the
newly added columns are still named 'oid', even if a table's column
naming scheme would indicate 'reloid' or such. This obviously
requires adapting a lot code, mostly replacing oid access via
HeapTupleGetOid() with access to the underlying Form_pg_*->oid column.
The bootstrap process now assigns oids for all oid columns in
genbki.pl that do not have an explicit value (starting at the largest
oid previously used), only oids assigned later by oids will be above
FirstBootstrapObjectId. As the oid column now is a normal column the
special bootstrap syntax for oids has been removed.
Oids are not automatically assigned during insertion anymore, all
backend code explicitly assigns oids with GetNewOidWithIndex(). For
the rare case that insertions into the catalog via SQL are called for
the new pg_nextoid() function can be used (which only works on catalog
tables).
The fact that oid columns on system tables are now normal columns
means that they will be included in the set of columns expanded
by * (i.e. SELECT * FROM pg_class will now include the table's oid,
previously it did not). It'd not technically be hard to hide oid
column by default, but that'd mean confusing behavior would either
have to be carried forward forever, or it'd cause breakage down the
line.
While it's not unlikely that further adjustments are needed, the
scope/invasiveness of the patch makes it worthwhile to get merge this
now. It's painful to maintain externally, too complicated to commit
after the code code freeze, and a dependency of a number of other
patches.
Catversion bump, for obvious reasons.
Author: Andres Freund, with contributions by John Naylor
Discussion: https://postgr.es/m/20180930034810.ywp2c7awz7opzcfr@alap3.anarazel.de
2018-11-21 00:36:57 +01:00
|
|
|
result = heap_getattr(tuple, oidcol,
|
|
|
|
SysCache[cacheId]->cc_tupdesc,
|
|
|
|
&isNull);
|
|
|
|
Assert(!isNull); /* columns used as oids should never be NULL */
|
2000-11-16 23:30:52 +01:00
|
|
|
ReleaseSysCache(tuple);
|
|
|
|
return result;
|
|
|
|
}
|
2000-06-17 06:56:39 +02:00
|
|
|
|
2002-08-02 20:15:10 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* SearchSysCacheAttName
|
|
|
|
*
|
|
|
|
* This routine is equivalent to SearchSysCache on the ATTNAME cache,
|
|
|
|
* except that it will return NULL if the found attribute is marked
|
|
|
|
* attisdropped. This is convenient for callers that want to act as
|
|
|
|
* though dropped attributes don't exist.
|
|
|
|
*/
|
|
|
|
HeapTuple
|
|
|
|
SearchSysCacheAttName(Oid relid, const char *attname)
|
|
|
|
{
|
|
|
|
HeapTuple tuple;
|
|
|
|
|
2010-02-14 19:42:19 +01:00
|
|
|
tuple = SearchSysCache2(ATTNAME,
|
|
|
|
ObjectIdGetDatum(relid),
|
|
|
|
CStringGetDatum(attname));
|
2002-08-02 20:15:10 +02:00
|
|
|
if (!HeapTupleIsValid(tuple))
|
|
|
|
return NULL;
|
|
|
|
if (((Form_pg_attribute) GETSTRUCT(tuple))->attisdropped)
|
|
|
|
{
|
|
|
|
ReleaseSysCache(tuple);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
return tuple;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* SearchSysCacheCopyAttName
|
|
|
|
*
|
|
|
|
* As above, an attisdropped-aware version of SearchSysCacheCopy.
|
|
|
|
*/
|
|
|
|
HeapTuple
|
|
|
|
SearchSysCacheCopyAttName(Oid relid, const char *attname)
|
|
|
|
{
|
|
|
|
HeapTuple tuple,
|
|
|
|
newtuple;
|
|
|
|
|
|
|
|
tuple = SearchSysCacheAttName(relid, attname);
|
|
|
|
if (!HeapTupleIsValid(tuple))
|
|
|
|
return tuple;
|
|
|
|
newtuple = heap_copytuple(tuple);
|
|
|
|
ReleaseSysCache(tuple);
|
|
|
|
return newtuple;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* SearchSysCacheExistsAttName
|
|
|
|
*
|
|
|
|
* As above, an attisdropped-aware version of SearchSysCacheExists.
|
|
|
|
*/
|
|
|
|
bool
|
|
|
|
SearchSysCacheExistsAttName(Oid relid, const char *attname)
|
|
|
|
{
|
|
|
|
HeapTuple tuple;
|
|
|
|
|
|
|
|
tuple = SearchSysCacheAttName(relid, attname);
|
|
|
|
if (!HeapTupleIsValid(tuple))
|
|
|
|
return false;
|
|
|
|
ReleaseSysCache(tuple);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2017-09-06 22:46:01 +02:00
|
|
|
/*
|
|
|
|
* SearchSysCacheAttNum
|
|
|
|
*
|
|
|
|
* This routine is equivalent to SearchSysCache on the ATTNUM cache,
|
|
|
|
* except that it will return NULL if the found attribute is marked
|
|
|
|
* attisdropped. This is convenient for callers that want to act as
|
|
|
|
* though dropped attributes don't exist.
|
|
|
|
*/
|
|
|
|
HeapTuple
|
|
|
|
SearchSysCacheAttNum(Oid relid, int16 attnum)
|
|
|
|
{
|
|
|
|
HeapTuple tuple;
|
|
|
|
|
|
|
|
tuple = SearchSysCache2(ATTNUM,
|
|
|
|
ObjectIdGetDatum(relid),
|
|
|
|
Int16GetDatum(attnum));
|
|
|
|
if (!HeapTupleIsValid(tuple))
|
|
|
|
return NULL;
|
|
|
|
if (((Form_pg_attribute) GETSTRUCT(tuple))->attisdropped)
|
|
|
|
{
|
|
|
|
ReleaseSysCache(tuple);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
return tuple;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* SearchSysCacheCopyAttNum
|
|
|
|
*
|
|
|
|
* As above, an attisdropped-aware version of SearchSysCacheCopy.
|
|
|
|
*/
|
|
|
|
HeapTuple
|
|
|
|
SearchSysCacheCopyAttNum(Oid relid, int16 attnum)
|
|
|
|
{
|
|
|
|
HeapTuple tuple,
|
|
|
|
newtuple;
|
|
|
|
|
|
|
|
tuple = SearchSysCacheAttNum(relid, attnum);
|
|
|
|
if (!HeapTupleIsValid(tuple))
|
|
|
|
return NULL;
|
|
|
|
newtuple = heap_copytuple(tuple);
|
|
|
|
ReleaseSysCache(tuple);
|
|
|
|
return newtuple;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
1996-07-09 08:22:35 +02:00
|
|
|
/*
|
2000-01-23 04:43:24 +01:00
|
|
|
* SysCacheGetAttr
|
|
|
|
*
|
2000-11-16 23:30:52 +01:00
|
|
|
* Given a tuple previously fetched by SearchSysCache(),
|
|
|
|
* extract a specific attribute.
|
1996-07-09 08:22:35 +02:00
|
|
|
*
|
2000-01-23 04:43:24 +01:00
|
|
|
* This is equivalent to using heap_getattr() on a tuple fetched
|
|
|
|
* from a non-cached relation. Usually, this is only used for attributes
|
|
|
|
* that could be NULL or variable length; the fixed-size attributes in
|
|
|
|
* a system table are accessed just by mapping the tuple onto the C struct
|
|
|
|
* declarations from include/catalog/.
|
1996-07-09 08:22:35 +02:00
|
|
|
*
|
2000-01-23 04:43:24 +01:00
|
|
|
* As with heap_getattr(), if the attribute is of a pass-by-reference type
|
|
|
|
* then a pointer into the tuple data area is returned --- the caller must
|
|
|
|
* not modify or pfree the datum!
|
2006-10-06 20:23:35 +02:00
|
|
|
*
|
|
|
|
* Note: it is legal to use SysCacheGetAttr() with a cacheId referencing
|
|
|
|
* a different cache for the same catalog the tuple was fetched from.
|
1996-07-09 08:22:35 +02:00
|
|
|
*/
|
2000-01-23 04:43:24 +01:00
|
|
|
Datum
|
|
|
|
SysCacheGetAttr(int cacheId, HeapTuple tup,
|
|
|
|
AttrNumber attributeNumber,
|
2000-06-17 06:56:39 +02:00
|
|
|
bool *isNull)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
1999-09-18 21:08:25 +02:00
|
|
|
/*
|
2000-01-23 04:43:24 +01:00
|
|
|
* We just need to get the TupleDesc out of the cache entry, and then we
|
2006-10-06 20:23:35 +02:00
|
|
|
* can apply heap_getattr(). Normally the cache control data is already
|
|
|
|
* valid (because the caller recently fetched the tuple via this same
|
|
|
|
* cache), but there are cases where we have to initialize the cache here.
|
1999-09-18 21:08:25 +02:00
|
|
|
*/
|
2006-10-06 20:23:35 +02:00
|
|
|
if (cacheId < 0 || cacheId >= SysCacheSize ||
|
|
|
|
!PointerIsValid(SysCache[cacheId]))
|
2011-06-18 23:37:30 +02:00
|
|
|
elog(ERROR, "invalid cache ID: %d", cacheId);
|
2006-10-06 20:23:35 +02:00
|
|
|
if (!PointerIsValid(SysCache[cacheId]->cc_tupdesc))
|
|
|
|
{
|
|
|
|
InitCatCachePhase2(SysCache[cacheId], false);
|
|
|
|
Assert(PointerIsValid(SysCache[cacheId]->cc_tupdesc));
|
|
|
|
}
|
2000-01-23 04:43:24 +01:00
|
|
|
|
|
|
|
return heap_getattr(tup, attributeNumber,
|
|
|
|
SysCache[cacheId]->cc_tupdesc,
|
2000-06-17 06:56:39 +02:00
|
|
|
isNull);
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
2002-04-06 08:59:25 +02:00
|
|
|
|
2023-03-25 22:49:33 +01:00
|
|
|
/*
|
|
|
|
* SysCacheGetAttrNotNull
|
|
|
|
*
|
|
|
|
* As above, a version of SysCacheGetAttr which knows that the attr cannot
|
|
|
|
* be NULL.
|
|
|
|
*/
|
|
|
|
Datum
|
|
|
|
SysCacheGetAttrNotNull(int cacheId, HeapTuple tup,
|
|
|
|
AttrNumber attributeNumber)
|
|
|
|
{
|
|
|
|
bool isnull;
|
|
|
|
Datum attr;
|
|
|
|
|
|
|
|
attr = SysCacheGetAttr(cacheId, tup, attributeNumber, &isnull);
|
|
|
|
|
|
|
|
if (isnull)
|
|
|
|
{
|
|
|
|
elog(ERROR,
|
|
|
|
"unexpected null value in cached tuple for catalog %s column %s",
|
|
|
|
get_rel_name(cacheinfo[cacheId].reloid),
|
|
|
|
NameStr(TupleDescAttr(SysCache[cacheId]->cc_tupdesc, attributeNumber - 1)->attname));
|
|
|
|
}
|
|
|
|
|
|
|
|
return attr;
|
|
|
|
}
|
|
|
|
|
2012-03-07 20:51:13 +01:00
|
|
|
/*
|
|
|
|
* GetSysCacheHashValue
|
|
|
|
*
|
|
|
|
* Get the hash value that would be used for a tuple in the specified cache
|
|
|
|
* with the given search keys.
|
|
|
|
*
|
|
|
|
* The reason for exposing this as part of the API is that the hash value is
|
|
|
|
* exposed in cache invalidation operations, so there are places outside the
|
|
|
|
* catcache code that need to be able to compute the hash values.
|
|
|
|
*/
|
|
|
|
uint32
|
|
|
|
GetSysCacheHashValue(int cacheId,
|
|
|
|
Datum key1,
|
|
|
|
Datum key2,
|
|
|
|
Datum key3,
|
|
|
|
Datum key4)
|
|
|
|
{
|
|
|
|
if (cacheId < 0 || cacheId >= SysCacheSize ||
|
|
|
|
!PointerIsValid(SysCache[cacheId]))
|
|
|
|
elog(ERROR, "invalid cache ID: %d", cacheId);
|
|
|
|
|
|
|
|
return GetCatCacheHashValue(SysCache[cacheId], key1, key2, key3, key4);
|
|
|
|
}
|
|
|
|
|
2002-04-06 08:59:25 +02:00
|
|
|
/*
|
|
|
|
* List-search interface
|
|
|
|
*/
|
|
|
|
struct catclist *
|
|
|
|
SearchSysCacheList(int cacheId, int nkeys,
|
2018-01-29 21:13:07 +01:00
|
|
|
Datum key1, Datum key2, Datum key3)
|
2002-04-06 08:59:25 +02:00
|
|
|
{
|
|
|
|
if (cacheId < 0 || cacheId >= SysCacheSize ||
|
|
|
|
!PointerIsValid(SysCache[cacheId]))
|
2011-06-18 23:37:30 +02:00
|
|
|
elog(ERROR, "invalid cache ID: %d", cacheId);
|
2002-04-06 08:59:25 +02:00
|
|
|
|
|
|
|
return SearchCatCacheList(SysCache[cacheId], nkeys,
|
2018-01-29 21:13:07 +01:00
|
|
|
key1, key2, key3);
|
2002-04-06 08:59:25 +02:00
|
|
|
}
|
Use an MVCC snapshot, rather than SnapshotNow, for catalog scans.
SnapshotNow scans have the undesirable property that, in the face of
concurrent updates, the scan can fail to see either the old or the new
versions of the row. In many cases, we work around this by requiring
DDL operations to hold AccessExclusiveLock on the object being
modified; in some cases, the existing locking is inadequate and random
failures occur as a result. This commit doesn't change anything
related to locking, but will hopefully pave the way to allowing lock
strength reductions in the future.
The major issue has held us back from making this change in the past
is that taking an MVCC snapshot is significantly more expensive than
using a static special snapshot such as SnapshotNow. However, testing
of various worst-case scenarios reveals that this problem is not
severe except under fairly extreme workloads. To mitigate those
problems, we avoid retaking the MVCC snapshot for each new scan;
instead, we take a new snapshot only when invalidation messages have
been processed. The catcache machinery already requires that
invalidation messages be sent before releasing the related heavyweight
lock; else other backends might rely on locally-cached data rather
than scanning the catalog at all. Thus, making snapshot reuse
dependent on the same guarantees shouldn't break anything that wasn't
already subtly broken.
Patch by me. Review by Michael Paquier and Andres Freund.
2013-07-02 15:47:01 +02:00
|
|
|
|
2017-05-13 00:17:29 +02:00
|
|
|
/*
|
|
|
|
* SysCacheInvalidate
|
|
|
|
*
|
|
|
|
* Invalidate entries in the specified cache, given a hash value.
|
|
|
|
* See CatCacheInvalidate() for more info.
|
|
|
|
*
|
|
|
|
* This routine is only quasi-public: it should only be used by inval.c.
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
SysCacheInvalidate(int cacheId, uint32 hashValue)
|
|
|
|
{
|
|
|
|
if (cacheId < 0 || cacheId >= SysCacheSize)
|
|
|
|
elog(ERROR, "invalid cache ID: %d", cacheId);
|
|
|
|
|
|
|
|
/* if this cache isn't initialized yet, no need to do anything */
|
|
|
|
if (!PointerIsValid(SysCache[cacheId]))
|
|
|
|
return;
|
|
|
|
|
|
|
|
CatCacheInvalidate(SysCache[cacheId], hashValue);
|
|
|
|
}
|
|
|
|
|
Use an MVCC snapshot, rather than SnapshotNow, for catalog scans.
SnapshotNow scans have the undesirable property that, in the face of
concurrent updates, the scan can fail to see either the old or the new
versions of the row. In many cases, we work around this by requiring
DDL operations to hold AccessExclusiveLock on the object being
modified; in some cases, the existing locking is inadequate and random
failures occur as a result. This commit doesn't change anything
related to locking, but will hopefully pave the way to allowing lock
strength reductions in the future.
The major issue has held us back from making this change in the past
is that taking an MVCC snapshot is significantly more expensive than
using a static special snapshot such as SnapshotNow. However, testing
of various worst-case scenarios reveals that this problem is not
severe except under fairly extreme workloads. To mitigate those
problems, we avoid retaking the MVCC snapshot for each new scan;
instead, we take a new snapshot only when invalidation messages have
been processed. The catcache machinery already requires that
invalidation messages be sent before releasing the related heavyweight
lock; else other backends might rely on locally-cached data rather
than scanning the catalog at all. Thus, making snapshot reuse
dependent on the same guarantees shouldn't break anything that wasn't
already subtly broken.
Patch by me. Review by Michael Paquier and Andres Freund.
2013-07-02 15:47:01 +02:00
|
|
|
/*
|
|
|
|
* Certain relations that do not have system caches send snapshot invalidation
|
|
|
|
* messages in lieu of catcache messages. This is for the benefit of
|
|
|
|
* GetCatalogSnapshot(), which can then reuse its existing MVCC snapshot
|
|
|
|
* for scanning one of those catalogs, rather than taking a new one, if no
|
|
|
|
* invalidation has been received.
|
|
|
|
*
|
|
|
|
* Relations that have syscaches need not (and must not) be listed here. The
|
|
|
|
* catcache invalidation messages will also flush the snapshot. If you add a
|
|
|
|
* syscache for one of these relations, remove it from this list.
|
|
|
|
*/
|
|
|
|
bool
|
|
|
|
RelationInvalidatesSnapshotsOnly(Oid relid)
|
|
|
|
{
|
|
|
|
switch (relid)
|
|
|
|
{
|
|
|
|
case DbRoleSettingRelationId:
|
|
|
|
case DependRelationId:
|
|
|
|
case SharedDependRelationId:
|
|
|
|
case DescriptionRelationId:
|
|
|
|
case SharedDescriptionRelationId:
|
|
|
|
case SecLabelRelationId:
|
|
|
|
case SharedSecLabelRelationId:
|
|
|
|
return true;
|
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Test whether a relation has a system cache.
|
|
|
|
*/
|
|
|
|
bool
|
|
|
|
RelationHasSysCache(Oid relid)
|
|
|
|
{
|
|
|
|
int low = 0,
|
|
|
|
high = SysCacheRelationOidSize - 1;
|
|
|
|
|
|
|
|
while (low <= high)
|
|
|
|
{
|
|
|
|
int middle = low + (high - low) / 2;
|
|
|
|
|
|
|
|
if (SysCacheRelationOid[middle] == relid)
|
|
|
|
return true;
|
|
|
|
if (SysCacheRelationOid[middle] < relid)
|
|
|
|
low = middle + 1;
|
|
|
|
else
|
|
|
|
high = middle - 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
Use a safer method for determining whether relcache init file is stale.
When we invalidate the relcache entry for a system catalog or index, we
must also delete the relcache "init file" if the init file contains a copy
of that rel's entry. The old way of doing this relied on a specially
maintained list of the OIDs of relations present in the init file: we made
the list either when reading the file in, or when writing the file out.
The problem is that when writing the file out, we included only rels
present in our local relcache, which might have already suffered some
deletions due to relcache inval events. In such cases we correctly decided
not to overwrite the real init file with incomplete data --- but we still
used the incomplete initFileRelationIds list for the rest of the current
session. This could result in wrong decisions about whether the session's
own actions require deletion of the init file, potentially allowing an init
file created by some other concurrent session to be left around even though
it's been made stale.
Since we don't support changing the schema of a system catalog at runtime,
the only likely scenario in which this would cause a problem in the field
involves a "vacuum full" on a catalog concurrently with other activity, and
even then it's far from easy to provoke. Remarkably, this has been broken
since 2002 (in commit 786340441706ac1957a031f11ad1c2e5b6e18314), but we had
never seen a reproducible test case until recently. If it did happen in
the field, the symptoms would probably involve unexpected "cache lookup
failed" errors to begin with, then "could not open file" failures after the
next checkpoint, as all accesses to the affected catalog stopped working.
Recovery would require manually removing the stale "pg_internal.init" file.
To fix, get rid of the initFileRelationIds list, and instead consult
syscache.c's list of relations used in catalog caches to decide whether a
relation is included in the init file. This should be a tad more efficient
anyway, since we're replacing linear search of a list with ~100 entries
with a binary search. It's a bit ugly that the init file contents are now
so directly tied to the catalog caches, but in practice that won't make
much difference.
Back-patch to all supported branches.
2015-06-07 21:32:09 +02:00
|
|
|
/*
|
|
|
|
* Test whether a relation supports a system cache, ie it is either a
|
|
|
|
* cached table or the index used for a cache.
|
|
|
|
*/
|
|
|
|
bool
|
|
|
|
RelationSupportsSysCache(Oid relid)
|
|
|
|
{
|
|
|
|
int low = 0,
|
|
|
|
high = SysCacheSupportingRelOidSize - 1;
|
|
|
|
|
|
|
|
while (low <= high)
|
|
|
|
{
|
|
|
|
int middle = low + (high - low) / 2;
|
|
|
|
|
|
|
|
if (SysCacheSupportingRelOid[middle] == relid)
|
|
|
|
return true;
|
|
|
|
if (SysCacheSupportingRelOid[middle] < relid)
|
|
|
|
low = middle + 1;
|
|
|
|
else
|
|
|
|
high = middle - 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
Use an MVCC snapshot, rather than SnapshotNow, for catalog scans.
SnapshotNow scans have the undesirable property that, in the face of
concurrent updates, the scan can fail to see either the old or the new
versions of the row. In many cases, we work around this by requiring
DDL operations to hold AccessExclusiveLock on the object being
modified; in some cases, the existing locking is inadequate and random
failures occur as a result. This commit doesn't change anything
related to locking, but will hopefully pave the way to allowing lock
strength reductions in the future.
The major issue has held us back from making this change in the past
is that taking an MVCC snapshot is significantly more expensive than
using a static special snapshot such as SnapshotNow. However, testing
of various worst-case scenarios reveals that this problem is not
severe except under fairly extreme workloads. To mitigate those
problems, we avoid retaking the MVCC snapshot for each new scan;
instead, we take a new snapshot only when invalidation messages have
been processed. The catcache machinery already requires that
invalidation messages be sent before releasing the related heavyweight
lock; else other backends might rely on locally-cached data rather
than scanning the catalog at all. Thus, making snapshot reuse
dependent on the same guarantees shouldn't break anything that wasn't
already subtly broken.
Patch by me. Review by Michael Paquier and Andres Freund.
2013-07-02 15:47:01 +02:00
|
|
|
|
|
|
|
/*
|
2024-02-16 18:37:50 +01:00
|
|
|
* OID comparator for qsort
|
Use an MVCC snapshot, rather than SnapshotNow, for catalog scans.
SnapshotNow scans have the undesirable property that, in the face of
concurrent updates, the scan can fail to see either the old or the new
versions of the row. In many cases, we work around this by requiring
DDL operations to hold AccessExclusiveLock on the object being
modified; in some cases, the existing locking is inadequate and random
failures occur as a result. This commit doesn't change anything
related to locking, but will hopefully pave the way to allowing lock
strength reductions in the future.
The major issue has held us back from making this change in the past
is that taking an MVCC snapshot is significantly more expensive than
using a static special snapshot such as SnapshotNow. However, testing
of various worst-case scenarios reveals that this problem is not
severe except under fairly extreme workloads. To mitigate those
problems, we avoid retaking the MVCC snapshot for each new scan;
instead, we take a new snapshot only when invalidation messages have
been processed. The catcache machinery already requires that
invalidation messages be sent before releasing the related heavyweight
lock; else other backends might rely on locally-cached data rather
than scanning the catalog at all. Thus, making snapshot reuse
dependent on the same guarantees shouldn't break anything that wasn't
already subtly broken.
Patch by me. Review by Michael Paquier and Andres Freund.
2013-07-02 15:47:01 +02:00
|
|
|
*/
|
|
|
|
static int
|
|
|
|
oid_compare(const void *a, const void *b)
|
|
|
|
{
|
Use a safer method for determining whether relcache init file is stale.
When we invalidate the relcache entry for a system catalog or index, we
must also delete the relcache "init file" if the init file contains a copy
of that rel's entry. The old way of doing this relied on a specially
maintained list of the OIDs of relations present in the init file: we made
the list either when reading the file in, or when writing the file out.
The problem is that when writing the file out, we included only rels
present in our local relcache, which might have already suffered some
deletions due to relcache inval events. In such cases we correctly decided
not to overwrite the real init file with incomplete data --- but we still
used the incomplete initFileRelationIds list for the rest of the current
session. This could result in wrong decisions about whether the session's
own actions require deletion of the init file, potentially allowing an init
file created by some other concurrent session to be left around even though
it's been made stale.
Since we don't support changing the schema of a system catalog at runtime,
the only likely scenario in which this would cause a problem in the field
involves a "vacuum full" on a catalog concurrently with other activity, and
even then it's far from easy to provoke. Remarkably, this has been broken
since 2002 (in commit 786340441706ac1957a031f11ad1c2e5b6e18314), but we had
never seen a reproducible test case until recently. If it did happen in
the field, the symptoms would probably involve unexpected "cache lookup
failed" errors to begin with, then "could not open file" failures after the
next checkpoint, as all accesses to the affected catalog stopped working.
Recovery would require manually removing the stale "pg_internal.init" file.
To fix, get rid of the initFileRelationIds list, and instead consult
syscache.c's list of relations used in catalog caches to decide whether a
relation is included in the init file. This should be a tad more efficient
anyway, since we're replacing linear search of a list with ~100 entries
with a binary search. It's a bit ugly that the init file contents are now
so directly tied to the catalog caches, but in practice that won't make
much difference.
Back-patch to all supported branches.
2015-06-07 21:32:09 +02:00
|
|
|
Oid oa = *((const Oid *) a);
|
|
|
|
Oid ob = *((const Oid *) b);
|
Use an MVCC snapshot, rather than SnapshotNow, for catalog scans.
SnapshotNow scans have the undesirable property that, in the face of
concurrent updates, the scan can fail to see either the old or the new
versions of the row. In many cases, we work around this by requiring
DDL operations to hold AccessExclusiveLock on the object being
modified; in some cases, the existing locking is inadequate and random
failures occur as a result. This commit doesn't change anything
related to locking, but will hopefully pave the way to allowing lock
strength reductions in the future.
The major issue has held us back from making this change in the past
is that taking an MVCC snapshot is significantly more expensive than
using a static special snapshot such as SnapshotNow. However, testing
of various worst-case scenarios reveals that this problem is not
severe except under fairly extreme workloads. To mitigate those
problems, we avoid retaking the MVCC snapshot for each new scan;
instead, we take a new snapshot only when invalidation messages have
been processed. The catcache machinery already requires that
invalidation messages be sent before releasing the related heavyweight
lock; else other backends might rely on locally-cached data rather
than scanning the catalog at all. Thus, making snapshot reuse
dependent on the same guarantees shouldn't break anything that wasn't
already subtly broken.
Patch by me. Review by Michael Paquier and Andres Freund.
2013-07-02 15:47:01 +02:00
|
|
|
|
2024-02-16 21:05:36 +01:00
|
|
|
return pg_cmp_u32(oa, ob);
|
Use an MVCC snapshot, rather than SnapshotNow, for catalog scans.
SnapshotNow scans have the undesirable property that, in the face of
concurrent updates, the scan can fail to see either the old or the new
versions of the row. In many cases, we work around this by requiring
DDL operations to hold AccessExclusiveLock on the object being
modified; in some cases, the existing locking is inadequate and random
failures occur as a result. This commit doesn't change anything
related to locking, but will hopefully pave the way to allowing lock
strength reductions in the future.
The major issue has held us back from making this change in the past
is that taking an MVCC snapshot is significantly more expensive than
using a static special snapshot such as SnapshotNow. However, testing
of various worst-case scenarios reveals that this problem is not
severe except under fairly extreme workloads. To mitigate those
problems, we avoid retaking the MVCC snapshot for each new scan;
instead, we take a new snapshot only when invalidation messages have
been processed. The catcache machinery already requires that
invalidation messages be sent before releasing the related heavyweight
lock; else other backends might rely on locally-cached data rather
than scanning the catalog at all. Thus, making snapshot reuse
dependent on the same guarantees shouldn't break anything that wasn't
already subtly broken.
Patch by me. Review by Michael Paquier and Andres Freund.
2013-07-02 15:47:01 +02:00
|
|
|
}
|