1996-08-28 03:59:28 +02:00
|
|
|
/*-------------------------------------------------------------------------
|
|
|
|
*
|
1999-02-14 00:22:53 +01:00
|
|
|
* syscache.h
|
1996-08-28 03:59:28 +02:00
|
|
|
* System catalog cache definitions.
|
|
|
|
*
|
1999-08-09 05:13:31 +02:00
|
|
|
* See also lsyscache.h, which provides convenience routines for
|
|
|
|
* common cache-lookup operations.
|
1996-08-28 03:59:28 +02:00
|
|
|
*
|
2022-01-08 01:04:57 +01:00
|
|
|
* Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
|
2000-01-26 06:58:53 +01:00
|
|
|
* Portions Copyright (c) 1994, Regents of the University of California
|
1996-08-28 03:59:28 +02:00
|
|
|
*
|
2010-09-20 22:08:53 +02:00
|
|
|
* src/include/utils/syscache.h
|
1996-08-28 03:59:28 +02:00
|
|
|
*
|
|
|
|
*-------------------------------------------------------------------------
|
|
|
|
*/
|
|
|
|
#ifndef SYSCACHE_H
|
|
|
|
#define SYSCACHE_H
|
|
|
|
|
2012-08-29 00:26:24 +02:00
|
|
|
#include "access/attnum.h"
|
|
|
|
#include "access/htup.h"
|
Use a safer method for determining whether relcache init file is stale.
When we invalidate the relcache entry for a system catalog or index, we
must also delete the relcache "init file" if the init file contains a copy
of that rel's entry. The old way of doing this relied on a specially
maintained list of the OIDs of relations present in the init file: we made
the list either when reading the file in, or when writing the file out.
The problem is that when writing the file out, we included only rels
present in our local relcache, which might have already suffered some
deletions due to relcache inval events. In such cases we correctly decided
not to overwrite the real init file with incomplete data --- but we still
used the incomplete initFileRelationIds list for the rest of the current
session. This could result in wrong decisions about whether the session's
own actions require deletion of the init file, potentially allowing an init
file created by some other concurrent session to be left around even though
it's been made stale.
Since we don't support changing the schema of a system catalog at runtime,
the only likely scenario in which this would cause a problem in the field
involves a "vacuum full" on a catalog concurrently with other activity, and
even then it's far from easy to provoke. Remarkably, this has been broken
since 2002 (in commit 786340441706ac1957a031f11ad1c2e5b6e18314), but we had
never seen a reproducible test case until recently. If it did happen in
the field, the symptoms would probably involve unexpected "cache lookup
failed" errors to begin with, then "could not open file" failures after the
next checkpoint, as all accesses to the affected catalog stopped working.
Recovery would require manually removing the stale "pg_internal.init" file.
To fix, get rid of the initFileRelationIds list, and instead consult
syscache.c's list of relations used in catalog caches to decide whether a
relation is included in the init file. This should be a tad more efficient
anyway, since we're replacing linear search of a list with ~100 entries
with a binary search. It's a bit ugly that the init file contents are now
so directly tied to the catalog caches, but in practice that won't make
much difference.
Back-patch to all supported branches.
2015-06-07 21:32:09 +02:00
|
|
|
/* we intentionally do not include utils/catcache.h here */
|
1996-10-19 06:16:04 +02:00
|
|
|
|
1996-08-28 03:59:28 +02:00
|
|
|
/*
|
|
|
|
* SysCache identifiers.
|
|
|
|
*
|
2008-05-07 03:04:49 +02:00
|
|
|
* The order of these identifiers must match the order
|
|
|
|
* of the entries in the array cacheinfo[] in syscache.c.
|
|
|
|
* Keep them in alphabetical order (renumbering only costs a
|
|
|
|
* backend rebuild).
|
1996-08-28 03:59:28 +02:00
|
|
|
*/
|
|
|
|
|
2008-05-07 03:04:49 +02:00
|
|
|
enum SysCacheIdentifier
|
|
|
|
{
|
|
|
|
AGGFNOID = 0,
|
|
|
|
AMNAME,
|
|
|
|
AMOID,
|
|
|
|
AMOPOPID,
|
|
|
|
AMOPSTRATEGY,
|
|
|
|
AMPROCNUM,
|
|
|
|
ATTNAME,
|
|
|
|
ATTNUM,
|
|
|
|
AUTHMEMMEMROLE,
|
|
|
|
AUTHMEMROLEMEM,
|
|
|
|
AUTHNAME,
|
|
|
|
AUTHOID,
|
|
|
|
CASTSOURCETARGET,
|
|
|
|
CLAAMNAMENSP,
|
|
|
|
CLAOID,
|
2011-02-08 22:04:18 +01:00
|
|
|
COLLNAMEENCNSP,
|
|
|
|
COLLOID,
|
2008-05-07 03:04:49 +02:00
|
|
|
CONDEFAULT,
|
|
|
|
CONNAMENSP,
|
|
|
|
CONSTROID,
|
|
|
|
CONVOID,
|
|
|
|
DATABASEOID,
|
2009-10-05 21:24:49 +02:00
|
|
|
DEFACLROLENSPOBJ,
|
2008-05-07 03:04:49 +02:00
|
|
|
ENUMOID,
|
|
|
|
ENUMTYPOIDNAME,
|
2012-07-18 16:16:16 +02:00
|
|
|
EVENTTRIGGERNAME,
|
|
|
|
EVENTTRIGGEROID,
|
2008-12-19 17:25:19 +01:00
|
|
|
FOREIGNDATAWRAPPERNAME,
|
|
|
|
FOREIGNDATAWRAPPEROID,
|
|
|
|
FOREIGNSERVERNAME,
|
|
|
|
FOREIGNSERVEROID,
|
2011-01-02 05:48:11 +01:00
|
|
|
FOREIGNTABLEREL,
|
2008-05-07 03:04:49 +02:00
|
|
|
INDEXRELID,
|
|
|
|
LANGNAME,
|
|
|
|
LANGOID,
|
|
|
|
NAMESPACENAME,
|
|
|
|
NAMESPACEOID,
|
|
|
|
OPERNAMENSP,
|
|
|
|
OPEROID,
|
|
|
|
OPFAMILYAMNAMENSP,
|
|
|
|
OPFAMILYOID,
|
Implement table partitioning.
Table partitioning is like table inheritance and reuses much of the
existing infrastructure, but there are some important differences.
The parent is called a partitioned table and is always empty; it may
not have indexes or non-inherited constraints, since those make no
sense for a relation with no data of its own. The children are called
partitions and contain all of the actual data. Each partition has an
implicit partitioning constraint. Multiple inheritance is not
allowed, and partitioning and inheritance can't be mixed. Partitions
can't have extra columns and may not allow nulls unless the parent
does. Tuples inserted into the parent are automatically routed to the
correct partition, so tuple-routing ON INSERT triggers are not needed.
Tuple routing isn't yet supported for partitions which are foreign
tables, and it doesn't handle updates that cross partition boundaries.
Currently, tables can be range-partitioned or list-partitioned. List
partitioning is limited to a single column, but range partitioning can
involve multiple columns. A partitioning "column" can be an
expression.
Because table partitioning is less general than table inheritance, it
is hoped that it will be easier to reason about properties of
partitions, and therefore that this will serve as a better foundation
for a variety of possible optimizations, including query planner
optimizations. The tuple routing based which this patch does based on
the implicit partitioning constraints is an example of this, but it
seems likely that many other useful optimizations are also possible.
Amit Langote, reviewed and tested by Robert Haas, Ashutosh Bapat,
Amit Kapila, Rajkumar Raghuwanshi, Corey Huinker, Jaime Casanova,
Rushabh Lathia, Erik Rijkers, among others. Minor revisions by me.
2016-12-07 19:17:43 +01:00
|
|
|
PARTRELID,
|
2008-05-07 03:04:49 +02:00
|
|
|
PROCNAMEARGSNSP,
|
|
|
|
PROCOID,
|
2017-05-31 00:47:10 +02:00
|
|
|
PUBLICATIONNAME,
|
Allow publishing the tables of schema.
A new option "FOR ALL TABLES IN SCHEMA" in Create/Alter Publication allows
one or more schemas to be specified, whose tables are selected by the
publisher for sending the data to the subscriber.
The new syntax allows specifying both the tables and schemas. For example:
CREATE PUBLICATION pub1 FOR TABLE t1,t2,t3, ALL TABLES IN SCHEMA s1,s2;
OR
ALTER PUBLICATION pub1 ADD TABLE t1,t2,t3, ALL TABLES IN SCHEMA s1,s2;
A new system table "pg_publication_namespace" has been added, to maintain
the schemas that the user wants to publish through the publication.
Modified the output plugin (pgoutput) to publish the changes if the
relation is part of schema publication.
Updates pg_dump to identify and dump schema publications. Updates the \d
family of commands to display schema publications and \dRp+ variant will
now display associated schemas if any.
Author: Vignesh C, Hou Zhijie, Amit Kapila
Syntax-Suggested-by: Tom Lane, Alvaro Herrera
Reviewed-by: Greg Nancarrow, Masahiko Sawada, Hou Zhijie, Amit Kapila, Haiying Tang, Ajin Cherian, Rahila Syed, Bharath Rupireddy, Mark Dilger
Tested-by: Haiying Tang
Discussion: https://www.postgresql.org/message-id/CALDaNm0OANxuJ6RXqwZsM1MSY4s19nuH3734j4a72etDwvBETQ@mail.gmail.com
2021-10-27 04:14:52 +02:00
|
|
|
PUBLICATIONNAMESPACE,
|
|
|
|
PUBLICATIONNAMESPACEMAP,
|
2017-05-31 00:47:10 +02:00
|
|
|
PUBLICATIONOID,
|
|
|
|
PUBLICATIONREL,
|
|
|
|
PUBLICATIONRELMAP,
|
Multirange datatypes
Multiranges are basically sorted arrays of non-overlapping ranges with
set-theoretic operations defined over them.
Since v14, each range type automatically gets a corresponding multirange
datatype. There are both manual and automatic mechanisms for naming multirange
types. Once can specify multirange type name using multirange_type_name
attribute in CREATE TYPE. Otherwise, a multirange type name is generated
automatically. If the range type name contains "range" then we change that to
"multirange". Otherwise, we add "_multirange" to the end.
Implementation of multiranges comes with a space-efficient internal
representation format, which evades extra paddings and duplicated storage of
oids. Altogether this format allows fetching a particular range by its index
in O(n).
Statistic gathering and selectivity estimation are implemented for multiranges.
For this purpose, stored multirange is approximated as union range without gaps.
This field will likely need improvements in the future.
Catversion is bumped.
Discussion: https://postgr.es/m/CALNJ-vSUpQ_Y%3DjXvTxt1VYFztaBSsWVXeF1y6gTYQ4bOiWDLgQ%40mail.gmail.com
Discussion: https://postgr.es/m/a0b8026459d1e6167933be2104a6174e7d40d0ab.camel%40j-davis.com#fe7218c83b08068bfffb0c5293eceda0
Author: Paul Jungwirth, revised by me
Reviewed-by: David Fetter, Corey Huinker, Jeff Davis, Pavel Stehule
Reviewed-by: Alvaro Herrera, Tom Lane, Isaac Morland, David G. Johnston
Reviewed-by: Zhihong Yu, Alexander Korotkov
2020-12-20 05:20:33 +01:00
|
|
|
RANGEMULTIRANGE,
|
2011-11-03 12:16:28 +01:00
|
|
|
RANGETYPE,
|
2008-05-07 03:04:49 +02:00
|
|
|
RELNAMENSP,
|
|
|
|
RELOID,
|
Introduce replication progress tracking infrastructure.
When implementing a replication solution ontop of logical decoding, two
related problems exist:
* How to safely keep track of replication progress
* How to change replication behavior, based on the origin of a row;
e.g. to avoid loops in bi-directional replication setups
The solution to these problems, as implemented here, consist out of
three parts:
1) 'replication origins', which identify nodes in a replication setup.
2) 'replication progress tracking', which remembers, for each
replication origin, how far replay has progressed in a efficient and
crash safe manner.
3) The ability to filter out changes performed on the behest of a
replication origin during logical decoding; this allows complex
replication topologies. E.g. by filtering all replayed changes out.
Most of this could also be implemented in "userspace", e.g. by inserting
additional rows contain origin information, but that ends up being much
less efficient and more complicated. We don't want to require various
replication solutions to reimplement logic for this independently. The
infrastructure is intended to be generic enough to be reusable.
This infrastructure also replaces the 'nodeid' infrastructure of commit
timestamps. It is intended to provide all the former capabilities,
except that there's only 2^16 different origins; but now they integrate
with logical decoding. Additionally more functionality is accessible via
SQL. Since the commit timestamp infrastructure has also been introduced
in 9.5 (commit 73c986add) changing the API is not a problem.
For now the number of origins for which the replication progress can be
tracked simultaneously is determined by the max_replication_slots
GUC. That GUC is not a perfect match to configure this, but there
doesn't seem to be sufficient reason to introduce a separate new one.
Bumps both catversion and wal page magic.
Author: Andres Freund, with contributions from Petr Jelinek and Craig Ringer
Reviewed-By: Heikki Linnakangas, Petr Jelinek, Robert Haas, Steve Singer
Discussion: 20150216002155.GI15326@awork2.anarazel.de,
20140923182422.GA15776@alap3.anarazel.de,
20131114172632.GE7522@alap2.anarazel.de
2015-04-29 19:30:53 +02:00
|
|
|
REPLORIGIDENT,
|
|
|
|
REPLORIGNAME,
|
2008-05-07 03:04:49 +02:00
|
|
|
RULERELNAME,
|
2016-12-20 18:00:00 +01:00
|
|
|
SEQRELID,
|
Rework the pg_statistic_ext catalog
Since extended statistic got introduced in PostgreSQL 10, there was a
single catalog pg_statistic_ext storing both the definitions and built
statistic. That's however problematic when a user is supposed to have
access only to the definitions, but not to user data.
Consider for example pg_dump on a database with RLS enabled - if the
pg_statistic_ext catalog respects RLS (which it should, if it contains
user data), pg_dump would not see any records and the result would not
define any extended statistics. That would be a surprising behavior.
Until now this was not a pressing issue, because the existing types of
extended statistic (functional dependencies and ndistinct coefficients)
do not include any user data directly. This changed with introduction
of MCV lists, which do include most common combinations of values.
The easiest way to fix this is to split the pg_statistic_ext catalog
into two - one for definitions, one for the built statistic values.
The new catalog is called pg_statistic_ext_data, and we're maintaining
a 1:1 relationship with the old catalog - either there are matching
records in both catalogs, or neither of them.
Bumped CATVERSION due to changing system catalog definitions.
Author: Dean Rasheed, with improvements by me
Reviewed-by: Dean Rasheed, John Naylor
Discussion: https://postgr.es/m/CAEZATCUhT9rt7Ui%3DVdx4N%3D%3DVV5XOK5dsXfnGgVOz_JhAicB%3DZA%40mail.gmail.com
2019-06-13 17:19:21 +02:00
|
|
|
STATEXTDATASTXOID,
|
Implement multivariate n-distinct coefficients
Add support for explicitly declared statistic objects (CREATE
STATISTICS), allowing collection of statistics on more complex
combinations that individual table columns. Companion commands DROP
STATISTICS and ALTER STATISTICS ... OWNER TO / SET SCHEMA / RENAME are
added too. All this DDL has been designed so that more statistic types
can be added later on, such as multivariate most-common-values and
multivariate histograms between columns of a single table, leaving room
for permitting columns on multiple tables, too, as well as expressions.
This commit only adds support for collection of n-distinct coefficient
on user-specified sets of columns in a single table. This is useful to
estimate number of distinct groups in GROUP BY and DISTINCT clauses;
estimation errors there can cause over-allocation of memory in hashed
aggregates, for instance, so it's a worthwhile problem to solve. A new
special pseudo-type pg_ndistinct is used.
(num-distinct estimation was deemed sufficiently useful by itself that
this is worthwhile even if no further statistic types are added
immediately; so much so that another version of essentially the same
functionality was submitted by Kyotaro Horiguchi:
https://postgr.es/m/20150828.173334.114731693.horiguchi.kyotaro@lab.ntt.co.jp
though this commit does not use that code.)
Author: Tomas Vondra. Some code rework by Álvaro.
Reviewed-by: Dean Rasheed, David Rowley, Kyotaro Horiguchi, Jeff Janes,
Ideriha Takeshi
Discussion: https://postgr.es/m/543AFA15.4080608@fuzzy.cz
https://postgr.es/m/20170320190220.ixlaueanxegqd5gr@alvherre.pgsql
2017-03-24 18:06:10 +01:00
|
|
|
STATEXTNAMENSP,
|
|
|
|
STATEXTOID,
|
2009-12-29 21:11:45 +01:00
|
|
|
STATRELATTINH,
|
2017-01-19 18:00:00 +01:00
|
|
|
SUBSCRIPTIONNAME,
|
2017-05-31 00:47:10 +02:00
|
|
|
SUBSCRIPTIONOID,
|
2017-03-23 13:36:36 +01:00
|
|
|
SUBSCRIPTIONRELMAP,
|
2010-01-05 22:54:00 +01:00
|
|
|
TABLESPACEOID,
|
2015-04-26 16:33:14 +02:00
|
|
|
TRFOID,
|
|
|
|
TRFTYPELANG,
|
2008-05-07 03:04:49 +02:00
|
|
|
TSCONFIGMAP,
|
|
|
|
TSCONFIGNAMENSP,
|
|
|
|
TSCONFIGOID,
|
|
|
|
TSDICTNAMENSP,
|
|
|
|
TSDICTOID,
|
|
|
|
TSPARSERNAMENSP,
|
|
|
|
TSPARSEROID,
|
|
|
|
TSTEMPLATENAMENSP,
|
|
|
|
TSTEMPLATEOID,
|
|
|
|
TYPENAMENSP,
|
2008-12-19 17:25:19 +01:00
|
|
|
TYPEOID,
|
|
|
|
USERMAPPINGOID,
|
|
|
|
USERMAPPINGUSERSERVER
|
2017-05-13 01:05:13 +02:00
|
|
|
|
|
|
|
#define SysCacheSize (USERMAPPINGUSERSERVER + 1)
|
2008-05-07 03:04:49 +02:00
|
|
|
};
|
1996-08-28 03:59:28 +02:00
|
|
|
|
|
|
|
extern void InitCatalogCache(void);
|
2002-02-19 21:11:20 +01:00
|
|
|
extern void InitCatalogCachePhase2(void);
|
2000-11-16 23:30:52 +01:00
|
|
|
|
|
|
|
extern HeapTuple SearchSysCache(int cacheId,
|
|
|
|
Datum key1, Datum key2, Datum key3, Datum key4);
|
Improve sys/catcache performance.
The following are the individual improvements:
1) Avoidance of FunctionCallInfo based function calls, replaced by
more efficient functions with a native C argument interface.
2) Don't extract columns from a cache entry's tuple whenever matching
entries - instead store them as a Datum array. This also allows to
get rid of having to build dummy tuples for negative & list
entries, and of a hack for dealing with cstring vs. text weirdness.
3) Reorder members of catcache.h struct, so imortant entries are more
likely to be on one cacheline.
4) Allowing the compiler to specialize critical SearchCatCache for a
specific number of attributes allows to unroll loops and avoid
other nkeys dependant initialization.
5) Only initializing the ScanKey when necessary, i.e. catcache misses,
greatly reduces cache unnecessary cpu cache misses.
6) Split of the cache-miss case from the hash lookup, reducing stack
allocations etc in the common case.
7) CatCTup and their corresponding heaptuple are allocated in one
piece.
This results in making cache lookups themselves roughly three times as
fast - full-system benchmarks obviously improve less than that.
I've also evaluated further techniques:
- replace open coded hash with simplehash - the list walk right now
shows up in profiles. Unfortunately it's not easy to do so safely as
an entry's memory location can change at various times, which
doesn't work well with the refcounting and cache invalidation.
- Cacheline-aligning CatCTup entries - helps some with performance,
but the win isn't big and the code for it is ugly, because the
tuples have to be freed as well.
- add more proper functions, rather than macros for
SearchSysCacheCopyN etc., but right now they don't show up in
profiles.
The reason the macro wrapper for syscache.c/h have to be changed,
rather than just catcache, is that doing otherwise would require
exposing the SysCache array to the outside. That might be a good idea
anyway, but it's for another day.
Author: Andres Freund
Reviewed-By: Robert Haas
Discussion: https://postgr.es/m/20170914061207.zxotvyopetm7lrrp@alap3.anarazel.de
2017-10-13 22:16:50 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* The use of argument specific numbers is encouraged. They're faster, and
|
|
|
|
* insulates the caller from changes in the maximum number of keys.
|
|
|
|
*/
|
|
|
|
extern HeapTuple SearchSysCache1(int cacheId,
|
|
|
|
Datum key1);
|
|
|
|
extern HeapTuple SearchSysCache2(int cacheId,
|
|
|
|
Datum key1, Datum key2);
|
|
|
|
extern HeapTuple SearchSysCache3(int cacheId,
|
|
|
|
Datum key1, Datum key2, Datum key3);
|
|
|
|
extern HeapTuple SearchSysCache4(int cacheId,
|
|
|
|
Datum key1, Datum key2, Datum key3, Datum key4);
|
|
|
|
|
2000-11-16 23:30:52 +01:00
|
|
|
extern void ReleaseSysCache(HeapTuple tuple);
|
|
|
|
|
|
|
|
/* convenience routines */
|
|
|
|
extern HeapTuple SearchSysCacheCopy(int cacheId,
|
1998-08-19 04:04:17 +02:00
|
|
|
Datum key1, Datum key2, Datum key3, Datum key4);
|
2001-08-10 20:57:42 +02:00
|
|
|
extern bool SearchSysCacheExists(int cacheId,
|
|
|
|
Datum key1, Datum key2, Datum key3, Datum key4);
|
Remove WITH OIDS support, change oid catalog column visibility.
Previously tables declared WITH OIDS, including a significant fraction
of the catalog tables, stored the oid column not as a normal column,
but as part of the tuple header.
This special column was not shown by default, which was somewhat odd,
as it's often (consider e.g. pg_class.oid) one of the more important
parts of a row. Neither pg_dump nor COPY included the contents of the
oid column by default.
The fact that the oid column was not an ordinary column necessitated a
significant amount of special case code to support oid columns. That
already was painful for the existing, but upcoming work aiming to make
table storage pluggable, would have required expanding and duplicating
that "specialness" significantly.
WITH OIDS has been deprecated since 2005 (commit ff02d0a05280e0).
Remove it.
Removing includes:
- CREATE TABLE and ALTER TABLE syntax for declaring the table to be
WITH OIDS has been removed (WITH (oids[ = true]) will error out)
- pg_dump does not support dumping tables declared WITH OIDS and will
issue a warning when dumping one (and ignore the oid column).
- restoring an pg_dump archive with pg_restore will warn when
restoring a table with oid contents (and ignore the oid column)
- COPY will refuse to load binary dump that includes oids.
- pg_upgrade will error out when encountering tables declared WITH
OIDS, they have to be altered to remove the oid column first.
- Functionality to access the oid of the last inserted row (like
plpgsql's RESULT_OID, spi's SPI_lastoid, ...) has been removed.
The syntax for declaring a table WITHOUT OIDS (or WITH (oids = false)
for CREATE TABLE) is still supported. While that requires a bit of
support code, it seems unnecessary to break applications / dumps that
do not use oids, and are explicit about not using them.
The biggest user of WITH OID columns was postgres' catalog. This
commit changes all 'magic' oid columns to be columns that are normally
declared and stored. To reduce unnecessary query breakage all the
newly added columns are still named 'oid', even if a table's column
naming scheme would indicate 'reloid' or such. This obviously
requires adapting a lot code, mostly replacing oid access via
HeapTupleGetOid() with access to the underlying Form_pg_*->oid column.
The bootstrap process now assigns oids for all oid columns in
genbki.pl that do not have an explicit value (starting at the largest
oid previously used), only oids assigned later by oids will be above
FirstBootstrapObjectId. As the oid column now is a normal column the
special bootstrap syntax for oids has been removed.
Oids are not automatically assigned during insertion anymore, all
backend code explicitly assigns oids with GetNewOidWithIndex(). For
the rare case that insertions into the catalog via SQL are called for
the new pg_nextoid() function can be used (which only works on catalog
tables).
The fact that oid columns on system tables are now normal columns
means that they will be included in the set of columns expanded
by * (i.e. SELECT * FROM pg_class will now include the table's oid,
previously it did not). It'd not technically be hard to hide oid
column by default, but that'd mean confusing behavior would either
have to be carried forward forever, or it'd cause breakage down the
line.
While it's not unlikely that further adjustments are needed, the
scope/invasiveness of the patch makes it worthwhile to get merge this
now. It's painful to maintain externally, too complicated to commit
after the code code freeze, and a dependency of a number of other
patches.
Catversion bump, for obvious reasons.
Author: Andres Freund, with contributions by John Naylor
Discussion: https://postgr.es/m/20180930034810.ywp2c7awz7opzcfr@alap3.anarazel.de
2018-11-21 00:36:57 +01:00
|
|
|
extern Oid GetSysCacheOid(int cacheId, AttrNumber oidcol,
|
2000-11-16 23:30:52 +01:00
|
|
|
Datum key1, Datum key2, Datum key3, Datum key4);
|
|
|
|
|
2002-08-02 20:15:10 +02:00
|
|
|
extern HeapTuple SearchSysCacheAttName(Oid relid, const char *attname);
|
|
|
|
extern HeapTuple SearchSysCacheCopyAttName(Oid relid, const char *attname);
|
|
|
|
extern bool SearchSysCacheExistsAttName(Oid relid, const char *attname);
|
|
|
|
|
2017-09-06 22:46:01 +02:00
|
|
|
extern HeapTuple SearchSysCacheAttNum(Oid relid, int16 attnum);
|
|
|
|
extern HeapTuple SearchSysCacheCopyAttNum(Oid relid, int16 attnum);
|
|
|
|
|
2000-01-23 04:43:24 +01:00
|
|
|
extern Datum SysCacheGetAttr(int cacheId, HeapTuple tup,
|
2000-06-17 06:56:39 +02:00
|
|
|
AttrNumber attributeNumber, bool *isNull);
|
2001-10-28 07:26:15 +01:00
|
|
|
|
2012-03-07 20:51:13 +01:00
|
|
|
extern uint32 GetSysCacheHashValue(int cacheId,
|
|
|
|
Datum key1, Datum key2, Datum key3, Datum key4);
|
|
|
|
|
2002-04-06 08:59:25 +02:00
|
|
|
/* list-search interface. Users of this must import catcache.h too */
|
2012-08-29 00:26:24 +02:00
|
|
|
struct catclist;
|
2002-04-06 08:59:25 +02:00
|
|
|
extern struct catclist *SearchSysCacheList(int cacheId, int nkeys,
|
2018-01-29 21:13:07 +01:00
|
|
|
Datum key1, Datum key2, Datum key3);
|
2002-09-04 22:31:48 +02:00
|
|
|
|
2017-05-13 00:17:29 +02:00
|
|
|
extern void SysCacheInvalidate(int cacheId, uint32 hashValue);
|
|
|
|
|
Use a safer method for determining whether relcache init file is stale.
When we invalidate the relcache entry for a system catalog or index, we
must also delete the relcache "init file" if the init file contains a copy
of that rel's entry. The old way of doing this relied on a specially
maintained list of the OIDs of relations present in the init file: we made
the list either when reading the file in, or when writing the file out.
The problem is that when writing the file out, we included only rels
present in our local relcache, which might have already suffered some
deletions due to relcache inval events. In such cases we correctly decided
not to overwrite the real init file with incomplete data --- but we still
used the incomplete initFileRelationIds list for the rest of the current
session. This could result in wrong decisions about whether the session's
own actions require deletion of the init file, potentially allowing an init
file created by some other concurrent session to be left around even though
it's been made stale.
Since we don't support changing the schema of a system catalog at runtime,
the only likely scenario in which this would cause a problem in the field
involves a "vacuum full" on a catalog concurrently with other activity, and
even then it's far from easy to provoke. Remarkably, this has been broken
since 2002 (in commit 786340441706ac1957a031f11ad1c2e5b6e18314), but we had
never seen a reproducible test case until recently. If it did happen in
the field, the symptoms would probably involve unexpected "cache lookup
failed" errors to begin with, then "could not open file" failures after the
next checkpoint, as all accesses to the affected catalog stopped working.
Recovery would require manually removing the stale "pg_internal.init" file.
To fix, get rid of the initFileRelationIds list, and instead consult
syscache.c's list of relations used in catalog caches to decide whether a
relation is included in the init file. This should be a tad more efficient
anyway, since we're replacing linear search of a list with ~100 entries
with a binary search. It's a bit ugly that the init file contents are now
so directly tied to the catalog caches, but in practice that won't make
much difference.
Back-patch to all supported branches.
2015-06-07 21:32:09 +02:00
|
|
|
extern bool RelationInvalidatesSnapshotsOnly(Oid relid);
|
|
|
|
extern bool RelationHasSysCache(Oid relid);
|
|
|
|
extern bool RelationSupportsSysCache(Oid relid);
|
Use an MVCC snapshot, rather than SnapshotNow, for catalog scans.
SnapshotNow scans have the undesirable property that, in the face of
concurrent updates, the scan can fail to see either the old or the new
versions of the row. In many cases, we work around this by requiring
DDL operations to hold AccessExclusiveLock on the object being
modified; in some cases, the existing locking is inadequate and random
failures occur as a result. This commit doesn't change anything
related to locking, but will hopefully pave the way to allowing lock
strength reductions in the future.
The major issue has held us back from making this change in the past
is that taking an MVCC snapshot is significantly more expensive than
using a static special snapshot such as SnapshotNow. However, testing
of various worst-case scenarios reveals that this problem is not
severe except under fairly extreme workloads. To mitigate those
problems, we avoid retaking the MVCC snapshot for each new scan;
instead, we take a new snapshot only when invalidation messages have
been processed. The catcache machinery already requires that
invalidation messages be sent before releasing the related heavyweight
lock; else other backends might rely on locally-cached data rather
than scanning the catalog at all. Thus, making snapshot reuse
dependent on the same guarantees shouldn't break anything that wasn't
already subtly broken.
Patch by me. Review by Michael Paquier and Andres Freund.
2013-07-02 15:47:01 +02:00
|
|
|
|
2010-02-14 19:42:19 +01:00
|
|
|
/*
|
|
|
|
* The use of the macros below rather than direct calls to the corresponding
|
|
|
|
* functions is encouraged, as it insulates the caller from changes in the
|
|
|
|
* maximum number of keys.
|
|
|
|
*/
|
|
|
|
#define SearchSysCacheCopy1(cacheId, key1) \
|
|
|
|
SearchSysCacheCopy(cacheId, key1, 0, 0, 0)
|
|
|
|
#define SearchSysCacheCopy2(cacheId, key1, key2) \
|
|
|
|
SearchSysCacheCopy(cacheId, key1, key2, 0, 0)
|
|
|
|
#define SearchSysCacheCopy3(cacheId, key1, key2, key3) \
|
|
|
|
SearchSysCacheCopy(cacheId, key1, key2, key3, 0)
|
|
|
|
#define SearchSysCacheCopy4(cacheId, key1, key2, key3, key4) \
|
|
|
|
SearchSysCacheCopy(cacheId, key1, key2, key3, key4)
|
|
|
|
|
|
|
|
#define SearchSysCacheExists1(cacheId, key1) \
|
|
|
|
SearchSysCacheExists(cacheId, key1, 0, 0, 0)
|
|
|
|
#define SearchSysCacheExists2(cacheId, key1, key2) \
|
|
|
|
SearchSysCacheExists(cacheId, key1, key2, 0, 0)
|
|
|
|
#define SearchSysCacheExists3(cacheId, key1, key2, key3) \
|
|
|
|
SearchSysCacheExists(cacheId, key1, key2, key3, 0)
|
|
|
|
#define SearchSysCacheExists4(cacheId, key1, key2, key3, key4) \
|
|
|
|
SearchSysCacheExists(cacheId, key1, key2, key3, key4)
|
|
|
|
|
Remove WITH OIDS support, change oid catalog column visibility.
Previously tables declared WITH OIDS, including a significant fraction
of the catalog tables, stored the oid column not as a normal column,
but as part of the tuple header.
This special column was not shown by default, which was somewhat odd,
as it's often (consider e.g. pg_class.oid) one of the more important
parts of a row. Neither pg_dump nor COPY included the contents of the
oid column by default.
The fact that the oid column was not an ordinary column necessitated a
significant amount of special case code to support oid columns. That
already was painful for the existing, but upcoming work aiming to make
table storage pluggable, would have required expanding and duplicating
that "specialness" significantly.
WITH OIDS has been deprecated since 2005 (commit ff02d0a05280e0).
Remove it.
Removing includes:
- CREATE TABLE and ALTER TABLE syntax for declaring the table to be
WITH OIDS has been removed (WITH (oids[ = true]) will error out)
- pg_dump does not support dumping tables declared WITH OIDS and will
issue a warning when dumping one (and ignore the oid column).
- restoring an pg_dump archive with pg_restore will warn when
restoring a table with oid contents (and ignore the oid column)
- COPY will refuse to load binary dump that includes oids.
- pg_upgrade will error out when encountering tables declared WITH
OIDS, they have to be altered to remove the oid column first.
- Functionality to access the oid of the last inserted row (like
plpgsql's RESULT_OID, spi's SPI_lastoid, ...) has been removed.
The syntax for declaring a table WITHOUT OIDS (or WITH (oids = false)
for CREATE TABLE) is still supported. While that requires a bit of
support code, it seems unnecessary to break applications / dumps that
do not use oids, and are explicit about not using them.
The biggest user of WITH OID columns was postgres' catalog. This
commit changes all 'magic' oid columns to be columns that are normally
declared and stored. To reduce unnecessary query breakage all the
newly added columns are still named 'oid', even if a table's column
naming scheme would indicate 'reloid' or such. This obviously
requires adapting a lot code, mostly replacing oid access via
HeapTupleGetOid() with access to the underlying Form_pg_*->oid column.
The bootstrap process now assigns oids for all oid columns in
genbki.pl that do not have an explicit value (starting at the largest
oid previously used), only oids assigned later by oids will be above
FirstBootstrapObjectId. As the oid column now is a normal column the
special bootstrap syntax for oids has been removed.
Oids are not automatically assigned during insertion anymore, all
backend code explicitly assigns oids with GetNewOidWithIndex(). For
the rare case that insertions into the catalog via SQL are called for
the new pg_nextoid() function can be used (which only works on catalog
tables).
The fact that oid columns on system tables are now normal columns
means that they will be included in the set of columns expanded
by * (i.e. SELECT * FROM pg_class will now include the table's oid,
previously it did not). It'd not technically be hard to hide oid
column by default, but that'd mean confusing behavior would either
have to be carried forward forever, or it'd cause breakage down the
line.
While it's not unlikely that further adjustments are needed, the
scope/invasiveness of the patch makes it worthwhile to get merge this
now. It's painful to maintain externally, too complicated to commit
after the code code freeze, and a dependency of a number of other
patches.
Catversion bump, for obvious reasons.
Author: Andres Freund, with contributions by John Naylor
Discussion: https://postgr.es/m/20180930034810.ywp2c7awz7opzcfr@alap3.anarazel.de
2018-11-21 00:36:57 +01:00
|
|
|
#define GetSysCacheOid1(cacheId, oidcol, key1) \
|
|
|
|
GetSysCacheOid(cacheId, oidcol, key1, 0, 0, 0)
|
|
|
|
#define GetSysCacheOid2(cacheId, oidcol, key1, key2) \
|
|
|
|
GetSysCacheOid(cacheId, oidcol, key1, key2, 0, 0)
|
|
|
|
#define GetSysCacheOid3(cacheId, oidcol, key1, key2, key3) \
|
|
|
|
GetSysCacheOid(cacheId, oidcol, key1, key2, key3, 0)
|
|
|
|
#define GetSysCacheOid4(cacheId, oidcol, key1, key2, key3, key4) \
|
|
|
|
GetSysCacheOid(cacheId, oidcol, key1, key2, key3, key4)
|
2010-02-14 19:42:19 +01:00
|
|
|
|
2012-03-07 20:51:13 +01:00
|
|
|
#define GetSysCacheHashValue1(cacheId, key1) \
|
|
|
|
GetSysCacheHashValue(cacheId, key1, 0, 0, 0)
|
|
|
|
#define GetSysCacheHashValue2(cacheId, key1, key2) \
|
|
|
|
GetSysCacheHashValue(cacheId, key1, key2, 0, 0)
|
|
|
|
#define GetSysCacheHashValue3(cacheId, key1, key2, key3) \
|
|
|
|
GetSysCacheHashValue(cacheId, key1, key2, key3, 0)
|
|
|
|
#define GetSysCacheHashValue4(cacheId, key1, key2, key3, key4) \
|
|
|
|
GetSysCacheHashValue(cacheId, key1, key2, key3, key4)
|
|
|
|
|
2010-02-14 19:42:19 +01:00
|
|
|
#define SearchSysCacheList1(cacheId, key1) \
|
2018-01-29 21:13:07 +01:00
|
|
|
SearchSysCacheList(cacheId, 1, key1, 0, 0)
|
2010-02-14 19:42:19 +01:00
|
|
|
#define SearchSysCacheList2(cacheId, key1, key2) \
|
2018-01-29 21:13:07 +01:00
|
|
|
SearchSysCacheList(cacheId, 2, key1, key2, 0)
|
2010-02-14 19:42:19 +01:00
|
|
|
#define SearchSysCacheList3(cacheId, key1, key2, key3) \
|
2018-01-29 21:13:07 +01:00
|
|
|
SearchSysCacheList(cacheId, 3, key1, key2, key3)
|
2010-02-14 19:42:19 +01:00
|
|
|
|
2002-04-06 08:59:25 +02:00
|
|
|
#define ReleaseSysCacheList(x) ReleaseCatCacheList(x)
|
|
|
|
|
1996-08-28 03:59:28 +02:00
|
|
|
#endif /* SYSCACHE_H */
|