postgresql/src/include/utils/rel.h

462 lines
15 KiB
C
Raw Normal View History

/*-------------------------------------------------------------------------
*
* rel.h
* POSTGRES relation descriptor (a/k/a relcache entry) definitions.
*
*
* Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
2010-09-20 22:08:53 +02:00
* src/include/utils/rel.h
*
*-------------------------------------------------------------------------
*/
#ifndef REL_H
#define REL_H
#include "access/tupdesc.h"
1999-07-16 19:07:40 +02:00
#include "catalog/pg_am.h"
#include "catalog/pg_class.h"
#include "catalog/pg_index.h"
#include "fmgr.h"
#include "nodes/bitmapset.h"
#include "rewrite/prs2lock.h"
#include "storage/block.h"
#include "storage/relfilenode.h"
#include "utils/relcache.h"
#include "utils/reltrigger.h"
/*
* LockRelId and LockInfo really belong to lmgr.h, but it's more convenient
* to declare them here so we can have a LockInfoData field in a Relation.
*/
typedef struct LockRelId
{
Oid relId; /* a relation identifier */
Oid dbId; /* a database identifier */
} LockRelId;
typedef struct LockInfoData
{
LockRelId lockRelId;
} LockInfoData;
typedef LockInfoData *LockInfo;
/*
* Cached lookup information for the frequently used index access method
* functions, defined by the pg_am row associated with an index relation.
*/
typedef struct RelationAmInfo
{
FmgrInfo aminsert;
FmgrInfo ambeginscan;
FmgrInfo amgettuple;
FmgrInfo amgetbitmap;
FmgrInfo amrescan;
FmgrInfo amendscan;
FmgrInfo ammarkpos;
FmgrInfo amrestrpos;
FmgrInfo amcanreturn;
} RelationAmInfo;
/*
* Here are the contents of a relation cache entry.
*/
typedef struct RelationData
{
RelFileNode rd_node; /* relation physical identifier */
/* use "struct" here to avoid needing to include smgr.h: */
2004-08-29 07:07:03 +02:00
struct SMgrRelationData *rd_smgr; /* cached file handle, or NULL */
int rd_refcnt; /* reference count */
BackendId rd_backend; /* owning backend id, if temporary relation */
bool rd_islocaltemp; /* rel is a temp rel of this session */
bool rd_isnailed; /* rel is nailed in cache */
bool rd_isvalid; /* relcache entry is valid */
2005-10-15 04:49:52 +02:00
char rd_indexvalid; /* state of rd_indexlist: 0 = not valid, 1 =
* valid, 2 = temporarily forced */
2004-08-29 07:07:03 +02:00
/*
* rd_createSubid is the ID of the highest subtransaction the rel has
2005-10-15 04:49:52 +02:00
* survived into; or zero if the rel was not created in the current top
* transaction. This can be now be relied on, whereas previously it could
* be "forgotten" in earlier releases. Likewise, rd_newRelfilenodeSubid is
* the ID of the highest subtransaction the relfilenode change has
* survived into, or zero if not changed in the current transaction (or we
* have forgotten changing it). rd_newRelfilenodeSubid can be forgotten
* when a relation has multiple new relfilenodes within a single
* transaction, with one of them occuring in a subsequently aborted
* subtransaction, e.g. BEGIN; TRUNCATE t; SAVEPOINT save; TRUNCATE t;
* ROLLBACK TO save; -- rd_newRelfilenode is now forgotten
*/
SubTransactionId rd_createSubid; /* rel was created in current xact */
SubTransactionId rd_newRelfilenodeSubid; /* new relfilenode assigned in
* current xact */
Form_pg_class rd_rel; /* RELATION tuple */
TupleDesc rd_att; /* tuple descriptor */
Oid rd_id; /* relation's object id */
List *rd_indexlist; /* list of OIDs of indexes on relation */
Bitmapset *rd_indexattr; /* identifies columns used in indexes */
Improve concurrency of foreign key locking This patch introduces two additional lock modes for tuples: "SELECT FOR KEY SHARE" and "SELECT FOR NO KEY UPDATE". These don't block each other, in contrast with already existing "SELECT FOR SHARE" and "SELECT FOR UPDATE". UPDATE commands that do not modify the values stored in the columns that are part of the key of the tuple now grab a SELECT FOR NO KEY UPDATE lock on the tuple, allowing them to proceed concurrently with tuple locks of the FOR KEY SHARE variety. Foreign key triggers now use FOR KEY SHARE instead of FOR SHARE; this means the concurrency improvement applies to them, which is the whole point of this patch. The added tuple lock semantics require some rejiggering of the multixact module, so that the locking level that each transaction is holding can be stored alongside its Xid. Also, multixacts now need to persist across server restarts and crashes, because they can now represent not only tuple locks, but also tuple updates. This means we need more careful tracking of lifetime of pg_multixact SLRU files; since they now persist longer, we require more infrastructure to figure out when they can be removed. pg_upgrade also needs to be careful to copy pg_multixact files over from the old server to the new, or at least part of multixact.c state, depending on the versions of the old and new servers. Tuple time qualification rules (HeapTupleSatisfies routines) need to be careful not to consider tuples with the "is multi" infomask bit set as being only locked; they might need to look up MultiXact values (i.e. possibly do pg_multixact I/O) to find out the Xid that updated a tuple, whereas they previously were assured to only use information readily available from the tuple header. This is considered acceptable, because the extra I/O would involve cases that would previously cause some commands to block waiting for concurrent transactions to finish. Another important change is the fact that locking tuples that have previously been updated causes the future versions to be marked as locked, too; this is essential for correctness of foreign key checks. This causes additional WAL-logging, also (there was previously a single WAL record for a locked tuple; now there are as many as updated copies of the tuple there exist.) With all this in place, contention related to tuples being checked by foreign key rules should be much reduced. As a bonus, the old behavior that a subtransaction grabbing a stronger tuple lock than the parent (sub)transaction held on a given tuple and later aborting caused the weaker lock to be lost, has been fixed. Many new spec files were added for isolation tester framework, to ensure overall behavior is sane. There's probably room for several more tests. There were several reviewers of this patch; in particular, Noah Misch and Andres Freund spent considerable time in it. Original idea for the patch came from Simon Riggs, after a problem report by Joel Jacobson. Most code is from me, with contributions from Marti Raudsepp, Alexander Shulgin, Noah Misch and Andres Freund. This patch was discussed in several pgsql-hackers threads; the most important start at the following message-ids: AANLkTimo9XVcEzfiBR-ut3KVNDkjm2Vxh+t8kAmWjPuv@mail.gmail.com 1290721684-sup-3951@alvh.no-ip.org 1294953201-sup-2099@alvh.no-ip.org 1320343602-sup-2290@alvh.no-ip.org 1339690386-sup-8927@alvh.no-ip.org 4FE5FF020200002500048A3D@gw.wicourts.gov 4FEAB90A0200002500048B7D@gw.wicourts.gov
2013-01-23 16:04:59 +01:00
Bitmapset *rd_keyattr; /* cols that can be ref'd by foreign keys */
Oid rd_oidindex; /* OID of unique index on OID, if any */
LockInfoData rd_lockInfo; /* lock mgr's info for locking relation */
RuleLock *rd_rules; /* rewrite rules */
MemoryContext rd_rulescxt; /* private memory cxt for rd_rules, if any */
TriggerDesc *trigdesc; /* Trigger info, or NULL if rel has none */
/*
* The index chosen as the relation's replication identity or
* InvalidOid. Only set correctly if RelationGetIndexList has been
* called/rd_indexvalid > 0.
*/
Oid rd_replidindex;
/*
* rd_options is set whenever rd_rel is loaded into the relcache entry.
2006-10-04 02:30:14 +02:00
* Note that you can NOT look into rd_rel for this data. NULL means "use
* defaults".
*/
bytea *rd_options; /* parsed pg_class.reloptions */
/* These are non-NULL only for an index relation: */
Form_pg_index rd_index; /* pg_index tuple describing this index */
/* use "struct" here to avoid needing to include htup.h: */
2003-08-04 02:43:34 +02:00
struct HeapTupleData *rd_indextuple; /* all of pg_index tuple */
Form_pg_am rd_am; /* pg_am tuple for index's AM */
/*
* index access support info (used only for an index relation)
*
* Note: only default support procs for each opclass are cached, namely
2011-04-10 17:42:00 +02:00
* those with lefttype and righttype equal to the opclass's opcintype. The
* arrays are indexed by support function number, which is a sufficient
* identifier given that restriction.
*
* Note: rd_amcache is available for index AMs to cache private data about
* an index. This must be just a cache since it may get reset at any time
* (in particular, it will get reset by a relcache inval message for the
2006-10-04 02:30:14 +02:00
* index). If used, it must point to a single memory chunk palloc'd in
* rd_indexcxt. A relcache reset will include freeing that chunk and
* setting rd_amcache = NULL.
*/
MemoryContext rd_indexcxt; /* private memory cxt for this stuff */
RelationAmInfo *rd_aminfo; /* lookup info for funcs found in pg_am */
Oid *rd_opfamily; /* OIDs of op families for each index col */
Oid *rd_opcintype; /* OIDs of opclass declared input data types */
RegProcedure *rd_support; /* OIDs of support procedures */
2005-10-15 04:49:52 +02:00
FmgrInfo *rd_supportinfo; /* lookup info for support procedures */
int16 *rd_indoption; /* per-column AM-specific flags */
List *rd_indexprs; /* index expression trees, if any */
List *rd_indpred; /* index predicate tree, if any */
Oid *rd_exclops; /* OIDs of exclusion operators, if any */
Oid *rd_exclprocs; /* OIDs of exclusion ops' procs, if any */
uint16 *rd_exclstrats; /* exclusion ops' strategy numbers, if any */
void *rd_amcache; /* available for use by index AM */
2011-04-10 17:42:00 +02:00
Oid *rd_indcollation; /* OIDs of index collations */
/*
* foreign-table support
*
* rd_fdwroutine must point to a single memory chunk palloc'd in
* CacheMemoryContext. It will be freed and reset to NULL on a relcache
* reset.
*/
/* use "struct" here to avoid needing to include fdwapi.h: */
struct FdwRoutine *rd_fdwroutine; /* cached function pointers, or NULL */
/*
* Hack for CLUSTER, rewriting ALTER TABLE, etc: when writing a new
* version of a table, we need to make any toast pointers inserted into it
* have the existing toast table's OID, not the OID of the transient toast
* table. If rd_toastoid isn't InvalidOid, it is the OID to place in
* toast pointers inserted into this rel. (Note it's set on the new
Fix CLUSTER/VACUUM FULL for toast values owned by recently-updated rows. In commit 7b0d0e9356963d5c3e4d329a917f5fbb82a2ef05, I made CLUSTER and VACUUM FULL try to preserve toast value OIDs from the original toast table to the new one. However, if we have to copy both live and recently-dead versions of a row that has a toasted column, those versions may well reference the same toast value with the same OID. The patch then led to duplicate-key failures as we tried to insert the toast value twice with the same OID. (The previous behavior was not very desirable either, since it would have silently inserted the same value twice with different OIDs. That wastes space, but what's worse is that the toast values inserted for already-dead heap rows would not be reclaimed by subsequent ordinary VACUUMs, since they go into the new toast table marked live not deleted.) To fix, check if the copied OID already exists in the new toast table, and if so, assume that it stores the desired value. This is reasonably safe since the only case where we will copy an OID from a previous toast pointer is when toast_insert_or_update was given that toast pointer and so we just pulled the data from the old table; if we got two different values that way then we have big problems anyway. We do have to assume that no other backend is inserting items into the new toast table concurrently, but that's surely safe for CLUSTER and VACUUM FULL. Per bug #6393 from Maxim Boguk. Back-patch to 9.0, same as the previous patch.
2012-01-12 22:40:14 +01:00
* version of the main heap, not the toast table itself.) This also
* causes toast_save_datum() to try to preserve toast value OIDs.
*/
Oid rd_toastoid; /* Real TOAST table's OID, or InvalidOid */
/* use "struct" here to avoid needing to include pgstat.h: */
2007-11-15 22:14:46 +01:00
struct PgStat_TableStatus *pgstat_info; /* statistics collection area */
} RelationData;
/*
* StdRdOptions
* Standard contents of rd_options for heaps and generic indexes.
*
* RelationGetFillFactor() and RelationGetTargetPageFreeSpace() can only
* be applied to relations that use this format or a superset for
* private options data.
*/
/* autovacuum-related reloptions. */
typedef struct AutoVacOpts
{
bool enabled;
int vacuum_threshold;
int analyze_threshold;
int vacuum_cost_delay;
int vacuum_cost_limit;
int freeze_min_age;
int freeze_max_age;
int freeze_table_age;
float8 vacuum_scale_factor;
float8 analyze_scale_factor;
} AutoVacOpts;
typedef struct StdRdOptions
{
int32 vl_len_; /* varlena header (do not touch directly!) */
2006-10-04 02:30:14 +02:00
int fillfactor; /* page fill factor in percent (0..100) */
AutoVacOpts autovacuum; /* autovacuum-related options */
bool security_barrier; /* for views */
int check_option_offset; /* for views */
} StdRdOptions;
#define HEAP_MIN_FILLFACTOR 10
#define HEAP_DEFAULT_FILLFACTOR 100
/*
* RelationGetFillFactor
* Returns the relation's fillfactor. Note multiple eval of argument!
*/
#define RelationGetFillFactor(relation, defaultff) \
((relation)->rd_options ? \
((StdRdOptions *) (relation)->rd_options)->fillfactor : (defaultff))
/*
* RelationGetTargetPageUsage
* Returns the relation's desired space usage per page in bytes.
*/
#define RelationGetTargetPageUsage(relation, defaultff) \
(BLCKSZ * RelationGetFillFactor(relation, defaultff) / 100)
/*
* RelationGetTargetPageFreeSpace
* Returns the relation's desired freespace per page in bytes.
*/
#define RelationGetTargetPageFreeSpace(relation, defaultff) \
(BLCKSZ * (100 - RelationGetFillFactor(relation, defaultff)) / 100)
/*
* RelationIsSecurityView
* Returns whether the relation is security view, or not
*/
#define RelationIsSecurityView(relation) \
((relation)->rd_options ? \
((StdRdOptions *) (relation)->rd_options)->security_barrier : false)
/*
* RelationHasCheckOption
* Returns true if the relation is a view defined with either the local
* or the cascaded check option.
*/
#define RelationHasCheckOption(relation) \
((relation)->rd_options && \
((StdRdOptions *) (relation)->rd_options)->check_option_offset != 0)
/*
* RelationHasLocalCheckOption
* Returns true if the relation is a view defined with the local check
* option.
*/
#define RelationHasLocalCheckOption(relation) \
((relation)->rd_options && \
((StdRdOptions *) (relation)->rd_options)->check_option_offset != 0 ? \
strcmp((char *) (relation)->rd_options + \
((StdRdOptions *) (relation)->rd_options)->check_option_offset, \
"local") == 0 : false)
/*
* RelationHasCascadedCheckOption
* Returns true if the relation is a view defined with the cascaded check
* option.
*/
#define RelationHasCascadedCheckOption(relation) \
((relation)->rd_options && \
((StdRdOptions *) (relation)->rd_options)->check_option_offset != 0 ? \
strcmp((char *) (relation)->rd_options + \
((StdRdOptions *) (relation)->rd_options)->check_option_offset, \
"cascaded") == 0 : false)
/*
1999-05-25 18:15:34 +02:00
* RelationIsValid
* True iff relation descriptor is valid.
*/
#define RelationIsValid(relation) PointerIsValid(relation)
#define InvalidRelation ((Relation) NULL)
/*
1999-05-25 18:15:34 +02:00
* RelationHasReferenceCountZero
* True iff relation reference count is zero.
*
* Note:
* Assumes relation descriptor is valid.
*/
#define RelationHasReferenceCountZero(relation) \
((bool)((relation)->rd_refcnt == 0))
/*
1999-05-25 18:15:34 +02:00
* RelationGetForm
* Returns pg_class tuple for a relation.
*
* Note:
* Assumes relation descriptor is valid.
*/
1998-09-01 05:29:17 +02:00
#define RelationGetForm(relation) ((relation)->rd_rel)
/*
1999-05-25 18:15:34 +02:00
* RelationGetRelid
* Returns the OID of the relation
*/
#define RelationGetRelid(relation) ((relation)->rd_id)
/*
* RelationGetNumberOfAttributes
* Returns the number of attributes in a relation.
*/
#define RelationGetNumberOfAttributes(relation) ((relation)->rd_rel->relnatts)
/*
1999-05-25 18:15:34 +02:00
* RelationGetDescr
* Returns tuple descriptor for a relation.
*/
1998-09-01 05:29:17 +02:00
#define RelationGetDescr(relation) ((relation)->rd_att)
/*
* RelationGetRelationName
* Returns the rel's name.
*
* Note that the name is only unique within the containing namespace.
*/
#define RelationGetRelationName(relation) \
(NameStr((relation)->rd_rel->relname))
/*
* RelationGetNamespace
* Returns the rel's namespace OID.
*/
#define RelationGetNamespace(relation) \
((relation)->rd_rel->relnamespace)
/*
* RelationIsMapped
* True if the relation uses the relfilenode map.
*
* NB: this is only meaningful for relkinds that have storage, else it
* will misleadingly say "true".
*/
#define RelationIsMapped(relation) \
((relation)->rd_rel->relfilenode == InvalidOid)
/*
* RelationOpenSmgr
* Open the relation at the smgr level, if not already done.
*/
#define RelationOpenSmgr(relation) \
do { \
if ((relation)->rd_smgr == NULL) \
smgrsetowner(&((relation)->rd_smgr), smgropen((relation)->rd_node, (relation)->rd_backend)); \
} while (0)
/*
* RelationCloseSmgr
* Close the relation at the smgr level, if not already done.
*
* Note: smgrclose should unhook from owner pointer, hence the Assert.
*/
#define RelationCloseSmgr(relation) \
do { \
if ((relation)->rd_smgr != NULL) \
{ \
smgrclose((relation)->rd_smgr); \
Assert((relation)->rd_smgr == NULL); \
} \
} while (0)
/*
* RelationGetTargetBlock
* Fetch relation's current insertion target block.
*
2010-02-26 03:01:40 +01:00
* Returns InvalidBlockNumber if there is no current target block. Note
* that the target block status is discarded on any smgr-level invalidation.
*/
#define RelationGetTargetBlock(relation) \
( (relation)->rd_smgr != NULL ? (relation)->rd_smgr->smgr_targblock : InvalidBlockNumber )
/*
* RelationSetTargetBlock
* Set relation's current insertion target block.
*/
#define RelationSetTargetBlock(relation, targblock) \
do { \
RelationOpenSmgr(relation); \
(relation)->rd_smgr->smgr_targblock = (targblock); \
} while (0)
/*
* RelationNeedsWAL
* True if relation needs WAL.
*/
#define RelationNeedsWAL(relation) \
((relation)->rd_rel->relpersistence == RELPERSISTENCE_PERMANENT)
/*
* RelationUsesLocalBuffers
* True if relation's pages are stored in local buffers.
*/
#define RelationUsesLocalBuffers(relation) \
((relation)->rd_rel->relpersistence == RELPERSISTENCE_TEMP)
/*
* RELATION_IS_LOCAL
* If a rel is either temp or newly created in the current transaction,
* it can be assumed to be accessible only to the current backend.
* This is typically used to decide that we can skip acquiring locks.
*
* Beware of multiple eval of argument
*/
#define RELATION_IS_LOCAL(relation) \
((relation)->rd_islocaltemp || \
(relation)->rd_createSubid != InvalidSubTransactionId)
/*
* RELATION_IS_OTHER_TEMP
* Test for a temporary relation that belongs to some other session.
*
* Beware of multiple eval of argument
*/
#define RELATION_IS_OTHER_TEMP(relation) \
((relation)->rd_rel->relpersistence == RELPERSISTENCE_TEMP && \
!(relation)->rd_islocaltemp)
/*
* RelationIsScannable
* Currently can only be false for a materialized view which has not been
* populated by its query. This is likely to get more complicated later,
* so use a macro which looks like a function.
*/
#define RelationIsScannable(relation) ((relation)->rd_rel->relispopulated)
/*
* RelationIsPopulated
* Currently, we don't physically distinguish the "populated" and
* "scannable" properties of matviews, but that may change later.
* Hence, use the appropriate one of these macros in code tests.
*/
#define RelationIsPopulated(relation) ((relation)->rd_rel->relispopulated)
/* routines in utils/cache/relcache.c */
extern void RelationIncrementReferenceCount(Relation rel);
extern void RelationDecrementReferenceCount(Relation rel);
#endif /* REL_H */