postgresql/src/include/access/transam.h

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

381 lines
12 KiB
C
Raw Normal View History

/*-------------------------------------------------------------------------
*
* transam.h
* postgres transaction access method support code
*
*
* Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
2010-09-20 22:08:53 +02:00
* src/include/access/transam.h
*
*-------------------------------------------------------------------------
*/
#ifndef TRANSAM_H
#define TRANSAM_H
#include "access/xlogdefs.h"
/* ----------------
* Special transaction ID values
*
* BootstrapTransactionId is the XID for "bootstrap" operations, and
* FrozenTransactionId is used for very old tuples. Both should
* always be considered valid.
*
* FirstNormalTransactionId is the first "normal" transaction id.
* Note: if you need to change it, you must change pg_class.h as well.
* ----------------
*/
#define InvalidTransactionId ((TransactionId) 0)
#define BootstrapTransactionId ((TransactionId) 1)
#define FrozenTransactionId ((TransactionId) 2)
#define FirstNormalTransactionId ((TransactionId) 3)
#define MaxTransactionId ((TransactionId) 0xFFFFFFFF)
/* ----------------
* transaction ID manipulation macros
* ----------------
*/
#define TransactionIdIsValid(xid) ((xid) != InvalidTransactionId)
#define TransactionIdIsNormal(xid) ((xid) >= FirstNormalTransactionId)
#define TransactionIdEquals(id1, id2) ((id1) == (id2))
#define TransactionIdStore(xid, dest) (*(dest) = (xid))
#define StoreInvalidTransactionId(dest) (*(dest) = InvalidTransactionId)
#define EpochFromFullTransactionId(x) ((uint32) ((x).value >> 32))
#define XidFromFullTransactionId(x) ((uint32) (x).value)
#define U64FromFullTransactionId(x) ((x).value)
#define FullTransactionIdEquals(a, b) ((a).value == (b).value)
#define FullTransactionIdPrecedes(a, b) ((a).value < (b).value)
#define FullTransactionIdPrecedesOrEquals(a, b) ((a).value <= (b).value)
#define FullTransactionIdFollows(a, b) ((a).value > (b).value)
#define FullTransactionIdFollowsOrEquals(a, b) ((a).value >= (b).value)
#define FullTransactionIdIsValid(x) TransactionIdIsValid(XidFromFullTransactionId(x))
#define InvalidFullTransactionId FullTransactionIdFromEpochAndXid(0, InvalidTransactionId)
#define FirstNormalFullTransactionId FullTransactionIdFromEpochAndXid(0, FirstNormalTransactionId)
#define FullTransactionIdIsNormal(x) FullTransactionIdFollowsOrEquals(x, FirstNormalFullTransactionId)
/*
* A 64 bit value that contains an epoch and a TransactionId. This is
* wrapped in a struct to prevent implicit conversion to/from TransactionId.
* Not all values represent valid normal XIDs.
*/
typedef struct FullTransactionId
{
uint64 value;
} FullTransactionId;
static inline FullTransactionId
FullTransactionIdFromEpochAndXid(uint32 epoch, TransactionId xid)
{
FullTransactionId result;
result.value = ((uint64) epoch) << 32 | xid;
return result;
}
static inline FullTransactionId
FullTransactionIdFromU64(uint64 value)
{
FullTransactionId result;
result.value = value;
return result;
}
/* advance a transaction ID variable, handling wraparound correctly */
#define TransactionIdAdvance(dest) \
do { \
(dest)++; \
if ((dest) < FirstNormalTransactionId) \
(dest) = FirstNormalTransactionId; \
} while(0)
/*
* Retreat a FullTransactionId variable, stepping over xids that would appear
* to be special only when viewed as 32bit XIDs.
*/
static inline void
FullTransactionIdRetreat(FullTransactionId *dest)
{
dest->value--;
/*
* In contrast to 32bit XIDs don't step over the "actual" special xids.
* For 64bit xids these can't be reached as part of a wraparound as they
* can in the 32bit case.
*/
if (FullTransactionIdPrecedes(*dest, FirstNormalFullTransactionId))
return;
/*
* But we do need to step over XIDs that'd appear special only for 32bit
* XIDs.
*/
while (XidFromFullTransactionId(*dest) < FirstNormalTransactionId)
dest->value--;
}
snapshot scalability: Don't compute global horizons while building snapshots. To make GetSnapshotData() more scalable, it cannot not look at at each proc's xmin: While snapshot contents do not need to change whenever a read-only transaction commits or a snapshot is released, a proc's xmin is modified in those cases. The frequency of xmin modifications leads to, particularly on higher core count systems, many cache misses inside GetSnapshotData(), despite the data underlying a snapshot not changing. That is the most significant source of GetSnapshotData() scaling poorly on larger systems. Without accessing xmins, GetSnapshotData() cannot calculate accurate horizons / thresholds as it has so far. But we don't really have to: The horizons don't actually change that much between GetSnapshotData() calls. Nor are the horizons actually used every time a snapshot is built. The trick this commit introduces is to delay computation of accurate horizons until there use and using horizon boundaries to determine whether accurate horizons need to be computed. The use of RecentGlobal[Data]Xmin to decide whether a row version could be removed has been replaces with new GlobalVisTest* functions. These use two thresholds to determine whether a row can be pruned: 1) definitely_needed, indicating that rows deleted by XIDs >= definitely_needed are definitely still visible. 2) maybe_needed, indicating that rows deleted by XIDs < maybe_needed can definitely be removed GetSnapshotData() updates definitely_needed to be the xmin of the computed snapshot. When testing whether a row can be removed (with GlobalVisTestIsRemovableXid()) and the tested XID falls in between the two (i.e. XID >= maybe_needed && XID < definitely_needed) the boundaries can be recomputed to be more accurate. As it is not cheap to compute accurate boundaries, we limit the number of times that happens in short succession. As the boundaries used by GlobalVisTestIsRemovableXid() are never reset (with maybe_needed updated by GetSnapshotData()), it is likely that further test can benefit from an earlier computation of accurate horizons. To avoid regressing performance when old_snapshot_threshold is set (as that requires an accurate horizon to be computed), heap_page_prune_opt() doesn't unconditionally call TransactionIdLimitedForOldSnapshots() anymore. Both the computation of the limited horizon, and the triggering of errors (with SetOldSnapshotThresholdTimestamp()) is now only done when necessary to remove tuples. This commit just removes the accesses to PGXACT->xmin from GetSnapshotData(), but other members of PGXACT residing in the same cache line are accessed. Therefore this in itself does not result in a significant improvement. Subsequent commits will take advantage of the fact that GetSnapshotData() now does not need to access xmins anymore. Note: This contains a workaround in heap_page_prune_opt() to keep the snapshot_too_old tests working. While that workaround is ugly, the tests currently are not meaningful, and it seems best to address them separately. Author: Andres Freund <andres@anarazel.de> Reviewed-By: Robert Haas <robertmhaas@gmail.com> Reviewed-By: Thomas Munro <thomas.munro@gmail.com> Reviewed-By: David Rowley <dgrowleyml@gmail.com> Discussion: https://postgr.es/m/20200301083601.ews6hz5dduc3w2se@alap3.anarazel.de
2020-08-13 01:03:49 +02:00
/*
* Advance a FullTransactionId variable, stepping over xids that would appear
* to be special only when viewed as 32bit XIDs.
*/
static inline void
FullTransactionIdAdvance(FullTransactionId *dest)
{
dest->value++;
/* see FullTransactionIdAdvance() */
if (FullTransactionIdPrecedes(*dest, FirstNormalFullTransactionId))
return;
while (XidFromFullTransactionId(*dest) < FirstNormalTransactionId)
dest->value++;
}
/* back up a transaction ID variable, handling wraparound correctly */
#define TransactionIdRetreat(dest) \
do { \
(dest)--; \
} while ((dest) < FirstNormalTransactionId)
/* compare two XIDs already known to be normal; this is a macro for speed */
#define NormalTransactionIdPrecedes(id1, id2) \
(AssertMacro(TransactionIdIsNormal(id1) && TransactionIdIsNormal(id2)), \
(int32) ((id1) - (id2)) < 0)
/* compare two XIDs already known to be normal; this is a macro for speed */
#define NormalTransactionIdFollows(id1, id2) \
(AssertMacro(TransactionIdIsNormal(id1) && TransactionIdIsNormal(id2)), \
(int32) ((id1) - (id2)) > 0)
/* ----------
* Object ID (OID) zero is InvalidOid.
*
* OIDs 1-9999 are reserved for manual assignment (see .dat files in
* src/include/catalog/). Of these, 8000-9999 are reserved for
* development purposes (such as in-progress patches and forks);
* they should not appear in released versions.
*
* OIDs 10000-11999 are reserved for assignment by genbki.pl, for use
* when the .dat files in src/include/catalog/ do not specify an OID
* for a catalog entry that requires one. Note that genbki.pl assigns
* these OIDs independently in each catalog, so they're not guaranteed
* to be globally unique. Furthermore, the bootstrap backend and
* initdb's post-bootstrap processing can also assign OIDs in this range.
* The normal OID-generation logic takes care of any OID conflicts that
* might arise from that.
*
* OIDs 12000-16383 are reserved for unpinned objects created by initdb's
* post-bootstrap processing. initdb forces the OID generator up to
* 12000 as soon as it's made the pinned objects it's responsible for.
*
* OIDs beginning at 16384 are assigned from the OID generator
* during normal multiuser operation. (We force the generator up to
* 16384 as soon as we are in normal operation.)
*
* The choices of 8000, 10000 and 12000 are completely arbitrary, and can be
* moved if we run low on OIDs in any category. Changing the macros below,
* and updating relevant documentation (see bki.sgml and RELEASE_CHANGES),
* should be sufficient to do this. Moving the 16384 boundary between
* initdb-assigned OIDs and user-defined objects would be substantially
* more painful, however, since some user-defined OIDs will appear in
* on-disk data; such a change would probably break pg_upgrade.
*
* NOTE: if the OID generator wraps around, we skip over OIDs 0-16383
* and resume with 16384. This minimizes the odds of OID conflict, by not
* reassigning OIDs that might have been assigned during initdb. Critically,
* it also ensures that no user-created object will be considered pinned.
* ----------
*/
#define FirstGenbkiObjectId 10000
#define FirstUnpinnedObjectId 12000
#define FirstNormalObjectId 16384
pg_upgrade: Preserve database OIDs. Commit 9a974cbcba005256a19991203583a94b4f9a21a9 arranged to preserve relfilenodes and tablespace OIDs. For similar reasons, also arrange to preserve database OIDs. One problem is that, up until now, the OIDs assigned to the template0 and postgres databases have not been fixed. This could be a problem when upgrading, because pg_upgrade might try to migrate a database from the old cluster to the new cluster while keeping the OID and find a different database with that OID, resulting in a failure. If it finds a database with the same name and the same OID that's OK: it will be dropped and recreated. But the same OID and a different name is a problem. To prevent that, fix the OIDs for postgres and template0 to specific values less than 16384. To avoid running afoul of this rule, these values should not be changed in future releases. It's not a problem that these OIDs aren't fixed in existing releases, because the OIDs that we're assigning here weren't used for either of these databases in any previous release. Thus, there's no chance that an upgrade of a cluster from any previous release will collide with the OIDs we're assigning here. And going forward, the OIDs will always be fixed, so the only potential collision is with a system database having the same name and the same OID, which is OK. This patch lets users assign a specific OID to a database as well, provided however that it can't be less than 16384. I (rhaas) thought it might be better not to expose this capability to users, but the consensus was otherwise, so the syntax is documented. Letting users assign OIDs below 16384 would not be OK, though, because a user-created database with a low-numbered OID might collide with a system-created database in a future release. We therefore prohibit that. Shruthi KC, based on an earlier patch from Antonin Houska, reviewed and with some adjustments by me. Discussion: http://postgr.es/m/CA+TgmoYgTwYcUmB=e8+hRHOFA0kkS6Kde85+UNdon6q7bt1niQ@mail.gmail.com Discussion: http://postgr.es/m/CAASxf_Mnwm1Dh2vd5FAhVX6S1nwNSZUB1z12VddYtM++H2+p7w@mail.gmail.com
2022-01-24 20:23:15 +01:00
/* OIDs of Template0 and Postgres database are fixed */
#define Template0ObjectId 4
#define PostgresObjectId 5
/*
* VariableCache is a data structure in shared memory that is used to track
* OID and XID assignment state. For largely historical reasons, there is
* just one struct with different fields that are protected by different
* LWLocks.
*
* Note: xidWrapLimit and oldestXidDB are not "active" values, but are
* used just to generate useful messages when xidWarnLimit or xidStopLimit
* are exceeded.
*/
typedef struct VariableCacheData
{
/*
* These fields are protected by OidGenLock.
*/
Oid nextOid; /* next OID to assign */
uint32 oidCount; /* OIDs available before must do XLOG work */
/*
* These fields are protected by XidGenLock.
*/
FullTransactionId nextXid; /* next XID to assign */
TransactionId oldestXid; /* cluster-wide minimum datfrozenxid */
TransactionId xidVacLimit; /* start forcing autovacuums here */
TransactionId xidWarnLimit; /* start complaining here */
TransactionId xidStopLimit; /* refuse to advance nextXid beyond here */
TransactionId xidWrapLimit; /* where the world ends */
Oid oldestXidDB; /* database with minimum datfrozenxid */
/*
* These fields are protected by CommitTsLock
*/
TransactionId oldestCommitTsXid;
TransactionId newestCommitTsXid;
/*
* These fields are protected by ProcArrayLock.
*/
FullTransactionId latestCompletedXid; /* newest full XID that has
* committed or aborted */
/*
* Number of top-level transactions with xids (i.e. which may have
* modified the database) that completed in some form since the start of
* the server. This currently is solely used to check whether
* GetSnapshotData() needs to recompute the contents of the snapshot, or
* not. There are likely other users of this. Always above 1.
*/
uint64 xactCompletionCount;
/*
* These fields are protected by XactTruncationLock
*/
TransactionId oldestClogXid; /* oldest it's safe to look up in clog */
} VariableCacheData;
typedef VariableCacheData *VariableCache;
/* ----------------
* extern declarations
* ----------------
*/
Allow read only connections during recovery, known as Hot Standby. Enabled by recovery_connections = on (default) and forcing archive recovery using a recovery.conf. Recovery processing now emulates the original transactions as they are replayed, providing full locking and MVCC behaviour for read only queries. Recovery must enter consistent state before connections are allowed, so there is a delay, typically short, before connections succeed. Replay of recovering transactions can conflict and in some cases deadlock with queries during recovery; these result in query cancellation after max_standby_delay seconds have expired. Infrastructure changes have minor effects on normal running, though introduce four new types of WAL record. New test mode "make standbycheck" allows regression tests of static command behaviour on a standby server while in recovery. Typical and extreme dynamic behaviours have been checked via code inspection and manual testing. Few port specific behaviours have been utilised, though primary testing has been on Linux only so far. This commit is the basic patch. Additional changes will follow in this release to enhance some aspects of behaviour, notably improved handling of conflicts, deadlock detection and query cancellation. Changes to VACUUM FULL are also required. Simon Riggs, with significant and lengthy review by Heikki Linnakangas, including streamlined redesign of snapshot creation and two-phase commit. Important contributions from Florian Pflug, Mark Kirkwood, Merlin Moncure, Greg Stark, Gianni Ciolli, Gabriele Bartolini, Hannu Krosing, Robert Haas, Tatsuo Ishii, Hiroyuki Yamada plus support and feedback from many other community members.
2009-12-19 02:32:45 +01:00
/* in transam/xact.c */
extern bool TransactionStartedDuringRecovery(void);
/* in transam/varsup.c */
extern PGDLLIMPORT VariableCache ShmemVariableCache;
/*
* prototypes for functions in transam/transam.c
*/
extern bool TransactionIdDidCommit(TransactionId transactionId);
extern bool TransactionIdDidAbort(TransactionId transactionId);
extern bool TransactionIdIsKnownCompleted(TransactionId transactionId);
extern void TransactionIdCommitTree(TransactionId xid, int nxids, TransactionId *xids);
extern void TransactionIdAsyncCommitTree(TransactionId xid, int nxids, TransactionId *xids, XLogRecPtr lsn);
extern void TransactionIdAbortTree(TransactionId xid, int nxids, TransactionId *xids);
extern bool TransactionIdPrecedes(TransactionId id1, TransactionId id2);
extern bool TransactionIdPrecedesOrEquals(TransactionId id1, TransactionId id2);
extern bool TransactionIdFollows(TransactionId id1, TransactionId id2);
extern bool TransactionIdFollowsOrEquals(TransactionId id1, TransactionId id2);
extern TransactionId TransactionIdLatest(TransactionId mainxid,
int nxids, const TransactionId *xids);
extern XLogRecPtr TransactionIdGetCommitLSN(TransactionId xid);
/* in transam/varsup.c */
extern FullTransactionId GetNewTransactionId(bool isSubXact);
extern void AdvanceNextFullTransactionIdPastXid(TransactionId xid);
extern FullTransactionId ReadNextFullTransactionId(void);
extern void SetTransactionIdLimit(TransactionId oldest_datfrozenxid,
Oid oldest_datoid);
extern void AdvanceOldestClogXid(TransactionId oldest_datfrozenxid);
extern bool ForceTransactionIdLimitUpdate(void);
extern Oid GetNewObjectId(void);
extern void StopGeneratingPinnedObjectIds(void);
#ifdef USE_ASSERT_CHECKING
extern void AssertTransactionIdInAllowableRange(TransactionId xid);
#else
#define AssertTransactionIdInAllowableRange(xid) ((void)true)
#endif
/*
* Some frontend programs include this header. For compilers that emit static
* inline functions even when they're unused, that leads to unsatisfied
* external references; hence hide them with #ifndef FRONTEND.
*/
#ifndef FRONTEND
/*
* For callers that just need the XID part of the next transaction ID.
*/
static inline TransactionId
ReadNextTransactionId(void)
{
return XidFromFullTransactionId(ReadNextFullTransactionId());
}
snapshot scalability: Don't compute global horizons while building snapshots. To make GetSnapshotData() more scalable, it cannot not look at at each proc's xmin: While snapshot contents do not need to change whenever a read-only transaction commits or a snapshot is released, a proc's xmin is modified in those cases. The frequency of xmin modifications leads to, particularly on higher core count systems, many cache misses inside GetSnapshotData(), despite the data underlying a snapshot not changing. That is the most significant source of GetSnapshotData() scaling poorly on larger systems. Without accessing xmins, GetSnapshotData() cannot calculate accurate horizons / thresholds as it has so far. But we don't really have to: The horizons don't actually change that much between GetSnapshotData() calls. Nor are the horizons actually used every time a snapshot is built. The trick this commit introduces is to delay computation of accurate horizons until there use and using horizon boundaries to determine whether accurate horizons need to be computed. The use of RecentGlobal[Data]Xmin to decide whether a row version could be removed has been replaces with new GlobalVisTest* functions. These use two thresholds to determine whether a row can be pruned: 1) definitely_needed, indicating that rows deleted by XIDs >= definitely_needed are definitely still visible. 2) maybe_needed, indicating that rows deleted by XIDs < maybe_needed can definitely be removed GetSnapshotData() updates definitely_needed to be the xmin of the computed snapshot. When testing whether a row can be removed (with GlobalVisTestIsRemovableXid()) and the tested XID falls in between the two (i.e. XID >= maybe_needed && XID < definitely_needed) the boundaries can be recomputed to be more accurate. As it is not cheap to compute accurate boundaries, we limit the number of times that happens in short succession. As the boundaries used by GlobalVisTestIsRemovableXid() are never reset (with maybe_needed updated by GetSnapshotData()), it is likely that further test can benefit from an earlier computation of accurate horizons. To avoid regressing performance when old_snapshot_threshold is set (as that requires an accurate horizon to be computed), heap_page_prune_opt() doesn't unconditionally call TransactionIdLimitedForOldSnapshots() anymore. Both the computation of the limited horizon, and the triggering of errors (with SetOldSnapshotThresholdTimestamp()) is now only done when necessary to remove tuples. This commit just removes the accesses to PGXACT->xmin from GetSnapshotData(), but other members of PGXACT residing in the same cache line are accessed. Therefore this in itself does not result in a significant improvement. Subsequent commits will take advantage of the fact that GetSnapshotData() now does not need to access xmins anymore. Note: This contains a workaround in heap_page_prune_opt() to keep the snapshot_too_old tests working. While that workaround is ugly, the tests currently are not meaningful, and it seems best to address them separately. Author: Andres Freund <andres@anarazel.de> Reviewed-By: Robert Haas <robertmhaas@gmail.com> Reviewed-By: Thomas Munro <thomas.munro@gmail.com> Reviewed-By: David Rowley <dgrowleyml@gmail.com> Discussion: https://postgr.es/m/20200301083601.ews6hz5dduc3w2se@alap3.anarazel.de
2020-08-13 01:03:49 +02:00
/* return transaction ID backed up by amount, handling wraparound correctly */
static inline TransactionId
TransactionIdRetreatedBy(TransactionId xid, uint32 amount)
{
xid -= amount;
while (xid < FirstNormalTransactionId)
xid--;
return xid;
}
/* return the older of the two IDs */
static inline TransactionId
TransactionIdOlder(TransactionId a, TransactionId b)
{
if (!TransactionIdIsValid(a))
return b;
if (!TransactionIdIsValid(b))
return a;
if (TransactionIdPrecedes(a, b))
return a;
return b;
}
/* return the older of the two IDs, assuming they're both normal */
static inline TransactionId
NormalTransactionIdOlder(TransactionId a, TransactionId b)
{
Assert(TransactionIdIsNormal(a));
Assert(TransactionIdIsNormal(b));
if (NormalTransactionIdPrecedes(a, b))
return a;
return b;
}
/* return the newer of the two IDs */
static inline FullTransactionId
FullTransactionIdNewer(FullTransactionId a, FullTransactionId b)
{
if (!FullTransactionIdIsValid(a))
return b;
if (!FullTransactionIdIsValid(b))
return a;
if (FullTransactionIdFollows(a, b))
return a;
return b;
}
#endif /* FRONTEND */
#endif /* TRANSAM_H */