1998-01-25 06:04:21 +01:00
|
|
|
/*-------------------------------------------------------------------------
|
1996-08-27 23:50:29 +02:00
|
|
|
*
|
1999-02-14 00:22:53 +01:00
|
|
|
* heapam.h
|
1997-09-07 07:04:48 +02:00
|
|
|
* POSTGRES heap access method definitions.
|
1996-08-27 23:50:29 +02:00
|
|
|
*
|
|
|
|
*
|
2014-01-07 22:05:30 +01:00
|
|
|
* Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
|
2000-01-26 06:58:53 +01:00
|
|
|
* Portions Copyright (c) 1994, Regents of the University of California
|
1996-08-27 23:50:29 +02:00
|
|
|
*
|
2010-09-20 22:08:53 +02:00
|
|
|
* src/include/access/heapam.h
|
1996-08-27 23:50:29 +02:00
|
|
|
*
|
|
|
|
*-------------------------------------------------------------------------
|
|
|
|
*/
|
1997-09-07 07:04:48 +02:00
|
|
|
#ifndef HEAPAM_H
|
1996-08-27 23:50:29 +02:00
|
|
|
#define HEAPAM_H
|
|
|
|
|
2002-05-21 01:51:44 +02:00
|
|
|
#include "access/sdir.h"
|
2008-06-19 02:46:06 +02:00
|
|
|
#include "access/skey.h"
|
2011-09-04 07:13:16 +02:00
|
|
|
#include "nodes/primnodes.h"
|
2012-08-30 22:15:44 +02:00
|
|
|
#include "storage/bufpage.h"
|
2011-09-04 07:13:16 +02:00
|
|
|
#include "storage/lock.h"
|
2008-06-19 02:46:06 +02:00
|
|
|
#include "utils/relcache.h"
|
2008-03-26 22:10:39 +01:00
|
|
|
#include "utils/snapshot.h"
|
1996-08-27 23:50:29 +02:00
|
|
|
|
|
|
|
|
2008-11-06 21:51:15 +01:00
|
|
|
/* "options" flag bits for heap_insert */
|
|
|
|
#define HEAP_INSERT_SKIP_WAL 0x0001
|
|
|
|
#define HEAP_INSERT_SKIP_FSM 0x0002
|
2012-12-02 21:52:52 +01:00
|
|
|
#define HEAP_INSERT_FROZEN 0x0004
|
2008-11-06 21:51:15 +01:00
|
|
|
|
|
|
|
typedef struct BulkInsertStateData *BulkInsertState;
|
|
|
|
|
Improve concurrency of foreign key locking
This patch introduces two additional lock modes for tuples: "SELECT FOR
KEY SHARE" and "SELECT FOR NO KEY UPDATE". These don't block each
other, in contrast with already existing "SELECT FOR SHARE" and "SELECT
FOR UPDATE". UPDATE commands that do not modify the values stored in
the columns that are part of the key of the tuple now grab a SELECT FOR
NO KEY UPDATE lock on the tuple, allowing them to proceed concurrently
with tuple locks of the FOR KEY SHARE variety.
Foreign key triggers now use FOR KEY SHARE instead of FOR SHARE; this
means the concurrency improvement applies to them, which is the whole
point of this patch.
The added tuple lock semantics require some rejiggering of the multixact
module, so that the locking level that each transaction is holding can
be stored alongside its Xid. Also, multixacts now need to persist
across server restarts and crashes, because they can now represent not
only tuple locks, but also tuple updates. This means we need more
careful tracking of lifetime of pg_multixact SLRU files; since they now
persist longer, we require more infrastructure to figure out when they
can be removed. pg_upgrade also needs to be careful to copy
pg_multixact files over from the old server to the new, or at least part
of multixact.c state, depending on the versions of the old and new
servers.
Tuple time qualification rules (HeapTupleSatisfies routines) need to be
careful not to consider tuples with the "is multi" infomask bit set as
being only locked; they might need to look up MultiXact values (i.e.
possibly do pg_multixact I/O) to find out the Xid that updated a tuple,
whereas they previously were assured to only use information readily
available from the tuple header. This is considered acceptable, because
the extra I/O would involve cases that would previously cause some
commands to block waiting for concurrent transactions to finish.
Another important change is the fact that locking tuples that have
previously been updated causes the future versions to be marked as
locked, too; this is essential for correctness of foreign key checks.
This causes additional WAL-logging, also (there was previously a single
WAL record for a locked tuple; now there are as many as updated copies
of the tuple there exist.)
With all this in place, contention related to tuples being checked by
foreign key rules should be much reduced.
As a bonus, the old behavior that a subtransaction grabbing a stronger
tuple lock than the parent (sub)transaction held on a given tuple and
later aborting caused the weaker lock to be lost, has been fixed.
Many new spec files were added for isolation tester framework, to ensure
overall behavior is sane. There's probably room for several more tests.
There were several reviewers of this patch; in particular, Noah Misch
and Andres Freund spent considerable time in it. Original idea for the
patch came from Simon Riggs, after a problem report by Joel Jacobson.
Most code is from me, with contributions from Marti Raudsepp, Alexander
Shulgin, Noah Misch and Andres Freund.
This patch was discussed in several pgsql-hackers threads; the most
important start at the following message-ids:
AANLkTimo9XVcEzfiBR-ut3KVNDkjm2Vxh+t8kAmWjPuv@mail.gmail.com
1290721684-sup-3951@alvh.no-ip.org
1294953201-sup-2099@alvh.no-ip.org
1320343602-sup-2290@alvh.no-ip.org
1339690386-sup-8927@alvh.no-ip.org
4FE5FF020200002500048A3D@gw.wicourts.gov
4FEAB90A0200002500048B7D@gw.wicourts.gov
2013-01-23 16:04:59 +01:00
|
|
|
/*
|
|
|
|
* Possible lock modes for a tuple.
|
|
|
|
*/
|
|
|
|
typedef enum LockTupleMode
|
2007-06-08 20:23:53 +02:00
|
|
|
{
|
Improve concurrency of foreign key locking
This patch introduces two additional lock modes for tuples: "SELECT FOR
KEY SHARE" and "SELECT FOR NO KEY UPDATE". These don't block each
other, in contrast with already existing "SELECT FOR SHARE" and "SELECT
FOR UPDATE". UPDATE commands that do not modify the values stored in
the columns that are part of the key of the tuple now grab a SELECT FOR
NO KEY UPDATE lock on the tuple, allowing them to proceed concurrently
with tuple locks of the FOR KEY SHARE variety.
Foreign key triggers now use FOR KEY SHARE instead of FOR SHARE; this
means the concurrency improvement applies to them, which is the whole
point of this patch.
The added tuple lock semantics require some rejiggering of the multixact
module, so that the locking level that each transaction is holding can
be stored alongside its Xid. Also, multixacts now need to persist
across server restarts and crashes, because they can now represent not
only tuple locks, but also tuple updates. This means we need more
careful tracking of lifetime of pg_multixact SLRU files; since they now
persist longer, we require more infrastructure to figure out when they
can be removed. pg_upgrade also needs to be careful to copy
pg_multixact files over from the old server to the new, or at least part
of multixact.c state, depending on the versions of the old and new
servers.
Tuple time qualification rules (HeapTupleSatisfies routines) need to be
careful not to consider tuples with the "is multi" infomask bit set as
being only locked; they might need to look up MultiXact values (i.e.
possibly do pg_multixact I/O) to find out the Xid that updated a tuple,
whereas they previously were assured to only use information readily
available from the tuple header. This is considered acceptable, because
the extra I/O would involve cases that would previously cause some
commands to block waiting for concurrent transactions to finish.
Another important change is the fact that locking tuples that have
previously been updated causes the future versions to be marked as
locked, too; this is essential for correctness of foreign key checks.
This causes additional WAL-logging, also (there was previously a single
WAL record for a locked tuple; now there are as many as updated copies
of the tuple there exist.)
With all this in place, contention related to tuples being checked by
foreign key rules should be much reduced.
As a bonus, the old behavior that a subtransaction grabbing a stronger
tuple lock than the parent (sub)transaction held on a given tuple and
later aborting caused the weaker lock to be lost, has been fixed.
Many new spec files were added for isolation tester framework, to ensure
overall behavior is sane. There's probably room for several more tests.
There were several reviewers of this patch; in particular, Noah Misch
and Andres Freund spent considerable time in it. Original idea for the
patch came from Simon Riggs, after a problem report by Joel Jacobson.
Most code is from me, with contributions from Marti Raudsepp, Alexander
Shulgin, Noah Misch and Andres Freund.
This patch was discussed in several pgsql-hackers threads; the most
important start at the following message-ids:
AANLkTimo9XVcEzfiBR-ut3KVNDkjm2Vxh+t8kAmWjPuv@mail.gmail.com
1290721684-sup-3951@alvh.no-ip.org
1294953201-sup-2099@alvh.no-ip.org
1320343602-sup-2290@alvh.no-ip.org
1339690386-sup-8927@alvh.no-ip.org
4FE5FF020200002500048A3D@gw.wicourts.gov
4FEAB90A0200002500048B7D@gw.wicourts.gov
2013-01-23 16:04:59 +01:00
|
|
|
/* SELECT FOR KEY SHARE */
|
|
|
|
LockTupleKeyShare,
|
|
|
|
/* SELECT FOR SHARE */
|
|
|
|
LockTupleShare,
|
|
|
|
/* SELECT FOR NO KEY UPDATE, and UPDATEs that don't modify key columns */
|
|
|
|
LockTupleNoKeyExclusive,
|
|
|
|
/* SELECT FOR UPDATE, UPDATEs that modify key columns, and DELETE */
|
2007-06-08 20:23:53 +02:00
|
|
|
LockTupleExclusive
|
|
|
|
} LockTupleMode;
|
|
|
|
|
Improve concurrency of foreign key locking
This patch introduces two additional lock modes for tuples: "SELECT FOR
KEY SHARE" and "SELECT FOR NO KEY UPDATE". These don't block each
other, in contrast with already existing "SELECT FOR SHARE" and "SELECT
FOR UPDATE". UPDATE commands that do not modify the values stored in
the columns that are part of the key of the tuple now grab a SELECT FOR
NO KEY UPDATE lock on the tuple, allowing them to proceed concurrently
with tuple locks of the FOR KEY SHARE variety.
Foreign key triggers now use FOR KEY SHARE instead of FOR SHARE; this
means the concurrency improvement applies to them, which is the whole
point of this patch.
The added tuple lock semantics require some rejiggering of the multixact
module, so that the locking level that each transaction is holding can
be stored alongside its Xid. Also, multixacts now need to persist
across server restarts and crashes, because they can now represent not
only tuple locks, but also tuple updates. This means we need more
careful tracking of lifetime of pg_multixact SLRU files; since they now
persist longer, we require more infrastructure to figure out when they
can be removed. pg_upgrade also needs to be careful to copy
pg_multixact files over from the old server to the new, or at least part
of multixact.c state, depending on the versions of the old and new
servers.
Tuple time qualification rules (HeapTupleSatisfies routines) need to be
careful not to consider tuples with the "is multi" infomask bit set as
being only locked; they might need to look up MultiXact values (i.e.
possibly do pg_multixact I/O) to find out the Xid that updated a tuple,
whereas they previously were assured to only use information readily
available from the tuple header. This is considered acceptable, because
the extra I/O would involve cases that would previously cause some
commands to block waiting for concurrent transactions to finish.
Another important change is the fact that locking tuples that have
previously been updated causes the future versions to be marked as
locked, too; this is essential for correctness of foreign key checks.
This causes additional WAL-logging, also (there was previously a single
WAL record for a locked tuple; now there are as many as updated copies
of the tuple there exist.)
With all this in place, contention related to tuples being checked by
foreign key rules should be much reduced.
As a bonus, the old behavior that a subtransaction grabbing a stronger
tuple lock than the parent (sub)transaction held on a given tuple and
later aborting caused the weaker lock to be lost, has been fixed.
Many new spec files were added for isolation tester framework, to ensure
overall behavior is sane. There's probably room for several more tests.
There were several reviewers of this patch; in particular, Noah Misch
and Andres Freund spent considerable time in it. Original idea for the
patch came from Simon Riggs, after a problem report by Joel Jacobson.
Most code is from me, with contributions from Marti Raudsepp, Alexander
Shulgin, Noah Misch and Andres Freund.
This patch was discussed in several pgsql-hackers threads; the most
important start at the following message-ids:
AANLkTimo9XVcEzfiBR-ut3KVNDkjm2Vxh+t8kAmWjPuv@mail.gmail.com
1290721684-sup-3951@alvh.no-ip.org
1294953201-sup-2099@alvh.no-ip.org
1320343602-sup-2290@alvh.no-ip.org
1339690386-sup-8927@alvh.no-ip.org
4FE5FF020200002500048A3D@gw.wicourts.gov
4FEAB90A0200002500048B7D@gw.wicourts.gov
2013-01-23 16:04:59 +01:00
|
|
|
#define MaxLockTupleMode LockTupleExclusive
|
|
|
|
|
2012-10-26 21:55:36 +02:00
|
|
|
/*
|
|
|
|
* When heap_update, heap_delete, or heap_lock_tuple fail because the target
|
|
|
|
* tuple is already outdated, they fill in this struct to provide information
|
|
|
|
* to the caller about what happened.
|
|
|
|
* ctid is the target's ctid link: it is the same as the target's TID if the
|
|
|
|
* target was deleted, or the location of the replacement tuple if the target
|
|
|
|
* was updated.
|
|
|
|
* xmax is the outdating transaction's XID. If the caller wants to visit the
|
|
|
|
* replacement tuple, it must check that this matches before believing the
|
|
|
|
* replacement is really a match.
|
|
|
|
* cmax is the outdating command's CID, but only when the failure code is
|
|
|
|
* HeapTupleSelfUpdated (i.e., something in the current transaction outdated
|
2014-05-06 18:12:18 +02:00
|
|
|
* the tuple); otherwise cmax is zero. (We make this restriction because
|
2012-10-26 21:55:36 +02:00
|
|
|
* HeapTupleHeaderGetCmax doesn't work for tuples outdated in other
|
|
|
|
* transactions.)
|
|
|
|
*/
|
|
|
|
typedef struct HeapUpdateFailureData
|
|
|
|
{
|
2013-05-29 22:58:43 +02:00
|
|
|
ItemPointerData ctid;
|
|
|
|
TransactionId xmax;
|
|
|
|
CommandId cmax;
|
2012-10-26 21:55:36 +02:00
|
|
|
} HeapUpdateFailureData;
|
|
|
|
|
2007-06-08 20:23:53 +02:00
|
|
|
|
1996-08-27 23:50:29 +02:00
|
|
|
/* ----------------
|
1997-09-07 07:04:48 +02:00
|
|
|
* function prototypes for heap access method
|
2000-06-19 00:44:35 +02:00
|
|
|
*
|
|
|
|
* heap_create, heap_create_with_catalog, and heap_drop_with_catalog
|
|
|
|
* are declared in catalog/heap.h
|
1996-08-27 23:50:29 +02:00
|
|
|
* ----------------
|
|
|
|
*/
|
|
|
|
|
2007-06-08 20:23:53 +02:00
|
|
|
/* in heap/heapam.c */
|
2001-11-02 17:30:29 +01:00
|
|
|
extern Relation relation_open(Oid relationId, LOCKMODE lockmode);
|
2006-08-18 18:09:13 +02:00
|
|
|
extern Relation try_relation_open(Oid relationId, LOCKMODE lockmode);
|
2002-03-26 20:17:02 +01:00
|
|
|
extern Relation relation_openrv(const RangeVar *relation, LOCKMODE lockmode);
|
2011-06-27 21:06:32 +02:00
|
|
|
extern Relation relation_openrv_extended(const RangeVar *relation,
|
|
|
|
LOCKMODE lockmode, bool missing_ok);
|
2001-11-02 17:30:29 +01:00
|
|
|
extern void relation_close(Relation relation, LOCKMODE lockmode);
|
|
|
|
|
1999-09-18 21:08:25 +02:00
|
|
|
extern Relation heap_open(Oid relationId, LOCKMODE lockmode);
|
2002-03-26 20:17:02 +01:00
|
|
|
extern Relation heap_openrv(const RangeVar *relation, LOCKMODE lockmode);
|
2011-06-27 21:06:32 +02:00
|
|
|
extern Relation heap_openrv_extended(const RangeVar *relation,
|
|
|
|
LOCKMODE lockmode, bool missing_ok);
|
2001-11-05 18:46:40 +01:00
|
|
|
|
2001-11-02 17:30:29 +01:00
|
|
|
#define heap_close(r,l) relation_close(r,l)
|
|
|
|
|
2008-06-19 02:46:06 +02:00
|
|
|
/* struct definition appears in relscan.h */
|
|
|
|
typedef struct HeapScanDescData *HeapScanDesc;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* HeapScanIsValid
|
|
|
|
* True iff the heap scan is valid.
|
|
|
|
*/
|
|
|
|
#define HeapScanIsValid(scan) PointerIsValid(scan)
|
|
|
|
|
2002-05-21 01:51:44 +02:00
|
|
|
extern HeapScanDesc heap_beginscan(Relation relation, Snapshot snapshot,
|
2002-09-04 22:31:48 +02:00
|
|
|
int nkeys, ScanKey key);
|
Use an MVCC snapshot, rather than SnapshotNow, for catalog scans.
SnapshotNow scans have the undesirable property that, in the face of
concurrent updates, the scan can fail to see either the old or the new
versions of the row. In many cases, we work around this by requiring
DDL operations to hold AccessExclusiveLock on the object being
modified; in some cases, the existing locking is inadequate and random
failures occur as a result. This commit doesn't change anything
related to locking, but will hopefully pave the way to allowing lock
strength reductions in the future.
The major issue has held us back from making this change in the past
is that taking an MVCC snapshot is significantly more expensive than
using a static special snapshot such as SnapshotNow. However, testing
of various worst-case scenarios reveals that this problem is not
severe except under fairly extreme workloads. To mitigate those
problems, we avoid retaking the MVCC snapshot for each new scan;
instead, we take a new snapshot only when invalidation messages have
been processed. The catcache machinery already requires that
invalidation messages be sent before releasing the related heavyweight
lock; else other backends might rely on locally-cached data rather
than scanning the catalog at all. Thus, making snapshot reuse
dependent on the same guarantees shouldn't break anything that wasn't
already subtly broken.
Patch by me. Review by Michael Paquier and Andres Freund.
2013-07-02 15:47:01 +02:00
|
|
|
extern HeapScanDesc heap_beginscan_catalog(Relation relation, int nkeys,
|
2014-05-06 18:12:18 +02:00
|
|
|
ScanKey key);
|
2008-01-14 02:39:09 +01:00
|
|
|
extern HeapScanDesc heap_beginscan_strat(Relation relation, Snapshot snapshot,
|
|
|
|
int nkeys, ScanKey key,
|
|
|
|
bool allow_strat, bool allow_sync);
|
2007-06-09 20:49:55 +02:00
|
|
|
extern HeapScanDesc heap_beginscan_bm(Relation relation, Snapshot snapshot,
|
2007-11-15 22:14:46 +01:00
|
|
|
int nkeys, ScanKey key);
|
2002-05-21 01:51:44 +02:00
|
|
|
extern void heap_rescan(HeapScanDesc scan, ScanKey key);
|
1998-08-19 04:04:17 +02:00
|
|
|
extern void heap_endscan(HeapScanDesc scan);
|
2002-05-21 01:51:44 +02:00
|
|
|
extern HeapTuple heap_getnext(HeapScanDesc scan, ScanDirection direction);
|
|
|
|
|
2002-05-24 20:57:57 +02:00
|
|
|
extern bool heap_fetch(Relation relation, Snapshot snapshot,
|
2002-09-04 22:31:48 +02:00
|
|
|
HeapTuple tuple, Buffer *userbuf, bool keep_buf,
|
2007-05-27 05:50:39 +02:00
|
|
|
Relation stats_relation);
|
Implement genuine serializable isolation level.
Until now, our Serializable mode has in fact been what's called Snapshot
Isolation, which allows some anomalies that could not occur in any
serialized ordering of the transactions. This patch fixes that using a
method called Serializable Snapshot Isolation, based on research papers by
Michael J. Cahill (see README-SSI for full references). In Serializable
Snapshot Isolation, transactions run like they do in Snapshot Isolation,
but a predicate lock manager observes the reads and writes performed and
aborts transactions if it detects that an anomaly might occur. This method
produces some false positives, ie. it sometimes aborts transactions even
though there is no anomaly.
To track reads we implement predicate locking, see storage/lmgr/predicate.c.
Whenever a tuple is read, a predicate lock is acquired on the tuple. Shared
memory is finite, so when a transaction takes many tuple-level locks on a
page, the locks are promoted to a single page-level lock, and further to a
single relation level lock if necessary. To lock key values with no matching
tuple, a sequential scan always takes a relation-level lock, and an index
scan acquires a page-level lock that covers the search key, whether or not
there are any matching keys at the moment.
A predicate lock doesn't conflict with any regular locks or with another
predicate locks in the normal sense. They're only used by the predicate lock
manager to detect the danger of anomalies. Only serializable transactions
participate in predicate locking, so there should be no extra overhead for
for other transactions.
Predicate locks can't be released at commit, but must be remembered until
all the transactions that overlapped with it have completed. That means that
we need to remember an unbounded amount of predicate locks, so we apply a
lossy but conservative method of tracking locks for committed transactions.
If we run short of shared memory, we overflow to a new "pg_serial" SLRU
pool.
We don't currently allow Serializable transactions in Hot Standby mode.
That would be hard, because even read-only transactions can cause anomalies
that wouldn't otherwise occur.
Serializable isolation mode now means the new fully serializable level.
Repeatable Read gives you the old Snapshot Isolation level that we have
always had.
Kevin Grittner and Dan Ports, reviewed by Jeff Davis, Heikki Linnakangas and
Anssi Kääriäinen
2011-02-07 22:46:51 +01:00
|
|
|
extern bool heap_hot_search_buffer(ItemPointer tid, Relation relation,
|
2011-06-27 16:27:17 +02:00
|
|
|
Buffer buffer, Snapshot snapshot, HeapTuple heapTuple,
|
|
|
|
bool *all_dead, bool first_call);
|
2007-09-20 19:56:33 +02:00
|
|
|
extern bool heap_hot_search(ItemPointer tid, Relation relation,
|
2007-11-15 22:14:46 +01:00
|
|
|
Snapshot snapshot, bool *all_dead);
|
2002-05-21 01:51:44 +02:00
|
|
|
|
2005-08-20 02:40:32 +02:00
|
|
|
extern void heap_get_latest_tid(Relation relation, Snapshot snapshot,
|
2002-09-04 22:31:48 +02:00
|
|
|
ItemPointer tid);
|
2001-09-17 02:29:10 +02:00
|
|
|
extern void setLastTid(const ItemPointer tid);
|
2002-05-22 00:05:55 +02:00
|
|
|
|
2008-11-06 21:51:15 +01:00
|
|
|
extern BulkInsertState GetBulkInsertState(void);
|
|
|
|
extern void FreeBulkInsertState(BulkInsertState);
|
|
|
|
|
2005-10-15 04:49:52 +02:00
|
|
|
extern Oid heap_insert(Relation relation, HeapTuple tup, CommandId cid,
|
2008-11-06 21:51:15 +01:00
|
|
|
int options, BulkInsertState bistate);
|
2011-11-09 09:54:41 +01:00
|
|
|
extern void heap_multi_insert(Relation relation, HeapTuple *tuples, int ntuples,
|
|
|
|
CommandId cid, int options, BulkInsertState bistate);
|
2005-08-20 02:40:32 +02:00
|
|
|
extern HTSU_Result heap_delete(Relation relation, ItemPointer tid,
|
2012-10-26 21:55:36 +02:00
|
|
|
CommandId cid, Snapshot crosscheck, bool wait,
|
|
|
|
HeapUpdateFailureData *hufd);
|
2005-08-20 02:40:32 +02:00
|
|
|
extern HTSU_Result heap_update(Relation relation, ItemPointer otid,
|
2005-10-15 04:49:52 +02:00
|
|
|
HeapTuple newtup,
|
2012-10-26 21:55:36 +02:00
|
|
|
CommandId cid, Snapshot crosscheck, bool wait,
|
Improve concurrency of foreign key locking
This patch introduces two additional lock modes for tuples: "SELECT FOR
KEY SHARE" and "SELECT FOR NO KEY UPDATE". These don't block each
other, in contrast with already existing "SELECT FOR SHARE" and "SELECT
FOR UPDATE". UPDATE commands that do not modify the values stored in
the columns that are part of the key of the tuple now grab a SELECT FOR
NO KEY UPDATE lock on the tuple, allowing them to proceed concurrently
with tuple locks of the FOR KEY SHARE variety.
Foreign key triggers now use FOR KEY SHARE instead of FOR SHARE; this
means the concurrency improvement applies to them, which is the whole
point of this patch.
The added tuple lock semantics require some rejiggering of the multixact
module, so that the locking level that each transaction is holding can
be stored alongside its Xid. Also, multixacts now need to persist
across server restarts and crashes, because they can now represent not
only tuple locks, but also tuple updates. This means we need more
careful tracking of lifetime of pg_multixact SLRU files; since they now
persist longer, we require more infrastructure to figure out when they
can be removed. pg_upgrade also needs to be careful to copy
pg_multixact files over from the old server to the new, or at least part
of multixact.c state, depending on the versions of the old and new
servers.
Tuple time qualification rules (HeapTupleSatisfies routines) need to be
careful not to consider tuples with the "is multi" infomask bit set as
being only locked; they might need to look up MultiXact values (i.e.
possibly do pg_multixact I/O) to find out the Xid that updated a tuple,
whereas they previously were assured to only use information readily
available from the tuple header. This is considered acceptable, because
the extra I/O would involve cases that would previously cause some
commands to block waiting for concurrent transactions to finish.
Another important change is the fact that locking tuples that have
previously been updated causes the future versions to be marked as
locked, too; this is essential for correctness of foreign key checks.
This causes additional WAL-logging, also (there was previously a single
WAL record for a locked tuple; now there are as many as updated copies
of the tuple there exist.)
With all this in place, contention related to tuples being checked by
foreign key rules should be much reduced.
As a bonus, the old behavior that a subtransaction grabbing a stronger
tuple lock than the parent (sub)transaction held on a given tuple and
later aborting caused the weaker lock to be lost, has been fixed.
Many new spec files were added for isolation tester framework, to ensure
overall behavior is sane. There's probably room for several more tests.
There were several reviewers of this patch; in particular, Noah Misch
and Andres Freund spent considerable time in it. Original idea for the
patch came from Simon Riggs, after a problem report by Joel Jacobson.
Most code is from me, with contributions from Marti Raudsepp, Alexander
Shulgin, Noah Misch and Andres Freund.
This patch was discussed in several pgsql-hackers threads; the most
important start at the following message-ids:
AANLkTimo9XVcEzfiBR-ut3KVNDkjm2Vxh+t8kAmWjPuv@mail.gmail.com
1290721684-sup-3951@alvh.no-ip.org
1294953201-sup-2099@alvh.no-ip.org
1320343602-sup-2290@alvh.no-ip.org
1339690386-sup-8927@alvh.no-ip.org
4FE5FF020200002500048A3D@gw.wicourts.gov
4FEAB90A0200002500048B7D@gw.wicourts.gov
2013-01-23 16:04:59 +01:00
|
|
|
HeapUpdateFailureData *hufd, LockTupleMode *lockmode);
|
2005-08-20 02:40:32 +02:00
|
|
|
extern HTSU_Result heap_lock_tuple(Relation relation, HeapTuple tuple,
|
2012-10-26 21:55:36 +02:00
|
|
|
CommandId cid, LockTupleMode mode, bool nowait,
|
Improve concurrency of foreign key locking
This patch introduces two additional lock modes for tuples: "SELECT FOR
KEY SHARE" and "SELECT FOR NO KEY UPDATE". These don't block each
other, in contrast with already existing "SELECT FOR SHARE" and "SELECT
FOR UPDATE". UPDATE commands that do not modify the values stored in
the columns that are part of the key of the tuple now grab a SELECT FOR
NO KEY UPDATE lock on the tuple, allowing them to proceed concurrently
with tuple locks of the FOR KEY SHARE variety.
Foreign key triggers now use FOR KEY SHARE instead of FOR SHARE; this
means the concurrency improvement applies to them, which is the whole
point of this patch.
The added tuple lock semantics require some rejiggering of the multixact
module, so that the locking level that each transaction is holding can
be stored alongside its Xid. Also, multixacts now need to persist
across server restarts and crashes, because they can now represent not
only tuple locks, but also tuple updates. This means we need more
careful tracking of lifetime of pg_multixact SLRU files; since they now
persist longer, we require more infrastructure to figure out when they
can be removed. pg_upgrade also needs to be careful to copy
pg_multixact files over from the old server to the new, or at least part
of multixact.c state, depending on the versions of the old and new
servers.
Tuple time qualification rules (HeapTupleSatisfies routines) need to be
careful not to consider tuples with the "is multi" infomask bit set as
being only locked; they might need to look up MultiXact values (i.e.
possibly do pg_multixact I/O) to find out the Xid that updated a tuple,
whereas they previously were assured to only use information readily
available from the tuple header. This is considered acceptable, because
the extra I/O would involve cases that would previously cause some
commands to block waiting for concurrent transactions to finish.
Another important change is the fact that locking tuples that have
previously been updated causes the future versions to be marked as
locked, too; this is essential for correctness of foreign key checks.
This causes additional WAL-logging, also (there was previously a single
WAL record for a locked tuple; now there are as many as updated copies
of the tuple there exist.)
With all this in place, contention related to tuples being checked by
foreign key rules should be much reduced.
As a bonus, the old behavior that a subtransaction grabbing a stronger
tuple lock than the parent (sub)transaction held on a given tuple and
later aborting caused the weaker lock to be lost, has been fixed.
Many new spec files were added for isolation tester framework, to ensure
overall behavior is sane. There's probably room for several more tests.
There were several reviewers of this patch; in particular, Noah Misch
and Andres Freund spent considerable time in it. Original idea for the
patch came from Simon Riggs, after a problem report by Joel Jacobson.
Most code is from me, with contributions from Marti Raudsepp, Alexander
Shulgin, Noah Misch and Andres Freund.
This patch was discussed in several pgsql-hackers threads; the most
important start at the following message-ids:
AANLkTimo9XVcEzfiBR-ut3KVNDkjm2Vxh+t8kAmWjPuv@mail.gmail.com
1290721684-sup-3951@alvh.no-ip.org
1294953201-sup-2099@alvh.no-ip.org
1320343602-sup-2290@alvh.no-ip.org
1339690386-sup-8927@alvh.no-ip.org
4FE5FF020200002500048A3D@gw.wicourts.gov
4FEAB90A0200002500048B7D@gw.wicourts.gov
2013-01-23 16:04:59 +01:00
|
|
|
bool follow_update,
|
2012-10-26 21:55:36 +02:00
|
|
|
Buffer *buffer, HeapUpdateFailureData *hufd);
|
2006-05-11 01:18:39 +02:00
|
|
|
extern void heap_inplace_update(Relation relation, HeapTuple tuple);
|
Improve concurrency of foreign key locking
This patch introduces two additional lock modes for tuples: "SELECT FOR
KEY SHARE" and "SELECT FOR NO KEY UPDATE". These don't block each
other, in contrast with already existing "SELECT FOR SHARE" and "SELECT
FOR UPDATE". UPDATE commands that do not modify the values stored in
the columns that are part of the key of the tuple now grab a SELECT FOR
NO KEY UPDATE lock on the tuple, allowing them to proceed concurrently
with tuple locks of the FOR KEY SHARE variety.
Foreign key triggers now use FOR KEY SHARE instead of FOR SHARE; this
means the concurrency improvement applies to them, which is the whole
point of this patch.
The added tuple lock semantics require some rejiggering of the multixact
module, so that the locking level that each transaction is holding can
be stored alongside its Xid. Also, multixacts now need to persist
across server restarts and crashes, because they can now represent not
only tuple locks, but also tuple updates. This means we need more
careful tracking of lifetime of pg_multixact SLRU files; since they now
persist longer, we require more infrastructure to figure out when they
can be removed. pg_upgrade also needs to be careful to copy
pg_multixact files over from the old server to the new, or at least part
of multixact.c state, depending on the versions of the old and new
servers.
Tuple time qualification rules (HeapTupleSatisfies routines) need to be
careful not to consider tuples with the "is multi" infomask bit set as
being only locked; they might need to look up MultiXact values (i.e.
possibly do pg_multixact I/O) to find out the Xid that updated a tuple,
whereas they previously were assured to only use information readily
available from the tuple header. This is considered acceptable, because
the extra I/O would involve cases that would previously cause some
commands to block waiting for concurrent transactions to finish.
Another important change is the fact that locking tuples that have
previously been updated causes the future versions to be marked as
locked, too; this is essential for correctness of foreign key checks.
This causes additional WAL-logging, also (there was previously a single
WAL record for a locked tuple; now there are as many as updated copies
of the tuple there exist.)
With all this in place, contention related to tuples being checked by
foreign key rules should be much reduced.
As a bonus, the old behavior that a subtransaction grabbing a stronger
tuple lock than the parent (sub)transaction held on a given tuple and
later aborting caused the weaker lock to be lost, has been fixed.
Many new spec files were added for isolation tester framework, to ensure
overall behavior is sane. There's probably room for several more tests.
There were several reviewers of this patch; in particular, Noah Misch
and Andres Freund spent considerable time in it. Original idea for the
patch came from Simon Riggs, after a problem report by Joel Jacobson.
Most code is from me, with contributions from Marti Raudsepp, Alexander
Shulgin, Noah Misch and Andres Freund.
This patch was discussed in several pgsql-hackers threads; the most
important start at the following message-ids:
AANLkTimo9XVcEzfiBR-ut3KVNDkjm2Vxh+t8kAmWjPuv@mail.gmail.com
1290721684-sup-3951@alvh.no-ip.org
1294953201-sup-2099@alvh.no-ip.org
1320343602-sup-2290@alvh.no-ip.org
1339690386-sup-8927@alvh.no-ip.org
4FE5FF020200002500048A3D@gw.wicourts.gov
4FEAB90A0200002500048B7D@gw.wicourts.gov
2013-01-23 16:04:59 +01:00
|
|
|
extern bool heap_freeze_tuple(HeapTupleHeader tuple, TransactionId cutoff_xid,
|
|
|
|
TransactionId cutoff_multi);
|
2011-11-08 03:39:40 +01:00
|
|
|
extern bool heap_tuple_needs_freeze(HeapTupleHeader tuple, TransactionId cutoff_xid,
|
Improve concurrency of foreign key locking
This patch introduces two additional lock modes for tuples: "SELECT FOR
KEY SHARE" and "SELECT FOR NO KEY UPDATE". These don't block each
other, in contrast with already existing "SELECT FOR SHARE" and "SELECT
FOR UPDATE". UPDATE commands that do not modify the values stored in
the columns that are part of the key of the tuple now grab a SELECT FOR
NO KEY UPDATE lock on the tuple, allowing them to proceed concurrently
with tuple locks of the FOR KEY SHARE variety.
Foreign key triggers now use FOR KEY SHARE instead of FOR SHARE; this
means the concurrency improvement applies to them, which is the whole
point of this patch.
The added tuple lock semantics require some rejiggering of the multixact
module, so that the locking level that each transaction is holding can
be stored alongside its Xid. Also, multixacts now need to persist
across server restarts and crashes, because they can now represent not
only tuple locks, but also tuple updates. This means we need more
careful tracking of lifetime of pg_multixact SLRU files; since they now
persist longer, we require more infrastructure to figure out when they
can be removed. pg_upgrade also needs to be careful to copy
pg_multixact files over from the old server to the new, or at least part
of multixact.c state, depending on the versions of the old and new
servers.
Tuple time qualification rules (HeapTupleSatisfies routines) need to be
careful not to consider tuples with the "is multi" infomask bit set as
being only locked; they might need to look up MultiXact values (i.e.
possibly do pg_multixact I/O) to find out the Xid that updated a tuple,
whereas they previously were assured to only use information readily
available from the tuple header. This is considered acceptable, because
the extra I/O would involve cases that would previously cause some
commands to block waiting for concurrent transactions to finish.
Another important change is the fact that locking tuples that have
previously been updated causes the future versions to be marked as
locked, too; this is essential for correctness of foreign key checks.
This causes additional WAL-logging, also (there was previously a single
WAL record for a locked tuple; now there are as many as updated copies
of the tuple there exist.)
With all this in place, contention related to tuples being checked by
foreign key rules should be much reduced.
As a bonus, the old behavior that a subtransaction grabbing a stronger
tuple lock than the parent (sub)transaction held on a given tuple and
later aborting caused the weaker lock to be lost, has been fixed.
Many new spec files were added for isolation tester framework, to ensure
overall behavior is sane. There's probably room for several more tests.
There were several reviewers of this patch; in particular, Noah Misch
and Andres Freund spent considerable time in it. Original idea for the
patch came from Simon Riggs, after a problem report by Joel Jacobson.
Most code is from me, with contributions from Marti Raudsepp, Alexander
Shulgin, Noah Misch and Andres Freund.
This patch was discussed in several pgsql-hackers threads; the most
important start at the following message-ids:
AANLkTimo9XVcEzfiBR-ut3KVNDkjm2Vxh+t8kAmWjPuv@mail.gmail.com
1290721684-sup-3951@alvh.no-ip.org
1294953201-sup-2099@alvh.no-ip.org
1320343602-sup-2290@alvh.no-ip.org
1339690386-sup-8927@alvh.no-ip.org
4FE5FF020200002500048A3D@gw.wicourts.gov
4FEAB90A0200002500048B7D@gw.wicourts.gov
2013-01-23 16:04:59 +01:00
|
|
|
MultiXactId cutoff_multi, Buffer buf);
|
2002-05-22 00:05:55 +02:00
|
|
|
|
|
|
|
extern Oid simple_heap_insert(Relation relation, HeapTuple tup);
|
2001-01-23 05:32:23 +01:00
|
|
|
extern void simple_heap_delete(Relation relation, ItemPointer tid);
|
|
|
|
extern void simple_heap_update(Relation relation, ItemPointer otid,
|
2001-03-22 05:01:46 +01:00
|
|
|
HeapTuple tup);
|
2002-05-22 00:05:55 +02:00
|
|
|
|
1998-08-19 04:04:17 +02:00
|
|
|
extern void heap_markpos(HeapScanDesc scan);
|
|
|
|
extern void heap_restrpos(HeapScanDesc scan);
|
1996-08-27 23:50:29 +02:00
|
|
|
|
2007-09-20 19:56:33 +02:00
|
|
|
extern void heap_sync(Relation relation);
|
|
|
|
|
|
|
|
/* in heap/pruneheap.c */
|
Introduce logical decoding.
This feature, building on previous commits, allows the write-ahead log
stream to be decoded into a series of logical changes; that is,
inserts, updates, and deletes and the transactions which contain them.
It is capable of handling decoding even across changes to the schema
of the effected tables. The output format is controlled by a
so-called "output plugin"; an example is included. To make use of
this in a real replication system, the output plugin will need to be
modified to produce output in the format appropriate to that system,
and to perform filtering.
Currently, information can be extracted from the logical decoding
system only via SQL; future commits will add the ability to stream
changes via walsender.
Andres Freund, with review and other contributions from many other
people, including Álvaro Herrera, Abhijit Menon-Sen, Peter Gheogegan,
Kevin Grittner, Robert Haas, Heikki Linnakangas, Fujii Masao, Abhijit
Menon-Sen, Michael Paquier, Simon Riggs, Craig Ringer, and Steve
Singer.
2014-03-03 22:32:18 +01:00
|
|
|
extern void heap_page_prune_opt(Relation relation, Buffer buffer);
|
2007-11-15 22:14:46 +01:00
|
|
|
extern int heap_page_prune(Relation relation, Buffer buffer,
|
|
|
|
TransactionId OldestXmin,
|
2010-04-21 19:20:56 +02:00
|
|
|
bool report_stats, TransactionId *latestRemovedXid);
|
2008-06-12 11:12:31 +02:00
|
|
|
extern void heap_page_prune_execute(Buffer buffer,
|
2008-03-08 22:57:59 +01:00
|
|
|
OffsetNumber *redirected, int nredirected,
|
|
|
|
OffsetNumber *nowdead, int ndead,
|
2010-02-08 05:33:55 +01:00
|
|
|
OffsetNumber *nowunused, int nunused);
|
2007-09-20 19:56:33 +02:00
|
|
|
extern void heap_get_root_tuples(Page page, OffsetNumber *root_offsets);
|
2007-01-25 03:17:26 +01:00
|
|
|
|
2007-06-08 20:23:53 +02:00
|
|
|
/* in heap/syncscan.c */
|
|
|
|
extern void ss_report_location(Relation rel, BlockNumber location);
|
|
|
|
extern BlockNumber ss_get_location(Relation rel, BlockNumber relnblocks);
|
|
|
|
extern void SyncScanShmemInit(void);
|
|
|
|
extern Size SyncScanShmemSize(void);
|
|
|
|
|
2001-11-05 18:46:40 +01:00
|
|
|
#endif /* HEAPAM_H */
|