1996-07-09 08:22:35 +02:00
|
|
|
/*-------------------------------------------------------------------------
|
|
|
|
*
|
1999-02-14 00:22:53 +01:00
|
|
|
* tqual.c
|
2007-03-25 21:45:14 +02:00
|
|
|
* POSTGRES "time qualification" code, ie, tuple visibility rules.
|
2002-01-16 21:29:02 +01:00
|
|
|
*
|
2004-10-16 00:40:29 +02:00
|
|
|
* NOTE: all the HeapTupleSatisfies routines will update the tuple's
|
|
|
|
* "hint" status bits if we see that the inserting or deleting transaction
|
2007-08-14 19:35:18 +02:00
|
|
|
* has now committed or aborted (and it is safe to set the hint bits).
|
2013-03-22 14:54:07 +01:00
|
|
|
* If the hint bits are changed, MarkBufferDirtyHint is called on
|
2007-08-14 19:35:18 +02:00
|
|
|
* the passed-in buffer. The caller must hold not only a pin, but at least
|
|
|
|
* shared buffer content lock on the buffer containing the tuple.
|
2004-10-16 00:40:29 +02:00
|
|
|
*
|
2012-05-14 09:22:44 +02:00
|
|
|
* NOTE: must check TransactionIdIsInProgress (which looks in PGXACT array)
|
2005-05-07 23:22:01 +02:00
|
|
|
* before TransactionIdDidCommit/TransactionIdDidAbort (which look in
|
|
|
|
* pg_clog). Otherwise we have a race condition: we might decide that a
|
|
|
|
* just-committed transaction crashed, because none of the tests succeed.
|
|
|
|
* xact.c is careful to record commit/abort in pg_clog before it unsets
|
2012-05-14 09:22:44 +02:00
|
|
|
* MyPgXact->xid in PGXACT array. That fixes that problem, but it also
|
2005-05-07 23:22:01 +02:00
|
|
|
* means there is a window where TransactionIdIsInProgress and
|
|
|
|
* TransactionIdDidCommit will both return true. If we check only
|
|
|
|
* TransactionIdDidCommit, we could consider a tuple committed when a
|
|
|
|
* later GetSnapshotData call will still think the originating transaction
|
2014-05-06 18:12:18 +02:00
|
|
|
* is in progress, which leads to application-level inconsistency. The
|
2005-05-07 23:22:01 +02:00
|
|
|
* upshot is that we gotta check TransactionIdIsInProgress first in all
|
|
|
|
* code paths, except for a few cases where we are looking at
|
|
|
|
* subtransactions of our own main transaction and so there can't be any
|
|
|
|
* race condition.
|
|
|
|
*
|
2009-03-09 14:08:05 +01:00
|
|
|
* Summary of visibility functions:
|
|
|
|
*
|
2009-06-11 16:49:15 +02:00
|
|
|
* HeapTupleSatisfiesMVCC()
|
|
|
|
* visible to supplied snapshot, excludes current command
|
|
|
|
* HeapTupleSatisfiesUpdate()
|
2013-08-01 16:46:19 +02:00
|
|
|
* visible to instant snapshot, with user-supplied command
|
2009-06-11 16:49:15 +02:00
|
|
|
* counter and more complex result
|
|
|
|
* HeapTupleSatisfiesSelf()
|
|
|
|
* visible to instant snapshot and current command
|
|
|
|
* HeapTupleSatisfiesDirty()
|
|
|
|
* like HeapTupleSatisfiesSelf(), but includes open transactions
|
|
|
|
* HeapTupleSatisfiesVacuum()
|
|
|
|
* visible to any running transaction, used by VACUUM
|
|
|
|
* HeapTupleSatisfiesToast()
|
|
|
|
* visible unless part of interrupted vacuum, used for TOAST
|
|
|
|
* HeapTupleSatisfiesAny()
|
|
|
|
* all tuples are visible
|
1996-07-09 08:22:35 +02:00
|
|
|
*
|
2014-01-07 22:05:30 +01:00
|
|
|
* Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
|
2000-01-26 06:58:53 +01:00
|
|
|
* Portions Copyright (c) 1994, Regents of the University of California
|
1996-07-09 08:22:35 +02:00
|
|
|
*
|
|
|
|
* IDENTIFICATION
|
2010-09-20 22:08:53 +02:00
|
|
|
* src/backend/utils/time/tqual.c
|
1996-07-09 08:22:35 +02:00
|
|
|
*
|
|
|
|
*-------------------------------------------------------------------------
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include "postgres.h"
|
|
|
|
|
2012-08-30 22:15:44 +02:00
|
|
|
#include "access/htup_details.h"
|
2005-04-28 23:47:18 +02:00
|
|
|
#include "access/multixact.h"
|
2004-07-01 02:52:04 +02:00
|
|
|
#include "access/subtrans.h"
|
2006-07-13 18:49:20 +02:00
|
|
|
#include "access/transam.h"
|
|
|
|
#include "access/xact.h"
|
2014-11-06 12:52:08 +01:00
|
|
|
#include "access/xlog.h"
|
2006-07-13 19:47:02 +02:00
|
|
|
#include "storage/bufmgr.h"
|
2005-05-19 23:35:48 +02:00
|
|
|
#include "storage/procarray.h"
|
Introduce logical decoding.
This feature, building on previous commits, allows the write-ahead log
stream to be decoded into a series of logical changes; that is,
inserts, updates, and deletes and the transactions which contain them.
It is capable of handling decoding even across changes to the schema
of the effected tables. The output format is controlled by a
so-called "output plugin"; an example is included. To make use of
this in a real replication system, the output plugin will need to be
modified to produce output in the format appropriate to that system,
and to perform filtering.
Currently, information can be extracted from the logical decoding
system only via SQL; future commits will add the ability to stream
changes via walsender.
Andres Freund, with review and other contributions from many other
people, including Álvaro Herrera, Abhijit Menon-Sen, Peter Gheogegan,
Kevin Grittner, Robert Haas, Heikki Linnakangas, Fujii Masao, Abhijit
Menon-Sen, Michael Paquier, Simon Riggs, Craig Ringer, and Steve
Singer.
2014-03-03 22:32:18 +01:00
|
|
|
#include "utils/builtins.h"
|
|
|
|
#include "utils/combocid.h"
|
|
|
|
#include "utils/snapmgr.h"
|
1996-07-09 08:22:35 +02:00
|
|
|
#include "utils/tqual.h"
|
|
|
|
|
2007-03-25 21:45:14 +02:00
|
|
|
|
|
|
|
/* Static variables representing various special snapshot semantics */
|
|
|
|
SnapshotData SnapshotSelfData = {HeapTupleSatisfiesSelf};
|
|
|
|
SnapshotData SnapshotAnyData = {HeapTupleSatisfiesAny};
|
|
|
|
SnapshotData SnapshotToastData = {HeapTupleSatisfiesToast};
|
|
|
|
|
2006-09-03 17:59:39 +02:00
|
|
|
/* local functions */
|
2007-03-25 21:45:14 +02:00
|
|
|
static bool XidInMVCCSnapshot(TransactionId xid, Snapshot snapshot);
|
2006-09-03 17:59:39 +02:00
|
|
|
|
2007-08-02 00:45:09 +02:00
|
|
|
/*
|
2007-08-14 19:35:18 +02:00
|
|
|
* SetHintBits()
|
2007-08-02 00:45:09 +02:00
|
|
|
*
|
|
|
|
* Set commit/abort hint bits on a tuple, if appropriate at this time.
|
|
|
|
*
|
2007-08-14 19:35:18 +02:00
|
|
|
* It is only safe to set a transaction-committed hint bit if we know the
|
2011-10-28 23:08:09 +02:00
|
|
|
* transaction's commit record has been flushed to disk, or if the table is
|
|
|
|
* temporary or unlogged and will be obliterated by a crash anyway. We
|
|
|
|
* cannot change the LSN of the page here because we may hold only a share
|
|
|
|
* lock on the buffer, so we can't use the LSN to interlock this; we have to
|
|
|
|
* just refrain from setting the hint bit until some future re-examination
|
|
|
|
* of the tuple.
|
2007-08-02 00:45:09 +02:00
|
|
|
*
|
2014-05-06 18:12:18 +02:00
|
|
|
* We can always set hint bits when marking a transaction aborted. (Some
|
2007-08-14 19:35:18 +02:00
|
|
|
* code in heapam.c relies on that!)
|
|
|
|
*
|
|
|
|
* Also, if we are cleaning up HEAP_MOVED_IN or HEAP_MOVED_OFF entries, then
|
2010-02-08 15:10:21 +01:00
|
|
|
* we can always set the hint bits, since pre-9.0 VACUUM FULL always used
|
2010-02-08 05:33:55 +01:00
|
|
|
* synchronous commits and didn't move tuples that weren't previously
|
2014-05-06 18:12:18 +02:00
|
|
|
* hinted. (This is not known by this subroutine, but is applied by its
|
2010-02-08 05:33:55 +01:00
|
|
|
* callers.) Note: old-style VACUUM FULL is gone, but we have to keep this
|
|
|
|
* module's support for MOVED_OFF/MOVED_IN flag bits for as long as we
|
|
|
|
* support in-place update from pre-9.0 databases.
|
2007-08-02 00:45:09 +02:00
|
|
|
*
|
|
|
|
* Normal commits may be asynchronous, so for those we need to get the LSN
|
|
|
|
* of the transaction and then check whether this is flushed.
|
|
|
|
*
|
|
|
|
* The caller should pass xid as the XID of the transaction to check, or
|
|
|
|
* InvalidTransactionId if no check is needed.
|
|
|
|
*/
|
|
|
|
static inline void
|
2007-08-14 19:35:18 +02:00
|
|
|
SetHintBits(HeapTupleHeader tuple, Buffer buffer,
|
|
|
|
uint16 infomask, TransactionId xid)
|
2007-08-02 00:45:09 +02:00
|
|
|
{
|
|
|
|
if (TransactionIdIsValid(xid))
|
|
|
|
{
|
|
|
|
/* NB: xid must be known committed here! */
|
2007-11-15 22:14:46 +01:00
|
|
|
XLogRecPtr commitLSN = TransactionIdGetCommitLSN(xid);
|
2007-08-02 00:45:09 +02:00
|
|
|
|
2011-10-28 23:08:09 +02:00
|
|
|
if (XLogNeedsFlush(commitLSN) && BufferIsPermanent(buffer))
|
2007-08-02 00:45:09 +02:00
|
|
|
return; /* not flushed yet, so don't set hint */
|
|
|
|
}
|
|
|
|
|
|
|
|
tuple->t_infomask |= infomask;
|
2013-06-17 17:02:12 +02:00
|
|
|
MarkBufferDirtyHint(buffer, true);
|
2007-08-02 00:45:09 +02:00
|
|
|
}
|
|
|
|
|
2007-08-14 19:35:18 +02:00
|
|
|
/*
|
|
|
|
* HeapTupleSetHintBits --- exported version of SetHintBits()
|
|
|
|
*
|
|
|
|
* This must be separate because of C99's brain-dead notions about how to
|
|
|
|
* implement inline functions.
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
HeapTupleSetHintBits(HeapTupleHeader tuple, Buffer buffer,
|
|
|
|
uint16 infomask, TransactionId xid)
|
|
|
|
{
|
|
|
|
SetHintBits(tuple, buffer, infomask, xid);
|
|
|
|
}
|
|
|
|
|
2007-08-02 00:45:09 +02:00
|
|
|
|
1996-07-09 08:22:35 +02:00
|
|
|
/*
|
2007-03-25 21:45:14 +02:00
|
|
|
* HeapTupleSatisfiesSelf
|
2002-01-17 00:51:56 +01:00
|
|
|
* True iff heap tuple is valid "for itself".
|
2002-01-17 00:09:09 +01:00
|
|
|
*
|
2002-01-17 00:51:56 +01:00
|
|
|
* Here, we consider the effects of:
|
|
|
|
* all committed transactions (as of the current instant)
|
2002-01-17 00:09:09 +01:00
|
|
|
* previous commands of this transaction
|
|
|
|
* changes made by the current command
|
1996-07-09 08:22:35 +02:00
|
|
|
*
|
|
|
|
* Note:
|
1997-09-07 07:04:48 +02:00
|
|
|
* Assumes heap tuple is valid.
|
2002-01-16 21:29:02 +01:00
|
|
|
*
|
1996-07-09 08:22:35 +02:00
|
|
|
* The satisfaction of "itself" requires the following:
|
|
|
|
*
|
1997-09-07 07:04:48 +02:00
|
|
|
* ((Xmin == my-transaction && the row was updated by the current transaction, and
|
|
|
|
* (Xmax is null it was not deleted
|
|
|
|
* [|| Xmax != my-transaction)]) [or it was deleted by another transaction]
|
1996-07-09 08:22:35 +02:00
|
|
|
* ||
|
|
|
|
*
|
1997-09-07 07:04:48 +02:00
|
|
|
* (Xmin is committed && the row was modified by a committed transaction, and
|
|
|
|
* (Xmax is null || the row has not been deleted, or
|
|
|
|
* (Xmax != my-transaction && the row was deleted by another transaction
|
|
|
|
* Xmax is not committed))) that has not been committed
|
1996-07-09 08:22:35 +02:00
|
|
|
*/
|
1998-04-24 16:43:33 +02:00
|
|
|
bool
|
2013-07-22 19:26:33 +02:00
|
|
|
HeapTupleSatisfiesSelf(HeapTuple htup, Snapshot snapshot, Buffer buffer)
|
1996-07-09 08:22:35 +02:00
|
|
|
{
|
2013-07-22 19:26:33 +02:00
|
|
|
HeapTupleHeader tuple = htup->t_data;
|
2014-05-06 18:12:18 +02:00
|
|
|
|
2013-07-22 19:26:33 +02:00
|
|
|
Assert(ItemPointerIsValid(&htup->t_self));
|
|
|
|
Assert(htup->t_tableOid != InvalidOid);
|
|
|
|
|
2013-12-22 21:49:09 +01:00
|
|
|
if (!HeapTupleHeaderXminCommitted(tuple))
|
1997-09-07 07:04:48 +02:00
|
|
|
{
|
2013-12-22 21:49:09 +01:00
|
|
|
if (HeapTupleHeaderXminInvalid(tuple))
|
1998-09-01 05:29:17 +02:00
|
|
|
return false;
|
1998-02-26 05:46:47 +01:00
|
|
|
|
2010-02-08 15:10:21 +01:00
|
|
|
/* Used by pre-9.0 binary upgrades */
|
1999-03-28 22:32:42 +02:00
|
|
|
if (tuple->t_infomask & HEAP_MOVED_OFF)
|
|
|
|
{
|
2003-06-25 03:08:13 +02:00
|
|
|
TransactionId xvac = HeapTupleHeaderGetXvac(tuple);
|
2003-08-04 02:43:34 +02:00
|
|
|
|
2003-06-25 03:08:13 +02:00
|
|
|
if (TransactionIdIsCurrentTransactionId(xvac))
|
1999-03-28 22:32:42 +02:00
|
|
|
return false;
|
2003-06-25 03:08:13 +02:00
|
|
|
if (!TransactionIdIsInProgress(xvac))
|
2001-12-19 18:18:39 +01:00
|
|
|
{
|
2003-06-25 03:08:13 +02:00
|
|
|
if (TransactionIdDidCommit(xvac))
|
2001-12-19 18:18:39 +01:00
|
|
|
{
|
2007-08-14 19:35:18 +02:00
|
|
|
SetHintBits(tuple, buffer, HEAP_XMIN_INVALID,
|
|
|
|
InvalidTransactionId);
|
2001-12-19 18:18:39 +01:00
|
|
|
return false;
|
|
|
|
}
|
2007-08-14 19:35:18 +02:00
|
|
|
SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED,
|
|
|
|
InvalidTransactionId);
|
1999-03-28 22:32:42 +02:00
|
|
|
}
|
|
|
|
}
|
2010-02-08 15:10:21 +01:00
|
|
|
/* Used by pre-9.0 binary upgrades */
|
1999-03-28 22:32:42 +02:00
|
|
|
else if (tuple->t_infomask & HEAP_MOVED_IN)
|
|
|
|
{
|
2003-06-25 03:08:13 +02:00
|
|
|
TransactionId xvac = HeapTupleHeaderGetXvac(tuple);
|
2003-08-04 02:43:34 +02:00
|
|
|
|
2003-06-25 03:08:13 +02:00
|
|
|
if (!TransactionIdIsCurrentTransactionId(xvac))
|
1999-03-28 22:32:42 +02:00
|
|
|
{
|
2003-06-25 03:08:13 +02:00
|
|
|
if (TransactionIdIsInProgress(xvac))
|
2001-12-19 18:18:39 +01:00
|
|
|
return false;
|
2003-06-25 03:08:13 +02:00
|
|
|
if (TransactionIdDidCommit(xvac))
|
2007-08-14 19:35:18 +02:00
|
|
|
SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED,
|
|
|
|
InvalidTransactionId);
|
2001-12-19 18:18:39 +01:00
|
|
|
else
|
|
|
|
{
|
2007-08-14 19:35:18 +02:00
|
|
|
SetHintBits(tuple, buffer, HEAP_XMIN_INVALID,
|
|
|
|
InvalidTransactionId);
|
2001-12-19 18:18:39 +01:00
|
|
|
return false;
|
|
|
|
}
|
1999-03-28 22:32:42 +02:00
|
|
|
}
|
|
|
|
}
|
2013-12-22 21:49:09 +01:00
|
|
|
else if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetRawXmin(tuple)))
|
1997-09-07 07:04:48 +02:00
|
|
|
{
|
1997-11-02 16:27:14 +01:00
|
|
|
if (tuple->t_infomask & HEAP_XMAX_INVALID) /* xid invalid */
|
1998-09-01 05:29:17 +02:00
|
|
|
return true;
|
2001-12-19 18:18:39 +01:00
|
|
|
|
2013-05-29 22:58:43 +02:00
|
|
|
if (HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask)) /* not deleter */
|
2005-04-28 23:47:18 +02:00
|
|
|
return true;
|
|
|
|
|
Improve concurrency of foreign key locking
This patch introduces two additional lock modes for tuples: "SELECT FOR
KEY SHARE" and "SELECT FOR NO KEY UPDATE". These don't block each
other, in contrast with already existing "SELECT FOR SHARE" and "SELECT
FOR UPDATE". UPDATE commands that do not modify the values stored in
the columns that are part of the key of the tuple now grab a SELECT FOR
NO KEY UPDATE lock on the tuple, allowing them to proceed concurrently
with tuple locks of the FOR KEY SHARE variety.
Foreign key triggers now use FOR KEY SHARE instead of FOR SHARE; this
means the concurrency improvement applies to them, which is the whole
point of this patch.
The added tuple lock semantics require some rejiggering of the multixact
module, so that the locking level that each transaction is holding can
be stored alongside its Xid. Also, multixacts now need to persist
across server restarts and crashes, because they can now represent not
only tuple locks, but also tuple updates. This means we need more
careful tracking of lifetime of pg_multixact SLRU files; since they now
persist longer, we require more infrastructure to figure out when they
can be removed. pg_upgrade also needs to be careful to copy
pg_multixact files over from the old server to the new, or at least part
of multixact.c state, depending on the versions of the old and new
servers.
Tuple time qualification rules (HeapTupleSatisfies routines) need to be
careful not to consider tuples with the "is multi" infomask bit set as
being only locked; they might need to look up MultiXact values (i.e.
possibly do pg_multixact I/O) to find out the Xid that updated a tuple,
whereas they previously were assured to only use information readily
available from the tuple header. This is considered acceptable, because
the extra I/O would involve cases that would previously cause some
commands to block waiting for concurrent transactions to finish.
Another important change is the fact that locking tuples that have
previously been updated causes the future versions to be marked as
locked, too; this is essential for correctness of foreign key checks.
This causes additional WAL-logging, also (there was previously a single
WAL record for a locked tuple; now there are as many as updated copies
of the tuple there exist.)
With all this in place, contention related to tuples being checked by
foreign key rules should be much reduced.
As a bonus, the old behavior that a subtransaction grabbing a stronger
tuple lock than the parent (sub)transaction held on a given tuple and
later aborting caused the weaker lock to be lost, has been fixed.
Many new spec files were added for isolation tester framework, to ensure
overall behavior is sane. There's probably room for several more tests.
There were several reviewers of this patch; in particular, Noah Misch
and Andres Freund spent considerable time in it. Original idea for the
patch came from Simon Riggs, after a problem report by Joel Jacobson.
Most code is from me, with contributions from Marti Raudsepp, Alexander
Shulgin, Noah Misch and Andres Freund.
This patch was discussed in several pgsql-hackers threads; the most
important start at the following message-ids:
AANLkTimo9XVcEzfiBR-ut3KVNDkjm2Vxh+t8kAmWjPuv@mail.gmail.com
1290721684-sup-3951@alvh.no-ip.org
1294953201-sup-2099@alvh.no-ip.org
1320343602-sup-2290@alvh.no-ip.org
1339690386-sup-8927@alvh.no-ip.org
4FE5FF020200002500048A3D@gw.wicourts.gov
4FEAB90A0200002500048B7D@gw.wicourts.gov
2013-01-23 16:04:59 +01:00
|
|
|
if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
|
|
|
|
{
|
2013-05-29 22:58:43 +02:00
|
|
|
TransactionId xmax;
|
Improve concurrency of foreign key locking
This patch introduces two additional lock modes for tuples: "SELECT FOR
KEY SHARE" and "SELECT FOR NO KEY UPDATE". These don't block each
other, in contrast with already existing "SELECT FOR SHARE" and "SELECT
FOR UPDATE". UPDATE commands that do not modify the values stored in
the columns that are part of the key of the tuple now grab a SELECT FOR
NO KEY UPDATE lock on the tuple, allowing them to proceed concurrently
with tuple locks of the FOR KEY SHARE variety.
Foreign key triggers now use FOR KEY SHARE instead of FOR SHARE; this
means the concurrency improvement applies to them, which is the whole
point of this patch.
The added tuple lock semantics require some rejiggering of the multixact
module, so that the locking level that each transaction is holding can
be stored alongside its Xid. Also, multixacts now need to persist
across server restarts and crashes, because they can now represent not
only tuple locks, but also tuple updates. This means we need more
careful tracking of lifetime of pg_multixact SLRU files; since they now
persist longer, we require more infrastructure to figure out when they
can be removed. pg_upgrade also needs to be careful to copy
pg_multixact files over from the old server to the new, or at least part
of multixact.c state, depending on the versions of the old and new
servers.
Tuple time qualification rules (HeapTupleSatisfies routines) need to be
careful not to consider tuples with the "is multi" infomask bit set as
being only locked; they might need to look up MultiXact values (i.e.
possibly do pg_multixact I/O) to find out the Xid that updated a tuple,
whereas they previously were assured to only use information readily
available from the tuple header. This is considered acceptable, because
the extra I/O would involve cases that would previously cause some
commands to block waiting for concurrent transactions to finish.
Another important change is the fact that locking tuples that have
previously been updated causes the future versions to be marked as
locked, too; this is essential for correctness of foreign key checks.
This causes additional WAL-logging, also (there was previously a single
WAL record for a locked tuple; now there are as many as updated copies
of the tuple there exist.)
With all this in place, contention related to tuples being checked by
foreign key rules should be much reduced.
As a bonus, the old behavior that a subtransaction grabbing a stronger
tuple lock than the parent (sub)transaction held on a given tuple and
later aborting caused the weaker lock to be lost, has been fixed.
Many new spec files were added for isolation tester framework, to ensure
overall behavior is sane. There's probably room for several more tests.
There were several reviewers of this patch; in particular, Noah Misch
and Andres Freund spent considerable time in it. Original idea for the
patch came from Simon Riggs, after a problem report by Joel Jacobson.
Most code is from me, with contributions from Marti Raudsepp, Alexander
Shulgin, Noah Misch and Andres Freund.
This patch was discussed in several pgsql-hackers threads; the most
important start at the following message-ids:
AANLkTimo9XVcEzfiBR-ut3KVNDkjm2Vxh+t8kAmWjPuv@mail.gmail.com
1290721684-sup-3951@alvh.no-ip.org
1294953201-sup-2099@alvh.no-ip.org
1320343602-sup-2290@alvh.no-ip.org
1339690386-sup-8927@alvh.no-ip.org
4FE5FF020200002500048A3D@gw.wicourts.gov
4FEAB90A0200002500048B7D@gw.wicourts.gov
2013-01-23 16:04:59 +01:00
|
|
|
|
|
|
|
xmax = HeapTupleGetUpdateXid(tuple);
|
2013-11-29 20:08:06 +01:00
|
|
|
|
|
|
|
/* not LOCKED_ONLY, so it has to have an xmax */
|
|
|
|
Assert(TransactionIdIsValid(xmax));
|
Improve concurrency of foreign key locking
This patch introduces two additional lock modes for tuples: "SELECT FOR
KEY SHARE" and "SELECT FOR NO KEY UPDATE". These don't block each
other, in contrast with already existing "SELECT FOR SHARE" and "SELECT
FOR UPDATE". UPDATE commands that do not modify the values stored in
the columns that are part of the key of the tuple now grab a SELECT FOR
NO KEY UPDATE lock on the tuple, allowing them to proceed concurrently
with tuple locks of the FOR KEY SHARE variety.
Foreign key triggers now use FOR KEY SHARE instead of FOR SHARE; this
means the concurrency improvement applies to them, which is the whole
point of this patch.
The added tuple lock semantics require some rejiggering of the multixact
module, so that the locking level that each transaction is holding can
be stored alongside its Xid. Also, multixacts now need to persist
across server restarts and crashes, because they can now represent not
only tuple locks, but also tuple updates. This means we need more
careful tracking of lifetime of pg_multixact SLRU files; since they now
persist longer, we require more infrastructure to figure out when they
can be removed. pg_upgrade also needs to be careful to copy
pg_multixact files over from the old server to the new, or at least part
of multixact.c state, depending on the versions of the old and new
servers.
Tuple time qualification rules (HeapTupleSatisfies routines) need to be
careful not to consider tuples with the "is multi" infomask bit set as
being only locked; they might need to look up MultiXact values (i.e.
possibly do pg_multixact I/O) to find out the Xid that updated a tuple,
whereas they previously were assured to only use information readily
available from the tuple header. This is considered acceptable, because
the extra I/O would involve cases that would previously cause some
commands to block waiting for concurrent transactions to finish.
Another important change is the fact that locking tuples that have
previously been updated causes the future versions to be marked as
locked, too; this is essential for correctness of foreign key checks.
This causes additional WAL-logging, also (there was previously a single
WAL record for a locked tuple; now there are as many as updated copies
of the tuple there exist.)
With all this in place, contention related to tuples being checked by
foreign key rules should be much reduced.
As a bonus, the old behavior that a subtransaction grabbing a stronger
tuple lock than the parent (sub)transaction held on a given tuple and
later aborting caused the weaker lock to be lost, has been fixed.
Many new spec files were added for isolation tester framework, to ensure
overall behavior is sane. There's probably room for several more tests.
There were several reviewers of this patch; in particular, Noah Misch
and Andres Freund spent considerable time in it. Original idea for the
patch came from Simon Riggs, after a problem report by Joel Jacobson.
Most code is from me, with contributions from Marti Raudsepp, Alexander
Shulgin, Noah Misch and Andres Freund.
This patch was discussed in several pgsql-hackers threads; the most
important start at the following message-ids:
AANLkTimo9XVcEzfiBR-ut3KVNDkjm2Vxh+t8kAmWjPuv@mail.gmail.com
1290721684-sup-3951@alvh.no-ip.org
1294953201-sup-2099@alvh.no-ip.org
1320343602-sup-2290@alvh.no-ip.org
1339690386-sup-8927@alvh.no-ip.org
4FE5FF020200002500048A3D@gw.wicourts.gov
4FEAB90A0200002500048B7D@gw.wicourts.gov
2013-01-23 16:04:59 +01:00
|
|
|
|
|
|
|
/* updating subtransaction must have aborted */
|
|
|
|
if (!TransactionIdIsCurrentTransactionId(xmax))
|
|
|
|
return true;
|
|
|
|
else
|
|
|
|
return false;
|
|
|
|
}
|
2005-04-28 23:47:18 +02:00
|
|
|
|
Improve concurrency of foreign key locking
This patch introduces two additional lock modes for tuples: "SELECT FOR
KEY SHARE" and "SELECT FOR NO KEY UPDATE". These don't block each
other, in contrast with already existing "SELECT FOR SHARE" and "SELECT
FOR UPDATE". UPDATE commands that do not modify the values stored in
the columns that are part of the key of the tuple now grab a SELECT FOR
NO KEY UPDATE lock on the tuple, allowing them to proceed concurrently
with tuple locks of the FOR KEY SHARE variety.
Foreign key triggers now use FOR KEY SHARE instead of FOR SHARE; this
means the concurrency improvement applies to them, which is the whole
point of this patch.
The added tuple lock semantics require some rejiggering of the multixact
module, so that the locking level that each transaction is holding can
be stored alongside its Xid. Also, multixacts now need to persist
across server restarts and crashes, because they can now represent not
only tuple locks, but also tuple updates. This means we need more
careful tracking of lifetime of pg_multixact SLRU files; since they now
persist longer, we require more infrastructure to figure out when they
can be removed. pg_upgrade also needs to be careful to copy
pg_multixact files over from the old server to the new, or at least part
of multixact.c state, depending on the versions of the old and new
servers.
Tuple time qualification rules (HeapTupleSatisfies routines) need to be
careful not to consider tuples with the "is multi" infomask bit set as
being only locked; they might need to look up MultiXact values (i.e.
possibly do pg_multixact I/O) to find out the Xid that updated a tuple,
whereas they previously were assured to only use information readily
available from the tuple header. This is considered acceptable, because
the extra I/O would involve cases that would previously cause some
commands to block waiting for concurrent transactions to finish.
Another important change is the fact that locking tuples that have
previously been updated causes the future versions to be marked as
locked, too; this is essential for correctness of foreign key checks.
This causes additional WAL-logging, also (there was previously a single
WAL record for a locked tuple; now there are as many as updated copies
of the tuple there exist.)
With all this in place, contention related to tuples being checked by
foreign key rules should be much reduced.
As a bonus, the old behavior that a subtransaction grabbing a stronger
tuple lock than the parent (sub)transaction held on a given tuple and
later aborting caused the weaker lock to be lost, has been fixed.
Many new spec files were added for isolation tester framework, to ensure
overall behavior is sane. There's probably room for several more tests.
There were several reviewers of this patch; in particular, Noah Misch
and Andres Freund spent considerable time in it. Original idea for the
patch came from Simon Riggs, after a problem report by Joel Jacobson.
Most code is from me, with contributions from Marti Raudsepp, Alexander
Shulgin, Noah Misch and Andres Freund.
This patch was discussed in several pgsql-hackers threads; the most
important start at the following message-ids:
AANLkTimo9XVcEzfiBR-ut3KVNDkjm2Vxh+t8kAmWjPuv@mail.gmail.com
1290721684-sup-3951@alvh.no-ip.org
1294953201-sup-2099@alvh.no-ip.org
1320343602-sup-2290@alvh.no-ip.org
1339690386-sup-8927@alvh.no-ip.org
4FE5FF020200002500048A3D@gw.wicourts.gov
4FEAB90A0200002500048B7D@gw.wicourts.gov
2013-01-23 16:04:59 +01:00
|
|
|
if (!TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetRawXmax(tuple)))
|
2004-07-28 16:23:31 +02:00
|
|
|
{
|
2007-09-21 20:24:28 +02:00
|
|
|
/* deleting subtransaction must have aborted */
|
2007-08-14 19:35:18 +02:00
|
|
|
SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
|
|
|
|
InvalidTransactionId);
|
2004-07-01 02:52:04 +02:00
|
|
|
return true;
|
2004-07-28 16:23:31 +02:00
|
|
|
}
|
2004-07-01 02:52:04 +02:00
|
|
|
|
1998-12-15 13:47:01 +01:00
|
|
|
return false;
|
1997-09-07 07:04:48 +02:00
|
|
|
}
|
2013-12-22 21:49:09 +01:00
|
|
|
else if (TransactionIdIsInProgress(HeapTupleHeaderGetRawXmin(tuple)))
|
1998-09-01 05:29:17 +02:00
|
|
|
return false;
|
2013-12-22 21:49:09 +01:00
|
|
|
else if (TransactionIdDidCommit(HeapTupleHeaderGetRawXmin(tuple)))
|
2007-08-14 19:35:18 +02:00
|
|
|
SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED,
|
2013-12-22 21:49:09 +01:00
|
|
|
HeapTupleHeaderGetRawXmin(tuple));
|
2001-12-19 18:18:39 +01:00
|
|
|
else
|
2004-10-16 00:40:29 +02:00
|
|
|
{
|
2005-05-07 23:22:01 +02:00
|
|
|
/* it must have aborted or crashed */
|
2007-08-14 19:35:18 +02:00
|
|
|
SetHintBits(tuple, buffer, HEAP_XMIN_INVALID,
|
|
|
|
InvalidTransactionId);
|
2005-05-07 23:22:01 +02:00
|
|
|
return false;
|
2004-10-16 00:40:29 +02:00
|
|
|
}
|
1997-09-07 07:04:48 +02:00
|
|
|
}
|
2001-12-19 18:18:39 +01:00
|
|
|
|
|
|
|
/* by here, the inserting transaction has committed */
|
From: Dan McGuirk <mcguirk@indirect.com>
Reply-To: hackers@hub.org, Dan McGuirk <mcguirk@indirect.com>
To: hackers@hub.org
Subject: [HACKERS] tmin writeback optimization
I was doing some profiling of the backend, and noticed that during a certain
benchmark I was running somewhere between 30% and 75% of the backend's CPU
time was being spent in calls to TransactionIdDidCommit() from
HeapTupleSatisfiesNow() or HeapTupleSatisfiesItself() to determine that
changed rows' transactions had in fact been committed even though the rows'
tmin values had not yet been set.
When a query looks at a given row, it needs to figure out whether the
transaction that changed the row has been committed and hence it should pay
attention to the row, or whether on the other hand the transaction is still
in progress or has been aborted and hence the row should be ignored. If
a tmin value is set, it is known definitively that the row's transaction
has been committed. However, if tmin is not set, the transaction
referred to in xmin must be looked up in pg_log, and this is what the
backend was spending a lot of time doing during my benchmark.
So, implementing a method suggested by Vadim, I created the following
patch that, the first time a query finds a committed row whose tmin value
is not set, sets it, and marks the buffer where the row is stored as
dirty. (It works for tmax, too.) This doesn't result in the boost in
real time performance I was hoping for, however it does decrease backend
CPU usage by up to two-thirds in certain situations, so it could be
rather beneficial in high-concurrency settings.
1997-03-28 08:06:53 +01:00
|
|
|
|
1997-11-02 16:27:14 +01:00
|
|
|
if (tuple->t_infomask & HEAP_XMAX_INVALID) /* xid invalid or aborted */
|
1998-09-01 05:29:17 +02:00
|
|
|
return true;
|
1997-09-07 07:04:48 +02:00
|
|
|
|
1997-11-02 16:27:14 +01:00
|
|
|
if (tuple->t_infomask & HEAP_XMAX_COMMITTED)
|
1998-12-15 13:47:01 +01:00
|
|
|
{
|
Improve concurrency of foreign key locking
This patch introduces two additional lock modes for tuples: "SELECT FOR
KEY SHARE" and "SELECT FOR NO KEY UPDATE". These don't block each
other, in contrast with already existing "SELECT FOR SHARE" and "SELECT
FOR UPDATE". UPDATE commands that do not modify the values stored in
the columns that are part of the key of the tuple now grab a SELECT FOR
NO KEY UPDATE lock on the tuple, allowing them to proceed concurrently
with tuple locks of the FOR KEY SHARE variety.
Foreign key triggers now use FOR KEY SHARE instead of FOR SHARE; this
means the concurrency improvement applies to them, which is the whole
point of this patch.
The added tuple lock semantics require some rejiggering of the multixact
module, so that the locking level that each transaction is holding can
be stored alongside its Xid. Also, multixacts now need to persist
across server restarts and crashes, because they can now represent not
only tuple locks, but also tuple updates. This means we need more
careful tracking of lifetime of pg_multixact SLRU files; since they now
persist longer, we require more infrastructure to figure out when they
can be removed. pg_upgrade also needs to be careful to copy
pg_multixact files over from the old server to the new, or at least part
of multixact.c state, depending on the versions of the old and new
servers.
Tuple time qualification rules (HeapTupleSatisfies routines) need to be
careful not to consider tuples with the "is multi" infomask bit set as
being only locked; they might need to look up MultiXact values (i.e.
possibly do pg_multixact I/O) to find out the Xid that updated a tuple,
whereas they previously were assured to only use information readily
available from the tuple header. This is considered acceptable, because
the extra I/O would involve cases that would previously cause some
commands to block waiting for concurrent transactions to finish.
Another important change is the fact that locking tuples that have
previously been updated causes the future versions to be marked as
locked, too; this is essential for correctness of foreign key checks.
This causes additional WAL-logging, also (there was previously a single
WAL record for a locked tuple; now there are as many as updated copies
of the tuple there exist.)
With all this in place, contention related to tuples being checked by
foreign key rules should be much reduced.
As a bonus, the old behavior that a subtransaction grabbing a stronger
tuple lock than the parent (sub)transaction held on a given tuple and
later aborting caused the weaker lock to be lost, has been fixed.
Many new spec files were added for isolation tester framework, to ensure
overall behavior is sane. There's probably room for several more tests.
There were several reviewers of this patch; in particular, Noah Misch
and Andres Freund spent considerable time in it. Original idea for the
patch came from Simon Riggs, after a problem report by Joel Jacobson.
Most code is from me, with contributions from Marti Raudsepp, Alexander
Shulgin, Noah Misch and Andres Freund.
This patch was discussed in several pgsql-hackers threads; the most
important start at the following message-ids:
AANLkTimo9XVcEzfiBR-ut3KVNDkjm2Vxh+t8kAmWjPuv@mail.gmail.com
1290721684-sup-3951@alvh.no-ip.org
1294953201-sup-2099@alvh.no-ip.org
1320343602-sup-2290@alvh.no-ip.org
1339690386-sup-8927@alvh.no-ip.org
4FE5FF020200002500048A3D@gw.wicourts.gov
4FEAB90A0200002500048B7D@gw.wicourts.gov
2013-01-23 16:04:59 +01:00
|
|
|
if (HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask))
|
1998-12-15 13:47:01 +01:00
|
|
|
return true;
|
1999-05-25 18:15:34 +02:00
|
|
|
return false; /* updated by other */
|
1998-12-15 13:47:01 +01:00
|
|
|
}
|
1997-11-02 16:27:14 +01:00
|
|
|
|
2005-04-28 23:47:18 +02:00
|
|
|
if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
|
|
|
|
{
|
2013-05-29 22:58:43 +02:00
|
|
|
TransactionId xmax;
|
Improve concurrency of foreign key locking
This patch introduces two additional lock modes for tuples: "SELECT FOR
KEY SHARE" and "SELECT FOR NO KEY UPDATE". These don't block each
other, in contrast with already existing "SELECT FOR SHARE" and "SELECT
FOR UPDATE". UPDATE commands that do not modify the values stored in
the columns that are part of the key of the tuple now grab a SELECT FOR
NO KEY UPDATE lock on the tuple, allowing them to proceed concurrently
with tuple locks of the FOR KEY SHARE variety.
Foreign key triggers now use FOR KEY SHARE instead of FOR SHARE; this
means the concurrency improvement applies to them, which is the whole
point of this patch.
The added tuple lock semantics require some rejiggering of the multixact
module, so that the locking level that each transaction is holding can
be stored alongside its Xid. Also, multixacts now need to persist
across server restarts and crashes, because they can now represent not
only tuple locks, but also tuple updates. This means we need more
careful tracking of lifetime of pg_multixact SLRU files; since they now
persist longer, we require more infrastructure to figure out when they
can be removed. pg_upgrade also needs to be careful to copy
pg_multixact files over from the old server to the new, or at least part
of multixact.c state, depending on the versions of the old and new
servers.
Tuple time qualification rules (HeapTupleSatisfies routines) need to be
careful not to consider tuples with the "is multi" infomask bit set as
being only locked; they might need to look up MultiXact values (i.e.
possibly do pg_multixact I/O) to find out the Xid that updated a tuple,
whereas they previously were assured to only use information readily
available from the tuple header. This is considered acceptable, because
the extra I/O would involve cases that would previously cause some
commands to block waiting for concurrent transactions to finish.
Another important change is the fact that locking tuples that have
previously been updated causes the future versions to be marked as
locked, too; this is essential for correctness of foreign key checks.
This causes additional WAL-logging, also (there was previously a single
WAL record for a locked tuple; now there are as many as updated copies
of the tuple there exist.)
With all this in place, contention related to tuples being checked by
foreign key rules should be much reduced.
As a bonus, the old behavior that a subtransaction grabbing a stronger
tuple lock than the parent (sub)transaction held on a given tuple and
later aborting caused the weaker lock to be lost, has been fixed.
Many new spec files were added for isolation tester framework, to ensure
overall behavior is sane. There's probably room for several more tests.
There were several reviewers of this patch; in particular, Noah Misch
and Andres Freund spent considerable time in it. Original idea for the
patch came from Simon Riggs, after a problem report by Joel Jacobson.
Most code is from me, with contributions from Marti Raudsepp, Alexander
Shulgin, Noah Misch and Andres Freund.
This patch was discussed in several pgsql-hackers threads; the most
important start at the following message-ids:
AANLkTimo9XVcEzfiBR-ut3KVNDkjm2Vxh+t8kAmWjPuv@mail.gmail.com
1290721684-sup-3951@alvh.no-ip.org
1294953201-sup-2099@alvh.no-ip.org
1320343602-sup-2290@alvh.no-ip.org
1339690386-sup-8927@alvh.no-ip.org
4FE5FF020200002500048A3D@gw.wicourts.gov
4FEAB90A0200002500048B7D@gw.wicourts.gov
2013-01-23 16:04:59 +01:00
|
|
|
|
|
|
|
if (HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask))
|
|
|
|
return true;
|
|
|
|
|
|
|
|
xmax = HeapTupleGetUpdateXid(tuple);
|
2013-11-29 20:08:06 +01:00
|
|
|
|
|
|
|
/* not LOCKED_ONLY, so it has to have an xmax */
|
|
|
|
Assert(TransactionIdIsValid(xmax));
|
|
|
|
|
Improve concurrency of foreign key locking
This patch introduces two additional lock modes for tuples: "SELECT FOR
KEY SHARE" and "SELECT FOR NO KEY UPDATE". These don't block each
other, in contrast with already existing "SELECT FOR SHARE" and "SELECT
FOR UPDATE". UPDATE commands that do not modify the values stored in
the columns that are part of the key of the tuple now grab a SELECT FOR
NO KEY UPDATE lock on the tuple, allowing them to proceed concurrently
with tuple locks of the FOR KEY SHARE variety.
Foreign key triggers now use FOR KEY SHARE instead of FOR SHARE; this
means the concurrency improvement applies to them, which is the whole
point of this patch.
The added tuple lock semantics require some rejiggering of the multixact
module, so that the locking level that each transaction is holding can
be stored alongside its Xid. Also, multixacts now need to persist
across server restarts and crashes, because they can now represent not
only tuple locks, but also tuple updates. This means we need more
careful tracking of lifetime of pg_multixact SLRU files; since they now
persist longer, we require more infrastructure to figure out when they
can be removed. pg_upgrade also needs to be careful to copy
pg_multixact files over from the old server to the new, or at least part
of multixact.c state, depending on the versions of the old and new
servers.
Tuple time qualification rules (HeapTupleSatisfies routines) need to be
careful not to consider tuples with the "is multi" infomask bit set as
being only locked; they might need to look up MultiXact values (i.e.
possibly do pg_multixact I/O) to find out the Xid that updated a tuple,
whereas they previously were assured to only use information readily
available from the tuple header. This is considered acceptable, because
the extra I/O would involve cases that would previously cause some
commands to block waiting for concurrent transactions to finish.
Another important change is the fact that locking tuples that have
previously been updated causes the future versions to be marked as
locked, too; this is essential for correctness of foreign key checks.
This causes additional WAL-logging, also (there was previously a single
WAL record for a locked tuple; now there are as many as updated copies
of the tuple there exist.)
With all this in place, contention related to tuples being checked by
foreign key rules should be much reduced.
As a bonus, the old behavior that a subtransaction grabbing a stronger
tuple lock than the parent (sub)transaction held on a given tuple and
later aborting caused the weaker lock to be lost, has been fixed.
Many new spec files were added for isolation tester framework, to ensure
overall behavior is sane. There's probably room for several more tests.
There were several reviewers of this patch; in particular, Noah Misch
and Andres Freund spent considerable time in it. Original idea for the
patch came from Simon Riggs, after a problem report by Joel Jacobson.
Most code is from me, with contributions from Marti Raudsepp, Alexander
Shulgin, Noah Misch and Andres Freund.
This patch was discussed in several pgsql-hackers threads; the most
important start at the following message-ids:
AANLkTimo9XVcEzfiBR-ut3KVNDkjm2Vxh+t8kAmWjPuv@mail.gmail.com
1290721684-sup-3951@alvh.no-ip.org
1294953201-sup-2099@alvh.no-ip.org
1320343602-sup-2290@alvh.no-ip.org
1339690386-sup-8927@alvh.no-ip.org
4FE5FF020200002500048A3D@gw.wicourts.gov
4FEAB90A0200002500048B7D@gw.wicourts.gov
2013-01-23 16:04:59 +01:00
|
|
|
if (TransactionIdIsCurrentTransactionId(xmax))
|
|
|
|
return false;
|
|
|
|
if (TransactionIdIsInProgress(xmax))
|
|
|
|
return true;
|
|
|
|
if (TransactionIdDidCommit(xmax))
|
|
|
|
return false;
|
2013-11-29 20:08:06 +01:00
|
|
|
/* it must have aborted or crashed */
|
2005-04-28 23:47:18 +02:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
Improve concurrency of foreign key locking
This patch introduces two additional lock modes for tuples: "SELECT FOR
KEY SHARE" and "SELECT FOR NO KEY UPDATE". These don't block each
other, in contrast with already existing "SELECT FOR SHARE" and "SELECT
FOR UPDATE". UPDATE commands that do not modify the values stored in
the columns that are part of the key of the tuple now grab a SELECT FOR
NO KEY UPDATE lock on the tuple, allowing them to proceed concurrently
with tuple locks of the FOR KEY SHARE variety.
Foreign key triggers now use FOR KEY SHARE instead of FOR SHARE; this
means the concurrency improvement applies to them, which is the whole
point of this patch.
The added tuple lock semantics require some rejiggering of the multixact
module, so that the locking level that each transaction is holding can
be stored alongside its Xid. Also, multixacts now need to persist
across server restarts and crashes, because they can now represent not
only tuple locks, but also tuple updates. This means we need more
careful tracking of lifetime of pg_multixact SLRU files; since they now
persist longer, we require more infrastructure to figure out when they
can be removed. pg_upgrade also needs to be careful to copy
pg_multixact files over from the old server to the new, or at least part
of multixact.c state, depending on the versions of the old and new
servers.
Tuple time qualification rules (HeapTupleSatisfies routines) need to be
careful not to consider tuples with the "is multi" infomask bit set as
being only locked; they might need to look up MultiXact values (i.e.
possibly do pg_multixact I/O) to find out the Xid that updated a tuple,
whereas they previously were assured to only use information readily
available from the tuple header. This is considered acceptable, because
the extra I/O would involve cases that would previously cause some
commands to block waiting for concurrent transactions to finish.
Another important change is the fact that locking tuples that have
previously been updated causes the future versions to be marked as
locked, too; this is essential for correctness of foreign key checks.
This causes additional WAL-logging, also (there was previously a single
WAL record for a locked tuple; now there are as many as updated copies
of the tuple there exist.)
With all this in place, contention related to tuples being checked by
foreign key rules should be much reduced.
As a bonus, the old behavior that a subtransaction grabbing a stronger
tuple lock than the parent (sub)transaction held on a given tuple and
later aborting caused the weaker lock to be lost, has been fixed.
Many new spec files were added for isolation tester framework, to ensure
overall behavior is sane. There's probably room for several more tests.
There were several reviewers of this patch; in particular, Noah Misch
and Andres Freund spent considerable time in it. Original idea for the
patch came from Simon Riggs, after a problem report by Joel Jacobson.
Most code is from me, with contributions from Marti Raudsepp, Alexander
Shulgin, Noah Misch and Andres Freund.
This patch was discussed in several pgsql-hackers threads; the most
important start at the following message-ids:
AANLkTimo9XVcEzfiBR-ut3KVNDkjm2Vxh+t8kAmWjPuv@mail.gmail.com
1290721684-sup-3951@alvh.no-ip.org
1294953201-sup-2099@alvh.no-ip.org
1320343602-sup-2290@alvh.no-ip.org
1339690386-sup-8927@alvh.no-ip.org
4FE5FF020200002500048A3D@gw.wicourts.gov
4FEAB90A0200002500048B7D@gw.wicourts.gov
2013-01-23 16:04:59 +01:00
|
|
|
if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetRawXmax(tuple)))
|
1998-12-15 13:47:01 +01:00
|
|
|
{
|
Improve concurrency of foreign key locking
This patch introduces two additional lock modes for tuples: "SELECT FOR
KEY SHARE" and "SELECT FOR NO KEY UPDATE". These don't block each
other, in contrast with already existing "SELECT FOR SHARE" and "SELECT
FOR UPDATE". UPDATE commands that do not modify the values stored in
the columns that are part of the key of the tuple now grab a SELECT FOR
NO KEY UPDATE lock on the tuple, allowing them to proceed concurrently
with tuple locks of the FOR KEY SHARE variety.
Foreign key triggers now use FOR KEY SHARE instead of FOR SHARE; this
means the concurrency improvement applies to them, which is the whole
point of this patch.
The added tuple lock semantics require some rejiggering of the multixact
module, so that the locking level that each transaction is holding can
be stored alongside its Xid. Also, multixacts now need to persist
across server restarts and crashes, because they can now represent not
only tuple locks, but also tuple updates. This means we need more
careful tracking of lifetime of pg_multixact SLRU files; since they now
persist longer, we require more infrastructure to figure out when they
can be removed. pg_upgrade also needs to be careful to copy
pg_multixact files over from the old server to the new, or at least part
of multixact.c state, depending on the versions of the old and new
servers.
Tuple time qualification rules (HeapTupleSatisfies routines) need to be
careful not to consider tuples with the "is multi" infomask bit set as
being only locked; they might need to look up MultiXact values (i.e.
possibly do pg_multixact I/O) to find out the Xid that updated a tuple,
whereas they previously were assured to only use information readily
available from the tuple header. This is considered acceptable, because
the extra I/O would involve cases that would previously cause some
commands to block waiting for concurrent transactions to finish.
Another important change is the fact that locking tuples that have
previously been updated causes the future versions to be marked as
locked, too; this is essential for correctness of foreign key checks.
This causes additional WAL-logging, also (there was previously a single
WAL record for a locked tuple; now there are as many as updated copies
of the tuple there exist.)
With all this in place, contention related to tuples being checked by
foreign key rules should be much reduced.
As a bonus, the old behavior that a subtransaction grabbing a stronger
tuple lock than the parent (sub)transaction held on a given tuple and
later aborting caused the weaker lock to be lost, has been fixed.
Many new spec files were added for isolation tester framework, to ensure
overall behavior is sane. There's probably room for several more tests.
There were several reviewers of this patch; in particular, Noah Misch
and Andres Freund spent considerable time in it. Original idea for the
patch came from Simon Riggs, after a problem report by Joel Jacobson.
Most code is from me, with contributions from Marti Raudsepp, Alexander
Shulgin, Noah Misch and Andres Freund.
This patch was discussed in several pgsql-hackers threads; the most
important start at the following message-ids:
AANLkTimo9XVcEzfiBR-ut3KVNDkjm2Vxh+t8kAmWjPuv@mail.gmail.com
1290721684-sup-3951@alvh.no-ip.org
1294953201-sup-2099@alvh.no-ip.org
1320343602-sup-2290@alvh.no-ip.org
1339690386-sup-8927@alvh.no-ip.org
4FE5FF020200002500048A3D@gw.wicourts.gov
4FEAB90A0200002500048B7D@gw.wicourts.gov
2013-01-23 16:04:59 +01:00
|
|
|
if (HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask))
|
1998-12-15 13:47:01 +01:00
|
|
|
return true;
|
1998-09-01 05:29:17 +02:00
|
|
|
return false;
|
1998-12-15 13:47:01 +01:00
|
|
|
}
|
1997-09-07 07:04:48 +02:00
|
|
|
|
Improve concurrency of foreign key locking
This patch introduces two additional lock modes for tuples: "SELECT FOR
KEY SHARE" and "SELECT FOR NO KEY UPDATE". These don't block each
other, in contrast with already existing "SELECT FOR SHARE" and "SELECT
FOR UPDATE". UPDATE commands that do not modify the values stored in
the columns that are part of the key of the tuple now grab a SELECT FOR
NO KEY UPDATE lock on the tuple, allowing them to proceed concurrently
with tuple locks of the FOR KEY SHARE variety.
Foreign key triggers now use FOR KEY SHARE instead of FOR SHARE; this
means the concurrency improvement applies to them, which is the whole
point of this patch.
The added tuple lock semantics require some rejiggering of the multixact
module, so that the locking level that each transaction is holding can
be stored alongside its Xid. Also, multixacts now need to persist
across server restarts and crashes, because they can now represent not
only tuple locks, but also tuple updates. This means we need more
careful tracking of lifetime of pg_multixact SLRU files; since they now
persist longer, we require more infrastructure to figure out when they
can be removed. pg_upgrade also needs to be careful to copy
pg_multixact files over from the old server to the new, or at least part
of multixact.c state, depending on the versions of the old and new
servers.
Tuple time qualification rules (HeapTupleSatisfies routines) need to be
careful not to consider tuples with the "is multi" infomask bit set as
being only locked; they might need to look up MultiXact values (i.e.
possibly do pg_multixact I/O) to find out the Xid that updated a tuple,
whereas they previously were assured to only use information readily
available from the tuple header. This is considered acceptable, because
the extra I/O would involve cases that would previously cause some
commands to block waiting for concurrent transactions to finish.
Another important change is the fact that locking tuples that have
previously been updated causes the future versions to be marked as
locked, too; this is essential for correctness of foreign key checks.
This causes additional WAL-logging, also (there was previously a single
WAL record for a locked tuple; now there are as many as updated copies
of the tuple there exist.)
With all this in place, contention related to tuples being checked by
foreign key rules should be much reduced.
As a bonus, the old behavior that a subtransaction grabbing a stronger
tuple lock than the parent (sub)transaction held on a given tuple and
later aborting caused the weaker lock to be lost, has been fixed.
Many new spec files were added for isolation tester framework, to ensure
overall behavior is sane. There's probably room for several more tests.
There were several reviewers of this patch; in particular, Noah Misch
and Andres Freund spent considerable time in it. Original idea for the
patch came from Simon Riggs, after a problem report by Joel Jacobson.
Most code is from me, with contributions from Marti Raudsepp, Alexander
Shulgin, Noah Misch and Andres Freund.
This patch was discussed in several pgsql-hackers threads; the most
important start at the following message-ids:
AANLkTimo9XVcEzfiBR-ut3KVNDkjm2Vxh+t8kAmWjPuv@mail.gmail.com
1290721684-sup-3951@alvh.no-ip.org
1294953201-sup-2099@alvh.no-ip.org
1320343602-sup-2290@alvh.no-ip.org
1339690386-sup-8927@alvh.no-ip.org
4FE5FF020200002500048A3D@gw.wicourts.gov
4FEAB90A0200002500048B7D@gw.wicourts.gov
2013-01-23 16:04:59 +01:00
|
|
|
if (TransactionIdIsInProgress(HeapTupleHeaderGetRawXmax(tuple)))
|
2005-05-07 23:22:01 +02:00
|
|
|
return true;
|
|
|
|
|
Improve concurrency of foreign key locking
This patch introduces two additional lock modes for tuples: "SELECT FOR
KEY SHARE" and "SELECT FOR NO KEY UPDATE". These don't block each
other, in contrast with already existing "SELECT FOR SHARE" and "SELECT
FOR UPDATE". UPDATE commands that do not modify the values stored in
the columns that are part of the key of the tuple now grab a SELECT FOR
NO KEY UPDATE lock on the tuple, allowing them to proceed concurrently
with tuple locks of the FOR KEY SHARE variety.
Foreign key triggers now use FOR KEY SHARE instead of FOR SHARE; this
means the concurrency improvement applies to them, which is the whole
point of this patch.
The added tuple lock semantics require some rejiggering of the multixact
module, so that the locking level that each transaction is holding can
be stored alongside its Xid. Also, multixacts now need to persist
across server restarts and crashes, because they can now represent not
only tuple locks, but also tuple updates. This means we need more
careful tracking of lifetime of pg_multixact SLRU files; since they now
persist longer, we require more infrastructure to figure out when they
can be removed. pg_upgrade also needs to be careful to copy
pg_multixact files over from the old server to the new, or at least part
of multixact.c state, depending on the versions of the old and new
servers.
Tuple time qualification rules (HeapTupleSatisfies routines) need to be
careful not to consider tuples with the "is multi" infomask bit set as
being only locked; they might need to look up MultiXact values (i.e.
possibly do pg_multixact I/O) to find out the Xid that updated a tuple,
whereas they previously were assured to only use information readily
available from the tuple header. This is considered acceptable, because
the extra I/O would involve cases that would previously cause some
commands to block waiting for concurrent transactions to finish.
Another important change is the fact that locking tuples that have
previously been updated causes the future versions to be marked as
locked, too; this is essential for correctness of foreign key checks.
This causes additional WAL-logging, also (there was previously a single
WAL record for a locked tuple; now there are as many as updated copies
of the tuple there exist.)
With all this in place, contention related to tuples being checked by
foreign key rules should be much reduced.
As a bonus, the old behavior that a subtransaction grabbing a stronger
tuple lock than the parent (sub)transaction held on a given tuple and
later aborting caused the weaker lock to be lost, has been fixed.
Many new spec files were added for isolation tester framework, to ensure
overall behavior is sane. There's probably room for several more tests.
There were several reviewers of this patch; in particular, Noah Misch
and Andres Freund spent considerable time in it. Original idea for the
patch came from Simon Riggs, after a problem report by Joel Jacobson.
Most code is from me, with contributions from Marti Raudsepp, Alexander
Shulgin, Noah Misch and Andres Freund.
This patch was discussed in several pgsql-hackers threads; the most
important start at the following message-ids:
AANLkTimo9XVcEzfiBR-ut3KVNDkjm2Vxh+t8kAmWjPuv@mail.gmail.com
1290721684-sup-3951@alvh.no-ip.org
1294953201-sup-2099@alvh.no-ip.org
1320343602-sup-2290@alvh.no-ip.org
1339690386-sup-8927@alvh.no-ip.org
4FE5FF020200002500048A3D@gw.wicourts.gov
4FEAB90A0200002500048B7D@gw.wicourts.gov
2013-01-23 16:04:59 +01:00
|
|
|
if (!TransactionIdDidCommit(HeapTupleHeaderGetRawXmax(tuple)))
|
1997-09-07 07:04:48 +02:00
|
|
|
{
|
2005-05-07 23:22:01 +02:00
|
|
|
/* it must have aborted or crashed */
|
2007-08-14 19:35:18 +02:00
|
|
|
SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
|
|
|
|
InvalidTransactionId);
|
1998-09-01 05:29:17 +02:00
|
|
|
return true;
|
1997-09-07 07:04:48 +02:00
|
|
|
}
|
|
|
|
|
2001-12-19 18:18:39 +01:00
|
|
|
/* xmax transaction committed */
|
1997-09-07 07:04:48 +02:00
|
|
|
|
Improve concurrency of foreign key locking
This patch introduces two additional lock modes for tuples: "SELECT FOR
KEY SHARE" and "SELECT FOR NO KEY UPDATE". These don't block each
other, in contrast with already existing "SELECT FOR SHARE" and "SELECT
FOR UPDATE". UPDATE commands that do not modify the values stored in
the columns that are part of the key of the tuple now grab a SELECT FOR
NO KEY UPDATE lock on the tuple, allowing them to proceed concurrently
with tuple locks of the FOR KEY SHARE variety.
Foreign key triggers now use FOR KEY SHARE instead of FOR SHARE; this
means the concurrency improvement applies to them, which is the whole
point of this patch.
The added tuple lock semantics require some rejiggering of the multixact
module, so that the locking level that each transaction is holding can
be stored alongside its Xid. Also, multixacts now need to persist
across server restarts and crashes, because they can now represent not
only tuple locks, but also tuple updates. This means we need more
careful tracking of lifetime of pg_multixact SLRU files; since they now
persist longer, we require more infrastructure to figure out when they
can be removed. pg_upgrade also needs to be careful to copy
pg_multixact files over from the old server to the new, or at least part
of multixact.c state, depending on the versions of the old and new
servers.
Tuple time qualification rules (HeapTupleSatisfies routines) need to be
careful not to consider tuples with the "is multi" infomask bit set as
being only locked; they might need to look up MultiXact values (i.e.
possibly do pg_multixact I/O) to find out the Xid that updated a tuple,
whereas they previously were assured to only use information readily
available from the tuple header. This is considered acceptable, because
the extra I/O would involve cases that would previously cause some
commands to block waiting for concurrent transactions to finish.
Another important change is the fact that locking tuples that have
previously been updated causes the future versions to be marked as
locked, too; this is essential for correctness of foreign key checks.
This causes additional WAL-logging, also (there was previously a single
WAL record for a locked tuple; now there are as many as updated copies
of the tuple there exist.)
With all this in place, contention related to tuples being checked by
foreign key rules should be much reduced.
As a bonus, the old behavior that a subtransaction grabbing a stronger
tuple lock than the parent (sub)transaction held on a given tuple and
later aborting caused the weaker lock to be lost, has been fixed.
Many new spec files were added for isolation tester framework, to ensure
overall behavior is sane. There's probably room for several more tests.
There were several reviewers of this patch; in particular, Noah Misch
and Andres Freund spent considerable time in it. Original idea for the
patch came from Simon Riggs, after a problem report by Joel Jacobson.
Most code is from me, with contributions from Marti Raudsepp, Alexander
Shulgin, Noah Misch and Andres Freund.
This patch was discussed in several pgsql-hackers threads; the most
important start at the following message-ids:
AANLkTimo9XVcEzfiBR-ut3KVNDkjm2Vxh+t8kAmWjPuv@mail.gmail.com
1290721684-sup-3951@alvh.no-ip.org
1294953201-sup-2099@alvh.no-ip.org
1320343602-sup-2290@alvh.no-ip.org
1339690386-sup-8927@alvh.no-ip.org
4FE5FF020200002500048A3D@gw.wicourts.gov
4FEAB90A0200002500048B7D@gw.wicourts.gov
2013-01-23 16:04:59 +01:00
|
|
|
if (HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask))
|
2003-06-25 03:08:13 +02:00
|
|
|
{
|
2007-08-14 19:35:18 +02:00
|
|
|
SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
|
|
|
|
InvalidTransactionId);
|
1998-12-15 13:47:01 +01:00
|
|
|
return true;
|
2003-06-25 03:08:13 +02:00
|
|
|
}
|
1998-12-15 13:47:01 +01:00
|
|
|
|
2007-08-14 19:35:18 +02:00
|
|
|
SetHintBits(tuple, buffer, HEAP_XMAX_COMMITTED,
|
Improve concurrency of foreign key locking
This patch introduces two additional lock modes for tuples: "SELECT FOR
KEY SHARE" and "SELECT FOR NO KEY UPDATE". These don't block each
other, in contrast with already existing "SELECT FOR SHARE" and "SELECT
FOR UPDATE". UPDATE commands that do not modify the values stored in
the columns that are part of the key of the tuple now grab a SELECT FOR
NO KEY UPDATE lock on the tuple, allowing them to proceed concurrently
with tuple locks of the FOR KEY SHARE variety.
Foreign key triggers now use FOR KEY SHARE instead of FOR SHARE; this
means the concurrency improvement applies to them, which is the whole
point of this patch.
The added tuple lock semantics require some rejiggering of the multixact
module, so that the locking level that each transaction is holding can
be stored alongside its Xid. Also, multixacts now need to persist
across server restarts and crashes, because they can now represent not
only tuple locks, but also tuple updates. This means we need more
careful tracking of lifetime of pg_multixact SLRU files; since they now
persist longer, we require more infrastructure to figure out when they
can be removed. pg_upgrade also needs to be careful to copy
pg_multixact files over from the old server to the new, or at least part
of multixact.c state, depending on the versions of the old and new
servers.
Tuple time qualification rules (HeapTupleSatisfies routines) need to be
careful not to consider tuples with the "is multi" infomask bit set as
being only locked; they might need to look up MultiXact values (i.e.
possibly do pg_multixact I/O) to find out the Xid that updated a tuple,
whereas they previously were assured to only use information readily
available from the tuple header. This is considered acceptable, because
the extra I/O would involve cases that would previously cause some
commands to block waiting for concurrent transactions to finish.
Another important change is the fact that locking tuples that have
previously been updated causes the future versions to be marked as
locked, too; this is essential for correctness of foreign key checks.
This causes additional WAL-logging, also (there was previously a single
WAL record for a locked tuple; now there are as many as updated copies
of the tuple there exist.)
With all this in place, contention related to tuples being checked by
foreign key rules should be much reduced.
As a bonus, the old behavior that a subtransaction grabbing a stronger
tuple lock than the parent (sub)transaction held on a given tuple and
later aborting caused the weaker lock to be lost, has been fixed.
Many new spec files were added for isolation tester framework, to ensure
overall behavior is sane. There's probably room for several more tests.
There were several reviewers of this patch; in particular, Noah Misch
and Andres Freund spent considerable time in it. Original idea for the
patch came from Simon Riggs, after a problem report by Joel Jacobson.
Most code is from me, with contributions from Marti Raudsepp, Alexander
Shulgin, Noah Misch and Andres Freund.
This patch was discussed in several pgsql-hackers threads; the most
important start at the following message-ids:
AANLkTimo9XVcEzfiBR-ut3KVNDkjm2Vxh+t8kAmWjPuv@mail.gmail.com
1290721684-sup-3951@alvh.no-ip.org
1294953201-sup-2099@alvh.no-ip.org
1320343602-sup-2290@alvh.no-ip.org
1339690386-sup-8927@alvh.no-ip.org
4FE5FF020200002500048A3D@gw.wicourts.gov
4FEAB90A0200002500048B7D@gw.wicourts.gov
2013-01-23 16:04:59 +01:00
|
|
|
HeapTupleHeaderGetRawXmax(tuple));
|
1998-09-01 05:29:17 +02:00
|
|
|
return false;
|
1996-07-09 08:22:35 +02:00
|
|
|
}
|
|
|
|
|
2007-03-25 21:45:14 +02:00
|
|
|
/*
|
|
|
|
* HeapTupleSatisfiesAny
|
|
|
|
* Dummy "satisfies" routine: any tuple satisfies SnapshotAny.
|
|
|
|
*/
|
|
|
|
bool
|
2013-07-22 19:26:33 +02:00
|
|
|
HeapTupleSatisfiesAny(HeapTuple htup, Snapshot snapshot, Buffer buffer)
|
2007-03-25 21:45:14 +02:00
|
|
|
{
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2002-01-16 21:29:02 +01:00
|
|
|
/*
|
|
|
|
* HeapTupleSatisfiesToast
|
2002-01-17 00:51:56 +01:00
|
|
|
* True iff heap tuple is valid as a TOAST row.
|
2002-01-16 21:29:02 +01:00
|
|
|
*
|
|
|
|
* This is a simplified version that only checks for VACUUM moving conditions.
|
|
|
|
* It's appropriate for TOAST usage because TOAST really doesn't want to do
|
2002-01-17 00:09:09 +01:00
|
|
|
* its own time qual checks; if you can see the main table row that contains
|
2014-05-06 18:12:18 +02:00
|
|
|
* a TOAST reference, you should be able to see the TOASTed value. However,
|
2002-01-16 21:29:02 +01:00
|
|
|
* vacuuming a TOAST table is independent of the main table, and in case such
|
|
|
|
* a vacuum fails partway through, we'd better do this much checking.
|
|
|
|
*
|
|
|
|
* Among other things, this means you can't do UPDATEs of rows in a TOAST
|
|
|
|
* table.
|
|
|
|
*/
|
|
|
|
bool
|
2013-07-22 19:26:33 +02:00
|
|
|
HeapTupleSatisfiesToast(HeapTuple htup, Snapshot snapshot,
|
2007-03-25 21:45:14 +02:00
|
|
|
Buffer buffer)
|
2002-01-16 21:29:02 +01:00
|
|
|
{
|
2013-07-22 19:26:33 +02:00
|
|
|
HeapTupleHeader tuple = htup->t_data;
|
2014-05-06 18:12:18 +02:00
|
|
|
|
2013-07-22 19:26:33 +02:00
|
|
|
Assert(ItemPointerIsValid(&htup->t_self));
|
|
|
|
Assert(htup->t_tableOid != InvalidOid);
|
|
|
|
|
2013-12-22 21:49:09 +01:00
|
|
|
if (!HeapTupleHeaderXminCommitted(tuple))
|
2002-01-16 21:29:02 +01:00
|
|
|
{
|
2013-12-22 21:49:09 +01:00
|
|
|
if (HeapTupleHeaderXminInvalid(tuple))
|
2002-01-16 21:29:02 +01:00
|
|
|
return false;
|
|
|
|
|
2010-02-08 15:10:21 +01:00
|
|
|
/* Used by pre-9.0 binary upgrades */
|
2002-01-16 21:29:02 +01:00
|
|
|
if (tuple->t_infomask & HEAP_MOVED_OFF)
|
|
|
|
{
|
2003-06-25 03:08:13 +02:00
|
|
|
TransactionId xvac = HeapTupleHeaderGetXvac(tuple);
|
2003-08-04 02:43:34 +02:00
|
|
|
|
2003-06-25 03:08:13 +02:00
|
|
|
if (TransactionIdIsCurrentTransactionId(xvac))
|
2002-01-16 21:29:02 +01:00
|
|
|
return false;
|
2003-06-25 03:08:13 +02:00
|
|
|
if (!TransactionIdIsInProgress(xvac))
|
2002-01-16 21:29:02 +01:00
|
|
|
{
|
2003-06-25 03:08:13 +02:00
|
|
|
if (TransactionIdDidCommit(xvac))
|
2002-01-16 21:29:02 +01:00
|
|
|
{
|
2007-08-14 19:35:18 +02:00
|
|
|
SetHintBits(tuple, buffer, HEAP_XMIN_INVALID,
|
|
|
|
InvalidTransactionId);
|
2002-01-16 21:29:02 +01:00
|
|
|
return false;
|
|
|
|
}
|
2007-08-14 19:35:18 +02:00
|
|
|
SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED,
|
|
|
|
InvalidTransactionId);
|
2002-01-16 21:29:02 +01:00
|
|
|
}
|
|
|
|
}
|
2010-02-08 15:10:21 +01:00
|
|
|
/* Used by pre-9.0 binary upgrades */
|
2002-01-16 21:29:02 +01:00
|
|
|
else if (tuple->t_infomask & HEAP_MOVED_IN)
|
|
|
|
{
|
2003-06-25 03:08:13 +02:00
|
|
|
TransactionId xvac = HeapTupleHeaderGetXvac(tuple);
|
2003-08-04 02:43:34 +02:00
|
|
|
|
2003-06-25 03:08:13 +02:00
|
|
|
if (!TransactionIdIsCurrentTransactionId(xvac))
|
2002-01-16 21:29:02 +01:00
|
|
|
{
|
2003-06-25 03:08:13 +02:00
|
|
|
if (TransactionIdIsInProgress(xvac))
|
2002-01-16 21:29:02 +01:00
|
|
|
return false;
|
2003-06-25 03:08:13 +02:00
|
|
|
if (TransactionIdDidCommit(xvac))
|
2007-08-14 19:35:18 +02:00
|
|
|
SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED,
|
|
|
|
InvalidTransactionId);
|
2002-01-16 21:29:02 +01:00
|
|
|
else
|
|
|
|
{
|
2007-08-14 19:35:18 +02:00
|
|
|
SetHintBits(tuple, buffer, HEAP_XMIN_INVALID,
|
|
|
|
InvalidTransactionId);
|
2002-01-16 21:29:02 +01:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* otherwise assume the tuple is valid for TOAST. */
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* HeapTupleSatisfiesUpdate
|
|
|
|
*
|
2013-08-01 16:46:19 +02:00
|
|
|
* This function returns a more detailed result code than most of the
|
|
|
|
* functions in this file, since UPDATE needs to know more than "is it
|
|
|
|
* visible?". It also allows for user-supplied CommandId rather than
|
|
|
|
* relying on CurrentCommandId.
|
2005-04-28 23:47:18 +02:00
|
|
|
*
|
|
|
|
* The possible return codes are:
|
|
|
|
*
|
|
|
|
* HeapTupleInvisible: the tuple didn't exist at all when the scan started,
|
|
|
|
* e.g. it was created by a later CommandId.
|
|
|
|
*
|
|
|
|
* HeapTupleMayBeUpdated: The tuple is valid and visible, so it may be
|
|
|
|
* updated.
|
|
|
|
*
|
|
|
|
* HeapTupleSelfUpdated: The tuple was updated by the current transaction,
|
|
|
|
* after the current scan started.
|
|
|
|
*
|
|
|
|
* HeapTupleUpdated: The tuple was updated by a committed transaction.
|
|
|
|
*
|
|
|
|
* HeapTupleBeingUpdated: The tuple is being updated by an in-progress
|
2006-11-17 19:00:15 +01:00
|
|
|
* transaction other than the current transaction. (Note: this includes
|
|
|
|
* the case where the tuple is share-locked by a MultiXact, even if the
|
|
|
|
* MultiXact includes the current transaction. Callers that want to
|
|
|
|
* distinguish that case must test for it themselves.)
|
2002-01-16 21:29:02 +01:00
|
|
|
*/
|
2005-03-21 00:40:34 +01:00
|
|
|
HTSU_Result
|
2013-07-22 19:26:33 +02:00
|
|
|
HeapTupleSatisfiesUpdate(HeapTuple htup, CommandId curcid,
|
2004-10-16 00:40:29 +02:00
|
|
|
Buffer buffer)
|
1998-12-15 13:47:01 +01:00
|
|
|
{
|
2013-07-22 19:26:33 +02:00
|
|
|
HeapTupleHeader tuple = htup->t_data;
|
2014-05-06 18:12:18 +02:00
|
|
|
|
2013-07-22 19:26:33 +02:00
|
|
|
Assert(ItemPointerIsValid(&htup->t_self));
|
|
|
|
Assert(htup->t_tableOid != InvalidOid);
|
|
|
|
|
2013-12-22 21:49:09 +01:00
|
|
|
if (!HeapTupleHeaderXminCommitted(tuple))
|
1998-12-15 13:47:01 +01:00
|
|
|
{
|
2013-12-22 21:49:09 +01:00
|
|
|
if (HeapTupleHeaderXminInvalid(tuple))
|
1998-12-15 13:47:01 +01:00
|
|
|
return HeapTupleInvisible;
|
|
|
|
|
2010-02-08 15:10:21 +01:00
|
|
|
/* Used by pre-9.0 binary upgrades */
|
2001-12-19 18:18:39 +01:00
|
|
|
if (tuple->t_infomask & HEAP_MOVED_OFF)
|
1999-03-28 22:32:42 +02:00
|
|
|
{
|
2003-06-25 03:08:13 +02:00
|
|
|
TransactionId xvac = HeapTupleHeaderGetXvac(tuple);
|
2003-08-04 02:43:34 +02:00
|
|
|
|
2003-06-25 03:08:13 +02:00
|
|
|
if (TransactionIdIsCurrentTransactionId(xvac))
|
1999-03-28 22:32:42 +02:00
|
|
|
return HeapTupleInvisible;
|
2003-06-25 03:08:13 +02:00
|
|
|
if (!TransactionIdIsInProgress(xvac))
|
2001-12-19 18:18:39 +01:00
|
|
|
{
|
2003-06-25 03:08:13 +02:00
|
|
|
if (TransactionIdDidCommit(xvac))
|
2001-12-19 18:18:39 +01:00
|
|
|
{
|
2007-08-14 19:35:18 +02:00
|
|
|
SetHintBits(tuple, buffer, HEAP_XMIN_INVALID,
|
|
|
|
InvalidTransactionId);
|
2001-12-19 18:18:39 +01:00
|
|
|
return HeapTupleInvisible;
|
|
|
|
}
|
2007-08-14 19:35:18 +02:00
|
|
|
SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED,
|
|
|
|
InvalidTransactionId);
|
1999-03-28 22:32:42 +02:00
|
|
|
}
|
|
|
|
}
|
2010-02-08 15:10:21 +01:00
|
|
|
/* Used by pre-9.0 binary upgrades */
|
2001-12-19 18:18:39 +01:00
|
|
|
else if (tuple->t_infomask & HEAP_MOVED_IN)
|
1999-03-28 22:32:42 +02:00
|
|
|
{
|
2003-06-25 03:08:13 +02:00
|
|
|
TransactionId xvac = HeapTupleHeaderGetXvac(tuple);
|
2003-08-04 02:43:34 +02:00
|
|
|
|
2003-06-25 03:08:13 +02:00
|
|
|
if (!TransactionIdIsCurrentTransactionId(xvac))
|
1999-03-28 22:32:42 +02:00
|
|
|
{
|
2003-06-25 03:08:13 +02:00
|
|
|
if (TransactionIdIsInProgress(xvac))
|
2001-12-19 18:18:39 +01:00
|
|
|
return HeapTupleInvisible;
|
2003-06-25 03:08:13 +02:00
|
|
|
if (TransactionIdDidCommit(xvac))
|
2007-08-14 19:35:18 +02:00
|
|
|
SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED,
|
|
|
|
InvalidTransactionId);
|
2001-12-19 18:18:39 +01:00
|
|
|
else
|
|
|
|
{
|
2007-08-14 19:35:18 +02:00
|
|
|
SetHintBits(tuple, buffer, HEAP_XMIN_INVALID,
|
|
|
|
InvalidTransactionId);
|
2001-12-19 18:18:39 +01:00
|
|
|
return HeapTupleInvisible;
|
|
|
|
}
|
1999-03-28 22:32:42 +02:00
|
|
|
}
|
|
|
|
}
|
2013-12-22 21:49:09 +01:00
|
|
|
else if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetRawXmin(tuple)))
|
1998-12-15 13:47:01 +01:00
|
|
|
{
|
2002-06-15 21:54:24 +02:00
|
|
|
if (HeapTupleHeaderGetCmin(tuple) >= curcid)
|
2005-10-15 04:49:52 +02:00
|
|
|
return HeapTupleInvisible; /* inserted after scan started */
|
1998-12-15 13:47:01 +01:00
|
|
|
|
2002-09-04 22:31:48 +02:00
|
|
|
if (tuple->t_infomask & HEAP_XMAX_INVALID) /* xid invalid */
|
1998-12-15 13:47:01 +01:00
|
|
|
return HeapTupleMayBeUpdated;
|
|
|
|
|
2013-12-18 17:31:27 +01:00
|
|
|
if (HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask))
|
|
|
|
{
|
2014-05-06 18:12:18 +02:00
|
|
|
TransactionId xmax;
|
2013-12-18 17:31:27 +01:00
|
|
|
|
|
|
|
xmax = HeapTupleHeaderGetRawXmax(tuple);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Careful here: even though this tuple was created by our own
|
|
|
|
* transaction, it might be locked by other transactions, if
|
|
|
|
* the original version was key-share locked when we updated
|
|
|
|
* it.
|
|
|
|
*/
|
|
|
|
|
|
|
|
if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
|
|
|
|
{
|
|
|
|
if (MultiXactHasRunningRemoteMembers(xmax))
|
|
|
|
return HeapTupleBeingUpdated;
|
|
|
|
else
|
|
|
|
return HeapTupleMayBeUpdated;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* if locker is gone, all's well */
|
|
|
|
if (!TransactionIdIsInProgress(xmax))
|
|
|
|
return HeapTupleMayBeUpdated;
|
|
|
|
|
|
|
|
if (!TransactionIdIsCurrentTransactionId(xmax))
|
|
|
|
return HeapTupleBeingUpdated;
|
|
|
|
else
|
|
|
|
return HeapTupleMayBeUpdated;
|
|
|
|
}
|
2005-04-28 23:47:18 +02:00
|
|
|
|
Improve concurrency of foreign key locking
This patch introduces two additional lock modes for tuples: "SELECT FOR
KEY SHARE" and "SELECT FOR NO KEY UPDATE". These don't block each
other, in contrast with already existing "SELECT FOR SHARE" and "SELECT
FOR UPDATE". UPDATE commands that do not modify the values stored in
the columns that are part of the key of the tuple now grab a SELECT FOR
NO KEY UPDATE lock on the tuple, allowing them to proceed concurrently
with tuple locks of the FOR KEY SHARE variety.
Foreign key triggers now use FOR KEY SHARE instead of FOR SHARE; this
means the concurrency improvement applies to them, which is the whole
point of this patch.
The added tuple lock semantics require some rejiggering of the multixact
module, so that the locking level that each transaction is holding can
be stored alongside its Xid. Also, multixacts now need to persist
across server restarts and crashes, because they can now represent not
only tuple locks, but also tuple updates. This means we need more
careful tracking of lifetime of pg_multixact SLRU files; since they now
persist longer, we require more infrastructure to figure out when they
can be removed. pg_upgrade also needs to be careful to copy
pg_multixact files over from the old server to the new, or at least part
of multixact.c state, depending on the versions of the old and new
servers.
Tuple time qualification rules (HeapTupleSatisfies routines) need to be
careful not to consider tuples with the "is multi" infomask bit set as
being only locked; they might need to look up MultiXact values (i.e.
possibly do pg_multixact I/O) to find out the Xid that updated a tuple,
whereas they previously were assured to only use information readily
available from the tuple header. This is considered acceptable, because
the extra I/O would involve cases that would previously cause some
commands to block waiting for concurrent transactions to finish.
Another important change is the fact that locking tuples that have
previously been updated causes the future versions to be marked as
locked, too; this is essential for correctness of foreign key checks.
This causes additional WAL-logging, also (there was previously a single
WAL record for a locked tuple; now there are as many as updated copies
of the tuple there exist.)
With all this in place, contention related to tuples being checked by
foreign key rules should be much reduced.
As a bonus, the old behavior that a subtransaction grabbing a stronger
tuple lock than the parent (sub)transaction held on a given tuple and
later aborting caused the weaker lock to be lost, has been fixed.
Many new spec files were added for isolation tester framework, to ensure
overall behavior is sane. There's probably room for several more tests.
There were several reviewers of this patch; in particular, Noah Misch
and Andres Freund spent considerable time in it. Original idea for the
patch came from Simon Riggs, after a problem report by Joel Jacobson.
Most code is from me, with contributions from Marti Raudsepp, Alexander
Shulgin, Noah Misch and Andres Freund.
This patch was discussed in several pgsql-hackers threads; the most
important start at the following message-ids:
AANLkTimo9XVcEzfiBR-ut3KVNDkjm2Vxh+t8kAmWjPuv@mail.gmail.com
1290721684-sup-3951@alvh.no-ip.org
1294953201-sup-2099@alvh.no-ip.org
1320343602-sup-2290@alvh.no-ip.org
1339690386-sup-8927@alvh.no-ip.org
4FE5FF020200002500048A3D@gw.wicourts.gov
4FEAB90A0200002500048B7D@gw.wicourts.gov
2013-01-23 16:04:59 +01:00
|
|
|
if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
|
|
|
|
{
|
2013-05-29 22:58:43 +02:00
|
|
|
TransactionId xmax;
|
Improve concurrency of foreign key locking
This patch introduces two additional lock modes for tuples: "SELECT FOR
KEY SHARE" and "SELECT FOR NO KEY UPDATE". These don't block each
other, in contrast with already existing "SELECT FOR SHARE" and "SELECT
FOR UPDATE". UPDATE commands that do not modify the values stored in
the columns that are part of the key of the tuple now grab a SELECT FOR
NO KEY UPDATE lock on the tuple, allowing them to proceed concurrently
with tuple locks of the FOR KEY SHARE variety.
Foreign key triggers now use FOR KEY SHARE instead of FOR SHARE; this
means the concurrency improvement applies to them, which is the whole
point of this patch.
The added tuple lock semantics require some rejiggering of the multixact
module, so that the locking level that each transaction is holding can
be stored alongside its Xid. Also, multixacts now need to persist
across server restarts and crashes, because they can now represent not
only tuple locks, but also tuple updates. This means we need more
careful tracking of lifetime of pg_multixact SLRU files; since they now
persist longer, we require more infrastructure to figure out when they
can be removed. pg_upgrade also needs to be careful to copy
pg_multixact files over from the old server to the new, or at least part
of multixact.c state, depending on the versions of the old and new
servers.
Tuple time qualification rules (HeapTupleSatisfies routines) need to be
careful not to consider tuples with the "is multi" infomask bit set as
being only locked; they might need to look up MultiXact values (i.e.
possibly do pg_multixact I/O) to find out the Xid that updated a tuple,
whereas they previously were assured to only use information readily
available from the tuple header. This is considered acceptable, because
the extra I/O would involve cases that would previously cause some
commands to block waiting for concurrent transactions to finish.
Another important change is the fact that locking tuples that have
previously been updated causes the future versions to be marked as
locked, too; this is essential for correctness of foreign key checks.
This causes additional WAL-logging, also (there was previously a single
WAL record for a locked tuple; now there are as many as updated copies
of the tuple there exist.)
With all this in place, contention related to tuples being checked by
foreign key rules should be much reduced.
As a bonus, the old behavior that a subtransaction grabbing a stronger
tuple lock than the parent (sub)transaction held on a given tuple and
later aborting caused the weaker lock to be lost, has been fixed.
Many new spec files were added for isolation tester framework, to ensure
overall behavior is sane. There's probably room for several more tests.
There were several reviewers of this patch; in particular, Noah Misch
and Andres Freund spent considerable time in it. Original idea for the
patch came from Simon Riggs, after a problem report by Joel Jacobson.
Most code is from me, with contributions from Marti Raudsepp, Alexander
Shulgin, Noah Misch and Andres Freund.
This patch was discussed in several pgsql-hackers threads; the most
important start at the following message-ids:
AANLkTimo9XVcEzfiBR-ut3KVNDkjm2Vxh+t8kAmWjPuv@mail.gmail.com
1290721684-sup-3951@alvh.no-ip.org
1294953201-sup-2099@alvh.no-ip.org
1320343602-sup-2290@alvh.no-ip.org
1339690386-sup-8927@alvh.no-ip.org
4FE5FF020200002500048A3D@gw.wicourts.gov
4FEAB90A0200002500048B7D@gw.wicourts.gov
2013-01-23 16:04:59 +01:00
|
|
|
|
|
|
|
xmax = HeapTupleGetUpdateXid(tuple);
|
2013-11-29 20:08:06 +01:00
|
|
|
|
|
|
|
/* not LOCKED_ONLY, so it has to have an xmax */
|
|
|
|
Assert(TransactionIdIsValid(xmax));
|
Improve concurrency of foreign key locking
This patch introduces two additional lock modes for tuples: "SELECT FOR
KEY SHARE" and "SELECT FOR NO KEY UPDATE". These don't block each
other, in contrast with already existing "SELECT FOR SHARE" and "SELECT
FOR UPDATE". UPDATE commands that do not modify the values stored in
the columns that are part of the key of the tuple now grab a SELECT FOR
NO KEY UPDATE lock on the tuple, allowing them to proceed concurrently
with tuple locks of the FOR KEY SHARE variety.
Foreign key triggers now use FOR KEY SHARE instead of FOR SHARE; this
means the concurrency improvement applies to them, which is the whole
point of this patch.
The added tuple lock semantics require some rejiggering of the multixact
module, so that the locking level that each transaction is holding can
be stored alongside its Xid. Also, multixacts now need to persist
across server restarts and crashes, because they can now represent not
only tuple locks, but also tuple updates. This means we need more
careful tracking of lifetime of pg_multixact SLRU files; since they now
persist longer, we require more infrastructure to figure out when they
can be removed. pg_upgrade also needs to be careful to copy
pg_multixact files over from the old server to the new, or at least part
of multixact.c state, depending on the versions of the old and new
servers.
Tuple time qualification rules (HeapTupleSatisfies routines) need to be
careful not to consider tuples with the "is multi" infomask bit set as
being only locked; they might need to look up MultiXact values (i.e.
possibly do pg_multixact I/O) to find out the Xid that updated a tuple,
whereas they previously were assured to only use information readily
available from the tuple header. This is considered acceptable, because
the extra I/O would involve cases that would previously cause some
commands to block waiting for concurrent transactions to finish.
Another important change is the fact that locking tuples that have
previously been updated causes the future versions to be marked as
locked, too; this is essential for correctness of foreign key checks.
This causes additional WAL-logging, also (there was previously a single
WAL record for a locked tuple; now there are as many as updated copies
of the tuple there exist.)
With all this in place, contention related to tuples being checked by
foreign key rules should be much reduced.
As a bonus, the old behavior that a subtransaction grabbing a stronger
tuple lock than the parent (sub)transaction held on a given tuple and
later aborting caused the weaker lock to be lost, has been fixed.
Many new spec files were added for isolation tester framework, to ensure
overall behavior is sane. There's probably room for several more tests.
There were several reviewers of this patch; in particular, Noah Misch
and Andres Freund spent considerable time in it. Original idea for the
patch came from Simon Riggs, after a problem report by Joel Jacobson.
Most code is from me, with contributions from Marti Raudsepp, Alexander
Shulgin, Noah Misch and Andres Freund.
This patch was discussed in several pgsql-hackers threads; the most
important start at the following message-ids:
AANLkTimo9XVcEzfiBR-ut3KVNDkjm2Vxh+t8kAmWjPuv@mail.gmail.com
1290721684-sup-3951@alvh.no-ip.org
1294953201-sup-2099@alvh.no-ip.org
1320343602-sup-2290@alvh.no-ip.org
1339690386-sup-8927@alvh.no-ip.org
4FE5FF020200002500048A3D@gw.wicourts.gov
4FEAB90A0200002500048B7D@gw.wicourts.gov
2013-01-23 16:04:59 +01:00
|
|
|
|
|
|
|
/* updating subtransaction must have aborted */
|
|
|
|
if (!TransactionIdIsCurrentTransactionId(xmax))
|
2013-12-18 17:31:27 +01:00
|
|
|
{
|
|
|
|
if (MultiXactHasRunningRemoteMembers(HeapTupleHeaderGetRawXmax(tuple)))
|
|
|
|
return HeapTupleBeingUpdated;
|
Improve concurrency of foreign key locking
This patch introduces two additional lock modes for tuples: "SELECT FOR
KEY SHARE" and "SELECT FOR NO KEY UPDATE". These don't block each
other, in contrast with already existing "SELECT FOR SHARE" and "SELECT
FOR UPDATE". UPDATE commands that do not modify the values stored in
the columns that are part of the key of the tuple now grab a SELECT FOR
NO KEY UPDATE lock on the tuple, allowing them to proceed concurrently
with tuple locks of the FOR KEY SHARE variety.
Foreign key triggers now use FOR KEY SHARE instead of FOR SHARE; this
means the concurrency improvement applies to them, which is the whole
point of this patch.
The added tuple lock semantics require some rejiggering of the multixact
module, so that the locking level that each transaction is holding can
be stored alongside its Xid. Also, multixacts now need to persist
across server restarts and crashes, because they can now represent not
only tuple locks, but also tuple updates. This means we need more
careful tracking of lifetime of pg_multixact SLRU files; since they now
persist longer, we require more infrastructure to figure out when they
can be removed. pg_upgrade also needs to be careful to copy
pg_multixact files over from the old server to the new, or at least part
of multixact.c state, depending on the versions of the old and new
servers.
Tuple time qualification rules (HeapTupleSatisfies routines) need to be
careful not to consider tuples with the "is multi" infomask bit set as
being only locked; they might need to look up MultiXact values (i.e.
possibly do pg_multixact I/O) to find out the Xid that updated a tuple,
whereas they previously were assured to only use information readily
available from the tuple header. This is considered acceptable, because
the extra I/O would involve cases that would previously cause some
commands to block waiting for concurrent transactions to finish.
Another important change is the fact that locking tuples that have
previously been updated causes the future versions to be marked as
locked, too; this is essential for correctness of foreign key checks.
This causes additional WAL-logging, also (there was previously a single
WAL record for a locked tuple; now there are as many as updated copies
of the tuple there exist.)
With all this in place, contention related to tuples being checked by
foreign key rules should be much reduced.
As a bonus, the old behavior that a subtransaction grabbing a stronger
tuple lock than the parent (sub)transaction held on a given tuple and
later aborting caused the weaker lock to be lost, has been fixed.
Many new spec files were added for isolation tester framework, to ensure
overall behavior is sane. There's probably room for several more tests.
There were several reviewers of this patch; in particular, Noah Misch
and Andres Freund spent considerable time in it. Original idea for the
patch came from Simon Riggs, after a problem report by Joel Jacobson.
Most code is from me, with contributions from Marti Raudsepp, Alexander
Shulgin, Noah Misch and Andres Freund.
This patch was discussed in several pgsql-hackers threads; the most
important start at the following message-ids:
AANLkTimo9XVcEzfiBR-ut3KVNDkjm2Vxh+t8kAmWjPuv@mail.gmail.com
1290721684-sup-3951@alvh.no-ip.org
1294953201-sup-2099@alvh.no-ip.org
1320343602-sup-2290@alvh.no-ip.org
1339690386-sup-8927@alvh.no-ip.org
4FE5FF020200002500048A3D@gw.wicourts.gov
4FEAB90A0200002500048B7D@gw.wicourts.gov
2013-01-23 16:04:59 +01:00
|
|
|
return HeapTupleMayBeUpdated;
|
2013-12-18 17:31:27 +01:00
|
|
|
}
|
Improve concurrency of foreign key locking
This patch introduces two additional lock modes for tuples: "SELECT FOR
KEY SHARE" and "SELECT FOR NO KEY UPDATE". These don't block each
other, in contrast with already existing "SELECT FOR SHARE" and "SELECT
FOR UPDATE". UPDATE commands that do not modify the values stored in
the columns that are part of the key of the tuple now grab a SELECT FOR
NO KEY UPDATE lock on the tuple, allowing them to proceed concurrently
with tuple locks of the FOR KEY SHARE variety.
Foreign key triggers now use FOR KEY SHARE instead of FOR SHARE; this
means the concurrency improvement applies to them, which is the whole
point of this patch.
The added tuple lock semantics require some rejiggering of the multixact
module, so that the locking level that each transaction is holding can
be stored alongside its Xid. Also, multixacts now need to persist
across server restarts and crashes, because they can now represent not
only tuple locks, but also tuple updates. This means we need more
careful tracking of lifetime of pg_multixact SLRU files; since they now
persist longer, we require more infrastructure to figure out when they
can be removed. pg_upgrade also needs to be careful to copy
pg_multixact files over from the old server to the new, or at least part
of multixact.c state, depending on the versions of the old and new
servers.
Tuple time qualification rules (HeapTupleSatisfies routines) need to be
careful not to consider tuples with the "is multi" infomask bit set as
being only locked; they might need to look up MultiXact values (i.e.
possibly do pg_multixact I/O) to find out the Xid that updated a tuple,
whereas they previously were assured to only use information readily
available from the tuple header. This is considered acceptable, because
the extra I/O would involve cases that would previously cause some
commands to block waiting for concurrent transactions to finish.
Another important change is the fact that locking tuples that have
previously been updated causes the future versions to be marked as
locked, too; this is essential for correctness of foreign key checks.
This causes additional WAL-logging, also (there was previously a single
WAL record for a locked tuple; now there are as many as updated copies
of the tuple there exist.)
With all this in place, contention related to tuples being checked by
foreign key rules should be much reduced.
As a bonus, the old behavior that a subtransaction grabbing a stronger
tuple lock than the parent (sub)transaction held on a given tuple and
later aborting caused the weaker lock to be lost, has been fixed.
Many new spec files were added for isolation tester framework, to ensure
overall behavior is sane. There's probably room for several more tests.
There were several reviewers of this patch; in particular, Noah Misch
and Andres Freund spent considerable time in it. Original idea for the
patch came from Simon Riggs, after a problem report by Joel Jacobson.
Most code is from me, with contributions from Marti Raudsepp, Alexander
Shulgin, Noah Misch and Andres Freund.
This patch was discussed in several pgsql-hackers threads; the most
important start at the following message-ids:
AANLkTimo9XVcEzfiBR-ut3KVNDkjm2Vxh+t8kAmWjPuv@mail.gmail.com
1290721684-sup-3951@alvh.no-ip.org
1294953201-sup-2099@alvh.no-ip.org
1320343602-sup-2290@alvh.no-ip.org
1339690386-sup-8927@alvh.no-ip.org
4FE5FF020200002500048A3D@gw.wicourts.gov
4FEAB90A0200002500048B7D@gw.wicourts.gov
2013-01-23 16:04:59 +01:00
|
|
|
else
|
|
|
|
{
|
|
|
|
if (HeapTupleHeaderGetCmax(tuple) >= curcid)
|
2013-05-29 22:58:43 +02:00
|
|
|
return HeapTupleSelfUpdated; /* updated after scan
|
|
|
|
* started */
|
Improve concurrency of foreign key locking
This patch introduces two additional lock modes for tuples: "SELECT FOR
KEY SHARE" and "SELECT FOR NO KEY UPDATE". These don't block each
other, in contrast with already existing "SELECT FOR SHARE" and "SELECT
FOR UPDATE". UPDATE commands that do not modify the values stored in
the columns that are part of the key of the tuple now grab a SELECT FOR
NO KEY UPDATE lock on the tuple, allowing them to proceed concurrently
with tuple locks of the FOR KEY SHARE variety.
Foreign key triggers now use FOR KEY SHARE instead of FOR SHARE; this
means the concurrency improvement applies to them, which is the whole
point of this patch.
The added tuple lock semantics require some rejiggering of the multixact
module, so that the locking level that each transaction is holding can
be stored alongside its Xid. Also, multixacts now need to persist
across server restarts and crashes, because they can now represent not
only tuple locks, but also tuple updates. This means we need more
careful tracking of lifetime of pg_multixact SLRU files; since they now
persist longer, we require more infrastructure to figure out when they
can be removed. pg_upgrade also needs to be careful to copy
pg_multixact files over from the old server to the new, or at least part
of multixact.c state, depending on the versions of the old and new
servers.
Tuple time qualification rules (HeapTupleSatisfies routines) need to be
careful not to consider tuples with the "is multi" infomask bit set as
being only locked; they might need to look up MultiXact values (i.e.
possibly do pg_multixact I/O) to find out the Xid that updated a tuple,
whereas they previously were assured to only use information readily
available from the tuple header. This is considered acceptable, because
the extra I/O would involve cases that would previously cause some
commands to block waiting for concurrent transactions to finish.
Another important change is the fact that locking tuples that have
previously been updated causes the future versions to be marked as
locked, too; this is essential for correctness of foreign key checks.
This causes additional WAL-logging, also (there was previously a single
WAL record for a locked tuple; now there are as many as updated copies
of the tuple there exist.)
With all this in place, contention related to tuples being checked by
foreign key rules should be much reduced.
As a bonus, the old behavior that a subtransaction grabbing a stronger
tuple lock than the parent (sub)transaction held on a given tuple and
later aborting caused the weaker lock to be lost, has been fixed.
Many new spec files were added for isolation tester framework, to ensure
overall behavior is sane. There's probably room for several more tests.
There were several reviewers of this patch; in particular, Noah Misch
and Andres Freund spent considerable time in it. Original idea for the
patch came from Simon Riggs, after a problem report by Joel Jacobson.
Most code is from me, with contributions from Marti Raudsepp, Alexander
Shulgin, Noah Misch and Andres Freund.
This patch was discussed in several pgsql-hackers threads; the most
important start at the following message-ids:
AANLkTimo9XVcEzfiBR-ut3KVNDkjm2Vxh+t8kAmWjPuv@mail.gmail.com
1290721684-sup-3951@alvh.no-ip.org
1294953201-sup-2099@alvh.no-ip.org
1320343602-sup-2290@alvh.no-ip.org
1339690386-sup-8927@alvh.no-ip.org
4FE5FF020200002500048A3D@gw.wicourts.gov
4FEAB90A0200002500048B7D@gw.wicourts.gov
2013-01-23 16:04:59 +01:00
|
|
|
else
|
2013-05-29 22:58:43 +02:00
|
|
|
return HeapTupleInvisible; /* updated before scan
|
|
|
|
* started */
|
Improve concurrency of foreign key locking
This patch introduces two additional lock modes for tuples: "SELECT FOR
KEY SHARE" and "SELECT FOR NO KEY UPDATE". These don't block each
other, in contrast with already existing "SELECT FOR SHARE" and "SELECT
FOR UPDATE". UPDATE commands that do not modify the values stored in
the columns that are part of the key of the tuple now grab a SELECT FOR
NO KEY UPDATE lock on the tuple, allowing them to proceed concurrently
with tuple locks of the FOR KEY SHARE variety.
Foreign key triggers now use FOR KEY SHARE instead of FOR SHARE; this
means the concurrency improvement applies to them, which is the whole
point of this patch.
The added tuple lock semantics require some rejiggering of the multixact
module, so that the locking level that each transaction is holding can
be stored alongside its Xid. Also, multixacts now need to persist
across server restarts and crashes, because they can now represent not
only tuple locks, but also tuple updates. This means we need more
careful tracking of lifetime of pg_multixact SLRU files; since they now
persist longer, we require more infrastructure to figure out when they
can be removed. pg_upgrade also needs to be careful to copy
pg_multixact files over from the old server to the new, or at least part
of multixact.c state, depending on the versions of the old and new
servers.
Tuple time qualification rules (HeapTupleSatisfies routines) need to be
careful not to consider tuples with the "is multi" infomask bit set as
being only locked; they might need to look up MultiXact values (i.e.
possibly do pg_multixact I/O) to find out the Xid that updated a tuple,
whereas they previously were assured to only use information readily
available from the tuple header. This is considered acceptable, because
the extra I/O would involve cases that would previously cause some
commands to block waiting for concurrent transactions to finish.
Another important change is the fact that locking tuples that have
previously been updated causes the future versions to be marked as
locked, too; this is essential for correctness of foreign key checks.
This causes additional WAL-logging, also (there was previously a single
WAL record for a locked tuple; now there are as many as updated copies
of the tuple there exist.)
With all this in place, contention related to tuples being checked by
foreign key rules should be much reduced.
As a bonus, the old behavior that a subtransaction grabbing a stronger
tuple lock than the parent (sub)transaction held on a given tuple and
later aborting caused the weaker lock to be lost, has been fixed.
Many new spec files were added for isolation tester framework, to ensure
overall behavior is sane. There's probably room for several more tests.
There were several reviewers of this patch; in particular, Noah Misch
and Andres Freund spent considerable time in it. Original idea for the
patch came from Simon Riggs, after a problem report by Joel Jacobson.
Most code is from me, with contributions from Marti Raudsepp, Alexander
Shulgin, Noah Misch and Andres Freund.
This patch was discussed in several pgsql-hackers threads; the most
important start at the following message-ids:
AANLkTimo9XVcEzfiBR-ut3KVNDkjm2Vxh+t8kAmWjPuv@mail.gmail.com
1290721684-sup-3951@alvh.no-ip.org
1294953201-sup-2099@alvh.no-ip.org
1320343602-sup-2290@alvh.no-ip.org
1339690386-sup-8927@alvh.no-ip.org
4FE5FF020200002500048A3D@gw.wicourts.gov
4FEAB90A0200002500048B7D@gw.wicourts.gov
2013-01-23 16:04:59 +01:00
|
|
|
}
|
|
|
|
}
|
2005-04-28 23:47:18 +02:00
|
|
|
|
Improve concurrency of foreign key locking
This patch introduces two additional lock modes for tuples: "SELECT FOR
KEY SHARE" and "SELECT FOR NO KEY UPDATE". These don't block each
other, in contrast with already existing "SELECT FOR SHARE" and "SELECT
FOR UPDATE". UPDATE commands that do not modify the values stored in
the columns that are part of the key of the tuple now grab a SELECT FOR
NO KEY UPDATE lock on the tuple, allowing them to proceed concurrently
with tuple locks of the FOR KEY SHARE variety.
Foreign key triggers now use FOR KEY SHARE instead of FOR SHARE; this
means the concurrency improvement applies to them, which is the whole
point of this patch.
The added tuple lock semantics require some rejiggering of the multixact
module, so that the locking level that each transaction is holding can
be stored alongside its Xid. Also, multixacts now need to persist
across server restarts and crashes, because they can now represent not
only tuple locks, but also tuple updates. This means we need more
careful tracking of lifetime of pg_multixact SLRU files; since they now
persist longer, we require more infrastructure to figure out when they
can be removed. pg_upgrade also needs to be careful to copy
pg_multixact files over from the old server to the new, or at least part
of multixact.c state, depending on the versions of the old and new
servers.
Tuple time qualification rules (HeapTupleSatisfies routines) need to be
careful not to consider tuples with the "is multi" infomask bit set as
being only locked; they might need to look up MultiXact values (i.e.
possibly do pg_multixact I/O) to find out the Xid that updated a tuple,
whereas they previously were assured to only use information readily
available from the tuple header. This is considered acceptable, because
the extra I/O would involve cases that would previously cause some
commands to block waiting for concurrent transactions to finish.
Another important change is the fact that locking tuples that have
previously been updated causes the future versions to be marked as
locked, too; this is essential for correctness of foreign key checks.
This causes additional WAL-logging, also (there was previously a single
WAL record for a locked tuple; now there are as many as updated copies
of the tuple there exist.)
With all this in place, contention related to tuples being checked by
foreign key rules should be much reduced.
As a bonus, the old behavior that a subtransaction grabbing a stronger
tuple lock than the parent (sub)transaction held on a given tuple and
later aborting caused the weaker lock to be lost, has been fixed.
Many new spec files were added for isolation tester framework, to ensure
overall behavior is sane. There's probably room for several more tests.
There were several reviewers of this patch; in particular, Noah Misch
and Andres Freund spent considerable time in it. Original idea for the
patch came from Simon Riggs, after a problem report by Joel Jacobson.
Most code is from me, with contributions from Marti Raudsepp, Alexander
Shulgin, Noah Misch and Andres Freund.
This patch was discussed in several pgsql-hackers threads; the most
important start at the following message-ids:
AANLkTimo9XVcEzfiBR-ut3KVNDkjm2Vxh+t8kAmWjPuv@mail.gmail.com
1290721684-sup-3951@alvh.no-ip.org
1294953201-sup-2099@alvh.no-ip.org
1320343602-sup-2290@alvh.no-ip.org
1339690386-sup-8927@alvh.no-ip.org
4FE5FF020200002500048A3D@gw.wicourts.gov
4FEAB90A0200002500048B7D@gw.wicourts.gov
2013-01-23 16:04:59 +01:00
|
|
|
if (!TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetRawXmax(tuple)))
|
2004-07-28 16:23:31 +02:00
|
|
|
{
|
2007-09-21 20:24:28 +02:00
|
|
|
/* deleting subtransaction must have aborted */
|
2007-08-14 19:35:18 +02:00
|
|
|
SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
|
|
|
|
InvalidTransactionId);
|
2004-07-01 02:52:04 +02:00
|
|
|
return HeapTupleMayBeUpdated;
|
2004-07-28 16:23:31 +02:00
|
|
|
}
|
2004-07-01 02:52:04 +02:00
|
|
|
|
2002-06-15 21:54:24 +02:00
|
|
|
if (HeapTupleHeaderGetCmax(tuple) >= curcid)
|
2005-10-15 04:49:52 +02:00
|
|
|
return HeapTupleSelfUpdated; /* updated after scan started */
|
1998-12-15 13:47:01 +01:00
|
|
|
else
|
2005-10-15 04:49:52 +02:00
|
|
|
return HeapTupleInvisible; /* updated before scan started */
|
1998-12-15 13:47:01 +01:00
|
|
|
}
|
2013-12-22 21:49:09 +01:00
|
|
|
else if (TransactionIdIsInProgress(HeapTupleHeaderGetRawXmin(tuple)))
|
1998-12-15 13:47:01 +01:00
|
|
|
return HeapTupleInvisible;
|
2013-12-22 21:49:09 +01:00
|
|
|
else if (TransactionIdDidCommit(HeapTupleHeaderGetRawXmin(tuple)))
|
2007-08-14 19:35:18 +02:00
|
|
|
SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED,
|
2013-12-22 21:49:09 +01:00
|
|
|
HeapTupleHeaderGetRawXmin(tuple));
|
2001-12-19 18:18:39 +01:00
|
|
|
else
|
2004-10-16 00:40:29 +02:00
|
|
|
{
|
2005-05-07 23:22:01 +02:00
|
|
|
/* it must have aborted or crashed */
|
2007-08-14 19:35:18 +02:00
|
|
|
SetHintBits(tuple, buffer, HEAP_XMIN_INVALID,
|
|
|
|
InvalidTransactionId);
|
2005-05-07 23:22:01 +02:00
|
|
|
return HeapTupleInvisible;
|
2004-10-16 00:40:29 +02:00
|
|
|
}
|
1998-12-15 13:47:01 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
/* by here, the inserting transaction has committed */
|
|
|
|
|
2002-09-04 22:31:48 +02:00
|
|
|
if (tuple->t_infomask & HEAP_XMAX_INVALID) /* xid invalid or aborted */
|
1998-12-15 13:47:01 +01:00
|
|
|
return HeapTupleMayBeUpdated;
|
|
|
|
|
2001-12-19 18:18:39 +01:00
|
|
|
if (tuple->t_infomask & HEAP_XMAX_COMMITTED)
|
1998-12-15 13:47:01 +01:00
|
|
|
{
|
Improve concurrency of foreign key locking
This patch introduces two additional lock modes for tuples: "SELECT FOR
KEY SHARE" and "SELECT FOR NO KEY UPDATE". These don't block each
other, in contrast with already existing "SELECT FOR SHARE" and "SELECT
FOR UPDATE". UPDATE commands that do not modify the values stored in
the columns that are part of the key of the tuple now grab a SELECT FOR
NO KEY UPDATE lock on the tuple, allowing them to proceed concurrently
with tuple locks of the FOR KEY SHARE variety.
Foreign key triggers now use FOR KEY SHARE instead of FOR SHARE; this
means the concurrency improvement applies to them, which is the whole
point of this patch.
The added tuple lock semantics require some rejiggering of the multixact
module, so that the locking level that each transaction is holding can
be stored alongside its Xid. Also, multixacts now need to persist
across server restarts and crashes, because they can now represent not
only tuple locks, but also tuple updates. This means we need more
careful tracking of lifetime of pg_multixact SLRU files; since they now
persist longer, we require more infrastructure to figure out when they
can be removed. pg_upgrade also needs to be careful to copy
pg_multixact files over from the old server to the new, or at least part
of multixact.c state, depending on the versions of the old and new
servers.
Tuple time qualification rules (HeapTupleSatisfies routines) need to be
careful not to consider tuples with the "is multi" infomask bit set as
being only locked; they might need to look up MultiXact values (i.e.
possibly do pg_multixact I/O) to find out the Xid that updated a tuple,
whereas they previously were assured to only use information readily
available from the tuple header. This is considered acceptable, because
the extra I/O would involve cases that would previously cause some
commands to block waiting for concurrent transactions to finish.
Another important change is the fact that locking tuples that have
previously been updated causes the future versions to be marked as
locked, too; this is essential for correctness of foreign key checks.
This causes additional WAL-logging, also (there was previously a single
WAL record for a locked tuple; now there are as many as updated copies
of the tuple there exist.)
With all this in place, contention related to tuples being checked by
foreign key rules should be much reduced.
As a bonus, the old behavior that a subtransaction grabbing a stronger
tuple lock than the parent (sub)transaction held on a given tuple and
later aborting caused the weaker lock to be lost, has been fixed.
Many new spec files were added for isolation tester framework, to ensure
overall behavior is sane. There's probably room for several more tests.
There were several reviewers of this patch; in particular, Noah Misch
and Andres Freund spent considerable time in it. Original idea for the
patch came from Simon Riggs, after a problem report by Joel Jacobson.
Most code is from me, with contributions from Marti Raudsepp, Alexander
Shulgin, Noah Misch and Andres Freund.
This patch was discussed in several pgsql-hackers threads; the most
important start at the following message-ids:
AANLkTimo9XVcEzfiBR-ut3KVNDkjm2Vxh+t8kAmWjPuv@mail.gmail.com
1290721684-sup-3951@alvh.no-ip.org
1294953201-sup-2099@alvh.no-ip.org
1320343602-sup-2290@alvh.no-ip.org
1339690386-sup-8927@alvh.no-ip.org
4FE5FF020200002500048A3D@gw.wicourts.gov
4FEAB90A0200002500048B7D@gw.wicourts.gov
2013-01-23 16:04:59 +01:00
|
|
|
if (HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask))
|
1998-12-15 13:47:01 +01:00
|
|
|
return HeapTupleMayBeUpdated;
|
2001-10-28 07:26:15 +01:00
|
|
|
return HeapTupleUpdated; /* updated by other */
|
1998-12-15 13:47:01 +01:00
|
|
|
}
|
|
|
|
|
2005-04-28 23:47:18 +02:00
|
|
|
if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
|
|
|
|
{
|
2013-05-29 22:58:43 +02:00
|
|
|
TransactionId xmax;
|
2005-04-28 23:47:18 +02:00
|
|
|
|
Improve concurrency of foreign key locking
This patch introduces two additional lock modes for tuples: "SELECT FOR
KEY SHARE" and "SELECT FOR NO KEY UPDATE". These don't block each
other, in contrast with already existing "SELECT FOR SHARE" and "SELECT
FOR UPDATE". UPDATE commands that do not modify the values stored in
the columns that are part of the key of the tuple now grab a SELECT FOR
NO KEY UPDATE lock on the tuple, allowing them to proceed concurrently
with tuple locks of the FOR KEY SHARE variety.
Foreign key triggers now use FOR KEY SHARE instead of FOR SHARE; this
means the concurrency improvement applies to them, which is the whole
point of this patch.
The added tuple lock semantics require some rejiggering of the multixact
module, so that the locking level that each transaction is holding can
be stored alongside its Xid. Also, multixacts now need to persist
across server restarts and crashes, because they can now represent not
only tuple locks, but also tuple updates. This means we need more
careful tracking of lifetime of pg_multixact SLRU files; since they now
persist longer, we require more infrastructure to figure out when they
can be removed. pg_upgrade also needs to be careful to copy
pg_multixact files over from the old server to the new, or at least part
of multixact.c state, depending on the versions of the old and new
servers.
Tuple time qualification rules (HeapTupleSatisfies routines) need to be
careful not to consider tuples with the "is multi" infomask bit set as
being only locked; they might need to look up MultiXact values (i.e.
possibly do pg_multixact I/O) to find out the Xid that updated a tuple,
whereas they previously were assured to only use information readily
available from the tuple header. This is considered acceptable, because
the extra I/O would involve cases that would previously cause some
commands to block waiting for concurrent transactions to finish.
Another important change is the fact that locking tuples that have
previously been updated causes the future versions to be marked as
locked, too; this is essential for correctness of foreign key checks.
This causes additional WAL-logging, also (there was previously a single
WAL record for a locked tuple; now there are as many as updated copies
of the tuple there exist.)
With all this in place, contention related to tuples being checked by
foreign key rules should be much reduced.
As a bonus, the old behavior that a subtransaction grabbing a stronger
tuple lock than the parent (sub)transaction held on a given tuple and
later aborting caused the weaker lock to be lost, has been fixed.
Many new spec files were added for isolation tester framework, to ensure
overall behavior is sane. There's probably room for several more tests.
There were several reviewers of this patch; in particular, Noah Misch
and Andres Freund spent considerable time in it. Original idea for the
patch came from Simon Riggs, after a problem report by Joel Jacobson.
Most code is from me, with contributions from Marti Raudsepp, Alexander
Shulgin, Noah Misch and Andres Freund.
This patch was discussed in several pgsql-hackers threads; the most
important start at the following message-ids:
AANLkTimo9XVcEzfiBR-ut3KVNDkjm2Vxh+t8kAmWjPuv@mail.gmail.com
1290721684-sup-3951@alvh.no-ip.org
1294953201-sup-2099@alvh.no-ip.org
1320343602-sup-2290@alvh.no-ip.org
1339690386-sup-8927@alvh.no-ip.org
4FE5FF020200002500048A3D@gw.wicourts.gov
4FEAB90A0200002500048B7D@gw.wicourts.gov
2013-01-23 16:04:59 +01:00
|
|
|
if (HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask))
|
|
|
|
{
|
|
|
|
/*
|
2013-05-29 22:58:43 +02:00
|
|
|
* If it's only locked but neither EXCL_LOCK nor KEYSHR_LOCK is
|
2014-05-06 18:12:18 +02:00
|
|
|
* set, it cannot possibly be running. Otherwise need to check.
|
Improve concurrency of foreign key locking
This patch introduces two additional lock modes for tuples: "SELECT FOR
KEY SHARE" and "SELECT FOR NO KEY UPDATE". These don't block each
other, in contrast with already existing "SELECT FOR SHARE" and "SELECT
FOR UPDATE". UPDATE commands that do not modify the values stored in
the columns that are part of the key of the tuple now grab a SELECT FOR
NO KEY UPDATE lock on the tuple, allowing them to proceed concurrently
with tuple locks of the FOR KEY SHARE variety.
Foreign key triggers now use FOR KEY SHARE instead of FOR SHARE; this
means the concurrency improvement applies to them, which is the whole
point of this patch.
The added tuple lock semantics require some rejiggering of the multixact
module, so that the locking level that each transaction is holding can
be stored alongside its Xid. Also, multixacts now need to persist
across server restarts and crashes, because they can now represent not
only tuple locks, but also tuple updates. This means we need more
careful tracking of lifetime of pg_multixact SLRU files; since they now
persist longer, we require more infrastructure to figure out when they
can be removed. pg_upgrade also needs to be careful to copy
pg_multixact files over from the old server to the new, or at least part
of multixact.c state, depending on the versions of the old and new
servers.
Tuple time qualification rules (HeapTupleSatisfies routines) need to be
careful not to consider tuples with the "is multi" infomask bit set as
being only locked; they might need to look up MultiXact values (i.e.
possibly do pg_multixact I/O) to find out the Xid that updated a tuple,
whereas they previously were assured to only use information readily
available from the tuple header. This is considered acceptable, because
the extra I/O would involve cases that would previously cause some
commands to block waiting for concurrent transactions to finish.
Another important change is the fact that locking tuples that have
previously been updated causes the future versions to be marked as
locked, too; this is essential for correctness of foreign key checks.
This causes additional WAL-logging, also (there was previously a single
WAL record for a locked tuple; now there are as many as updated copies
of the tuple there exist.)
With all this in place, contention related to tuples being checked by
foreign key rules should be much reduced.
As a bonus, the old behavior that a subtransaction grabbing a stronger
tuple lock than the parent (sub)transaction held on a given tuple and
later aborting caused the weaker lock to be lost, has been fixed.
Many new spec files were added for isolation tester framework, to ensure
overall behavior is sane. There's probably room for several more tests.
There were several reviewers of this patch; in particular, Noah Misch
and Andres Freund spent considerable time in it. Original idea for the
patch came from Simon Riggs, after a problem report by Joel Jacobson.
Most code is from me, with contributions from Marti Raudsepp, Alexander
Shulgin, Noah Misch and Andres Freund.
This patch was discussed in several pgsql-hackers threads; the most
important start at the following message-ids:
AANLkTimo9XVcEzfiBR-ut3KVNDkjm2Vxh+t8kAmWjPuv@mail.gmail.com
1290721684-sup-3951@alvh.no-ip.org
1294953201-sup-2099@alvh.no-ip.org
1320343602-sup-2290@alvh.no-ip.org
1339690386-sup-8927@alvh.no-ip.org
4FE5FF020200002500048A3D@gw.wicourts.gov
4FEAB90A0200002500048B7D@gw.wicourts.gov
2013-01-23 16:04:59 +01:00
|
|
|
*/
|
|
|
|
if ((tuple->t_infomask & (HEAP_XMAX_EXCL_LOCK |
|
|
|
|
HEAP_XMAX_KEYSHR_LOCK)) &&
|
2014-07-29 21:41:06 +02:00
|
|
|
MultiXactIdIsRunning(HeapTupleHeaderGetRawXmax(tuple), true))
|
Improve concurrency of foreign key locking
This patch introduces two additional lock modes for tuples: "SELECT FOR
KEY SHARE" and "SELECT FOR NO KEY UPDATE". These don't block each
other, in contrast with already existing "SELECT FOR SHARE" and "SELECT
FOR UPDATE". UPDATE commands that do not modify the values stored in
the columns that are part of the key of the tuple now grab a SELECT FOR
NO KEY UPDATE lock on the tuple, allowing them to proceed concurrently
with tuple locks of the FOR KEY SHARE variety.
Foreign key triggers now use FOR KEY SHARE instead of FOR SHARE; this
means the concurrency improvement applies to them, which is the whole
point of this patch.
The added tuple lock semantics require some rejiggering of the multixact
module, so that the locking level that each transaction is holding can
be stored alongside its Xid. Also, multixacts now need to persist
across server restarts and crashes, because they can now represent not
only tuple locks, but also tuple updates. This means we need more
careful tracking of lifetime of pg_multixact SLRU files; since they now
persist longer, we require more infrastructure to figure out when they
can be removed. pg_upgrade also needs to be careful to copy
pg_multixact files over from the old server to the new, or at least part
of multixact.c state, depending on the versions of the old and new
servers.
Tuple time qualification rules (HeapTupleSatisfies routines) need to be
careful not to consider tuples with the "is multi" infomask bit set as
being only locked; they might need to look up MultiXact values (i.e.
possibly do pg_multixact I/O) to find out the Xid that updated a tuple,
whereas they previously were assured to only use information readily
available from the tuple header. This is considered acceptable, because
the extra I/O would involve cases that would previously cause some
commands to block waiting for concurrent transactions to finish.
Another important change is the fact that locking tuples that have
previously been updated causes the future versions to be marked as
locked, too; this is essential for correctness of foreign key checks.
This causes additional WAL-logging, also (there was previously a single
WAL record for a locked tuple; now there are as many as updated copies
of the tuple there exist.)
With all this in place, contention related to tuples being checked by
foreign key rules should be much reduced.
As a bonus, the old behavior that a subtransaction grabbing a stronger
tuple lock than the parent (sub)transaction held on a given tuple and
later aborting caused the weaker lock to be lost, has been fixed.
Many new spec files were added for isolation tester framework, to ensure
overall behavior is sane. There's probably room for several more tests.
There were several reviewers of this patch; in particular, Noah Misch
and Andres Freund spent considerable time in it. Original idea for the
patch came from Simon Riggs, after a problem report by Joel Jacobson.
Most code is from me, with contributions from Marti Raudsepp, Alexander
Shulgin, Noah Misch and Andres Freund.
This patch was discussed in several pgsql-hackers threads; the most
important start at the following message-ids:
AANLkTimo9XVcEzfiBR-ut3KVNDkjm2Vxh+t8kAmWjPuv@mail.gmail.com
1290721684-sup-3951@alvh.no-ip.org
1294953201-sup-2099@alvh.no-ip.org
1320343602-sup-2290@alvh.no-ip.org
1339690386-sup-8927@alvh.no-ip.org
4FE5FF020200002500048A3D@gw.wicourts.gov
4FEAB90A0200002500048B7D@gw.wicourts.gov
2013-01-23 16:04:59 +01:00
|
|
|
return HeapTupleBeingUpdated;
|
|
|
|
|
|
|
|
SetHintBits(tuple, buffer, HEAP_XMAX_INVALID, InvalidTransactionId);
|
|
|
|
return HeapTupleMayBeUpdated;
|
|
|
|
}
|
|
|
|
|
|
|
|
xmax = HeapTupleGetUpdateXid(tuple);
|
2014-07-29 21:41:06 +02:00
|
|
|
if (!TransactionIdIsValid(xmax))
|
|
|
|
{
|
|
|
|
if (MultiXactIdIsRunning(HeapTupleHeaderGetRawXmax(tuple), false))
|
|
|
|
return HeapTupleBeingUpdated;
|
|
|
|
}
|
Improve concurrency of foreign key locking
This patch introduces two additional lock modes for tuples: "SELECT FOR
KEY SHARE" and "SELECT FOR NO KEY UPDATE". These don't block each
other, in contrast with already existing "SELECT FOR SHARE" and "SELECT
FOR UPDATE". UPDATE commands that do not modify the values stored in
the columns that are part of the key of the tuple now grab a SELECT FOR
NO KEY UPDATE lock on the tuple, allowing them to proceed concurrently
with tuple locks of the FOR KEY SHARE variety.
Foreign key triggers now use FOR KEY SHARE instead of FOR SHARE; this
means the concurrency improvement applies to them, which is the whole
point of this patch.
The added tuple lock semantics require some rejiggering of the multixact
module, so that the locking level that each transaction is holding can
be stored alongside its Xid. Also, multixacts now need to persist
across server restarts and crashes, because they can now represent not
only tuple locks, but also tuple updates. This means we need more
careful tracking of lifetime of pg_multixact SLRU files; since they now
persist longer, we require more infrastructure to figure out when they
can be removed. pg_upgrade also needs to be careful to copy
pg_multixact files over from the old server to the new, or at least part
of multixact.c state, depending on the versions of the old and new
servers.
Tuple time qualification rules (HeapTupleSatisfies routines) need to be
careful not to consider tuples with the "is multi" infomask bit set as
being only locked; they might need to look up MultiXact values (i.e.
possibly do pg_multixact I/O) to find out the Xid that updated a tuple,
whereas they previously were assured to only use information readily
available from the tuple header. This is considered acceptable, because
the extra I/O would involve cases that would previously cause some
commands to block waiting for concurrent transactions to finish.
Another important change is the fact that locking tuples that have
previously been updated causes the future versions to be marked as
locked, too; this is essential for correctness of foreign key checks.
This causes additional WAL-logging, also (there was previously a single
WAL record for a locked tuple; now there are as many as updated copies
of the tuple there exist.)
With all this in place, contention related to tuples being checked by
foreign key rules should be much reduced.
As a bonus, the old behavior that a subtransaction grabbing a stronger
tuple lock than the parent (sub)transaction held on a given tuple and
later aborting caused the weaker lock to be lost, has been fixed.
Many new spec files were added for isolation tester framework, to ensure
overall behavior is sane. There's probably room for several more tests.
There were several reviewers of this patch; in particular, Noah Misch
and Andres Freund spent considerable time in it. Original idea for the
patch came from Simon Riggs, after a problem report by Joel Jacobson.
Most code is from me, with contributions from Marti Raudsepp, Alexander
Shulgin, Noah Misch and Andres Freund.
This patch was discussed in several pgsql-hackers threads; the most
important start at the following message-ids:
AANLkTimo9XVcEzfiBR-ut3KVNDkjm2Vxh+t8kAmWjPuv@mail.gmail.com
1290721684-sup-3951@alvh.no-ip.org
1294953201-sup-2099@alvh.no-ip.org
1320343602-sup-2290@alvh.no-ip.org
1339690386-sup-8927@alvh.no-ip.org
4FE5FF020200002500048A3D@gw.wicourts.gov
4FEAB90A0200002500048B7D@gw.wicourts.gov
2013-01-23 16:04:59 +01:00
|
|
|
|
2013-11-29 20:08:06 +01:00
|
|
|
/* not LOCKED_ONLY, so it has to have an xmax */
|
|
|
|
Assert(TransactionIdIsValid(xmax));
|
Improve concurrency of foreign key locking
This patch introduces two additional lock modes for tuples: "SELECT FOR
KEY SHARE" and "SELECT FOR NO KEY UPDATE". These don't block each
other, in contrast with already existing "SELECT FOR SHARE" and "SELECT
FOR UPDATE". UPDATE commands that do not modify the values stored in
the columns that are part of the key of the tuple now grab a SELECT FOR
NO KEY UPDATE lock on the tuple, allowing them to proceed concurrently
with tuple locks of the FOR KEY SHARE variety.
Foreign key triggers now use FOR KEY SHARE instead of FOR SHARE; this
means the concurrency improvement applies to them, which is the whole
point of this patch.
The added tuple lock semantics require some rejiggering of the multixact
module, so that the locking level that each transaction is holding can
be stored alongside its Xid. Also, multixacts now need to persist
across server restarts and crashes, because they can now represent not
only tuple locks, but also tuple updates. This means we need more
careful tracking of lifetime of pg_multixact SLRU files; since they now
persist longer, we require more infrastructure to figure out when they
can be removed. pg_upgrade also needs to be careful to copy
pg_multixact files over from the old server to the new, or at least part
of multixact.c state, depending on the versions of the old and new
servers.
Tuple time qualification rules (HeapTupleSatisfies routines) need to be
careful not to consider tuples with the "is multi" infomask bit set as
being only locked; they might need to look up MultiXact values (i.e.
possibly do pg_multixact I/O) to find out the Xid that updated a tuple,
whereas they previously were assured to only use information readily
available from the tuple header. This is considered acceptable, because
the extra I/O would involve cases that would previously cause some
commands to block waiting for concurrent transactions to finish.
Another important change is the fact that locking tuples that have
previously been updated causes the future versions to be marked as
locked, too; this is essential for correctness of foreign key checks.
This causes additional WAL-logging, also (there was previously a single
WAL record for a locked tuple; now there are as many as updated copies
of the tuple there exist.)
With all this in place, contention related to tuples being checked by
foreign key rules should be much reduced.
As a bonus, the old behavior that a subtransaction grabbing a stronger
tuple lock than the parent (sub)transaction held on a given tuple and
later aborting caused the weaker lock to be lost, has been fixed.
Many new spec files were added for isolation tester framework, to ensure
overall behavior is sane. There's probably room for several more tests.
There were several reviewers of this patch; in particular, Noah Misch
and Andres Freund spent considerable time in it. Original idea for the
patch came from Simon Riggs, after a problem report by Joel Jacobson.
Most code is from me, with contributions from Marti Raudsepp, Alexander
Shulgin, Noah Misch and Andres Freund.
This patch was discussed in several pgsql-hackers threads; the most
important start at the following message-ids:
AANLkTimo9XVcEzfiBR-ut3KVNDkjm2Vxh+t8kAmWjPuv@mail.gmail.com
1290721684-sup-3951@alvh.no-ip.org
1294953201-sup-2099@alvh.no-ip.org
1320343602-sup-2290@alvh.no-ip.org
1339690386-sup-8927@alvh.no-ip.org
4FE5FF020200002500048A3D@gw.wicourts.gov
4FEAB90A0200002500048B7D@gw.wicourts.gov
2013-01-23 16:04:59 +01:00
|
|
|
|
|
|
|
if (TransactionIdIsCurrentTransactionId(xmax))
|
|
|
|
{
|
|
|
|
if (HeapTupleHeaderGetCmax(tuple) >= curcid)
|
2013-05-29 22:58:43 +02:00
|
|
|
return HeapTupleSelfUpdated; /* updated after scan started */
|
Improve concurrency of foreign key locking
This patch introduces two additional lock modes for tuples: "SELECT FOR
KEY SHARE" and "SELECT FOR NO KEY UPDATE". These don't block each
other, in contrast with already existing "SELECT FOR SHARE" and "SELECT
FOR UPDATE". UPDATE commands that do not modify the values stored in
the columns that are part of the key of the tuple now grab a SELECT FOR
NO KEY UPDATE lock on the tuple, allowing them to proceed concurrently
with tuple locks of the FOR KEY SHARE variety.
Foreign key triggers now use FOR KEY SHARE instead of FOR SHARE; this
means the concurrency improvement applies to them, which is the whole
point of this patch.
The added tuple lock semantics require some rejiggering of the multixact
module, so that the locking level that each transaction is holding can
be stored alongside its Xid. Also, multixacts now need to persist
across server restarts and crashes, because they can now represent not
only tuple locks, but also tuple updates. This means we need more
careful tracking of lifetime of pg_multixact SLRU files; since they now
persist longer, we require more infrastructure to figure out when they
can be removed. pg_upgrade also needs to be careful to copy
pg_multixact files over from the old server to the new, or at least part
of multixact.c state, depending on the versions of the old and new
servers.
Tuple time qualification rules (HeapTupleSatisfies routines) need to be
careful not to consider tuples with the "is multi" infomask bit set as
being only locked; they might need to look up MultiXact values (i.e.
possibly do pg_multixact I/O) to find out the Xid that updated a tuple,
whereas they previously were assured to only use information readily
available from the tuple header. This is considered acceptable, because
the extra I/O would involve cases that would previously cause some
commands to block waiting for concurrent transactions to finish.
Another important change is the fact that locking tuples that have
previously been updated causes the future versions to be marked as
locked, too; this is essential for correctness of foreign key checks.
This causes additional WAL-logging, also (there was previously a single
WAL record for a locked tuple; now there are as many as updated copies
of the tuple there exist.)
With all this in place, contention related to tuples being checked by
foreign key rules should be much reduced.
As a bonus, the old behavior that a subtransaction grabbing a stronger
tuple lock than the parent (sub)transaction held on a given tuple and
later aborting caused the weaker lock to be lost, has been fixed.
Many new spec files were added for isolation tester framework, to ensure
overall behavior is sane. There's probably room for several more tests.
There were several reviewers of this patch; in particular, Noah Misch
and Andres Freund spent considerable time in it. Original idea for the
patch came from Simon Riggs, after a problem report by Joel Jacobson.
Most code is from me, with contributions from Marti Raudsepp, Alexander
Shulgin, Noah Misch and Andres Freund.
This patch was discussed in several pgsql-hackers threads; the most
important start at the following message-ids:
AANLkTimo9XVcEzfiBR-ut3KVNDkjm2Vxh+t8kAmWjPuv@mail.gmail.com
1290721684-sup-3951@alvh.no-ip.org
1294953201-sup-2099@alvh.no-ip.org
1320343602-sup-2290@alvh.no-ip.org
1339690386-sup-8927@alvh.no-ip.org
4FE5FF020200002500048A3D@gw.wicourts.gov
4FEAB90A0200002500048B7D@gw.wicourts.gov
2013-01-23 16:04:59 +01:00
|
|
|
else
|
2013-05-29 22:58:43 +02:00
|
|
|
return HeapTupleInvisible; /* updated before scan started */
|
Improve concurrency of foreign key locking
This patch introduces two additional lock modes for tuples: "SELECT FOR
KEY SHARE" and "SELECT FOR NO KEY UPDATE". These don't block each
other, in contrast with already existing "SELECT FOR SHARE" and "SELECT
FOR UPDATE". UPDATE commands that do not modify the values stored in
the columns that are part of the key of the tuple now grab a SELECT FOR
NO KEY UPDATE lock on the tuple, allowing them to proceed concurrently
with tuple locks of the FOR KEY SHARE variety.
Foreign key triggers now use FOR KEY SHARE instead of FOR SHARE; this
means the concurrency improvement applies to them, which is the whole
point of this patch.
The added tuple lock semantics require some rejiggering of the multixact
module, so that the locking level that each transaction is holding can
be stored alongside its Xid. Also, multixacts now need to persist
across server restarts and crashes, because they can now represent not
only tuple locks, but also tuple updates. This means we need more
careful tracking of lifetime of pg_multixact SLRU files; since they now
persist longer, we require more infrastructure to figure out when they
can be removed. pg_upgrade also needs to be careful to copy
pg_multixact files over from the old server to the new, or at least part
of multixact.c state, depending on the versions of the old and new
servers.
Tuple time qualification rules (HeapTupleSatisfies routines) need to be
careful not to consider tuples with the "is multi" infomask bit set as
being only locked; they might need to look up MultiXact values (i.e.
possibly do pg_multixact I/O) to find out the Xid that updated a tuple,
whereas they previously were assured to only use information readily
available from the tuple header. This is considered acceptable, because
the extra I/O would involve cases that would previously cause some
commands to block waiting for concurrent transactions to finish.
Another important change is the fact that locking tuples that have
previously been updated causes the future versions to be marked as
locked, too; this is essential for correctness of foreign key checks.
This causes additional WAL-logging, also (there was previously a single
WAL record for a locked tuple; now there are as many as updated copies
of the tuple there exist.)
With all this in place, contention related to tuples being checked by
foreign key rules should be much reduced.
As a bonus, the old behavior that a subtransaction grabbing a stronger
tuple lock than the parent (sub)transaction held on a given tuple and
later aborting caused the weaker lock to be lost, has been fixed.
Many new spec files were added for isolation tester framework, to ensure
overall behavior is sane. There's probably room for several more tests.
There were several reviewers of this patch; in particular, Noah Misch
and Andres Freund spent considerable time in it. Original idea for the
patch came from Simon Riggs, after a problem report by Joel Jacobson.
Most code is from me, with contributions from Marti Raudsepp, Alexander
Shulgin, Noah Misch and Andres Freund.
This patch was discussed in several pgsql-hackers threads; the most
important start at the following message-ids:
AANLkTimo9XVcEzfiBR-ut3KVNDkjm2Vxh+t8kAmWjPuv@mail.gmail.com
1290721684-sup-3951@alvh.no-ip.org
1294953201-sup-2099@alvh.no-ip.org
1320343602-sup-2290@alvh.no-ip.org
1339690386-sup-8927@alvh.no-ip.org
4FE5FF020200002500048A3D@gw.wicourts.gov
4FEAB90A0200002500048B7D@gw.wicourts.gov
2013-01-23 16:04:59 +01:00
|
|
|
}
|
|
|
|
|
2014-07-29 21:41:06 +02:00
|
|
|
if (MultiXactIdIsRunning(HeapTupleHeaderGetRawXmax(tuple), false))
|
2005-04-28 23:47:18 +02:00
|
|
|
return HeapTupleBeingUpdated;
|
Improve concurrency of foreign key locking
This patch introduces two additional lock modes for tuples: "SELECT FOR
KEY SHARE" and "SELECT FOR NO KEY UPDATE". These don't block each
other, in contrast with already existing "SELECT FOR SHARE" and "SELECT
FOR UPDATE". UPDATE commands that do not modify the values stored in
the columns that are part of the key of the tuple now grab a SELECT FOR
NO KEY UPDATE lock on the tuple, allowing them to proceed concurrently
with tuple locks of the FOR KEY SHARE variety.
Foreign key triggers now use FOR KEY SHARE instead of FOR SHARE; this
means the concurrency improvement applies to them, which is the whole
point of this patch.
The added tuple lock semantics require some rejiggering of the multixact
module, so that the locking level that each transaction is holding can
be stored alongside its Xid. Also, multixacts now need to persist
across server restarts and crashes, because they can now represent not
only tuple locks, but also tuple updates. This means we need more
careful tracking of lifetime of pg_multixact SLRU files; since they now
persist longer, we require more infrastructure to figure out when they
can be removed. pg_upgrade also needs to be careful to copy
pg_multixact files over from the old server to the new, or at least part
of multixact.c state, depending on the versions of the old and new
servers.
Tuple time qualification rules (HeapTupleSatisfies routines) need to be
careful not to consider tuples with the "is multi" infomask bit set as
being only locked; they might need to look up MultiXact values (i.e.
possibly do pg_multixact I/O) to find out the Xid that updated a tuple,
whereas they previously were assured to only use information readily
available from the tuple header. This is considered acceptable, because
the extra I/O would involve cases that would previously cause some
commands to block waiting for concurrent transactions to finish.
Another important change is the fact that locking tuples that have
previously been updated causes the future versions to be marked as
locked, too; this is essential for correctness of foreign key checks.
This causes additional WAL-logging, also (there was previously a single
WAL record for a locked tuple; now there are as many as updated copies
of the tuple there exist.)
With all this in place, contention related to tuples being checked by
foreign key rules should be much reduced.
As a bonus, the old behavior that a subtransaction grabbing a stronger
tuple lock than the parent (sub)transaction held on a given tuple and
later aborting caused the weaker lock to be lost, has been fixed.
Many new spec files were added for isolation tester framework, to ensure
overall behavior is sane. There's probably room for several more tests.
There were several reviewers of this patch; in particular, Noah Misch
and Andres Freund spent considerable time in it. Original idea for the
patch came from Simon Riggs, after a problem report by Joel Jacobson.
Most code is from me, with contributions from Marti Raudsepp, Alexander
Shulgin, Noah Misch and Andres Freund.
This patch was discussed in several pgsql-hackers threads; the most
important start at the following message-ids:
AANLkTimo9XVcEzfiBR-ut3KVNDkjm2Vxh+t8kAmWjPuv@mail.gmail.com
1290721684-sup-3951@alvh.no-ip.org
1294953201-sup-2099@alvh.no-ip.org
1320343602-sup-2290@alvh.no-ip.org
1339690386-sup-8927@alvh.no-ip.org
4FE5FF020200002500048A3D@gw.wicourts.gov
4FEAB90A0200002500048B7D@gw.wicourts.gov
2013-01-23 16:04:59 +01:00
|
|
|
|
|
|
|
if (TransactionIdDidCommit(xmax))
|
|
|
|
return HeapTupleUpdated;
|
2013-11-29 20:08:06 +01:00
|
|
|
|
2013-12-05 16:21:55 +01:00
|
|
|
/*
|
|
|
|
* By here, the update in the Xmax is either aborted or crashed, but
|
|
|
|
* what about the other members?
|
|
|
|
*/
|
|
|
|
|
2014-07-29 21:41:06 +02:00
|
|
|
if (!MultiXactIdIsRunning(HeapTupleHeaderGetRawXmax(tuple), false))
|
2013-12-05 16:21:55 +01:00
|
|
|
{
|
|
|
|
/*
|
|
|
|
* There's no member, even just a locker, alive anymore, so we can
|
|
|
|
* mark the Xmax as invalid.
|
|
|
|
*/
|
2013-11-29 20:08:06 +01:00
|
|
|
SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
|
|
|
|
InvalidTransactionId);
|
2013-12-05 16:21:55 +01:00
|
|
|
return HeapTupleMayBeUpdated;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
/* There are lockers running */
|
|
|
|
return HeapTupleBeingUpdated;
|
|
|
|
}
|
2005-04-28 23:47:18 +02:00
|
|
|
}
|
|
|
|
|
Improve concurrency of foreign key locking
This patch introduces two additional lock modes for tuples: "SELECT FOR
KEY SHARE" and "SELECT FOR NO KEY UPDATE". These don't block each
other, in contrast with already existing "SELECT FOR SHARE" and "SELECT
FOR UPDATE". UPDATE commands that do not modify the values stored in
the columns that are part of the key of the tuple now grab a SELECT FOR
NO KEY UPDATE lock on the tuple, allowing them to proceed concurrently
with tuple locks of the FOR KEY SHARE variety.
Foreign key triggers now use FOR KEY SHARE instead of FOR SHARE; this
means the concurrency improvement applies to them, which is the whole
point of this patch.
The added tuple lock semantics require some rejiggering of the multixact
module, so that the locking level that each transaction is holding can
be stored alongside its Xid. Also, multixacts now need to persist
across server restarts and crashes, because they can now represent not
only tuple locks, but also tuple updates. This means we need more
careful tracking of lifetime of pg_multixact SLRU files; since they now
persist longer, we require more infrastructure to figure out when they
can be removed. pg_upgrade also needs to be careful to copy
pg_multixact files over from the old server to the new, or at least part
of multixact.c state, depending on the versions of the old and new
servers.
Tuple time qualification rules (HeapTupleSatisfies routines) need to be
careful not to consider tuples with the "is multi" infomask bit set as
being only locked; they might need to look up MultiXact values (i.e.
possibly do pg_multixact I/O) to find out the Xid that updated a tuple,
whereas they previously were assured to only use information readily
available from the tuple header. This is considered acceptable, because
the extra I/O would involve cases that would previously cause some
commands to block waiting for concurrent transactions to finish.
Another important change is the fact that locking tuples that have
previously been updated causes the future versions to be marked as
locked, too; this is essential for correctness of foreign key checks.
This causes additional WAL-logging, also (there was previously a single
WAL record for a locked tuple; now there are as many as updated copies
of the tuple there exist.)
With all this in place, contention related to tuples being checked by
foreign key rules should be much reduced.
As a bonus, the old behavior that a subtransaction grabbing a stronger
tuple lock than the parent (sub)transaction held on a given tuple and
later aborting caused the weaker lock to be lost, has been fixed.
Many new spec files were added for isolation tester framework, to ensure
overall behavior is sane. There's probably room for several more tests.
There were several reviewers of this patch; in particular, Noah Misch
and Andres Freund spent considerable time in it. Original idea for the
patch came from Simon Riggs, after a problem report by Joel Jacobson.
Most code is from me, with contributions from Marti Raudsepp, Alexander
Shulgin, Noah Misch and Andres Freund.
This patch was discussed in several pgsql-hackers threads; the most
important start at the following message-ids:
AANLkTimo9XVcEzfiBR-ut3KVNDkjm2Vxh+t8kAmWjPuv@mail.gmail.com
1290721684-sup-3951@alvh.no-ip.org
1294953201-sup-2099@alvh.no-ip.org
1320343602-sup-2290@alvh.no-ip.org
1339690386-sup-8927@alvh.no-ip.org
4FE5FF020200002500048A3D@gw.wicourts.gov
4FEAB90A0200002500048B7D@gw.wicourts.gov
2013-01-23 16:04:59 +01:00
|
|
|
if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetRawXmax(tuple)))
|
1998-12-15 13:47:01 +01:00
|
|
|
{
|
Improve concurrency of foreign key locking
This patch introduces two additional lock modes for tuples: "SELECT FOR
KEY SHARE" and "SELECT FOR NO KEY UPDATE". These don't block each
other, in contrast with already existing "SELECT FOR SHARE" and "SELECT
FOR UPDATE". UPDATE commands that do not modify the values stored in
the columns that are part of the key of the tuple now grab a SELECT FOR
NO KEY UPDATE lock on the tuple, allowing them to proceed concurrently
with tuple locks of the FOR KEY SHARE variety.
Foreign key triggers now use FOR KEY SHARE instead of FOR SHARE; this
means the concurrency improvement applies to them, which is the whole
point of this patch.
The added tuple lock semantics require some rejiggering of the multixact
module, so that the locking level that each transaction is holding can
be stored alongside its Xid. Also, multixacts now need to persist
across server restarts and crashes, because they can now represent not
only tuple locks, but also tuple updates. This means we need more
careful tracking of lifetime of pg_multixact SLRU files; since they now
persist longer, we require more infrastructure to figure out when they
can be removed. pg_upgrade also needs to be careful to copy
pg_multixact files over from the old server to the new, or at least part
of multixact.c state, depending on the versions of the old and new
servers.
Tuple time qualification rules (HeapTupleSatisfies routines) need to be
careful not to consider tuples with the "is multi" infomask bit set as
being only locked; they might need to look up MultiXact values (i.e.
possibly do pg_multixact I/O) to find out the Xid that updated a tuple,
whereas they previously were assured to only use information readily
available from the tuple header. This is considered acceptable, because
the extra I/O would involve cases that would previously cause some
commands to block waiting for concurrent transactions to finish.
Another important change is the fact that locking tuples that have
previously been updated causes the future versions to be marked as
locked, too; this is essential for correctness of foreign key checks.
This causes additional WAL-logging, also (there was previously a single
WAL record for a locked tuple; now there are as many as updated copies
of the tuple there exist.)
With all this in place, contention related to tuples being checked by
foreign key rules should be much reduced.
As a bonus, the old behavior that a subtransaction grabbing a stronger
tuple lock than the parent (sub)transaction held on a given tuple and
later aborting caused the weaker lock to be lost, has been fixed.
Many new spec files were added for isolation tester framework, to ensure
overall behavior is sane. There's probably room for several more tests.
There were several reviewers of this patch; in particular, Noah Misch
and Andres Freund spent considerable time in it. Original idea for the
patch came from Simon Riggs, after a problem report by Joel Jacobson.
Most code is from me, with contributions from Marti Raudsepp, Alexander
Shulgin, Noah Misch and Andres Freund.
This patch was discussed in several pgsql-hackers threads; the most
important start at the following message-ids:
AANLkTimo9XVcEzfiBR-ut3KVNDkjm2Vxh+t8kAmWjPuv@mail.gmail.com
1290721684-sup-3951@alvh.no-ip.org
1294953201-sup-2099@alvh.no-ip.org
1320343602-sup-2290@alvh.no-ip.org
1339690386-sup-8927@alvh.no-ip.org
4FE5FF020200002500048A3D@gw.wicourts.gov
4FEAB90A0200002500048B7D@gw.wicourts.gov
2013-01-23 16:04:59 +01:00
|
|
|
if (HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask))
|
1998-12-15 13:47:01 +01:00
|
|
|
return HeapTupleMayBeUpdated;
|
2002-06-15 21:54:24 +02:00
|
|
|
if (HeapTupleHeaderGetCmax(tuple) >= curcid)
|
2005-10-15 04:49:52 +02:00
|
|
|
return HeapTupleSelfUpdated; /* updated after scan started */
|
1998-12-15 13:47:01 +01:00
|
|
|
else
|
|
|
|
return HeapTupleInvisible; /* updated before scan started */
|
|
|
|
}
|
|
|
|
|
Improve concurrency of foreign key locking
This patch introduces two additional lock modes for tuples: "SELECT FOR
KEY SHARE" and "SELECT FOR NO KEY UPDATE". These don't block each
other, in contrast with already existing "SELECT FOR SHARE" and "SELECT
FOR UPDATE". UPDATE commands that do not modify the values stored in
the columns that are part of the key of the tuple now grab a SELECT FOR
NO KEY UPDATE lock on the tuple, allowing them to proceed concurrently
with tuple locks of the FOR KEY SHARE variety.
Foreign key triggers now use FOR KEY SHARE instead of FOR SHARE; this
means the concurrency improvement applies to them, which is the whole
point of this patch.
The added tuple lock semantics require some rejiggering of the multixact
module, so that the locking level that each transaction is holding can
be stored alongside its Xid. Also, multixacts now need to persist
across server restarts and crashes, because they can now represent not
only tuple locks, but also tuple updates. This means we need more
careful tracking of lifetime of pg_multixact SLRU files; since they now
persist longer, we require more infrastructure to figure out when they
can be removed. pg_upgrade also needs to be careful to copy
pg_multixact files over from the old server to the new, or at least part
of multixact.c state, depending on the versions of the old and new
servers.
Tuple time qualification rules (HeapTupleSatisfies routines) need to be
careful not to consider tuples with the "is multi" infomask bit set as
being only locked; they might need to look up MultiXact values (i.e.
possibly do pg_multixact I/O) to find out the Xid that updated a tuple,
whereas they previously were assured to only use information readily
available from the tuple header. This is considered acceptable, because
the extra I/O would involve cases that would previously cause some
commands to block waiting for concurrent transactions to finish.
Another important change is the fact that locking tuples that have
previously been updated causes the future versions to be marked as
locked, too; this is essential for correctness of foreign key checks.
This causes additional WAL-logging, also (there was previously a single
WAL record for a locked tuple; now there are as many as updated copies
of the tuple there exist.)
With all this in place, contention related to tuples being checked by
foreign key rules should be much reduced.
As a bonus, the old behavior that a subtransaction grabbing a stronger
tuple lock than the parent (sub)transaction held on a given tuple and
later aborting caused the weaker lock to be lost, has been fixed.
Many new spec files were added for isolation tester framework, to ensure
overall behavior is sane. There's probably room for several more tests.
There were several reviewers of this patch; in particular, Noah Misch
and Andres Freund spent considerable time in it. Original idea for the
patch came from Simon Riggs, after a problem report by Joel Jacobson.
Most code is from me, with contributions from Marti Raudsepp, Alexander
Shulgin, Noah Misch and Andres Freund.
This patch was discussed in several pgsql-hackers threads; the most
important start at the following message-ids:
AANLkTimo9XVcEzfiBR-ut3KVNDkjm2Vxh+t8kAmWjPuv@mail.gmail.com
1290721684-sup-3951@alvh.no-ip.org
1294953201-sup-2099@alvh.no-ip.org
1320343602-sup-2290@alvh.no-ip.org
1339690386-sup-8927@alvh.no-ip.org
4FE5FF020200002500048A3D@gw.wicourts.gov
4FEAB90A0200002500048B7D@gw.wicourts.gov
2013-01-23 16:04:59 +01:00
|
|
|
if (TransactionIdIsInProgress(HeapTupleHeaderGetRawXmax(tuple)))
|
2005-05-07 23:22:01 +02:00
|
|
|
return HeapTupleBeingUpdated;
|
|
|
|
|
Improve concurrency of foreign key locking
This patch introduces two additional lock modes for tuples: "SELECT FOR
KEY SHARE" and "SELECT FOR NO KEY UPDATE". These don't block each
other, in contrast with already existing "SELECT FOR SHARE" and "SELECT
FOR UPDATE". UPDATE commands that do not modify the values stored in
the columns that are part of the key of the tuple now grab a SELECT FOR
NO KEY UPDATE lock on the tuple, allowing them to proceed concurrently
with tuple locks of the FOR KEY SHARE variety.
Foreign key triggers now use FOR KEY SHARE instead of FOR SHARE; this
means the concurrency improvement applies to them, which is the whole
point of this patch.
The added tuple lock semantics require some rejiggering of the multixact
module, so that the locking level that each transaction is holding can
be stored alongside its Xid. Also, multixacts now need to persist
across server restarts and crashes, because they can now represent not
only tuple locks, but also tuple updates. This means we need more
careful tracking of lifetime of pg_multixact SLRU files; since they now
persist longer, we require more infrastructure to figure out when they
can be removed. pg_upgrade also needs to be careful to copy
pg_multixact files over from the old server to the new, or at least part
of multixact.c state, depending on the versions of the old and new
servers.
Tuple time qualification rules (HeapTupleSatisfies routines) need to be
careful not to consider tuples with the "is multi" infomask bit set as
being only locked; they might need to look up MultiXact values (i.e.
possibly do pg_multixact I/O) to find out the Xid that updated a tuple,
whereas they previously were assured to only use information readily
available from the tuple header. This is considered acceptable, because
the extra I/O would involve cases that would previously cause some
commands to block waiting for concurrent transactions to finish.
Another important change is the fact that locking tuples that have
previously been updated causes the future versions to be marked as
locked, too; this is essential for correctness of foreign key checks.
This causes additional WAL-logging, also (there was previously a single
WAL record for a locked tuple; now there are as many as updated copies
of the tuple there exist.)
With all this in place, contention related to tuples being checked by
foreign key rules should be much reduced.
As a bonus, the old behavior that a subtransaction grabbing a stronger
tuple lock than the parent (sub)transaction held on a given tuple and
later aborting caused the weaker lock to be lost, has been fixed.
Many new spec files were added for isolation tester framework, to ensure
overall behavior is sane. There's probably room for several more tests.
There were several reviewers of this patch; in particular, Noah Misch
and Andres Freund spent considerable time in it. Original idea for the
patch came from Simon Riggs, after a problem report by Joel Jacobson.
Most code is from me, with contributions from Marti Raudsepp, Alexander
Shulgin, Noah Misch and Andres Freund.
This patch was discussed in several pgsql-hackers threads; the most
important start at the following message-ids:
AANLkTimo9XVcEzfiBR-ut3KVNDkjm2Vxh+t8kAmWjPuv@mail.gmail.com
1290721684-sup-3951@alvh.no-ip.org
1294953201-sup-2099@alvh.no-ip.org
1320343602-sup-2290@alvh.no-ip.org
1339690386-sup-8927@alvh.no-ip.org
4FE5FF020200002500048A3D@gw.wicourts.gov
4FEAB90A0200002500048B7D@gw.wicourts.gov
2013-01-23 16:04:59 +01:00
|
|
|
if (!TransactionIdDidCommit(HeapTupleHeaderGetRawXmax(tuple)))
|
1998-12-15 13:47:01 +01:00
|
|
|
{
|
2005-05-07 23:22:01 +02:00
|
|
|
/* it must have aborted or crashed */
|
2007-08-14 19:35:18 +02:00
|
|
|
SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
|
|
|
|
InvalidTransactionId);
|
2005-05-07 23:22:01 +02:00
|
|
|
return HeapTupleMayBeUpdated;
|
1998-12-15 13:47:01 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
/* xmax transaction committed */
|
|
|
|
|
Improve concurrency of foreign key locking
This patch introduces two additional lock modes for tuples: "SELECT FOR
KEY SHARE" and "SELECT FOR NO KEY UPDATE". These don't block each
other, in contrast with already existing "SELECT FOR SHARE" and "SELECT
FOR UPDATE". UPDATE commands that do not modify the values stored in
the columns that are part of the key of the tuple now grab a SELECT FOR
NO KEY UPDATE lock on the tuple, allowing them to proceed concurrently
with tuple locks of the FOR KEY SHARE variety.
Foreign key triggers now use FOR KEY SHARE instead of FOR SHARE; this
means the concurrency improvement applies to them, which is the whole
point of this patch.
The added tuple lock semantics require some rejiggering of the multixact
module, so that the locking level that each transaction is holding can
be stored alongside its Xid. Also, multixacts now need to persist
across server restarts and crashes, because they can now represent not
only tuple locks, but also tuple updates. This means we need more
careful tracking of lifetime of pg_multixact SLRU files; since they now
persist longer, we require more infrastructure to figure out when they
can be removed. pg_upgrade also needs to be careful to copy
pg_multixact files over from the old server to the new, or at least part
of multixact.c state, depending on the versions of the old and new
servers.
Tuple time qualification rules (HeapTupleSatisfies routines) need to be
careful not to consider tuples with the "is multi" infomask bit set as
being only locked; they might need to look up MultiXact values (i.e.
possibly do pg_multixact I/O) to find out the Xid that updated a tuple,
whereas they previously were assured to only use information readily
available from the tuple header. This is considered acceptable, because
the extra I/O would involve cases that would previously cause some
commands to block waiting for concurrent transactions to finish.
Another important change is the fact that locking tuples that have
previously been updated causes the future versions to be marked as
locked, too; this is essential for correctness of foreign key checks.
This causes additional WAL-logging, also (there was previously a single
WAL record for a locked tuple; now there are as many as updated copies
of the tuple there exist.)
With all this in place, contention related to tuples being checked by
foreign key rules should be much reduced.
As a bonus, the old behavior that a subtransaction grabbing a stronger
tuple lock than the parent (sub)transaction held on a given tuple and
later aborting caused the weaker lock to be lost, has been fixed.
Many new spec files were added for isolation tester framework, to ensure
overall behavior is sane. There's probably room for several more tests.
There were several reviewers of this patch; in particular, Noah Misch
and Andres Freund spent considerable time in it. Original idea for the
patch came from Simon Riggs, after a problem report by Joel Jacobson.
Most code is from me, with contributions from Marti Raudsepp, Alexander
Shulgin, Noah Misch and Andres Freund.
This patch was discussed in several pgsql-hackers threads; the most
important start at the following message-ids:
AANLkTimo9XVcEzfiBR-ut3KVNDkjm2Vxh+t8kAmWjPuv@mail.gmail.com
1290721684-sup-3951@alvh.no-ip.org
1294953201-sup-2099@alvh.no-ip.org
1320343602-sup-2290@alvh.no-ip.org
1339690386-sup-8927@alvh.no-ip.org
4FE5FF020200002500048A3D@gw.wicourts.gov
4FEAB90A0200002500048B7D@gw.wicourts.gov
2013-01-23 16:04:59 +01:00
|
|
|
if (HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask))
|
2003-06-25 03:08:13 +02:00
|
|
|
{
|
2007-08-14 19:35:18 +02:00
|
|
|
SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
|
|
|
|
InvalidTransactionId);
|
1998-12-15 13:47:01 +01:00
|
|
|
return HeapTupleMayBeUpdated;
|
2003-06-25 03:08:13 +02:00
|
|
|
}
|
1998-12-15 13:47:01 +01:00
|
|
|
|
2007-08-14 19:35:18 +02:00
|
|
|
SetHintBits(tuple, buffer, HEAP_XMAX_COMMITTED,
|
Improve concurrency of foreign key locking
This patch introduces two additional lock modes for tuples: "SELECT FOR
KEY SHARE" and "SELECT FOR NO KEY UPDATE". These don't block each
other, in contrast with already existing "SELECT FOR SHARE" and "SELECT
FOR UPDATE". UPDATE commands that do not modify the values stored in
the columns that are part of the key of the tuple now grab a SELECT FOR
NO KEY UPDATE lock on the tuple, allowing them to proceed concurrently
with tuple locks of the FOR KEY SHARE variety.
Foreign key triggers now use FOR KEY SHARE instead of FOR SHARE; this
means the concurrency improvement applies to them, which is the whole
point of this patch.
The added tuple lock semantics require some rejiggering of the multixact
module, so that the locking level that each transaction is holding can
be stored alongside its Xid. Also, multixacts now need to persist
across server restarts and crashes, because they can now represent not
only tuple locks, but also tuple updates. This means we need more
careful tracking of lifetime of pg_multixact SLRU files; since they now
persist longer, we require more infrastructure to figure out when they
can be removed. pg_upgrade also needs to be careful to copy
pg_multixact files over from the old server to the new, or at least part
of multixact.c state, depending on the versions of the old and new
servers.
Tuple time qualification rules (HeapTupleSatisfies routines) need to be
careful not to consider tuples with the "is multi" infomask bit set as
being only locked; they might need to look up MultiXact values (i.e.
possibly do pg_multixact I/O) to find out the Xid that updated a tuple,
whereas they previously were assured to only use information readily
available from the tuple header. This is considered acceptable, because
the extra I/O would involve cases that would previously cause some
commands to block waiting for concurrent transactions to finish.
Another important change is the fact that locking tuples that have
previously been updated causes the future versions to be marked as
locked, too; this is essential for correctness of foreign key checks.
This causes additional WAL-logging, also (there was previously a single
WAL record for a locked tuple; now there are as many as updated copies
of the tuple there exist.)
With all this in place, contention related to tuples being checked by
foreign key rules should be much reduced.
As a bonus, the old behavior that a subtransaction grabbing a stronger
tuple lock than the parent (sub)transaction held on a given tuple and
later aborting caused the weaker lock to be lost, has been fixed.
Many new spec files were added for isolation tester framework, to ensure
overall behavior is sane. There's probably room for several more tests.
There were several reviewers of this patch; in particular, Noah Misch
and Andres Freund spent considerable time in it. Original idea for the
patch came from Simon Riggs, after a problem report by Joel Jacobson.
Most code is from me, with contributions from Marti Raudsepp, Alexander
Shulgin, Noah Misch and Andres Freund.
This patch was discussed in several pgsql-hackers threads; the most
important start at the following message-ids:
AANLkTimo9XVcEzfiBR-ut3KVNDkjm2Vxh+t8kAmWjPuv@mail.gmail.com
1290721684-sup-3951@alvh.no-ip.org
1294953201-sup-2099@alvh.no-ip.org
1320343602-sup-2290@alvh.no-ip.org
1339690386-sup-8927@alvh.no-ip.org
4FE5FF020200002500048A3D@gw.wicourts.gov
4FEAB90A0200002500048B7D@gw.wicourts.gov
2013-01-23 16:04:59 +01:00
|
|
|
HeapTupleHeaderGetRawXmax(tuple));
|
1999-05-25 18:15:34 +02:00
|
|
|
return HeapTupleUpdated; /* updated by other */
|
1998-12-15 13:47:01 +01:00
|
|
|
}
|
|
|
|
|
2002-01-17 00:51:56 +01:00
|
|
|
/*
|
|
|
|
* HeapTupleSatisfiesDirty
|
|
|
|
* True iff heap tuple is valid including effects of open transactions.
|
2002-01-17 00:09:09 +01:00
|
|
|
*
|
2002-01-17 00:51:56 +01:00
|
|
|
* Here, we consider the effects of:
|
|
|
|
* all committed and in-progress transactions (as of the current instant)
|
2002-01-17 00:09:09 +01:00
|
|
|
* previous commands of this transaction
|
2002-01-17 00:51:56 +01:00
|
|
|
* changes made by the current command
|
2002-01-16 21:29:02 +01:00
|
|
|
*
|
2007-03-25 21:45:14 +02:00
|
|
|
* This is essentially like HeapTupleSatisfiesSelf as far as effects of
|
2002-01-16 21:29:02 +01:00
|
|
|
* the current transaction and committed/aborted xacts are concerned.
|
|
|
|
* However, we also include the effects of other xacts still in progress.
|
|
|
|
*
|
2007-03-25 21:45:14 +02:00
|
|
|
* A special hack is that the passed-in snapshot struct is used as an
|
|
|
|
* output argument to return the xids of concurrent xacts that affected the
|
|
|
|
* tuple. snapshot->xmin is set to the tuple's xmin if that is another
|
|
|
|
* transaction that's still in progress; or to InvalidTransactionId if the
|
|
|
|
* tuple's xmin is committed good, committed dead, or my own xact. Similarly
|
|
|
|
* for snapshot->xmax and the tuple's xmax.
|
2002-01-16 21:29:02 +01:00
|
|
|
*/
|
1998-12-15 13:47:01 +01:00
|
|
|
bool
|
2013-07-22 19:26:33 +02:00
|
|
|
HeapTupleSatisfiesDirty(HeapTuple htup, Snapshot snapshot,
|
2007-03-25 21:45:14 +02:00
|
|
|
Buffer buffer)
|
1998-12-15 13:47:01 +01:00
|
|
|
{
|
2013-07-22 19:26:33 +02:00
|
|
|
HeapTupleHeader tuple = htup->t_data;
|
2014-05-06 18:12:18 +02:00
|
|
|
|
2013-07-22 19:26:33 +02:00
|
|
|
Assert(ItemPointerIsValid(&htup->t_self));
|
|
|
|
Assert(htup->t_tableOid != InvalidOid);
|
|
|
|
|
2007-03-25 21:45:14 +02:00
|
|
|
snapshot->xmin = snapshot->xmax = InvalidTransactionId;
|
1998-12-15 13:47:01 +01:00
|
|
|
|
2013-12-22 21:49:09 +01:00
|
|
|
if (!HeapTupleHeaderXminCommitted(tuple))
|
1998-12-15 13:47:01 +01:00
|
|
|
{
|
2013-12-22 21:49:09 +01:00
|
|
|
if (HeapTupleHeaderXminInvalid(tuple))
|
1998-12-15 13:47:01 +01:00
|
|
|
return false;
|
|
|
|
|
2010-02-08 15:10:21 +01:00
|
|
|
/* Used by pre-9.0 binary upgrades */
|
1999-03-28 22:32:42 +02:00
|
|
|
if (tuple->t_infomask & HEAP_MOVED_OFF)
|
|
|
|
{
|
2003-06-25 03:08:13 +02:00
|
|
|
TransactionId xvac = HeapTupleHeaderGetXvac(tuple);
|
2003-08-04 02:43:34 +02:00
|
|
|
|
2003-06-25 03:08:13 +02:00
|
|
|
if (TransactionIdIsCurrentTransactionId(xvac))
|
1999-04-05 12:55:49 +02:00
|
|
|
return false;
|
2003-06-25 03:08:13 +02:00
|
|
|
if (!TransactionIdIsInProgress(xvac))
|
1999-03-28 22:32:42 +02:00
|
|
|
{
|
2003-06-25 03:08:13 +02:00
|
|
|
if (TransactionIdDidCommit(xvac))
|
2001-12-19 18:18:39 +01:00
|
|
|
{
|
2007-08-14 19:35:18 +02:00
|
|
|
SetHintBits(tuple, buffer, HEAP_XMIN_INVALID,
|
|
|
|
InvalidTransactionId);
|
2001-12-19 18:18:39 +01:00
|
|
|
return false;
|
|
|
|
}
|
2007-08-14 19:35:18 +02:00
|
|
|
SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED,
|
|
|
|
InvalidTransactionId);
|
1999-03-28 22:32:42 +02:00
|
|
|
}
|
|
|
|
}
|
2010-02-08 15:10:21 +01:00
|
|
|
/* Used by pre-9.0 binary upgrades */
|
1999-03-28 22:32:42 +02:00
|
|
|
else if (tuple->t_infomask & HEAP_MOVED_IN)
|
|
|
|
{
|
2003-06-25 03:08:13 +02:00
|
|
|
TransactionId xvac = HeapTupleHeaderGetXvac(tuple);
|
2003-08-04 02:43:34 +02:00
|
|
|
|
2003-06-25 03:08:13 +02:00
|
|
|
if (!TransactionIdIsCurrentTransactionId(xvac))
|
1999-03-28 22:32:42 +02:00
|
|
|
{
|
2003-06-25 03:08:13 +02:00
|
|
|
if (TransactionIdIsInProgress(xvac))
|
2001-12-19 18:18:39 +01:00
|
|
|
return false;
|
2003-06-25 03:08:13 +02:00
|
|
|
if (TransactionIdDidCommit(xvac))
|
2007-08-14 19:35:18 +02:00
|
|
|
SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED,
|
|
|
|
InvalidTransactionId);
|
1999-04-12 18:57:27 +02:00
|
|
|
else
|
|
|
|
{
|
2007-08-14 19:35:18 +02:00
|
|
|
SetHintBits(tuple, buffer, HEAP_XMIN_INVALID,
|
|
|
|
InvalidTransactionId);
|
1999-04-12 18:57:27 +02:00
|
|
|
return false;
|
|
|
|
}
|
1999-03-28 22:32:42 +02:00
|
|
|
}
|
|
|
|
}
|
2013-12-22 21:49:09 +01:00
|
|
|
else if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetRawXmin(tuple)))
|
1998-12-15 13:47:01 +01:00
|
|
|
{
|
|
|
|
if (tuple->t_infomask & HEAP_XMAX_INVALID) /* xid invalid */
|
|
|
|
return true;
|
|
|
|
|
2013-05-29 22:58:43 +02:00
|
|
|
if (HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask)) /* not deleter */
|
2005-04-28 23:47:18 +02:00
|
|
|
return true;
|
|
|
|
|
Improve concurrency of foreign key locking
This patch introduces two additional lock modes for tuples: "SELECT FOR
KEY SHARE" and "SELECT FOR NO KEY UPDATE". These don't block each
other, in contrast with already existing "SELECT FOR SHARE" and "SELECT
FOR UPDATE". UPDATE commands that do not modify the values stored in
the columns that are part of the key of the tuple now grab a SELECT FOR
NO KEY UPDATE lock on the tuple, allowing them to proceed concurrently
with tuple locks of the FOR KEY SHARE variety.
Foreign key triggers now use FOR KEY SHARE instead of FOR SHARE; this
means the concurrency improvement applies to them, which is the whole
point of this patch.
The added tuple lock semantics require some rejiggering of the multixact
module, so that the locking level that each transaction is holding can
be stored alongside its Xid. Also, multixacts now need to persist
across server restarts and crashes, because they can now represent not
only tuple locks, but also tuple updates. This means we need more
careful tracking of lifetime of pg_multixact SLRU files; since they now
persist longer, we require more infrastructure to figure out when they
can be removed. pg_upgrade also needs to be careful to copy
pg_multixact files over from the old server to the new, or at least part
of multixact.c state, depending on the versions of the old and new
servers.
Tuple time qualification rules (HeapTupleSatisfies routines) need to be
careful not to consider tuples with the "is multi" infomask bit set as
being only locked; they might need to look up MultiXact values (i.e.
possibly do pg_multixact I/O) to find out the Xid that updated a tuple,
whereas they previously were assured to only use information readily
available from the tuple header. This is considered acceptable, because
the extra I/O would involve cases that would previously cause some
commands to block waiting for concurrent transactions to finish.
Another important change is the fact that locking tuples that have
previously been updated causes the future versions to be marked as
locked, too; this is essential for correctness of foreign key checks.
This causes additional WAL-logging, also (there was previously a single
WAL record for a locked tuple; now there are as many as updated copies
of the tuple there exist.)
With all this in place, contention related to tuples being checked by
foreign key rules should be much reduced.
As a bonus, the old behavior that a subtransaction grabbing a stronger
tuple lock than the parent (sub)transaction held on a given tuple and
later aborting caused the weaker lock to be lost, has been fixed.
Many new spec files were added for isolation tester framework, to ensure
overall behavior is sane. There's probably room for several more tests.
There were several reviewers of this patch; in particular, Noah Misch
and Andres Freund spent considerable time in it. Original idea for the
patch came from Simon Riggs, after a problem report by Joel Jacobson.
Most code is from me, with contributions from Marti Raudsepp, Alexander
Shulgin, Noah Misch and Andres Freund.
This patch was discussed in several pgsql-hackers threads; the most
important start at the following message-ids:
AANLkTimo9XVcEzfiBR-ut3KVNDkjm2Vxh+t8kAmWjPuv@mail.gmail.com
1290721684-sup-3951@alvh.no-ip.org
1294953201-sup-2099@alvh.no-ip.org
1320343602-sup-2290@alvh.no-ip.org
1339690386-sup-8927@alvh.no-ip.org
4FE5FF020200002500048A3D@gw.wicourts.gov
4FEAB90A0200002500048B7D@gw.wicourts.gov
2013-01-23 16:04:59 +01:00
|
|
|
if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
|
|
|
|
{
|
2013-05-29 22:58:43 +02:00
|
|
|
TransactionId xmax;
|
2005-04-28 23:47:18 +02:00
|
|
|
|
Improve concurrency of foreign key locking
This patch introduces two additional lock modes for tuples: "SELECT FOR
KEY SHARE" and "SELECT FOR NO KEY UPDATE". These don't block each
other, in contrast with already existing "SELECT FOR SHARE" and "SELECT
FOR UPDATE". UPDATE commands that do not modify the values stored in
the columns that are part of the key of the tuple now grab a SELECT FOR
NO KEY UPDATE lock on the tuple, allowing them to proceed concurrently
with tuple locks of the FOR KEY SHARE variety.
Foreign key triggers now use FOR KEY SHARE instead of FOR SHARE; this
means the concurrency improvement applies to them, which is the whole
point of this patch.
The added tuple lock semantics require some rejiggering of the multixact
module, so that the locking level that each transaction is holding can
be stored alongside its Xid. Also, multixacts now need to persist
across server restarts and crashes, because they can now represent not
only tuple locks, but also tuple updates. This means we need more
careful tracking of lifetime of pg_multixact SLRU files; since they now
persist longer, we require more infrastructure to figure out when they
can be removed. pg_upgrade also needs to be careful to copy
pg_multixact files over from the old server to the new, or at least part
of multixact.c state, depending on the versions of the old and new
servers.
Tuple time qualification rules (HeapTupleSatisfies routines) need to be
careful not to consider tuples with the "is multi" infomask bit set as
being only locked; they might need to look up MultiXact values (i.e.
possibly do pg_multixact I/O) to find out the Xid that updated a tuple,
whereas they previously were assured to only use information readily
available from the tuple header. This is considered acceptable, because
the extra I/O would involve cases that would previously cause some
commands to block waiting for concurrent transactions to finish.
Another important change is the fact that locking tuples that have
previously been updated causes the future versions to be marked as
locked, too; this is essential for correctness of foreign key checks.
This causes additional WAL-logging, also (there was previously a single
WAL record for a locked tuple; now there are as many as updated copies
of the tuple there exist.)
With all this in place, contention related to tuples being checked by
foreign key rules should be much reduced.
As a bonus, the old behavior that a subtransaction grabbing a stronger
tuple lock than the parent (sub)transaction held on a given tuple and
later aborting caused the weaker lock to be lost, has been fixed.
Many new spec files were added for isolation tester framework, to ensure
overall behavior is sane. There's probably room for several more tests.
There were several reviewers of this patch; in particular, Noah Misch
and Andres Freund spent considerable time in it. Original idea for the
patch came from Simon Riggs, after a problem report by Joel Jacobson.
Most code is from me, with contributions from Marti Raudsepp, Alexander
Shulgin, Noah Misch and Andres Freund.
This patch was discussed in several pgsql-hackers threads; the most
important start at the following message-ids:
AANLkTimo9XVcEzfiBR-ut3KVNDkjm2Vxh+t8kAmWjPuv@mail.gmail.com
1290721684-sup-3951@alvh.no-ip.org
1294953201-sup-2099@alvh.no-ip.org
1320343602-sup-2290@alvh.no-ip.org
1339690386-sup-8927@alvh.no-ip.org
4FE5FF020200002500048A3D@gw.wicourts.gov
4FEAB90A0200002500048B7D@gw.wicourts.gov
2013-01-23 16:04:59 +01:00
|
|
|
xmax = HeapTupleGetUpdateXid(tuple);
|
2013-11-29 20:08:06 +01:00
|
|
|
|
|
|
|
/* not LOCKED_ONLY, so it has to have an xmax */
|
|
|
|
Assert(TransactionIdIsValid(xmax));
|
Improve concurrency of foreign key locking
This patch introduces two additional lock modes for tuples: "SELECT FOR
KEY SHARE" and "SELECT FOR NO KEY UPDATE". These don't block each
other, in contrast with already existing "SELECT FOR SHARE" and "SELECT
FOR UPDATE". UPDATE commands that do not modify the values stored in
the columns that are part of the key of the tuple now grab a SELECT FOR
NO KEY UPDATE lock on the tuple, allowing them to proceed concurrently
with tuple locks of the FOR KEY SHARE variety.
Foreign key triggers now use FOR KEY SHARE instead of FOR SHARE; this
means the concurrency improvement applies to them, which is the whole
point of this patch.
The added tuple lock semantics require some rejiggering of the multixact
module, so that the locking level that each transaction is holding can
be stored alongside its Xid. Also, multixacts now need to persist
across server restarts and crashes, because they can now represent not
only tuple locks, but also tuple updates. This means we need more
careful tracking of lifetime of pg_multixact SLRU files; since they now
persist longer, we require more infrastructure to figure out when they
can be removed. pg_upgrade also needs to be careful to copy
pg_multixact files over from the old server to the new, or at least part
of multixact.c state, depending on the versions of the old and new
servers.
Tuple time qualification rules (HeapTupleSatisfies routines) need to be
careful not to consider tuples with the "is multi" infomask bit set as
being only locked; they might need to look up MultiXact values (i.e.
possibly do pg_multixact I/O) to find out the Xid that updated a tuple,
whereas they previously were assured to only use information readily
available from the tuple header. This is considered acceptable, because
the extra I/O would involve cases that would previously cause some
commands to block waiting for concurrent transactions to finish.
Another important change is the fact that locking tuples that have
previously been updated causes the future versions to be marked as
locked, too; this is essential for correctness of foreign key checks.
This causes additional WAL-logging, also (there was previously a single
WAL record for a locked tuple; now there are as many as updated copies
of the tuple there exist.)
With all this in place, contention related to tuples being checked by
foreign key rules should be much reduced.
As a bonus, the old behavior that a subtransaction grabbing a stronger
tuple lock than the parent (sub)transaction held on a given tuple and
later aborting caused the weaker lock to be lost, has been fixed.
Many new spec files were added for isolation tester framework, to ensure
overall behavior is sane. There's probably room for several more tests.
There were several reviewers of this patch; in particular, Noah Misch
and Andres Freund spent considerable time in it. Original idea for the
patch came from Simon Riggs, after a problem report by Joel Jacobson.
Most code is from me, with contributions from Marti Raudsepp, Alexander
Shulgin, Noah Misch and Andres Freund.
This patch was discussed in several pgsql-hackers threads; the most
important start at the following message-ids:
AANLkTimo9XVcEzfiBR-ut3KVNDkjm2Vxh+t8kAmWjPuv@mail.gmail.com
1290721684-sup-3951@alvh.no-ip.org
1294953201-sup-2099@alvh.no-ip.org
1320343602-sup-2290@alvh.no-ip.org
1339690386-sup-8927@alvh.no-ip.org
4FE5FF020200002500048A3D@gw.wicourts.gov
4FEAB90A0200002500048B7D@gw.wicourts.gov
2013-01-23 16:04:59 +01:00
|
|
|
|
|
|
|
/* updating subtransaction must have aborted */
|
|
|
|
if (!TransactionIdIsCurrentTransactionId(xmax))
|
|
|
|
return true;
|
|
|
|
else
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetRawXmax(tuple)))
|
2004-07-28 16:23:31 +02:00
|
|
|
{
|
2007-09-21 20:24:28 +02:00
|
|
|
/* deleting subtransaction must have aborted */
|
2007-08-14 19:35:18 +02:00
|
|
|
SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
|
|
|
|
InvalidTransactionId);
|
2004-07-01 02:52:04 +02:00
|
|
|
return true;
|
2004-07-28 16:23:31 +02:00
|
|
|
}
|
2004-07-01 02:52:04 +02:00
|
|
|
|
1998-12-15 13:47:01 +01:00
|
|
|
return false;
|
|
|
|
}
|
2013-12-22 21:49:09 +01:00
|
|
|
else if (TransactionIdIsInProgress(HeapTupleHeaderGetRawXmin(tuple)))
|
1998-12-15 13:47:01 +01:00
|
|
|
{
|
2013-12-22 21:49:09 +01:00
|
|
|
snapshot->xmin = HeapTupleHeaderGetRawXmin(tuple);
|
2001-12-19 18:18:39 +01:00
|
|
|
/* XXX shouldn't we fall through to look at xmax? */
|
1999-05-25 18:15:34 +02:00
|
|
|
return true; /* in insertion by other */
|
1998-12-15 13:47:01 +01:00
|
|
|
}
|
2013-12-22 21:49:09 +01:00
|
|
|
else if (TransactionIdDidCommit(HeapTupleHeaderGetRawXmin(tuple)))
|
2007-08-14 19:35:18 +02:00
|
|
|
SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED,
|
2013-12-22 21:49:09 +01:00
|
|
|
HeapTupleHeaderGetRawXmin(tuple));
|
2005-05-07 23:22:01 +02:00
|
|
|
else
|
|
|
|
{
|
|
|
|
/* it must have aborted or crashed */
|
2007-08-14 19:35:18 +02:00
|
|
|
SetHintBits(tuple, buffer, HEAP_XMIN_INVALID,
|
|
|
|
InvalidTransactionId);
|
2005-05-07 23:22:01 +02:00
|
|
|
return false;
|
|
|
|
}
|
1998-12-15 13:47:01 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
/* by here, the inserting transaction has committed */
|
|
|
|
|
|
|
|
if (tuple->t_infomask & HEAP_XMAX_INVALID) /* xid invalid or aborted */
|
|
|
|
return true;
|
|
|
|
|
|
|
|
if (tuple->t_infomask & HEAP_XMAX_COMMITTED)
|
|
|
|
{
|
Improve concurrency of foreign key locking
This patch introduces two additional lock modes for tuples: "SELECT FOR
KEY SHARE" and "SELECT FOR NO KEY UPDATE". These don't block each
other, in contrast with already existing "SELECT FOR SHARE" and "SELECT
FOR UPDATE". UPDATE commands that do not modify the values stored in
the columns that are part of the key of the tuple now grab a SELECT FOR
NO KEY UPDATE lock on the tuple, allowing them to proceed concurrently
with tuple locks of the FOR KEY SHARE variety.
Foreign key triggers now use FOR KEY SHARE instead of FOR SHARE; this
means the concurrency improvement applies to them, which is the whole
point of this patch.
The added tuple lock semantics require some rejiggering of the multixact
module, so that the locking level that each transaction is holding can
be stored alongside its Xid. Also, multixacts now need to persist
across server restarts and crashes, because they can now represent not
only tuple locks, but also tuple updates. This means we need more
careful tracking of lifetime of pg_multixact SLRU files; since they now
persist longer, we require more infrastructure to figure out when they
can be removed. pg_upgrade also needs to be careful to copy
pg_multixact files over from the old server to the new, or at least part
of multixact.c state, depending on the versions of the old and new
servers.
Tuple time qualification rules (HeapTupleSatisfies routines) need to be
careful not to consider tuples with the "is multi" infomask bit set as
being only locked; they might need to look up MultiXact values (i.e.
possibly do pg_multixact I/O) to find out the Xid that updated a tuple,
whereas they previously were assured to only use information readily
available from the tuple header. This is considered acceptable, because
the extra I/O would involve cases that would previously cause some
commands to block waiting for concurrent transactions to finish.
Another important change is the fact that locking tuples that have
previously been updated causes the future versions to be marked as
locked, too; this is essential for correctness of foreign key checks.
This causes additional WAL-logging, also (there was previously a single
WAL record for a locked tuple; now there are as many as updated copies
of the tuple there exist.)
With all this in place, contention related to tuples being checked by
foreign key rules should be much reduced.
As a bonus, the old behavior that a subtransaction grabbing a stronger
tuple lock than the parent (sub)transaction held on a given tuple and
later aborting caused the weaker lock to be lost, has been fixed.
Many new spec files were added for isolation tester framework, to ensure
overall behavior is sane. There's probably room for several more tests.
There were several reviewers of this patch; in particular, Noah Misch
and Andres Freund spent considerable time in it. Original idea for the
patch came from Simon Riggs, after a problem report by Joel Jacobson.
Most code is from me, with contributions from Marti Raudsepp, Alexander
Shulgin, Noah Misch and Andres Freund.
This patch was discussed in several pgsql-hackers threads; the most
important start at the following message-ids:
AANLkTimo9XVcEzfiBR-ut3KVNDkjm2Vxh+t8kAmWjPuv@mail.gmail.com
1290721684-sup-3951@alvh.no-ip.org
1294953201-sup-2099@alvh.no-ip.org
1320343602-sup-2290@alvh.no-ip.org
1339690386-sup-8927@alvh.no-ip.org
4FE5FF020200002500048A3D@gw.wicourts.gov
4FEAB90A0200002500048B7D@gw.wicourts.gov
2013-01-23 16:04:59 +01:00
|
|
|
if (HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask))
|
1998-12-15 13:47:01 +01:00
|
|
|
return true;
|
1999-05-25 18:15:34 +02:00
|
|
|
return false; /* updated by other */
|
1998-12-15 13:47:01 +01:00
|
|
|
}
|
|
|
|
|
2005-04-28 23:47:18 +02:00
|
|
|
if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
|
|
|
|
{
|
2013-05-29 22:58:43 +02:00
|
|
|
TransactionId xmax;
|
Improve concurrency of foreign key locking
This patch introduces two additional lock modes for tuples: "SELECT FOR
KEY SHARE" and "SELECT FOR NO KEY UPDATE". These don't block each
other, in contrast with already existing "SELECT FOR SHARE" and "SELECT
FOR UPDATE". UPDATE commands that do not modify the values stored in
the columns that are part of the key of the tuple now grab a SELECT FOR
NO KEY UPDATE lock on the tuple, allowing them to proceed concurrently
with tuple locks of the FOR KEY SHARE variety.
Foreign key triggers now use FOR KEY SHARE instead of FOR SHARE; this
means the concurrency improvement applies to them, which is the whole
point of this patch.
The added tuple lock semantics require some rejiggering of the multixact
module, so that the locking level that each transaction is holding can
be stored alongside its Xid. Also, multixacts now need to persist
across server restarts and crashes, because they can now represent not
only tuple locks, but also tuple updates. This means we need more
careful tracking of lifetime of pg_multixact SLRU files; since they now
persist longer, we require more infrastructure to figure out when they
can be removed. pg_upgrade also needs to be careful to copy
pg_multixact files over from the old server to the new, or at least part
of multixact.c state, depending on the versions of the old and new
servers.
Tuple time qualification rules (HeapTupleSatisfies routines) need to be
careful not to consider tuples with the "is multi" infomask bit set as
being only locked; they might need to look up MultiXact values (i.e.
possibly do pg_multixact I/O) to find out the Xid that updated a tuple,
whereas they previously were assured to only use information readily
available from the tuple header. This is considered acceptable, because
the extra I/O would involve cases that would previously cause some
commands to block waiting for concurrent transactions to finish.
Another important change is the fact that locking tuples that have
previously been updated causes the future versions to be marked as
locked, too; this is essential for correctness of foreign key checks.
This causes additional WAL-logging, also (there was previously a single
WAL record for a locked tuple; now there are as many as updated copies
of the tuple there exist.)
With all this in place, contention related to tuples being checked by
foreign key rules should be much reduced.
As a bonus, the old behavior that a subtransaction grabbing a stronger
tuple lock than the parent (sub)transaction held on a given tuple and
later aborting caused the weaker lock to be lost, has been fixed.
Many new spec files were added for isolation tester framework, to ensure
overall behavior is sane. There's probably room for several more tests.
There were several reviewers of this patch; in particular, Noah Misch
and Andres Freund spent considerable time in it. Original idea for the
patch came from Simon Riggs, after a problem report by Joel Jacobson.
Most code is from me, with contributions from Marti Raudsepp, Alexander
Shulgin, Noah Misch and Andres Freund.
This patch was discussed in several pgsql-hackers threads; the most
important start at the following message-ids:
AANLkTimo9XVcEzfiBR-ut3KVNDkjm2Vxh+t8kAmWjPuv@mail.gmail.com
1290721684-sup-3951@alvh.no-ip.org
1294953201-sup-2099@alvh.no-ip.org
1320343602-sup-2290@alvh.no-ip.org
1339690386-sup-8927@alvh.no-ip.org
4FE5FF020200002500048A3D@gw.wicourts.gov
4FEAB90A0200002500048B7D@gw.wicourts.gov
2013-01-23 16:04:59 +01:00
|
|
|
|
|
|
|
if (HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask))
|
|
|
|
return true;
|
|
|
|
|
|
|
|
xmax = HeapTupleGetUpdateXid(tuple);
|
2013-11-29 20:08:06 +01:00
|
|
|
|
|
|
|
/* not LOCKED_ONLY, so it has to have an xmax */
|
|
|
|
Assert(TransactionIdIsValid(xmax));
|
|
|
|
|
Improve concurrency of foreign key locking
This patch introduces two additional lock modes for tuples: "SELECT FOR
KEY SHARE" and "SELECT FOR NO KEY UPDATE". These don't block each
other, in contrast with already existing "SELECT FOR SHARE" and "SELECT
FOR UPDATE". UPDATE commands that do not modify the values stored in
the columns that are part of the key of the tuple now grab a SELECT FOR
NO KEY UPDATE lock on the tuple, allowing them to proceed concurrently
with tuple locks of the FOR KEY SHARE variety.
Foreign key triggers now use FOR KEY SHARE instead of FOR SHARE; this
means the concurrency improvement applies to them, which is the whole
point of this patch.
The added tuple lock semantics require some rejiggering of the multixact
module, so that the locking level that each transaction is holding can
be stored alongside its Xid. Also, multixacts now need to persist
across server restarts and crashes, because they can now represent not
only tuple locks, but also tuple updates. This means we need more
careful tracking of lifetime of pg_multixact SLRU files; since they now
persist longer, we require more infrastructure to figure out when they
can be removed. pg_upgrade also needs to be careful to copy
pg_multixact files over from the old server to the new, or at least part
of multixact.c state, depending on the versions of the old and new
servers.
Tuple time qualification rules (HeapTupleSatisfies routines) need to be
careful not to consider tuples with the "is multi" infomask bit set as
being only locked; they might need to look up MultiXact values (i.e.
possibly do pg_multixact I/O) to find out the Xid that updated a tuple,
whereas they previously were assured to only use information readily
available from the tuple header. This is considered acceptable, because
the extra I/O would involve cases that would previously cause some
commands to block waiting for concurrent transactions to finish.
Another important change is the fact that locking tuples that have
previously been updated causes the future versions to be marked as
locked, too; this is essential for correctness of foreign key checks.
This causes additional WAL-logging, also (there was previously a single
WAL record for a locked tuple; now there are as many as updated copies
of the tuple there exist.)
With all this in place, contention related to tuples being checked by
foreign key rules should be much reduced.
As a bonus, the old behavior that a subtransaction grabbing a stronger
tuple lock than the parent (sub)transaction held on a given tuple and
later aborting caused the weaker lock to be lost, has been fixed.
Many new spec files were added for isolation tester framework, to ensure
overall behavior is sane. There's probably room for several more tests.
There were several reviewers of this patch; in particular, Noah Misch
and Andres Freund spent considerable time in it. Original idea for the
patch came from Simon Riggs, after a problem report by Joel Jacobson.
Most code is from me, with contributions from Marti Raudsepp, Alexander
Shulgin, Noah Misch and Andres Freund.
This patch was discussed in several pgsql-hackers threads; the most
important start at the following message-ids:
AANLkTimo9XVcEzfiBR-ut3KVNDkjm2Vxh+t8kAmWjPuv@mail.gmail.com
1290721684-sup-3951@alvh.no-ip.org
1294953201-sup-2099@alvh.no-ip.org
1320343602-sup-2290@alvh.no-ip.org
1339690386-sup-8927@alvh.no-ip.org
4FE5FF020200002500048A3D@gw.wicourts.gov
4FEAB90A0200002500048B7D@gw.wicourts.gov
2013-01-23 16:04:59 +01:00
|
|
|
if (TransactionIdIsCurrentTransactionId(xmax))
|
|
|
|
return false;
|
|
|
|
if (TransactionIdIsInProgress(xmax))
|
|
|
|
{
|
|
|
|
snapshot->xmax = xmax;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
if (TransactionIdDidCommit(xmax))
|
|
|
|
return false;
|
2013-11-29 20:08:06 +01:00
|
|
|
/* it must have aborted or crashed */
|
2005-04-28 23:47:18 +02:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
Improve concurrency of foreign key locking
This patch introduces two additional lock modes for tuples: "SELECT FOR
KEY SHARE" and "SELECT FOR NO KEY UPDATE". These don't block each
other, in contrast with already existing "SELECT FOR SHARE" and "SELECT
FOR UPDATE". UPDATE commands that do not modify the values stored in
the columns that are part of the key of the tuple now grab a SELECT FOR
NO KEY UPDATE lock on the tuple, allowing them to proceed concurrently
with tuple locks of the FOR KEY SHARE variety.
Foreign key triggers now use FOR KEY SHARE instead of FOR SHARE; this
means the concurrency improvement applies to them, which is the whole
point of this patch.
The added tuple lock semantics require some rejiggering of the multixact
module, so that the locking level that each transaction is holding can
be stored alongside its Xid. Also, multixacts now need to persist
across server restarts and crashes, because they can now represent not
only tuple locks, but also tuple updates. This means we need more
careful tracking of lifetime of pg_multixact SLRU files; since they now
persist longer, we require more infrastructure to figure out when they
can be removed. pg_upgrade also needs to be careful to copy
pg_multixact files over from the old server to the new, or at least part
of multixact.c state, depending on the versions of the old and new
servers.
Tuple time qualification rules (HeapTupleSatisfies routines) need to be
careful not to consider tuples with the "is multi" infomask bit set as
being only locked; they might need to look up MultiXact values (i.e.
possibly do pg_multixact I/O) to find out the Xid that updated a tuple,
whereas they previously were assured to only use information readily
available from the tuple header. This is considered acceptable, because
the extra I/O would involve cases that would previously cause some
commands to block waiting for concurrent transactions to finish.
Another important change is the fact that locking tuples that have
previously been updated causes the future versions to be marked as
locked, too; this is essential for correctness of foreign key checks.
This causes additional WAL-logging, also (there was previously a single
WAL record for a locked tuple; now there are as many as updated copies
of the tuple there exist.)
With all this in place, contention related to tuples being checked by
foreign key rules should be much reduced.
As a bonus, the old behavior that a subtransaction grabbing a stronger
tuple lock than the parent (sub)transaction held on a given tuple and
later aborting caused the weaker lock to be lost, has been fixed.
Many new spec files were added for isolation tester framework, to ensure
overall behavior is sane. There's probably room for several more tests.
There were several reviewers of this patch; in particular, Noah Misch
and Andres Freund spent considerable time in it. Original idea for the
patch came from Simon Riggs, after a problem report by Joel Jacobson.
Most code is from me, with contributions from Marti Raudsepp, Alexander
Shulgin, Noah Misch and Andres Freund.
This patch was discussed in several pgsql-hackers threads; the most
important start at the following message-ids:
AANLkTimo9XVcEzfiBR-ut3KVNDkjm2Vxh+t8kAmWjPuv@mail.gmail.com
1290721684-sup-3951@alvh.no-ip.org
1294953201-sup-2099@alvh.no-ip.org
1320343602-sup-2290@alvh.no-ip.org
1339690386-sup-8927@alvh.no-ip.org
4FE5FF020200002500048A3D@gw.wicourts.gov
4FEAB90A0200002500048B7D@gw.wicourts.gov
2013-01-23 16:04:59 +01:00
|
|
|
if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetRawXmax(tuple)))
|
2000-07-03 06:45:09 +02:00
|
|
|
{
|
Improve concurrency of foreign key locking
This patch introduces two additional lock modes for tuples: "SELECT FOR
KEY SHARE" and "SELECT FOR NO KEY UPDATE". These don't block each
other, in contrast with already existing "SELECT FOR SHARE" and "SELECT
FOR UPDATE". UPDATE commands that do not modify the values stored in
the columns that are part of the key of the tuple now grab a SELECT FOR
NO KEY UPDATE lock on the tuple, allowing them to proceed concurrently
with tuple locks of the FOR KEY SHARE variety.
Foreign key triggers now use FOR KEY SHARE instead of FOR SHARE; this
means the concurrency improvement applies to them, which is the whole
point of this patch.
The added tuple lock semantics require some rejiggering of the multixact
module, so that the locking level that each transaction is holding can
be stored alongside its Xid. Also, multixacts now need to persist
across server restarts and crashes, because they can now represent not
only tuple locks, but also tuple updates. This means we need more
careful tracking of lifetime of pg_multixact SLRU files; since they now
persist longer, we require more infrastructure to figure out when they
can be removed. pg_upgrade also needs to be careful to copy
pg_multixact files over from the old server to the new, or at least part
of multixact.c state, depending on the versions of the old and new
servers.
Tuple time qualification rules (HeapTupleSatisfies routines) need to be
careful not to consider tuples with the "is multi" infomask bit set as
being only locked; they might need to look up MultiXact values (i.e.
possibly do pg_multixact I/O) to find out the Xid that updated a tuple,
whereas they previously were assured to only use information readily
available from the tuple header. This is considered acceptable, because
the extra I/O would involve cases that would previously cause some
commands to block waiting for concurrent transactions to finish.
Another important change is the fact that locking tuples that have
previously been updated causes the future versions to be marked as
locked, too; this is essential for correctness of foreign key checks.
This causes additional WAL-logging, also (there was previously a single
WAL record for a locked tuple; now there are as many as updated copies
of the tuple there exist.)
With all this in place, contention related to tuples being checked by
foreign key rules should be much reduced.
As a bonus, the old behavior that a subtransaction grabbing a stronger
tuple lock than the parent (sub)transaction held on a given tuple and
later aborting caused the weaker lock to be lost, has been fixed.
Many new spec files were added for isolation tester framework, to ensure
overall behavior is sane. There's probably room for several more tests.
There were several reviewers of this patch; in particular, Noah Misch
and Andres Freund spent considerable time in it. Original idea for the
patch came from Simon Riggs, after a problem report by Joel Jacobson.
Most code is from me, with contributions from Marti Raudsepp, Alexander
Shulgin, Noah Misch and Andres Freund.
This patch was discussed in several pgsql-hackers threads; the most
important start at the following message-ids:
AANLkTimo9XVcEzfiBR-ut3KVNDkjm2Vxh+t8kAmWjPuv@mail.gmail.com
1290721684-sup-3951@alvh.no-ip.org
1294953201-sup-2099@alvh.no-ip.org
1320343602-sup-2290@alvh.no-ip.org
1339690386-sup-8927@alvh.no-ip.org
4FE5FF020200002500048A3D@gw.wicourts.gov
4FEAB90A0200002500048B7D@gw.wicourts.gov
2013-01-23 16:04:59 +01:00
|
|
|
if (HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask))
|
2000-07-03 06:45:09 +02:00
|
|
|
return true;
|
1998-12-15 13:47:01 +01:00
|
|
|
return false;
|
2000-07-03 06:45:09 +02:00
|
|
|
}
|
1998-12-15 13:47:01 +01:00
|
|
|
|
Improve concurrency of foreign key locking
This patch introduces two additional lock modes for tuples: "SELECT FOR
KEY SHARE" and "SELECT FOR NO KEY UPDATE". These don't block each
other, in contrast with already existing "SELECT FOR SHARE" and "SELECT
FOR UPDATE". UPDATE commands that do not modify the values stored in
the columns that are part of the key of the tuple now grab a SELECT FOR
NO KEY UPDATE lock on the tuple, allowing them to proceed concurrently
with tuple locks of the FOR KEY SHARE variety.
Foreign key triggers now use FOR KEY SHARE instead of FOR SHARE; this
means the concurrency improvement applies to them, which is the whole
point of this patch.
The added tuple lock semantics require some rejiggering of the multixact
module, so that the locking level that each transaction is holding can
be stored alongside its Xid. Also, multixacts now need to persist
across server restarts and crashes, because they can now represent not
only tuple locks, but also tuple updates. This means we need more
careful tracking of lifetime of pg_multixact SLRU files; since they now
persist longer, we require more infrastructure to figure out when they
can be removed. pg_upgrade also needs to be careful to copy
pg_multixact files over from the old server to the new, or at least part
of multixact.c state, depending on the versions of the old and new
servers.
Tuple time qualification rules (HeapTupleSatisfies routines) need to be
careful not to consider tuples with the "is multi" infomask bit set as
being only locked; they might need to look up MultiXact values (i.e.
possibly do pg_multixact I/O) to find out the Xid that updated a tuple,
whereas they previously were assured to only use information readily
available from the tuple header. This is considered acceptable, because
the extra I/O would involve cases that would previously cause some
commands to block waiting for concurrent transactions to finish.
Another important change is the fact that locking tuples that have
previously been updated causes the future versions to be marked as
locked, too; this is essential for correctness of foreign key checks.
This causes additional WAL-logging, also (there was previously a single
WAL record for a locked tuple; now there are as many as updated copies
of the tuple there exist.)
With all this in place, contention related to tuples being checked by
foreign key rules should be much reduced.
As a bonus, the old behavior that a subtransaction grabbing a stronger
tuple lock than the parent (sub)transaction held on a given tuple and
later aborting caused the weaker lock to be lost, has been fixed.
Many new spec files were added for isolation tester framework, to ensure
overall behavior is sane. There's probably room for several more tests.
There were several reviewers of this patch; in particular, Noah Misch
and Andres Freund spent considerable time in it. Original idea for the
patch came from Simon Riggs, after a problem report by Joel Jacobson.
Most code is from me, with contributions from Marti Raudsepp, Alexander
Shulgin, Noah Misch and Andres Freund.
This patch was discussed in several pgsql-hackers threads; the most
important start at the following message-ids:
AANLkTimo9XVcEzfiBR-ut3KVNDkjm2Vxh+t8kAmWjPuv@mail.gmail.com
1290721684-sup-3951@alvh.no-ip.org
1294953201-sup-2099@alvh.no-ip.org
1320343602-sup-2290@alvh.no-ip.org
1339690386-sup-8927@alvh.no-ip.org
4FE5FF020200002500048A3D@gw.wicourts.gov
4FEAB90A0200002500048B7D@gw.wicourts.gov
2013-01-23 16:04:59 +01:00
|
|
|
if (TransactionIdIsInProgress(HeapTupleHeaderGetRawXmax(tuple)))
|
1998-12-15 13:47:01 +01:00
|
|
|
{
|
2013-08-02 20:34:56 +02:00
|
|
|
if (!HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask))
|
|
|
|
snapshot->xmax = HeapTupleHeaderGetRawXmax(tuple);
|
2005-05-07 23:22:01 +02:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
Improve concurrency of foreign key locking
This patch introduces two additional lock modes for tuples: "SELECT FOR
KEY SHARE" and "SELECT FOR NO KEY UPDATE". These don't block each
other, in contrast with already existing "SELECT FOR SHARE" and "SELECT
FOR UPDATE". UPDATE commands that do not modify the values stored in
the columns that are part of the key of the tuple now grab a SELECT FOR
NO KEY UPDATE lock on the tuple, allowing them to proceed concurrently
with tuple locks of the FOR KEY SHARE variety.
Foreign key triggers now use FOR KEY SHARE instead of FOR SHARE; this
means the concurrency improvement applies to them, which is the whole
point of this patch.
The added tuple lock semantics require some rejiggering of the multixact
module, so that the locking level that each transaction is holding can
be stored alongside its Xid. Also, multixacts now need to persist
across server restarts and crashes, because they can now represent not
only tuple locks, but also tuple updates. This means we need more
careful tracking of lifetime of pg_multixact SLRU files; since they now
persist longer, we require more infrastructure to figure out when they
can be removed. pg_upgrade also needs to be careful to copy
pg_multixact files over from the old server to the new, or at least part
of multixact.c state, depending on the versions of the old and new
servers.
Tuple time qualification rules (HeapTupleSatisfies routines) need to be
careful not to consider tuples with the "is multi" infomask bit set as
being only locked; they might need to look up MultiXact values (i.e.
possibly do pg_multixact I/O) to find out the Xid that updated a tuple,
whereas they previously were assured to only use information readily
available from the tuple header. This is considered acceptable, because
the extra I/O would involve cases that would previously cause some
commands to block waiting for concurrent transactions to finish.
Another important change is the fact that locking tuples that have
previously been updated causes the future versions to be marked as
locked, too; this is essential for correctness of foreign key checks.
This causes additional WAL-logging, also (there was previously a single
WAL record for a locked tuple; now there are as many as updated copies
of the tuple there exist.)
With all this in place, contention related to tuples being checked by
foreign key rules should be much reduced.
As a bonus, the old behavior that a subtransaction grabbing a stronger
tuple lock than the parent (sub)transaction held on a given tuple and
later aborting caused the weaker lock to be lost, has been fixed.
Many new spec files were added for isolation tester framework, to ensure
overall behavior is sane. There's probably room for several more tests.
There were several reviewers of this patch; in particular, Noah Misch
and Andres Freund spent considerable time in it. Original idea for the
patch came from Simon Riggs, after a problem report by Joel Jacobson.
Most code is from me, with contributions from Marti Raudsepp, Alexander
Shulgin, Noah Misch and Andres Freund.
This patch was discussed in several pgsql-hackers threads; the most
important start at the following message-ids:
AANLkTimo9XVcEzfiBR-ut3KVNDkjm2Vxh+t8kAmWjPuv@mail.gmail.com
1290721684-sup-3951@alvh.no-ip.org
1294953201-sup-2099@alvh.no-ip.org
1320343602-sup-2290@alvh.no-ip.org
1339690386-sup-8927@alvh.no-ip.org
4FE5FF020200002500048A3D@gw.wicourts.gov
4FEAB90A0200002500048B7D@gw.wicourts.gov
2013-01-23 16:04:59 +01:00
|
|
|
if (!TransactionIdDidCommit(HeapTupleHeaderGetRawXmax(tuple)))
|
2005-05-07 23:22:01 +02:00
|
|
|
{
|
|
|
|
/* it must have aborted or crashed */
|
2007-08-14 19:35:18 +02:00
|
|
|
SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
|
|
|
|
InvalidTransactionId);
|
2005-05-07 23:22:01 +02:00
|
|
|
return true;
|
1998-12-15 13:47:01 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
/* xmax transaction committed */
|
|
|
|
|
Improve concurrency of foreign key locking
This patch introduces two additional lock modes for tuples: "SELECT FOR
KEY SHARE" and "SELECT FOR NO KEY UPDATE". These don't block each
other, in contrast with already existing "SELECT FOR SHARE" and "SELECT
FOR UPDATE". UPDATE commands that do not modify the values stored in
the columns that are part of the key of the tuple now grab a SELECT FOR
NO KEY UPDATE lock on the tuple, allowing them to proceed concurrently
with tuple locks of the FOR KEY SHARE variety.
Foreign key triggers now use FOR KEY SHARE instead of FOR SHARE; this
means the concurrency improvement applies to them, which is the whole
point of this patch.
The added tuple lock semantics require some rejiggering of the multixact
module, so that the locking level that each transaction is holding can
be stored alongside its Xid. Also, multixacts now need to persist
across server restarts and crashes, because they can now represent not
only tuple locks, but also tuple updates. This means we need more
careful tracking of lifetime of pg_multixact SLRU files; since they now
persist longer, we require more infrastructure to figure out when they
can be removed. pg_upgrade also needs to be careful to copy
pg_multixact files over from the old server to the new, or at least part
of multixact.c state, depending on the versions of the old and new
servers.
Tuple time qualification rules (HeapTupleSatisfies routines) need to be
careful not to consider tuples with the "is multi" infomask bit set as
being only locked; they might need to look up MultiXact values (i.e.
possibly do pg_multixact I/O) to find out the Xid that updated a tuple,
whereas they previously were assured to only use information readily
available from the tuple header. This is considered acceptable, because
the extra I/O would involve cases that would previously cause some
commands to block waiting for concurrent transactions to finish.
Another important change is the fact that locking tuples that have
previously been updated causes the future versions to be marked as
locked, too; this is essential for correctness of foreign key checks.
This causes additional WAL-logging, also (there was previously a single
WAL record for a locked tuple; now there are as many as updated copies
of the tuple there exist.)
With all this in place, contention related to tuples being checked by
foreign key rules should be much reduced.
As a bonus, the old behavior that a subtransaction grabbing a stronger
tuple lock than the parent (sub)transaction held on a given tuple and
later aborting caused the weaker lock to be lost, has been fixed.
Many new spec files were added for isolation tester framework, to ensure
overall behavior is sane. There's probably room for several more tests.
There were several reviewers of this patch; in particular, Noah Misch
and Andres Freund spent considerable time in it. Original idea for the
patch came from Simon Riggs, after a problem report by Joel Jacobson.
Most code is from me, with contributions from Marti Raudsepp, Alexander
Shulgin, Noah Misch and Andres Freund.
This patch was discussed in several pgsql-hackers threads; the most
important start at the following message-ids:
AANLkTimo9XVcEzfiBR-ut3KVNDkjm2Vxh+t8kAmWjPuv@mail.gmail.com
1290721684-sup-3951@alvh.no-ip.org
1294953201-sup-2099@alvh.no-ip.org
1320343602-sup-2290@alvh.no-ip.org
1339690386-sup-8927@alvh.no-ip.org
4FE5FF020200002500048A3D@gw.wicourts.gov
4FEAB90A0200002500048B7D@gw.wicourts.gov
2013-01-23 16:04:59 +01:00
|
|
|
if (HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask))
|
2003-06-25 03:08:13 +02:00
|
|
|
{
|
2007-08-14 19:35:18 +02:00
|
|
|
SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
|
|
|
|
InvalidTransactionId);
|
1998-12-15 13:47:01 +01:00
|
|
|
return true;
|
2003-06-25 03:08:13 +02:00
|
|
|
}
|
1998-12-15 13:47:01 +01:00
|
|
|
|
2007-08-14 19:35:18 +02:00
|
|
|
SetHintBits(tuple, buffer, HEAP_XMAX_COMMITTED,
|
Improve concurrency of foreign key locking
This patch introduces two additional lock modes for tuples: "SELECT FOR
KEY SHARE" and "SELECT FOR NO KEY UPDATE". These don't block each
other, in contrast with already existing "SELECT FOR SHARE" and "SELECT
FOR UPDATE". UPDATE commands that do not modify the values stored in
the columns that are part of the key of the tuple now grab a SELECT FOR
NO KEY UPDATE lock on the tuple, allowing them to proceed concurrently
with tuple locks of the FOR KEY SHARE variety.
Foreign key triggers now use FOR KEY SHARE instead of FOR SHARE; this
means the concurrency improvement applies to them, which is the whole
point of this patch.
The added tuple lock semantics require some rejiggering of the multixact
module, so that the locking level that each transaction is holding can
be stored alongside its Xid. Also, multixacts now need to persist
across server restarts and crashes, because they can now represent not
only tuple locks, but also tuple updates. This means we need more
careful tracking of lifetime of pg_multixact SLRU files; since they now
persist longer, we require more infrastructure to figure out when they
can be removed. pg_upgrade also needs to be careful to copy
pg_multixact files over from the old server to the new, or at least part
of multixact.c state, depending on the versions of the old and new
servers.
Tuple time qualification rules (HeapTupleSatisfies routines) need to be
careful not to consider tuples with the "is multi" infomask bit set as
being only locked; they might need to look up MultiXact values (i.e.
possibly do pg_multixact I/O) to find out the Xid that updated a tuple,
whereas they previously were assured to only use information readily
available from the tuple header. This is considered acceptable, because
the extra I/O would involve cases that would previously cause some
commands to block waiting for concurrent transactions to finish.
Another important change is the fact that locking tuples that have
previously been updated causes the future versions to be marked as
locked, too; this is essential for correctness of foreign key checks.
This causes additional WAL-logging, also (there was previously a single
WAL record for a locked tuple; now there are as many as updated copies
of the tuple there exist.)
With all this in place, contention related to tuples being checked by
foreign key rules should be much reduced.
As a bonus, the old behavior that a subtransaction grabbing a stronger
tuple lock than the parent (sub)transaction held on a given tuple and
later aborting caused the weaker lock to be lost, has been fixed.
Many new spec files were added for isolation tester framework, to ensure
overall behavior is sane. There's probably room for several more tests.
There were several reviewers of this patch; in particular, Noah Misch
and Andres Freund spent considerable time in it. Original idea for the
patch came from Simon Riggs, after a problem report by Joel Jacobson.
Most code is from me, with contributions from Marti Raudsepp, Alexander
Shulgin, Noah Misch and Andres Freund.
This patch was discussed in several pgsql-hackers threads; the most
important start at the following message-ids:
AANLkTimo9XVcEzfiBR-ut3KVNDkjm2Vxh+t8kAmWjPuv@mail.gmail.com
1290721684-sup-3951@alvh.no-ip.org
1294953201-sup-2099@alvh.no-ip.org
1320343602-sup-2290@alvh.no-ip.org
1339690386-sup-8927@alvh.no-ip.org
4FE5FF020200002500048A3D@gw.wicourts.gov
4FEAB90A0200002500048B7D@gw.wicourts.gov
2013-01-23 16:04:59 +01:00
|
|
|
HeapTupleHeaderGetRawXmax(tuple));
|
1999-05-25 18:15:34 +02:00
|
|
|
return false; /* updated by other */
|
1998-12-15 13:47:01 +01:00
|
|
|
}
|
1998-12-16 12:53:55 +01:00
|
|
|
|
2002-01-16 21:29:02 +01:00
|
|
|
/*
|
2007-03-25 21:45:14 +02:00
|
|
|
* HeapTupleSatisfiesMVCC
|
|
|
|
* True iff heap tuple is valid for the given MVCC snapshot.
|
2002-01-17 00:09:09 +01:00
|
|
|
*
|
2002-01-17 00:51:56 +01:00
|
|
|
* Here, we consider the effects of:
|
|
|
|
* all transactions committed as of the time of the given snapshot
|
2002-01-17 00:09:09 +01:00
|
|
|
* previous commands of this transaction
|
|
|
|
*
|
|
|
|
* Does _not_ include:
|
2002-01-17 00:51:56 +01:00
|
|
|
* transactions shown as in-progress by the snapshot
|
|
|
|
* transactions started after the snapshot was taken
|
2002-01-17 00:09:09 +01:00
|
|
|
* changes made by the current command
|
2002-01-16 21:29:02 +01:00
|
|
|
*
|
|
|
|
* (Notice, however, that the tuple status hint bits will be updated on the
|
|
|
|
* basis of the true state of the transaction, even if we then pretend we
|
|
|
|
* can't see it.)
|
|
|
|
*/
|
1998-12-16 12:53:55 +01:00
|
|
|
bool
|
2013-07-22 19:26:33 +02:00
|
|
|
HeapTupleSatisfiesMVCC(HeapTuple htup, Snapshot snapshot,
|
2007-03-25 21:45:14 +02:00
|
|
|
Buffer buffer)
|
1998-12-16 12:53:55 +01:00
|
|
|
{
|
2013-07-22 19:26:33 +02:00
|
|
|
HeapTupleHeader tuple = htup->t_data;
|
2014-05-06 18:12:18 +02:00
|
|
|
|
2013-07-22 19:26:33 +02:00
|
|
|
Assert(ItemPointerIsValid(&htup->t_self));
|
|
|
|
Assert(htup->t_tableOid != InvalidOid);
|
|
|
|
|
2013-12-22 21:49:09 +01:00
|
|
|
if (!HeapTupleHeaderXminCommitted(tuple))
|
1998-12-16 12:53:55 +01:00
|
|
|
{
|
2013-12-22 21:49:09 +01:00
|
|
|
if (HeapTupleHeaderXminInvalid(tuple))
|
1998-12-16 12:53:55 +01:00
|
|
|
return false;
|
|
|
|
|
2010-02-08 15:10:21 +01:00
|
|
|
/* Used by pre-9.0 binary upgrades */
|
1999-03-28 22:32:42 +02:00
|
|
|
if (tuple->t_infomask & HEAP_MOVED_OFF)
|
|
|
|
{
|
2003-06-25 03:08:13 +02:00
|
|
|
TransactionId xvac = HeapTupleHeaderGetXvac(tuple);
|
2003-08-04 02:43:34 +02:00
|
|
|
|
2003-06-25 03:08:13 +02:00
|
|
|
if (TransactionIdIsCurrentTransactionId(xvac))
|
1999-03-28 22:32:42 +02:00
|
|
|
return false;
|
2003-06-25 03:08:13 +02:00
|
|
|
if (!TransactionIdIsInProgress(xvac))
|
2001-12-19 18:18:39 +01:00
|
|
|
{
|
2003-06-25 03:08:13 +02:00
|
|
|
if (TransactionIdDidCommit(xvac))
|
2001-12-19 18:18:39 +01:00
|
|
|
{
|
2007-08-14 19:35:18 +02:00
|
|
|
SetHintBits(tuple, buffer, HEAP_XMIN_INVALID,
|
|
|
|
InvalidTransactionId);
|
2001-12-19 18:18:39 +01:00
|
|
|
return false;
|
|
|
|
}
|
2007-08-14 19:35:18 +02:00
|
|
|
SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED,
|
|
|
|
InvalidTransactionId);
|
1999-03-28 22:32:42 +02:00
|
|
|
}
|
|
|
|
}
|
2010-02-08 15:10:21 +01:00
|
|
|
/* Used by pre-9.0 binary upgrades */
|
1999-03-28 22:32:42 +02:00
|
|
|
else if (tuple->t_infomask & HEAP_MOVED_IN)
|
|
|
|
{
|
2003-06-25 03:08:13 +02:00
|
|
|
TransactionId xvac = HeapTupleHeaderGetXvac(tuple);
|
2003-08-04 02:43:34 +02:00
|
|
|
|
2003-06-25 03:08:13 +02:00
|
|
|
if (!TransactionIdIsCurrentTransactionId(xvac))
|
1999-03-28 22:32:42 +02:00
|
|
|
{
|
2003-06-25 03:08:13 +02:00
|
|
|
if (TransactionIdIsInProgress(xvac))
|
2001-12-19 18:18:39 +01:00
|
|
|
return false;
|
2003-06-25 03:08:13 +02:00
|
|
|
if (TransactionIdDidCommit(xvac))
|
2007-08-14 19:35:18 +02:00
|
|
|
SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED,
|
|
|
|
InvalidTransactionId);
|
2001-12-19 18:18:39 +01:00
|
|
|
else
|
|
|
|
{
|
2007-08-14 19:35:18 +02:00
|
|
|
SetHintBits(tuple, buffer, HEAP_XMIN_INVALID,
|
|
|
|
InvalidTransactionId);
|
2001-12-19 18:18:39 +01:00
|
|
|
return false;
|
|
|
|
}
|
1999-03-28 22:32:42 +02:00
|
|
|
}
|
|
|
|
}
|
2013-12-22 21:49:09 +01:00
|
|
|
else if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetRawXmin(tuple)))
|
1998-12-16 12:53:55 +01:00
|
|
|
{
|
2002-06-15 21:54:24 +02:00
|
|
|
if (HeapTupleHeaderGetCmin(tuple) >= snapshot->curcid)
|
1998-12-16 12:53:55 +01:00
|
|
|
return false; /* inserted after scan started */
|
|
|
|
|
|
|
|
if (tuple->t_infomask & HEAP_XMAX_INVALID) /* xid invalid */
|
|
|
|
return true;
|
|
|
|
|
2013-05-29 22:58:43 +02:00
|
|
|
if (HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask)) /* not deleter */
|
2005-04-28 23:47:18 +02:00
|
|
|
return true;
|
|
|
|
|
Improve concurrency of foreign key locking
This patch introduces two additional lock modes for tuples: "SELECT FOR
KEY SHARE" and "SELECT FOR NO KEY UPDATE". These don't block each
other, in contrast with already existing "SELECT FOR SHARE" and "SELECT
FOR UPDATE". UPDATE commands that do not modify the values stored in
the columns that are part of the key of the tuple now grab a SELECT FOR
NO KEY UPDATE lock on the tuple, allowing them to proceed concurrently
with tuple locks of the FOR KEY SHARE variety.
Foreign key triggers now use FOR KEY SHARE instead of FOR SHARE; this
means the concurrency improvement applies to them, which is the whole
point of this patch.
The added tuple lock semantics require some rejiggering of the multixact
module, so that the locking level that each transaction is holding can
be stored alongside its Xid. Also, multixacts now need to persist
across server restarts and crashes, because they can now represent not
only tuple locks, but also tuple updates. This means we need more
careful tracking of lifetime of pg_multixact SLRU files; since they now
persist longer, we require more infrastructure to figure out when they
can be removed. pg_upgrade also needs to be careful to copy
pg_multixact files over from the old server to the new, or at least part
of multixact.c state, depending on the versions of the old and new
servers.
Tuple time qualification rules (HeapTupleSatisfies routines) need to be
careful not to consider tuples with the "is multi" infomask bit set as
being only locked; they might need to look up MultiXact values (i.e.
possibly do pg_multixact I/O) to find out the Xid that updated a tuple,
whereas they previously were assured to only use information readily
available from the tuple header. This is considered acceptable, because
the extra I/O would involve cases that would previously cause some
commands to block waiting for concurrent transactions to finish.
Another important change is the fact that locking tuples that have
previously been updated causes the future versions to be marked as
locked, too; this is essential for correctness of foreign key checks.
This causes additional WAL-logging, also (there was previously a single
WAL record for a locked tuple; now there are as many as updated copies
of the tuple there exist.)
With all this in place, contention related to tuples being checked by
foreign key rules should be much reduced.
As a bonus, the old behavior that a subtransaction grabbing a stronger
tuple lock than the parent (sub)transaction held on a given tuple and
later aborting caused the weaker lock to be lost, has been fixed.
Many new spec files were added for isolation tester framework, to ensure
overall behavior is sane. There's probably room for several more tests.
There were several reviewers of this patch; in particular, Noah Misch
and Andres Freund spent considerable time in it. Original idea for the
patch came from Simon Riggs, after a problem report by Joel Jacobson.
Most code is from me, with contributions from Marti Raudsepp, Alexander
Shulgin, Noah Misch and Andres Freund.
This patch was discussed in several pgsql-hackers threads; the most
important start at the following message-ids:
AANLkTimo9XVcEzfiBR-ut3KVNDkjm2Vxh+t8kAmWjPuv@mail.gmail.com
1290721684-sup-3951@alvh.no-ip.org
1294953201-sup-2099@alvh.no-ip.org
1320343602-sup-2290@alvh.no-ip.org
1339690386-sup-8927@alvh.no-ip.org
4FE5FF020200002500048A3D@gw.wicourts.gov
4FEAB90A0200002500048B7D@gw.wicourts.gov
2013-01-23 16:04:59 +01:00
|
|
|
if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
|
|
|
|
{
|
2013-05-29 22:58:43 +02:00
|
|
|
TransactionId xmax;
|
Improve concurrency of foreign key locking
This patch introduces two additional lock modes for tuples: "SELECT FOR
KEY SHARE" and "SELECT FOR NO KEY UPDATE". These don't block each
other, in contrast with already existing "SELECT FOR SHARE" and "SELECT
FOR UPDATE". UPDATE commands that do not modify the values stored in
the columns that are part of the key of the tuple now grab a SELECT FOR
NO KEY UPDATE lock on the tuple, allowing them to proceed concurrently
with tuple locks of the FOR KEY SHARE variety.
Foreign key triggers now use FOR KEY SHARE instead of FOR SHARE; this
means the concurrency improvement applies to them, which is the whole
point of this patch.
The added tuple lock semantics require some rejiggering of the multixact
module, so that the locking level that each transaction is holding can
be stored alongside its Xid. Also, multixacts now need to persist
across server restarts and crashes, because they can now represent not
only tuple locks, but also tuple updates. This means we need more
careful tracking of lifetime of pg_multixact SLRU files; since they now
persist longer, we require more infrastructure to figure out when they
can be removed. pg_upgrade also needs to be careful to copy
pg_multixact files over from the old server to the new, or at least part
of multixact.c state, depending on the versions of the old and new
servers.
Tuple time qualification rules (HeapTupleSatisfies routines) need to be
careful not to consider tuples with the "is multi" infomask bit set as
being only locked; they might need to look up MultiXact values (i.e.
possibly do pg_multixact I/O) to find out the Xid that updated a tuple,
whereas they previously were assured to only use information readily
available from the tuple header. This is considered acceptable, because
the extra I/O would involve cases that would previously cause some
commands to block waiting for concurrent transactions to finish.
Another important change is the fact that locking tuples that have
previously been updated causes the future versions to be marked as
locked, too; this is essential for correctness of foreign key checks.
This causes additional WAL-logging, also (there was previously a single
WAL record for a locked tuple; now there are as many as updated copies
of the tuple there exist.)
With all this in place, contention related to tuples being checked by
foreign key rules should be much reduced.
As a bonus, the old behavior that a subtransaction grabbing a stronger
tuple lock than the parent (sub)transaction held on a given tuple and
later aborting caused the weaker lock to be lost, has been fixed.
Many new spec files were added for isolation tester framework, to ensure
overall behavior is sane. There's probably room for several more tests.
There were several reviewers of this patch; in particular, Noah Misch
and Andres Freund spent considerable time in it. Original idea for the
patch came from Simon Riggs, after a problem report by Joel Jacobson.
Most code is from me, with contributions from Marti Raudsepp, Alexander
Shulgin, Noah Misch and Andres Freund.
This patch was discussed in several pgsql-hackers threads; the most
important start at the following message-ids:
AANLkTimo9XVcEzfiBR-ut3KVNDkjm2Vxh+t8kAmWjPuv@mail.gmail.com
1290721684-sup-3951@alvh.no-ip.org
1294953201-sup-2099@alvh.no-ip.org
1320343602-sup-2290@alvh.no-ip.org
1339690386-sup-8927@alvh.no-ip.org
4FE5FF020200002500048A3D@gw.wicourts.gov
4FEAB90A0200002500048B7D@gw.wicourts.gov
2013-01-23 16:04:59 +01:00
|
|
|
|
|
|
|
xmax = HeapTupleGetUpdateXid(tuple);
|
2013-11-29 20:08:06 +01:00
|
|
|
|
|
|
|
/* not LOCKED_ONLY, so it has to have an xmax */
|
|
|
|
Assert(TransactionIdIsValid(xmax));
|
Improve concurrency of foreign key locking
This patch introduces two additional lock modes for tuples: "SELECT FOR
KEY SHARE" and "SELECT FOR NO KEY UPDATE". These don't block each
other, in contrast with already existing "SELECT FOR SHARE" and "SELECT
FOR UPDATE". UPDATE commands that do not modify the values stored in
the columns that are part of the key of the tuple now grab a SELECT FOR
NO KEY UPDATE lock on the tuple, allowing them to proceed concurrently
with tuple locks of the FOR KEY SHARE variety.
Foreign key triggers now use FOR KEY SHARE instead of FOR SHARE; this
means the concurrency improvement applies to them, which is the whole
point of this patch.
The added tuple lock semantics require some rejiggering of the multixact
module, so that the locking level that each transaction is holding can
be stored alongside its Xid. Also, multixacts now need to persist
across server restarts and crashes, because they can now represent not
only tuple locks, but also tuple updates. This means we need more
careful tracking of lifetime of pg_multixact SLRU files; since they now
persist longer, we require more infrastructure to figure out when they
can be removed. pg_upgrade also needs to be careful to copy
pg_multixact files over from the old server to the new, or at least part
of multixact.c state, depending on the versions of the old and new
servers.
Tuple time qualification rules (HeapTupleSatisfies routines) need to be
careful not to consider tuples with the "is multi" infomask bit set as
being only locked; they might need to look up MultiXact values (i.e.
possibly do pg_multixact I/O) to find out the Xid that updated a tuple,
whereas they previously were assured to only use information readily
available from the tuple header. This is considered acceptable, because
the extra I/O would involve cases that would previously cause some
commands to block waiting for concurrent transactions to finish.
Another important change is the fact that locking tuples that have
previously been updated causes the future versions to be marked as
locked, too; this is essential for correctness of foreign key checks.
This causes additional WAL-logging, also (there was previously a single
WAL record for a locked tuple; now there are as many as updated copies
of the tuple there exist.)
With all this in place, contention related to tuples being checked by
foreign key rules should be much reduced.
As a bonus, the old behavior that a subtransaction grabbing a stronger
tuple lock than the parent (sub)transaction held on a given tuple and
later aborting caused the weaker lock to be lost, has been fixed.
Many new spec files were added for isolation tester framework, to ensure
overall behavior is sane. There's probably room for several more tests.
There were several reviewers of this patch; in particular, Noah Misch
and Andres Freund spent considerable time in it. Original idea for the
patch came from Simon Riggs, after a problem report by Joel Jacobson.
Most code is from me, with contributions from Marti Raudsepp, Alexander
Shulgin, Noah Misch and Andres Freund.
This patch was discussed in several pgsql-hackers threads; the most
important start at the following message-ids:
AANLkTimo9XVcEzfiBR-ut3KVNDkjm2Vxh+t8kAmWjPuv@mail.gmail.com
1290721684-sup-3951@alvh.no-ip.org
1294953201-sup-2099@alvh.no-ip.org
1320343602-sup-2290@alvh.no-ip.org
1339690386-sup-8927@alvh.no-ip.org
4FE5FF020200002500048A3D@gw.wicourts.gov
4FEAB90A0200002500048B7D@gw.wicourts.gov
2013-01-23 16:04:59 +01:00
|
|
|
|
|
|
|
/* updating subtransaction must have aborted */
|
|
|
|
if (!TransactionIdIsCurrentTransactionId(xmax))
|
|
|
|
return true;
|
|
|
|
else if (HeapTupleHeaderGetCmax(tuple) >= snapshot->curcid)
|
|
|
|
return true; /* updated after scan started */
|
|
|
|
else
|
2013-05-29 22:58:43 +02:00
|
|
|
return false; /* updated before scan started */
|
Improve concurrency of foreign key locking
This patch introduces two additional lock modes for tuples: "SELECT FOR
KEY SHARE" and "SELECT FOR NO KEY UPDATE". These don't block each
other, in contrast with already existing "SELECT FOR SHARE" and "SELECT
FOR UPDATE". UPDATE commands that do not modify the values stored in
the columns that are part of the key of the tuple now grab a SELECT FOR
NO KEY UPDATE lock on the tuple, allowing them to proceed concurrently
with tuple locks of the FOR KEY SHARE variety.
Foreign key triggers now use FOR KEY SHARE instead of FOR SHARE; this
means the concurrency improvement applies to them, which is the whole
point of this patch.
The added tuple lock semantics require some rejiggering of the multixact
module, so that the locking level that each transaction is holding can
be stored alongside its Xid. Also, multixacts now need to persist
across server restarts and crashes, because they can now represent not
only tuple locks, but also tuple updates. This means we need more
careful tracking of lifetime of pg_multixact SLRU files; since they now
persist longer, we require more infrastructure to figure out when they
can be removed. pg_upgrade also needs to be careful to copy
pg_multixact files over from the old server to the new, or at least part
of multixact.c state, depending on the versions of the old and new
servers.
Tuple time qualification rules (HeapTupleSatisfies routines) need to be
careful not to consider tuples with the "is multi" infomask bit set as
being only locked; they might need to look up MultiXact values (i.e.
possibly do pg_multixact I/O) to find out the Xid that updated a tuple,
whereas they previously were assured to only use information readily
available from the tuple header. This is considered acceptable, because
the extra I/O would involve cases that would previously cause some
commands to block waiting for concurrent transactions to finish.
Another important change is the fact that locking tuples that have
previously been updated causes the future versions to be marked as
locked, too; this is essential for correctness of foreign key checks.
This causes additional WAL-logging, also (there was previously a single
WAL record for a locked tuple; now there are as many as updated copies
of the tuple there exist.)
With all this in place, contention related to tuples being checked by
foreign key rules should be much reduced.
As a bonus, the old behavior that a subtransaction grabbing a stronger
tuple lock than the parent (sub)transaction held on a given tuple and
later aborting caused the weaker lock to be lost, has been fixed.
Many new spec files were added for isolation tester framework, to ensure
overall behavior is sane. There's probably room for several more tests.
There were several reviewers of this patch; in particular, Noah Misch
and Andres Freund spent considerable time in it. Original idea for the
patch came from Simon Riggs, after a problem report by Joel Jacobson.
Most code is from me, with contributions from Marti Raudsepp, Alexander
Shulgin, Noah Misch and Andres Freund.
This patch was discussed in several pgsql-hackers threads; the most
important start at the following message-ids:
AANLkTimo9XVcEzfiBR-ut3KVNDkjm2Vxh+t8kAmWjPuv@mail.gmail.com
1290721684-sup-3951@alvh.no-ip.org
1294953201-sup-2099@alvh.no-ip.org
1320343602-sup-2290@alvh.no-ip.org
1339690386-sup-8927@alvh.no-ip.org
4FE5FF020200002500048A3D@gw.wicourts.gov
4FEAB90A0200002500048B7D@gw.wicourts.gov
2013-01-23 16:04:59 +01:00
|
|
|
}
|
2005-04-28 23:47:18 +02:00
|
|
|
|
Improve concurrency of foreign key locking
This patch introduces two additional lock modes for tuples: "SELECT FOR
KEY SHARE" and "SELECT FOR NO KEY UPDATE". These don't block each
other, in contrast with already existing "SELECT FOR SHARE" and "SELECT
FOR UPDATE". UPDATE commands that do not modify the values stored in
the columns that are part of the key of the tuple now grab a SELECT FOR
NO KEY UPDATE lock on the tuple, allowing them to proceed concurrently
with tuple locks of the FOR KEY SHARE variety.
Foreign key triggers now use FOR KEY SHARE instead of FOR SHARE; this
means the concurrency improvement applies to them, which is the whole
point of this patch.
The added tuple lock semantics require some rejiggering of the multixact
module, so that the locking level that each transaction is holding can
be stored alongside its Xid. Also, multixacts now need to persist
across server restarts and crashes, because they can now represent not
only tuple locks, but also tuple updates. This means we need more
careful tracking of lifetime of pg_multixact SLRU files; since they now
persist longer, we require more infrastructure to figure out when they
can be removed. pg_upgrade also needs to be careful to copy
pg_multixact files over from the old server to the new, or at least part
of multixact.c state, depending on the versions of the old and new
servers.
Tuple time qualification rules (HeapTupleSatisfies routines) need to be
careful not to consider tuples with the "is multi" infomask bit set as
being only locked; they might need to look up MultiXact values (i.e.
possibly do pg_multixact I/O) to find out the Xid that updated a tuple,
whereas they previously were assured to only use information readily
available from the tuple header. This is considered acceptable, because
the extra I/O would involve cases that would previously cause some
commands to block waiting for concurrent transactions to finish.
Another important change is the fact that locking tuples that have
previously been updated causes the future versions to be marked as
locked, too; this is essential for correctness of foreign key checks.
This causes additional WAL-logging, also (there was previously a single
WAL record for a locked tuple; now there are as many as updated copies
of the tuple there exist.)
With all this in place, contention related to tuples being checked by
foreign key rules should be much reduced.
As a bonus, the old behavior that a subtransaction grabbing a stronger
tuple lock than the parent (sub)transaction held on a given tuple and
later aborting caused the weaker lock to be lost, has been fixed.
Many new spec files were added for isolation tester framework, to ensure
overall behavior is sane. There's probably room for several more tests.
There were several reviewers of this patch; in particular, Noah Misch
and Andres Freund spent considerable time in it. Original idea for the
patch came from Simon Riggs, after a problem report by Joel Jacobson.
Most code is from me, with contributions from Marti Raudsepp, Alexander
Shulgin, Noah Misch and Andres Freund.
This patch was discussed in several pgsql-hackers threads; the most
important start at the following message-ids:
AANLkTimo9XVcEzfiBR-ut3KVNDkjm2Vxh+t8kAmWjPuv@mail.gmail.com
1290721684-sup-3951@alvh.no-ip.org
1294953201-sup-2099@alvh.no-ip.org
1320343602-sup-2290@alvh.no-ip.org
1339690386-sup-8927@alvh.no-ip.org
4FE5FF020200002500048A3D@gw.wicourts.gov
4FEAB90A0200002500048B7D@gw.wicourts.gov
2013-01-23 16:04:59 +01:00
|
|
|
if (!TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetRawXmax(tuple)))
|
2004-07-28 16:23:31 +02:00
|
|
|
{
|
2007-09-21 20:24:28 +02:00
|
|
|
/* deleting subtransaction must have aborted */
|
2007-08-14 19:35:18 +02:00
|
|
|
SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
|
|
|
|
InvalidTransactionId);
|
2004-07-01 02:52:04 +02:00
|
|
|
return true;
|
2004-07-28 16:23:31 +02:00
|
|
|
}
|
2004-07-01 02:52:04 +02:00
|
|
|
|
2002-06-15 21:54:24 +02:00
|
|
|
if (HeapTupleHeaderGetCmax(tuple) >= snapshot->curcid)
|
1998-12-16 12:53:55 +01:00
|
|
|
return true; /* deleted after scan started */
|
|
|
|
else
|
|
|
|
return false; /* deleted before scan started */
|
|
|
|
}
|
2013-12-22 21:49:09 +01:00
|
|
|
else if (TransactionIdIsInProgress(HeapTupleHeaderGetRawXmin(tuple)))
|
1998-12-16 12:53:55 +01:00
|
|
|
return false;
|
2013-12-22 21:49:09 +01:00
|
|
|
else if (TransactionIdDidCommit(HeapTupleHeaderGetRawXmin(tuple)))
|
2007-08-14 19:35:18 +02:00
|
|
|
SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED,
|
2013-12-22 21:49:09 +01:00
|
|
|
HeapTupleHeaderGetRawXmin(tuple));
|
2001-12-19 18:18:39 +01:00
|
|
|
else
|
2004-10-16 00:40:29 +02:00
|
|
|
{
|
2005-05-07 23:22:01 +02:00
|
|
|
/* it must have aborted or crashed */
|
2007-08-14 19:35:18 +02:00
|
|
|
SetHintBits(tuple, buffer, HEAP_XMIN_INVALID,
|
|
|
|
InvalidTransactionId);
|
2005-05-07 23:22:01 +02:00
|
|
|
return false;
|
2004-10-16 00:40:29 +02:00
|
|
|
}
|
1998-12-16 12:53:55 +01:00
|
|
|
}
|
|
|
|
|
1999-05-25 18:15:34 +02:00
|
|
|
/*
|
|
|
|
* By here, the inserting transaction has committed - have to check
|
|
|
|
* when...
|
1998-12-16 12:53:55 +01:00
|
|
|
*/
|
2013-12-22 21:49:09 +01:00
|
|
|
if (!HeapTupleHeaderXminFrozen(tuple)
|
|
|
|
&& XidInMVCCSnapshot(HeapTupleHeaderGetRawXmin(tuple), snapshot))
|
2006-09-03 17:59:39 +02:00
|
|
|
return false; /* treat as still in progress */
|
1998-12-16 12:53:55 +01:00
|
|
|
|
|
|
|
if (tuple->t_infomask & HEAP_XMAX_INVALID) /* xid invalid or aborted */
|
|
|
|
return true;
|
|
|
|
|
Improve concurrency of foreign key locking
This patch introduces two additional lock modes for tuples: "SELECT FOR
KEY SHARE" and "SELECT FOR NO KEY UPDATE". These don't block each
other, in contrast with already existing "SELECT FOR SHARE" and "SELECT
FOR UPDATE". UPDATE commands that do not modify the values stored in
the columns that are part of the key of the tuple now grab a SELECT FOR
NO KEY UPDATE lock on the tuple, allowing them to proceed concurrently
with tuple locks of the FOR KEY SHARE variety.
Foreign key triggers now use FOR KEY SHARE instead of FOR SHARE; this
means the concurrency improvement applies to them, which is the whole
point of this patch.
The added tuple lock semantics require some rejiggering of the multixact
module, so that the locking level that each transaction is holding can
be stored alongside its Xid. Also, multixacts now need to persist
across server restarts and crashes, because they can now represent not
only tuple locks, but also tuple updates. This means we need more
careful tracking of lifetime of pg_multixact SLRU files; since they now
persist longer, we require more infrastructure to figure out when they
can be removed. pg_upgrade also needs to be careful to copy
pg_multixact files over from the old server to the new, or at least part
of multixact.c state, depending on the versions of the old and new
servers.
Tuple time qualification rules (HeapTupleSatisfies routines) need to be
careful not to consider tuples with the "is multi" infomask bit set as
being only locked; they might need to look up MultiXact values (i.e.
possibly do pg_multixact I/O) to find out the Xid that updated a tuple,
whereas they previously were assured to only use information readily
available from the tuple header. This is considered acceptable, because
the extra I/O would involve cases that would previously cause some
commands to block waiting for concurrent transactions to finish.
Another important change is the fact that locking tuples that have
previously been updated causes the future versions to be marked as
locked, too; this is essential for correctness of foreign key checks.
This causes additional WAL-logging, also (there was previously a single
WAL record for a locked tuple; now there are as many as updated copies
of the tuple there exist.)
With all this in place, contention related to tuples being checked by
foreign key rules should be much reduced.
As a bonus, the old behavior that a subtransaction grabbing a stronger
tuple lock than the parent (sub)transaction held on a given tuple and
later aborting caused the weaker lock to be lost, has been fixed.
Many new spec files were added for isolation tester framework, to ensure
overall behavior is sane. There's probably room for several more tests.
There were several reviewers of this patch; in particular, Noah Misch
and Andres Freund spent considerable time in it. Original idea for the
patch came from Simon Riggs, after a problem report by Joel Jacobson.
Most code is from me, with contributions from Marti Raudsepp, Alexander
Shulgin, Noah Misch and Andres Freund.
This patch was discussed in several pgsql-hackers threads; the most
important start at the following message-ids:
AANLkTimo9XVcEzfiBR-ut3KVNDkjm2Vxh+t8kAmWjPuv@mail.gmail.com
1290721684-sup-3951@alvh.no-ip.org
1294953201-sup-2099@alvh.no-ip.org
1320343602-sup-2290@alvh.no-ip.org
1339690386-sup-8927@alvh.no-ip.org
4FE5FF020200002500048A3D@gw.wicourts.gov
4FEAB90A0200002500048B7D@gw.wicourts.gov
2013-01-23 16:04:59 +01:00
|
|
|
if (HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask))
|
2005-04-28 23:47:18 +02:00
|
|
|
return true;
|
|
|
|
|
|
|
|
if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
|
|
|
|
{
|
2013-05-29 22:58:43 +02:00
|
|
|
TransactionId xmax;
|
Improve concurrency of foreign key locking
This patch introduces two additional lock modes for tuples: "SELECT FOR
KEY SHARE" and "SELECT FOR NO KEY UPDATE". These don't block each
other, in contrast with already existing "SELECT FOR SHARE" and "SELECT
FOR UPDATE". UPDATE commands that do not modify the values stored in
the columns that are part of the key of the tuple now grab a SELECT FOR
NO KEY UPDATE lock on the tuple, allowing them to proceed concurrently
with tuple locks of the FOR KEY SHARE variety.
Foreign key triggers now use FOR KEY SHARE instead of FOR SHARE; this
means the concurrency improvement applies to them, which is the whole
point of this patch.
The added tuple lock semantics require some rejiggering of the multixact
module, so that the locking level that each transaction is holding can
be stored alongside its Xid. Also, multixacts now need to persist
across server restarts and crashes, because they can now represent not
only tuple locks, but also tuple updates. This means we need more
careful tracking of lifetime of pg_multixact SLRU files; since they now
persist longer, we require more infrastructure to figure out when they
can be removed. pg_upgrade also needs to be careful to copy
pg_multixact files over from the old server to the new, or at least part
of multixact.c state, depending on the versions of the old and new
servers.
Tuple time qualification rules (HeapTupleSatisfies routines) need to be
careful not to consider tuples with the "is multi" infomask bit set as
being only locked; they might need to look up MultiXact values (i.e.
possibly do pg_multixact I/O) to find out the Xid that updated a tuple,
whereas they previously were assured to only use information readily
available from the tuple header. This is considered acceptable, because
the extra I/O would involve cases that would previously cause some
commands to block waiting for concurrent transactions to finish.
Another important change is the fact that locking tuples that have
previously been updated causes the future versions to be marked as
locked, too; this is essential for correctness of foreign key checks.
This causes additional WAL-logging, also (there was previously a single
WAL record for a locked tuple; now there are as many as updated copies
of the tuple there exist.)
With all this in place, contention related to tuples being checked by
foreign key rules should be much reduced.
As a bonus, the old behavior that a subtransaction grabbing a stronger
tuple lock than the parent (sub)transaction held on a given tuple and
later aborting caused the weaker lock to be lost, has been fixed.
Many new spec files were added for isolation tester framework, to ensure
overall behavior is sane. There's probably room for several more tests.
There were several reviewers of this patch; in particular, Noah Misch
and Andres Freund spent considerable time in it. Original idea for the
patch came from Simon Riggs, after a problem report by Joel Jacobson.
Most code is from me, with contributions from Marti Raudsepp, Alexander
Shulgin, Noah Misch and Andres Freund.
This patch was discussed in several pgsql-hackers threads; the most
important start at the following message-ids:
AANLkTimo9XVcEzfiBR-ut3KVNDkjm2Vxh+t8kAmWjPuv@mail.gmail.com
1290721684-sup-3951@alvh.no-ip.org
1294953201-sup-2099@alvh.no-ip.org
1320343602-sup-2290@alvh.no-ip.org
1339690386-sup-8927@alvh.no-ip.org
4FE5FF020200002500048A3D@gw.wicourts.gov
4FEAB90A0200002500048B7D@gw.wicourts.gov
2013-01-23 16:04:59 +01:00
|
|
|
|
|
|
|
/* already checked above */
|
|
|
|
Assert(!HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask));
|
|
|
|
|
|
|
|
xmax = HeapTupleGetUpdateXid(tuple);
|
2013-11-29 20:08:06 +01:00
|
|
|
|
|
|
|
/* not LOCKED_ONLY, so it has to have an xmax */
|
|
|
|
Assert(TransactionIdIsValid(xmax));
|
|
|
|
|
Improve concurrency of foreign key locking
This patch introduces two additional lock modes for tuples: "SELECT FOR
KEY SHARE" and "SELECT FOR NO KEY UPDATE". These don't block each
other, in contrast with already existing "SELECT FOR SHARE" and "SELECT
FOR UPDATE". UPDATE commands that do not modify the values stored in
the columns that are part of the key of the tuple now grab a SELECT FOR
NO KEY UPDATE lock on the tuple, allowing them to proceed concurrently
with tuple locks of the FOR KEY SHARE variety.
Foreign key triggers now use FOR KEY SHARE instead of FOR SHARE; this
means the concurrency improvement applies to them, which is the whole
point of this patch.
The added tuple lock semantics require some rejiggering of the multixact
module, so that the locking level that each transaction is holding can
be stored alongside its Xid. Also, multixacts now need to persist
across server restarts and crashes, because they can now represent not
only tuple locks, but also tuple updates. This means we need more
careful tracking of lifetime of pg_multixact SLRU files; since they now
persist longer, we require more infrastructure to figure out when they
can be removed. pg_upgrade also needs to be careful to copy
pg_multixact files over from the old server to the new, or at least part
of multixact.c state, depending on the versions of the old and new
servers.
Tuple time qualification rules (HeapTupleSatisfies routines) need to be
careful not to consider tuples with the "is multi" infomask bit set as
being only locked; they might need to look up MultiXact values (i.e.
possibly do pg_multixact I/O) to find out the Xid that updated a tuple,
whereas they previously were assured to only use information readily
available from the tuple header. This is considered acceptable, because
the extra I/O would involve cases that would previously cause some
commands to block waiting for concurrent transactions to finish.
Another important change is the fact that locking tuples that have
previously been updated causes the future versions to be marked as
locked, too; this is essential for correctness of foreign key checks.
This causes additional WAL-logging, also (there was previously a single
WAL record for a locked tuple; now there are as many as updated copies
of the tuple there exist.)
With all this in place, contention related to tuples being checked by
foreign key rules should be much reduced.
As a bonus, the old behavior that a subtransaction grabbing a stronger
tuple lock than the parent (sub)transaction held on a given tuple and
later aborting caused the weaker lock to be lost, has been fixed.
Many new spec files were added for isolation tester framework, to ensure
overall behavior is sane. There's probably room for several more tests.
There were several reviewers of this patch; in particular, Noah Misch
and Andres Freund spent considerable time in it. Original idea for the
patch came from Simon Riggs, after a problem report by Joel Jacobson.
Most code is from me, with contributions from Marti Raudsepp, Alexander
Shulgin, Noah Misch and Andres Freund.
This patch was discussed in several pgsql-hackers threads; the most
important start at the following message-ids:
AANLkTimo9XVcEzfiBR-ut3KVNDkjm2Vxh+t8kAmWjPuv@mail.gmail.com
1290721684-sup-3951@alvh.no-ip.org
1294953201-sup-2099@alvh.no-ip.org
1320343602-sup-2290@alvh.no-ip.org
1339690386-sup-8927@alvh.no-ip.org
4FE5FF020200002500048A3D@gw.wicourts.gov
4FEAB90A0200002500048B7D@gw.wicourts.gov
2013-01-23 16:04:59 +01:00
|
|
|
if (TransactionIdIsCurrentTransactionId(xmax))
|
|
|
|
{
|
|
|
|
if (HeapTupleHeaderGetCmax(tuple) >= snapshot->curcid)
|
|
|
|
return true; /* deleted after scan started */
|
|
|
|
else
|
|
|
|
return false; /* deleted before scan started */
|
|
|
|
}
|
|
|
|
if (TransactionIdIsInProgress(xmax))
|
|
|
|
return true;
|
|
|
|
if (TransactionIdDidCommit(xmax))
|
|
|
|
{
|
|
|
|
/* updating transaction committed, but when? */
|
|
|
|
if (XidInMVCCSnapshot(xmax, snapshot))
|
|
|
|
return true; /* treat as still in progress */
|
|
|
|
return false;
|
|
|
|
}
|
2013-11-29 20:08:06 +01:00
|
|
|
/* it must have aborted or crashed */
|
1998-12-16 12:53:55 +01:00
|
|
|
return true;
|
2005-04-28 23:47:18 +02:00
|
|
|
}
|
1998-12-16 12:53:55 +01:00
|
|
|
|
|
|
|
if (!(tuple->t_infomask & HEAP_XMAX_COMMITTED))
|
|
|
|
{
|
Improve concurrency of foreign key locking
This patch introduces two additional lock modes for tuples: "SELECT FOR
KEY SHARE" and "SELECT FOR NO KEY UPDATE". These don't block each
other, in contrast with already existing "SELECT FOR SHARE" and "SELECT
FOR UPDATE". UPDATE commands that do not modify the values stored in
the columns that are part of the key of the tuple now grab a SELECT FOR
NO KEY UPDATE lock on the tuple, allowing them to proceed concurrently
with tuple locks of the FOR KEY SHARE variety.
Foreign key triggers now use FOR KEY SHARE instead of FOR SHARE; this
means the concurrency improvement applies to them, which is the whole
point of this patch.
The added tuple lock semantics require some rejiggering of the multixact
module, so that the locking level that each transaction is holding can
be stored alongside its Xid. Also, multixacts now need to persist
across server restarts and crashes, because they can now represent not
only tuple locks, but also tuple updates. This means we need more
careful tracking of lifetime of pg_multixact SLRU files; since they now
persist longer, we require more infrastructure to figure out when they
can be removed. pg_upgrade also needs to be careful to copy
pg_multixact files over from the old server to the new, or at least part
of multixact.c state, depending on the versions of the old and new
servers.
Tuple time qualification rules (HeapTupleSatisfies routines) need to be
careful not to consider tuples with the "is multi" infomask bit set as
being only locked; they might need to look up MultiXact values (i.e.
possibly do pg_multixact I/O) to find out the Xid that updated a tuple,
whereas they previously were assured to only use information readily
available from the tuple header. This is considered acceptable, because
the extra I/O would involve cases that would previously cause some
commands to block waiting for concurrent transactions to finish.
Another important change is the fact that locking tuples that have
previously been updated causes the future versions to be marked as
locked, too; this is essential for correctness of foreign key checks.
This causes additional WAL-logging, also (there was previously a single
WAL record for a locked tuple; now there are as many as updated copies
of the tuple there exist.)
With all this in place, contention related to tuples being checked by
foreign key rules should be much reduced.
As a bonus, the old behavior that a subtransaction grabbing a stronger
tuple lock than the parent (sub)transaction held on a given tuple and
later aborting caused the weaker lock to be lost, has been fixed.
Many new spec files were added for isolation tester framework, to ensure
overall behavior is sane. There's probably room for several more tests.
There were several reviewers of this patch; in particular, Noah Misch
and Andres Freund spent considerable time in it. Original idea for the
patch came from Simon Riggs, after a problem report by Joel Jacobson.
Most code is from me, with contributions from Marti Raudsepp, Alexander
Shulgin, Noah Misch and Andres Freund.
This patch was discussed in several pgsql-hackers threads; the most
important start at the following message-ids:
AANLkTimo9XVcEzfiBR-ut3KVNDkjm2Vxh+t8kAmWjPuv@mail.gmail.com
1290721684-sup-3951@alvh.no-ip.org
1294953201-sup-2099@alvh.no-ip.org
1320343602-sup-2290@alvh.no-ip.org
1339690386-sup-8927@alvh.no-ip.org
4FE5FF020200002500048A3D@gw.wicourts.gov
4FEAB90A0200002500048B7D@gw.wicourts.gov
2013-01-23 16:04:59 +01:00
|
|
|
if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetRawXmax(tuple)))
|
1998-12-16 12:53:55 +01:00
|
|
|
{
|
2002-06-15 21:54:24 +02:00
|
|
|
if (HeapTupleHeaderGetCmax(tuple) >= snapshot->curcid)
|
1999-05-25 18:15:34 +02:00
|
|
|
return true; /* deleted after scan started */
|
1998-12-16 12:53:55 +01:00
|
|
|
else
|
1999-05-25 18:15:34 +02:00
|
|
|
return false; /* deleted before scan started */
|
1998-12-16 12:53:55 +01:00
|
|
|
}
|
|
|
|
|
Improve concurrency of foreign key locking
This patch introduces two additional lock modes for tuples: "SELECT FOR
KEY SHARE" and "SELECT FOR NO KEY UPDATE". These don't block each
other, in contrast with already existing "SELECT FOR SHARE" and "SELECT
FOR UPDATE". UPDATE commands that do not modify the values stored in
the columns that are part of the key of the tuple now grab a SELECT FOR
NO KEY UPDATE lock on the tuple, allowing them to proceed concurrently
with tuple locks of the FOR KEY SHARE variety.
Foreign key triggers now use FOR KEY SHARE instead of FOR SHARE; this
means the concurrency improvement applies to them, which is the whole
point of this patch.
The added tuple lock semantics require some rejiggering of the multixact
module, so that the locking level that each transaction is holding can
be stored alongside its Xid. Also, multixacts now need to persist
across server restarts and crashes, because they can now represent not
only tuple locks, but also tuple updates. This means we need more
careful tracking of lifetime of pg_multixact SLRU files; since they now
persist longer, we require more infrastructure to figure out when they
can be removed. pg_upgrade also needs to be careful to copy
pg_multixact files over from the old server to the new, or at least part
of multixact.c state, depending on the versions of the old and new
servers.
Tuple time qualification rules (HeapTupleSatisfies routines) need to be
careful not to consider tuples with the "is multi" infomask bit set as
being only locked; they might need to look up MultiXact values (i.e.
possibly do pg_multixact I/O) to find out the Xid that updated a tuple,
whereas they previously were assured to only use information readily
available from the tuple header. This is considered acceptable, because
the extra I/O would involve cases that would previously cause some
commands to block waiting for concurrent transactions to finish.
Another important change is the fact that locking tuples that have
previously been updated causes the future versions to be marked as
locked, too; this is essential for correctness of foreign key checks.
This causes additional WAL-logging, also (there was previously a single
WAL record for a locked tuple; now there are as many as updated copies
of the tuple there exist.)
With all this in place, contention related to tuples being checked by
foreign key rules should be much reduced.
As a bonus, the old behavior that a subtransaction grabbing a stronger
tuple lock than the parent (sub)transaction held on a given tuple and
later aborting caused the weaker lock to be lost, has been fixed.
Many new spec files were added for isolation tester framework, to ensure
overall behavior is sane. There's probably room for several more tests.
There were several reviewers of this patch; in particular, Noah Misch
and Andres Freund spent considerable time in it. Original idea for the
patch came from Simon Riggs, after a problem report by Joel Jacobson.
Most code is from me, with contributions from Marti Raudsepp, Alexander
Shulgin, Noah Misch and Andres Freund.
This patch was discussed in several pgsql-hackers threads; the most
important start at the following message-ids:
AANLkTimo9XVcEzfiBR-ut3KVNDkjm2Vxh+t8kAmWjPuv@mail.gmail.com
1290721684-sup-3951@alvh.no-ip.org
1294953201-sup-2099@alvh.no-ip.org
1320343602-sup-2290@alvh.no-ip.org
1339690386-sup-8927@alvh.no-ip.org
4FE5FF020200002500048A3D@gw.wicourts.gov
4FEAB90A0200002500048B7D@gw.wicourts.gov
2013-01-23 16:04:59 +01:00
|
|
|
if (TransactionIdIsInProgress(HeapTupleHeaderGetRawXmax(tuple)))
|
2005-05-07 23:22:01 +02:00
|
|
|
return true;
|
|
|
|
|
Improve concurrency of foreign key locking
This patch introduces two additional lock modes for tuples: "SELECT FOR
KEY SHARE" and "SELECT FOR NO KEY UPDATE". These don't block each
other, in contrast with already existing "SELECT FOR SHARE" and "SELECT
FOR UPDATE". UPDATE commands that do not modify the values stored in
the columns that are part of the key of the tuple now grab a SELECT FOR
NO KEY UPDATE lock on the tuple, allowing them to proceed concurrently
with tuple locks of the FOR KEY SHARE variety.
Foreign key triggers now use FOR KEY SHARE instead of FOR SHARE; this
means the concurrency improvement applies to them, which is the whole
point of this patch.
The added tuple lock semantics require some rejiggering of the multixact
module, so that the locking level that each transaction is holding can
be stored alongside its Xid. Also, multixacts now need to persist
across server restarts and crashes, because they can now represent not
only tuple locks, but also tuple updates. This means we need more
careful tracking of lifetime of pg_multixact SLRU files; since they now
persist longer, we require more infrastructure to figure out when they
can be removed. pg_upgrade also needs to be careful to copy
pg_multixact files over from the old server to the new, or at least part
of multixact.c state, depending on the versions of the old and new
servers.
Tuple time qualification rules (HeapTupleSatisfies routines) need to be
careful not to consider tuples with the "is multi" infomask bit set as
being only locked; they might need to look up MultiXact values (i.e.
possibly do pg_multixact I/O) to find out the Xid that updated a tuple,
whereas they previously were assured to only use information readily
available from the tuple header. This is considered acceptable, because
the extra I/O would involve cases that would previously cause some
commands to block waiting for concurrent transactions to finish.
Another important change is the fact that locking tuples that have
previously been updated causes the future versions to be marked as
locked, too; this is essential for correctness of foreign key checks.
This causes additional WAL-logging, also (there was previously a single
WAL record for a locked tuple; now there are as many as updated copies
of the tuple there exist.)
With all this in place, contention related to tuples being checked by
foreign key rules should be much reduced.
As a bonus, the old behavior that a subtransaction grabbing a stronger
tuple lock than the parent (sub)transaction held on a given tuple and
later aborting caused the weaker lock to be lost, has been fixed.
Many new spec files were added for isolation tester framework, to ensure
overall behavior is sane. There's probably room for several more tests.
There were several reviewers of this patch; in particular, Noah Misch
and Andres Freund spent considerable time in it. Original idea for the
patch came from Simon Riggs, after a problem report by Joel Jacobson.
Most code is from me, with contributions from Marti Raudsepp, Alexander
Shulgin, Noah Misch and Andres Freund.
This patch was discussed in several pgsql-hackers threads; the most
important start at the following message-ids:
AANLkTimo9XVcEzfiBR-ut3KVNDkjm2Vxh+t8kAmWjPuv@mail.gmail.com
1290721684-sup-3951@alvh.no-ip.org
1294953201-sup-2099@alvh.no-ip.org
1320343602-sup-2290@alvh.no-ip.org
1339690386-sup-8927@alvh.no-ip.org
4FE5FF020200002500048A3D@gw.wicourts.gov
4FEAB90A0200002500048B7D@gw.wicourts.gov
2013-01-23 16:04:59 +01:00
|
|
|
if (!TransactionIdDidCommit(HeapTupleHeaderGetRawXmax(tuple)))
|
1998-12-16 12:53:55 +01:00
|
|
|
{
|
2005-05-07 23:22:01 +02:00
|
|
|
/* it must have aborted or crashed */
|
2007-08-14 19:35:18 +02:00
|
|
|
SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
|
|
|
|
InvalidTransactionId);
|
1998-12-16 12:53:55 +01:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* xmax transaction committed */
|
2007-08-14 19:35:18 +02:00
|
|
|
SetHintBits(tuple, buffer, HEAP_XMAX_COMMITTED,
|
Improve concurrency of foreign key locking
This patch introduces two additional lock modes for tuples: "SELECT FOR
KEY SHARE" and "SELECT FOR NO KEY UPDATE". These don't block each
other, in contrast with already existing "SELECT FOR SHARE" and "SELECT
FOR UPDATE". UPDATE commands that do not modify the values stored in
the columns that are part of the key of the tuple now grab a SELECT FOR
NO KEY UPDATE lock on the tuple, allowing them to proceed concurrently
with tuple locks of the FOR KEY SHARE variety.
Foreign key triggers now use FOR KEY SHARE instead of FOR SHARE; this
means the concurrency improvement applies to them, which is the whole
point of this patch.
The added tuple lock semantics require some rejiggering of the multixact
module, so that the locking level that each transaction is holding can
be stored alongside its Xid. Also, multixacts now need to persist
across server restarts and crashes, because they can now represent not
only tuple locks, but also tuple updates. This means we need more
careful tracking of lifetime of pg_multixact SLRU files; since they now
persist longer, we require more infrastructure to figure out when they
can be removed. pg_upgrade also needs to be careful to copy
pg_multixact files over from the old server to the new, or at least part
of multixact.c state, depending on the versions of the old and new
servers.
Tuple time qualification rules (HeapTupleSatisfies routines) need to be
careful not to consider tuples with the "is multi" infomask bit set as
being only locked; they might need to look up MultiXact values (i.e.
possibly do pg_multixact I/O) to find out the Xid that updated a tuple,
whereas they previously were assured to only use information readily
available from the tuple header. This is considered acceptable, because
the extra I/O would involve cases that would previously cause some
commands to block waiting for concurrent transactions to finish.
Another important change is the fact that locking tuples that have
previously been updated causes the future versions to be marked as
locked, too; this is essential for correctness of foreign key checks.
This causes additional WAL-logging, also (there was previously a single
WAL record for a locked tuple; now there are as many as updated copies
of the tuple there exist.)
With all this in place, contention related to tuples being checked by
foreign key rules should be much reduced.
As a bonus, the old behavior that a subtransaction grabbing a stronger
tuple lock than the parent (sub)transaction held on a given tuple and
later aborting caused the weaker lock to be lost, has been fixed.
Many new spec files were added for isolation tester framework, to ensure
overall behavior is sane. There's probably room for several more tests.
There were several reviewers of this patch; in particular, Noah Misch
and Andres Freund spent considerable time in it. Original idea for the
patch came from Simon Riggs, after a problem report by Joel Jacobson.
Most code is from me, with contributions from Marti Raudsepp, Alexander
Shulgin, Noah Misch and Andres Freund.
This patch was discussed in several pgsql-hackers threads; the most
important start at the following message-ids:
AANLkTimo9XVcEzfiBR-ut3KVNDkjm2Vxh+t8kAmWjPuv@mail.gmail.com
1290721684-sup-3951@alvh.no-ip.org
1294953201-sup-2099@alvh.no-ip.org
1320343602-sup-2290@alvh.no-ip.org
1339690386-sup-8927@alvh.no-ip.org
4FE5FF020200002500048A3D@gw.wicourts.gov
4FEAB90A0200002500048B7D@gw.wicourts.gov
2013-01-23 16:04:59 +01:00
|
|
|
HeapTupleHeaderGetRawXmax(tuple));
|
1998-12-16 12:53:55 +01:00
|
|
|
}
|
|
|
|
|
2002-05-06 04:39:01 +02:00
|
|
|
/*
|
|
|
|
* OK, the deleting transaction committed too ... but when?
|
|
|
|
*/
|
Improve concurrency of foreign key locking
This patch introduces two additional lock modes for tuples: "SELECT FOR
KEY SHARE" and "SELECT FOR NO KEY UPDATE". These don't block each
other, in contrast with already existing "SELECT FOR SHARE" and "SELECT
FOR UPDATE". UPDATE commands that do not modify the values stored in
the columns that are part of the key of the tuple now grab a SELECT FOR
NO KEY UPDATE lock on the tuple, allowing them to proceed concurrently
with tuple locks of the FOR KEY SHARE variety.
Foreign key triggers now use FOR KEY SHARE instead of FOR SHARE; this
means the concurrency improvement applies to them, which is the whole
point of this patch.
The added tuple lock semantics require some rejiggering of the multixact
module, so that the locking level that each transaction is holding can
be stored alongside its Xid. Also, multixacts now need to persist
across server restarts and crashes, because they can now represent not
only tuple locks, but also tuple updates. This means we need more
careful tracking of lifetime of pg_multixact SLRU files; since they now
persist longer, we require more infrastructure to figure out when they
can be removed. pg_upgrade also needs to be careful to copy
pg_multixact files over from the old server to the new, or at least part
of multixact.c state, depending on the versions of the old and new
servers.
Tuple time qualification rules (HeapTupleSatisfies routines) need to be
careful not to consider tuples with the "is multi" infomask bit set as
being only locked; they might need to look up MultiXact values (i.e.
possibly do pg_multixact I/O) to find out the Xid that updated a tuple,
whereas they previously were assured to only use information readily
available from the tuple header. This is considered acceptable, because
the extra I/O would involve cases that would previously cause some
commands to block waiting for concurrent transactions to finish.
Another important change is the fact that locking tuples that have
previously been updated causes the future versions to be marked as
locked, too; this is essential for correctness of foreign key checks.
This causes additional WAL-logging, also (there was previously a single
WAL record for a locked tuple; now there are as many as updated copies
of the tuple there exist.)
With all this in place, contention related to tuples being checked by
foreign key rules should be much reduced.
As a bonus, the old behavior that a subtransaction grabbing a stronger
tuple lock than the parent (sub)transaction held on a given tuple and
later aborting caused the weaker lock to be lost, has been fixed.
Many new spec files were added for isolation tester framework, to ensure
overall behavior is sane. There's probably room for several more tests.
There were several reviewers of this patch; in particular, Noah Misch
and Andres Freund spent considerable time in it. Original idea for the
patch came from Simon Riggs, after a problem report by Joel Jacobson.
Most code is from me, with contributions from Marti Raudsepp, Alexander
Shulgin, Noah Misch and Andres Freund.
This patch was discussed in several pgsql-hackers threads; the most
important start at the following message-ids:
AANLkTimo9XVcEzfiBR-ut3KVNDkjm2Vxh+t8kAmWjPuv@mail.gmail.com
1290721684-sup-3951@alvh.no-ip.org
1294953201-sup-2099@alvh.no-ip.org
1320343602-sup-2290@alvh.no-ip.org
1339690386-sup-8927@alvh.no-ip.org
4FE5FF020200002500048A3D@gw.wicourts.gov
4FEAB90A0200002500048B7D@gw.wicourts.gov
2013-01-23 16:04:59 +01:00
|
|
|
if (XidInMVCCSnapshot(HeapTupleHeaderGetRawXmax(tuple), snapshot))
|
2006-09-03 17:59:39 +02:00
|
|
|
return true; /* treat as still in progress */
|
1999-05-25 18:15:34 +02:00
|
|
|
|
1998-12-16 12:53:55 +01:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2001-07-12 06:11:13 +02:00
|
|
|
|
|
|
|
/*
|
2002-01-17 00:09:09 +01:00
|
|
|
* HeapTupleSatisfiesVacuum
|
|
|
|
*
|
2002-01-17 00:51:56 +01:00
|
|
|
* Determine the status of tuples for VACUUM purposes. Here, what
|
|
|
|
* we mainly want to know is if a tuple is potentially visible to *any*
|
|
|
|
* running transaction. If so, it can't be removed yet by VACUUM.
|
2001-07-12 06:11:13 +02:00
|
|
|
*
|
2014-05-06 18:12:18 +02:00
|
|
|
* OldestXmin is a cutoff XID (obtained from GetOldestXmin()). Tuples
|
2001-08-26 18:56:03 +02:00
|
|
|
* deleted by XIDs >= OldestXmin are deemed "recently dead"; they might
|
2001-07-12 06:11:13 +02:00
|
|
|
* still be visible to some open transaction, so we can't remove them,
|
|
|
|
* even if we see that the deleting transaction has committed.
|
|
|
|
*/
|
|
|
|
HTSV_Result
|
2013-07-22 19:26:33 +02:00
|
|
|
HeapTupleSatisfiesVacuum(HeapTuple htup, TransactionId OldestXmin,
|
2004-10-16 00:40:29 +02:00
|
|
|
Buffer buffer)
|
2001-07-12 06:11:13 +02:00
|
|
|
{
|
2013-07-22 19:26:33 +02:00
|
|
|
HeapTupleHeader tuple = htup->t_data;
|
2014-05-06 18:12:18 +02:00
|
|
|
|
2013-07-22 19:26:33 +02:00
|
|
|
Assert(ItemPointerIsValid(&htup->t_self));
|
|
|
|
Assert(htup->t_tableOid != InvalidOid);
|
|
|
|
|
2001-07-12 06:11:13 +02:00
|
|
|
/*
|
|
|
|
* Has inserting transaction committed?
|
|
|
|
*
|
2005-11-22 19:17:34 +01:00
|
|
|
* If the inserting transaction aborted, then the tuple was never visible
|
|
|
|
* to any other transaction, so we can delete it immediately.
|
2001-07-12 06:11:13 +02:00
|
|
|
*/
|
2013-12-22 21:49:09 +01:00
|
|
|
if (!HeapTupleHeaderXminCommitted(tuple))
|
2001-07-12 06:11:13 +02:00
|
|
|
{
|
2013-12-22 21:49:09 +01:00
|
|
|
if (HeapTupleHeaderXminInvalid(tuple))
|
2001-07-12 06:11:13 +02:00
|
|
|
return HEAPTUPLE_DEAD;
|
2010-02-08 15:10:21 +01:00
|
|
|
/* Used by pre-9.0 binary upgrades */
|
2001-07-12 06:11:13 +02:00
|
|
|
else if (tuple->t_infomask & HEAP_MOVED_OFF)
|
|
|
|
{
|
2003-06-25 03:08:13 +02:00
|
|
|
TransactionId xvac = HeapTupleHeaderGetXvac(tuple);
|
2003-08-04 02:43:34 +02:00
|
|
|
|
2003-06-25 03:08:13 +02:00
|
|
|
if (TransactionIdIsCurrentTransactionId(xvac))
|
2001-12-19 18:18:39 +01:00
|
|
|
return HEAPTUPLE_DELETE_IN_PROGRESS;
|
2003-06-25 03:08:13 +02:00
|
|
|
if (TransactionIdIsInProgress(xvac))
|
2001-12-19 18:18:39 +01:00
|
|
|
return HEAPTUPLE_DELETE_IN_PROGRESS;
|
2003-06-25 03:08:13 +02:00
|
|
|
if (TransactionIdDidCommit(xvac))
|
2001-07-12 06:11:13 +02:00
|
|
|
{
|
2007-08-14 19:35:18 +02:00
|
|
|
SetHintBits(tuple, buffer, HEAP_XMIN_INVALID,
|
|
|
|
InvalidTransactionId);
|
2001-07-12 06:11:13 +02:00
|
|
|
return HEAPTUPLE_DEAD;
|
|
|
|
}
|
2007-08-14 19:35:18 +02:00
|
|
|
SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED,
|
|
|
|
InvalidTransactionId);
|
2001-07-12 06:11:13 +02:00
|
|
|
}
|
2010-02-08 15:10:21 +01:00
|
|
|
/* Used by pre-9.0 binary upgrades */
|
2001-07-12 06:11:13 +02:00
|
|
|
else if (tuple->t_infomask & HEAP_MOVED_IN)
|
|
|
|
{
|
2003-06-25 03:08:13 +02:00
|
|
|
TransactionId xvac = HeapTupleHeaderGetXvac(tuple);
|
2003-08-04 02:43:34 +02:00
|
|
|
|
2003-06-25 03:08:13 +02:00
|
|
|
if (TransactionIdIsCurrentTransactionId(xvac))
|
2001-12-19 18:18:39 +01:00
|
|
|
return HEAPTUPLE_INSERT_IN_PROGRESS;
|
2003-06-25 03:08:13 +02:00
|
|
|
if (TransactionIdIsInProgress(xvac))
|
2001-12-19 18:18:39 +01:00
|
|
|
return HEAPTUPLE_INSERT_IN_PROGRESS;
|
2003-06-25 03:08:13 +02:00
|
|
|
if (TransactionIdDidCommit(xvac))
|
2007-08-14 19:35:18 +02:00
|
|
|
SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED,
|
|
|
|
InvalidTransactionId);
|
2001-12-19 18:18:39 +01:00
|
|
|
else
|
2001-07-12 06:11:13 +02:00
|
|
|
{
|
2007-08-14 19:35:18 +02:00
|
|
|
SetHintBits(tuple, buffer, HEAP_XMIN_INVALID,
|
|
|
|
InvalidTransactionId);
|
2001-07-12 06:11:13 +02:00
|
|
|
return HEAPTUPLE_DEAD;
|
|
|
|
}
|
|
|
|
}
|
Fix longstanding bug in HeapTupleSatisfiesVacuum().
HeapTupleSatisfiesVacuum() didn't properly discern between
DELETE_IN_PROGRESS and INSERT_IN_PROGRESS for rows that have been
inserted in the current transaction and deleted in a aborted
subtransaction of the current backend. At the very least that caused
problems for CLUSTER and CREATE INDEX in transactions that had
aborting subtransactions producing rows, leading to warnings like:
WARNING: concurrent delete in progress within table "..."
possibly in an endless, uninterruptible, loop.
Instead of treating *InProgress xmins the same as *IsCurrent ones,
treat them as being distinct like the other visibility routines. As
implemented this separatation can cause a behaviour change for rows
that have been inserted and deleted in another, still running,
transaction. HTSV will now return INSERT_IN_PROGRESS instead of
DELETE_IN_PROGRESS for those. That's both, more in line with the other
visibility routines and arguably more correct. The latter because a
INSERT_IN_PROGRESS will make callers look at/wait for xmin, instead of
xmax.
The only current caller where that's possibly worse than the old
behaviour is heap_prune_chain() which now won't mark the page as
prunable if a row has concurrently been inserted and deleted. That's
harmless enough.
As a cautionary measure also insert a interrupt check before the gotos
in IndexBuildHeapScan() that lead to the uninterruptible loop. There
are other possible causes, like a row that several sessions try to
update and all fail, for repeated loops and the cost of doing so in
the retry case is low.
As this bug goes back all the way to the introduction of
subtransactions in 573a71a5da backpatch to all supported releases.
Reported-By: Sandro Santilli
2014-06-04 21:36:19 +02:00
|
|
|
else if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetRawXmin(tuple)))
|
2003-09-22 02:47:23 +02:00
|
|
|
{
|
|
|
|
if (tuple->t_infomask & HEAP_XMAX_INVALID) /* xid invalid */
|
|
|
|
return HEAPTUPLE_INSERT_IN_PROGRESS;
|
2013-07-20 00:35:07 +02:00
|
|
|
/* only locked? run infomask-only check first, for performance */
|
|
|
|
if (HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask) ||
|
|
|
|
HeapTupleHeaderIsOnlyLocked(tuple))
|
2003-09-22 02:47:23 +02:00
|
|
|
return HEAPTUPLE_INSERT_IN_PROGRESS;
|
|
|
|
/* inserted and then deleted by same xact */
|
Fix longstanding bug in HeapTupleSatisfiesVacuum().
HeapTupleSatisfiesVacuum() didn't properly discern between
DELETE_IN_PROGRESS and INSERT_IN_PROGRESS for rows that have been
inserted in the current transaction and deleted in a aborted
subtransaction of the current backend. At the very least that caused
problems for CLUSTER and CREATE INDEX in transactions that had
aborting subtransactions producing rows, leading to warnings like:
WARNING: concurrent delete in progress within table "..."
possibly in an endless, uninterruptible, loop.
Instead of treating *InProgress xmins the same as *IsCurrent ones,
treat them as being distinct like the other visibility routines. As
implemented this separatation can cause a behaviour change for rows
that have been inserted and deleted in another, still running,
transaction. HTSV will now return INSERT_IN_PROGRESS instead of
DELETE_IN_PROGRESS for those. That's both, more in line with the other
visibility routines and arguably more correct. The latter because a
INSERT_IN_PROGRESS will make callers look at/wait for xmin, instead of
xmax.
The only current caller where that's possibly worse than the old
behaviour is heap_prune_chain() which now won't mark the page as
prunable if a row has concurrently been inserted and deleted. That's
harmless enough.
As a cautionary measure also insert a interrupt check before the gotos
in IndexBuildHeapScan() that lead to the uninterruptible loop. There
are other possible causes, like a row that several sessions try to
update and all fail, for repeated loops and the cost of doing so in
the retry case is low.
As this bug goes back all the way to the introduction of
subtransactions in 573a71a5da backpatch to all supported releases.
Reported-By: Sandro Santilli
2014-06-04 21:36:19 +02:00
|
|
|
if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetUpdateXid(tuple)))
|
|
|
|
return HEAPTUPLE_DELETE_IN_PROGRESS;
|
|
|
|
/* deleting subtransaction must have aborted */
|
|
|
|
return HEAPTUPLE_INSERT_IN_PROGRESS;
|
|
|
|
}
|
|
|
|
else if (TransactionIdIsInProgress(HeapTupleHeaderGetRawXmin(tuple)))
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* It'd be possible to discern between INSERT/DELETE in progress
|
|
|
|
* here by looking at xmax - but that doesn't seem beneficial for
|
|
|
|
* the majority of callers and even detrimental for some. We'd
|
|
|
|
* rather have callers look at/wait for xmin than xmax. It's
|
|
|
|
* always correct to return INSERT_IN_PROGRESS because that's
|
|
|
|
* what's happening from the view of other backends.
|
|
|
|
*/
|
|
|
|
return HEAPTUPLE_INSERT_IN_PROGRESS;
|
2003-09-22 02:47:23 +02:00
|
|
|
}
|
2013-12-22 21:49:09 +01:00
|
|
|
else if (TransactionIdDidCommit(HeapTupleHeaderGetRawXmin(tuple)))
|
2007-08-14 19:35:18 +02:00
|
|
|
SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED,
|
2013-12-22 21:49:09 +01:00
|
|
|
HeapTupleHeaderGetRawXmin(tuple));
|
2001-07-12 06:11:13 +02:00
|
|
|
else
|
|
|
|
{
|
|
|
|
/*
|
2005-10-15 04:49:52 +02:00
|
|
|
* Not in Progress, Not Committed, so either Aborted or crashed
|
2001-07-12 06:11:13 +02:00
|
|
|
*/
|
2007-08-14 19:35:18 +02:00
|
|
|
SetHintBits(tuple, buffer, HEAP_XMIN_INVALID,
|
|
|
|
InvalidTransactionId);
|
2001-07-12 06:11:13 +02:00
|
|
|
return HEAPTUPLE_DEAD;
|
|
|
|
}
|
2007-11-15 22:14:46 +01:00
|
|
|
|
2007-08-02 00:45:09 +02:00
|
|
|
/*
|
|
|
|
* At this point the xmin is known committed, but we might not have
|
2007-11-15 22:14:46 +01:00
|
|
|
* been able to set the hint bit yet; so we can no longer Assert that
|
|
|
|
* it's set.
|
2007-08-02 00:45:09 +02:00
|
|
|
*/
|
2001-07-12 06:11:13 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2014-05-06 18:12:18 +02:00
|
|
|
* Okay, the inserter committed, so it was good at some point. Now what
|
2005-10-15 04:49:52 +02:00
|
|
|
* about the deleting transaction?
|
2001-07-12 06:11:13 +02:00
|
|
|
*/
|
|
|
|
if (tuple->t_infomask & HEAP_XMAX_INVALID)
|
|
|
|
return HEAPTUPLE_LIVE;
|
|
|
|
|
Improve concurrency of foreign key locking
This patch introduces two additional lock modes for tuples: "SELECT FOR
KEY SHARE" and "SELECT FOR NO KEY UPDATE". These don't block each
other, in contrast with already existing "SELECT FOR SHARE" and "SELECT
FOR UPDATE". UPDATE commands that do not modify the values stored in
the columns that are part of the key of the tuple now grab a SELECT FOR
NO KEY UPDATE lock on the tuple, allowing them to proceed concurrently
with tuple locks of the FOR KEY SHARE variety.
Foreign key triggers now use FOR KEY SHARE instead of FOR SHARE; this
means the concurrency improvement applies to them, which is the whole
point of this patch.
The added tuple lock semantics require some rejiggering of the multixact
module, so that the locking level that each transaction is holding can
be stored alongside its Xid. Also, multixacts now need to persist
across server restarts and crashes, because they can now represent not
only tuple locks, but also tuple updates. This means we need more
careful tracking of lifetime of pg_multixact SLRU files; since they now
persist longer, we require more infrastructure to figure out when they
can be removed. pg_upgrade also needs to be careful to copy
pg_multixact files over from the old server to the new, or at least part
of multixact.c state, depending on the versions of the old and new
servers.
Tuple time qualification rules (HeapTupleSatisfies routines) need to be
careful not to consider tuples with the "is multi" infomask bit set as
being only locked; they might need to look up MultiXact values (i.e.
possibly do pg_multixact I/O) to find out the Xid that updated a tuple,
whereas they previously were assured to only use information readily
available from the tuple header. This is considered acceptable, because
the extra I/O would involve cases that would previously cause some
commands to block waiting for concurrent transactions to finish.
Another important change is the fact that locking tuples that have
previously been updated causes the future versions to be marked as
locked, too; this is essential for correctness of foreign key checks.
This causes additional WAL-logging, also (there was previously a single
WAL record for a locked tuple; now there are as many as updated copies
of the tuple there exist.)
With all this in place, contention related to tuples being checked by
foreign key rules should be much reduced.
As a bonus, the old behavior that a subtransaction grabbing a stronger
tuple lock than the parent (sub)transaction held on a given tuple and
later aborting caused the weaker lock to be lost, has been fixed.
Many new spec files were added for isolation tester framework, to ensure
overall behavior is sane. There's probably room for several more tests.
There were several reviewers of this patch; in particular, Noah Misch
and Andres Freund spent considerable time in it. Original idea for the
patch came from Simon Riggs, after a problem report by Joel Jacobson.
Most code is from me, with contributions from Marti Raudsepp, Alexander
Shulgin, Noah Misch and Andres Freund.
This patch was discussed in several pgsql-hackers threads; the most
important start at the following message-ids:
AANLkTimo9XVcEzfiBR-ut3KVNDkjm2Vxh+t8kAmWjPuv@mail.gmail.com
1290721684-sup-3951@alvh.no-ip.org
1294953201-sup-2099@alvh.no-ip.org
1320343602-sup-2290@alvh.no-ip.org
1339690386-sup-8927@alvh.no-ip.org
4FE5FF020200002500048A3D@gw.wicourts.gov
4FEAB90A0200002500048B7D@gw.wicourts.gov
2013-01-23 16:04:59 +01:00
|
|
|
if (HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask))
|
2001-12-19 18:18:39 +01:00
|
|
|
{
|
2002-01-11 21:07:03 +01:00
|
|
|
/*
|
2005-10-15 04:49:52 +02:00
|
|
|
* "Deleting" xact really only locked it, so the tuple is live in any
|
Fix recently-understood problems with handling of XID freezing, particularly
in PITR scenarios. We now WAL-log the replacement of old XIDs with
FrozenTransactionId, so that such replacement is guaranteed to propagate to
PITR slave databases. Also, rather than relying on hint-bit updates to be
preserved, pg_clog is not truncated until all instances of an XID are known to
have been replaced by FrozenTransactionId. Add new GUC variables and
pg_autovacuum columns to allow management of the freezing policy, so that
users can trade off the size of pg_clog against the amount of freezing work
done. Revise the already-existing code that forces autovacuum of tables
approaching the wraparound point to make it more bulletproof; also, revise the
autovacuum logic so that anti-wraparound vacuuming is done per-table rather
than per-database. initdb forced because of changes in pg_class, pg_database,
and pg_autovacuum catalogs. Heikki Linnakangas, Simon Riggs, and Tom Lane.
2006-11-05 23:42:10 +01:00
|
|
|
* case. However, we should make sure that either XMAX_COMMITTED or
|
2007-11-15 22:14:46 +01:00
|
|
|
* XMAX_INVALID gets set once the xact is gone, to reduce the costs of
|
|
|
|
* examining the tuple for future xacts. Also, marking dead
|
Fix recently-understood problems with handling of XID freezing, particularly
in PITR scenarios. We now WAL-log the replacement of old XIDs with
FrozenTransactionId, so that such replacement is guaranteed to propagate to
PITR slave databases. Also, rather than relying on hint-bit updates to be
preserved, pg_clog is not truncated until all instances of an XID are known to
have been replaced by FrozenTransactionId. Add new GUC variables and
pg_autovacuum columns to allow management of the freezing policy, so that
users can trade off the size of pg_clog against the amount of freezing work
done. Revise the already-existing code that forces autovacuum of tables
approaching the wraparound point to make it more bulletproof; also, revise the
autovacuum logic so that anti-wraparound vacuuming is done per-table rather
than per-database. initdb forced because of changes in pg_class, pg_database,
and pg_autovacuum catalogs. Heikki Linnakangas, Simon Riggs, and Tom Lane.
2006-11-05 23:42:10 +01:00
|
|
|
* MultiXacts as invalid here provides defense against MultiXactId
|
|
|
|
* wraparound (see also comments in heap_freeze_tuple()).
|
2002-01-11 21:07:03 +01:00
|
|
|
*/
|
|
|
|
if (!(tuple->t_infomask & HEAP_XMAX_COMMITTED))
|
|
|
|
{
|
2005-04-28 23:47:18 +02:00
|
|
|
if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
|
|
|
|
{
|
Improve concurrency of foreign key locking
This patch introduces two additional lock modes for tuples: "SELECT FOR
KEY SHARE" and "SELECT FOR NO KEY UPDATE". These don't block each
other, in contrast with already existing "SELECT FOR SHARE" and "SELECT
FOR UPDATE". UPDATE commands that do not modify the values stored in
the columns that are part of the key of the tuple now grab a SELECT FOR
NO KEY UPDATE lock on the tuple, allowing them to proceed concurrently
with tuple locks of the FOR KEY SHARE variety.
Foreign key triggers now use FOR KEY SHARE instead of FOR SHARE; this
means the concurrency improvement applies to them, which is the whole
point of this patch.
The added tuple lock semantics require some rejiggering of the multixact
module, so that the locking level that each transaction is holding can
be stored alongside its Xid. Also, multixacts now need to persist
across server restarts and crashes, because they can now represent not
only tuple locks, but also tuple updates. This means we need more
careful tracking of lifetime of pg_multixact SLRU files; since they now
persist longer, we require more infrastructure to figure out when they
can be removed. pg_upgrade also needs to be careful to copy
pg_multixact files over from the old server to the new, or at least part
of multixact.c state, depending on the versions of the old and new
servers.
Tuple time qualification rules (HeapTupleSatisfies routines) need to be
careful not to consider tuples with the "is multi" infomask bit set as
being only locked; they might need to look up MultiXact values (i.e.
possibly do pg_multixact I/O) to find out the Xid that updated a tuple,
whereas they previously were assured to only use information readily
available from the tuple header. This is considered acceptable, because
the extra I/O would involve cases that would previously cause some
commands to block waiting for concurrent transactions to finish.
Another important change is the fact that locking tuples that have
previously been updated causes the future versions to be marked as
locked, too; this is essential for correctness of foreign key checks.
This causes additional WAL-logging, also (there was previously a single
WAL record for a locked tuple; now there are as many as updated copies
of the tuple there exist.)
With all this in place, contention related to tuples being checked by
foreign key rules should be much reduced.
As a bonus, the old behavior that a subtransaction grabbing a stronger
tuple lock than the parent (sub)transaction held on a given tuple and
later aborting caused the weaker lock to be lost, has been fixed.
Many new spec files were added for isolation tester framework, to ensure
overall behavior is sane. There's probably room for several more tests.
There were several reviewers of this patch; in particular, Noah Misch
and Andres Freund spent considerable time in it. Original idea for the
patch came from Simon Riggs, after a problem report by Joel Jacobson.
Most code is from me, with contributions from Marti Raudsepp, Alexander
Shulgin, Noah Misch and Andres Freund.
This patch was discussed in several pgsql-hackers threads; the most
important start at the following message-ids:
AANLkTimo9XVcEzfiBR-ut3KVNDkjm2Vxh+t8kAmWjPuv@mail.gmail.com
1290721684-sup-3951@alvh.no-ip.org
1294953201-sup-2099@alvh.no-ip.org
1320343602-sup-2290@alvh.no-ip.org
1339690386-sup-8927@alvh.no-ip.org
4FE5FF020200002500048A3D@gw.wicourts.gov
4FEAB90A0200002500048B7D@gw.wicourts.gov
2013-01-23 16:04:59 +01:00
|
|
|
/*
|
|
|
|
* If it's only locked but neither EXCL_LOCK nor KEYSHR_LOCK
|
|
|
|
* are set, it cannot possibly be running; otherwise have to
|
|
|
|
* check.
|
|
|
|
*/
|
|
|
|
if ((tuple->t_infomask & (HEAP_XMAX_EXCL_LOCK |
|
|
|
|
HEAP_XMAX_KEYSHR_LOCK)) &&
|
2014-07-29 21:41:06 +02:00
|
|
|
MultiXactIdIsRunning(HeapTupleHeaderGetRawXmax(tuple),
|
|
|
|
true))
|
2005-04-28 23:47:18 +02:00
|
|
|
return HEAPTUPLE_LIVE;
|
Improve concurrency of foreign key locking
This patch introduces two additional lock modes for tuples: "SELECT FOR
KEY SHARE" and "SELECT FOR NO KEY UPDATE". These don't block each
other, in contrast with already existing "SELECT FOR SHARE" and "SELECT
FOR UPDATE". UPDATE commands that do not modify the values stored in
the columns that are part of the key of the tuple now grab a SELECT FOR
NO KEY UPDATE lock on the tuple, allowing them to proceed concurrently
with tuple locks of the FOR KEY SHARE variety.
Foreign key triggers now use FOR KEY SHARE instead of FOR SHARE; this
means the concurrency improvement applies to them, which is the whole
point of this patch.
The added tuple lock semantics require some rejiggering of the multixact
module, so that the locking level that each transaction is holding can
be stored alongside its Xid. Also, multixacts now need to persist
across server restarts and crashes, because they can now represent not
only tuple locks, but also tuple updates. This means we need more
careful tracking of lifetime of pg_multixact SLRU files; since they now
persist longer, we require more infrastructure to figure out when they
can be removed. pg_upgrade also needs to be careful to copy
pg_multixact files over from the old server to the new, or at least part
of multixact.c state, depending on the versions of the old and new
servers.
Tuple time qualification rules (HeapTupleSatisfies routines) need to be
careful not to consider tuples with the "is multi" infomask bit set as
being only locked; they might need to look up MultiXact values (i.e.
possibly do pg_multixact I/O) to find out the Xid that updated a tuple,
whereas they previously were assured to only use information readily
available from the tuple header. This is considered acceptable, because
the extra I/O would involve cases that would previously cause some
commands to block waiting for concurrent transactions to finish.
Another important change is the fact that locking tuples that have
previously been updated causes the future versions to be marked as
locked, too; this is essential for correctness of foreign key checks.
This causes additional WAL-logging, also (there was previously a single
WAL record for a locked tuple; now there are as many as updated copies
of the tuple there exist.)
With all this in place, contention related to tuples being checked by
foreign key rules should be much reduced.
As a bonus, the old behavior that a subtransaction grabbing a stronger
tuple lock than the parent (sub)transaction held on a given tuple and
later aborting caused the weaker lock to be lost, has been fixed.
Many new spec files were added for isolation tester framework, to ensure
overall behavior is sane. There's probably room for several more tests.
There were several reviewers of this patch; in particular, Noah Misch
and Andres Freund spent considerable time in it. Original idea for the
patch came from Simon Riggs, after a problem report by Joel Jacobson.
Most code is from me, with contributions from Marti Raudsepp, Alexander
Shulgin, Noah Misch and Andres Freund.
This patch was discussed in several pgsql-hackers threads; the most
important start at the following message-ids:
AANLkTimo9XVcEzfiBR-ut3KVNDkjm2Vxh+t8kAmWjPuv@mail.gmail.com
1290721684-sup-3951@alvh.no-ip.org
1294953201-sup-2099@alvh.no-ip.org
1320343602-sup-2290@alvh.no-ip.org
1339690386-sup-8927@alvh.no-ip.org
4FE5FF020200002500048A3D@gw.wicourts.gov
4FEAB90A0200002500048B7D@gw.wicourts.gov
2013-01-23 16:04:59 +01:00
|
|
|
SetHintBits(tuple, buffer, HEAP_XMAX_INVALID, InvalidTransactionId);
|
|
|
|
|
2005-04-28 23:47:18 +02:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
Improve concurrency of foreign key locking
This patch introduces two additional lock modes for tuples: "SELECT FOR
KEY SHARE" and "SELECT FOR NO KEY UPDATE". These don't block each
other, in contrast with already existing "SELECT FOR SHARE" and "SELECT
FOR UPDATE". UPDATE commands that do not modify the values stored in
the columns that are part of the key of the tuple now grab a SELECT FOR
NO KEY UPDATE lock on the tuple, allowing them to proceed concurrently
with tuple locks of the FOR KEY SHARE variety.
Foreign key triggers now use FOR KEY SHARE instead of FOR SHARE; this
means the concurrency improvement applies to them, which is the whole
point of this patch.
The added tuple lock semantics require some rejiggering of the multixact
module, so that the locking level that each transaction is holding can
be stored alongside its Xid. Also, multixacts now need to persist
across server restarts and crashes, because they can now represent not
only tuple locks, but also tuple updates. This means we need more
careful tracking of lifetime of pg_multixact SLRU files; since they now
persist longer, we require more infrastructure to figure out when they
can be removed. pg_upgrade also needs to be careful to copy
pg_multixact files over from the old server to the new, or at least part
of multixact.c state, depending on the versions of the old and new
servers.
Tuple time qualification rules (HeapTupleSatisfies routines) need to be
careful not to consider tuples with the "is multi" infomask bit set as
being only locked; they might need to look up MultiXact values (i.e.
possibly do pg_multixact I/O) to find out the Xid that updated a tuple,
whereas they previously were assured to only use information readily
available from the tuple header. This is considered acceptable, because
the extra I/O would involve cases that would previously cause some
commands to block waiting for concurrent transactions to finish.
Another important change is the fact that locking tuples that have
previously been updated causes the future versions to be marked as
locked, too; this is essential for correctness of foreign key checks.
This causes additional WAL-logging, also (there was previously a single
WAL record for a locked tuple; now there are as many as updated copies
of the tuple there exist.)
With all this in place, contention related to tuples being checked by
foreign key rules should be much reduced.
As a bonus, the old behavior that a subtransaction grabbing a stronger
tuple lock than the parent (sub)transaction held on a given tuple and
later aborting caused the weaker lock to be lost, has been fixed.
Many new spec files were added for isolation tester framework, to ensure
overall behavior is sane. There's probably room for several more tests.
There were several reviewers of this patch; in particular, Noah Misch
and Andres Freund spent considerable time in it. Original idea for the
patch came from Simon Riggs, after a problem report by Joel Jacobson.
Most code is from me, with contributions from Marti Raudsepp, Alexander
Shulgin, Noah Misch and Andres Freund.
This patch was discussed in several pgsql-hackers threads; the most
important start at the following message-ids:
AANLkTimo9XVcEzfiBR-ut3KVNDkjm2Vxh+t8kAmWjPuv@mail.gmail.com
1290721684-sup-3951@alvh.no-ip.org
1294953201-sup-2099@alvh.no-ip.org
1320343602-sup-2290@alvh.no-ip.org
1339690386-sup-8927@alvh.no-ip.org
4FE5FF020200002500048A3D@gw.wicourts.gov
4FEAB90A0200002500048B7D@gw.wicourts.gov
2013-01-23 16:04:59 +01:00
|
|
|
if (TransactionIdIsInProgress(HeapTupleHeaderGetRawXmax(tuple)))
|
2005-04-28 23:47:18 +02:00
|
|
|
return HEAPTUPLE_LIVE;
|
Improve concurrency of foreign key locking
This patch introduces two additional lock modes for tuples: "SELECT FOR
KEY SHARE" and "SELECT FOR NO KEY UPDATE". These don't block each
other, in contrast with already existing "SELECT FOR SHARE" and "SELECT
FOR UPDATE". UPDATE commands that do not modify the values stored in
the columns that are part of the key of the tuple now grab a SELECT FOR
NO KEY UPDATE lock on the tuple, allowing them to proceed concurrently
with tuple locks of the FOR KEY SHARE variety.
Foreign key triggers now use FOR KEY SHARE instead of FOR SHARE; this
means the concurrency improvement applies to them, which is the whole
point of this patch.
The added tuple lock semantics require some rejiggering of the multixact
module, so that the locking level that each transaction is holding can
be stored alongside its Xid. Also, multixacts now need to persist
across server restarts and crashes, because they can now represent not
only tuple locks, but also tuple updates. This means we need more
careful tracking of lifetime of pg_multixact SLRU files; since they now
persist longer, we require more infrastructure to figure out when they
can be removed. pg_upgrade also needs to be careful to copy
pg_multixact files over from the old server to the new, or at least part
of multixact.c state, depending on the versions of the old and new
servers.
Tuple time qualification rules (HeapTupleSatisfies routines) need to be
careful not to consider tuples with the "is multi" infomask bit set as
being only locked; they might need to look up MultiXact values (i.e.
possibly do pg_multixact I/O) to find out the Xid that updated a tuple,
whereas they previously were assured to only use information readily
available from the tuple header. This is considered acceptable, because
the extra I/O would involve cases that would previously cause some
commands to block waiting for concurrent transactions to finish.
Another important change is the fact that locking tuples that have
previously been updated causes the future versions to be marked as
locked, too; this is essential for correctness of foreign key checks.
This causes additional WAL-logging, also (there was previously a single
WAL record for a locked tuple; now there are as many as updated copies
of the tuple there exist.)
With all this in place, contention related to tuples being checked by
foreign key rules should be much reduced.
As a bonus, the old behavior that a subtransaction grabbing a stronger
tuple lock than the parent (sub)transaction held on a given tuple and
later aborting caused the weaker lock to be lost, has been fixed.
Many new spec files were added for isolation tester framework, to ensure
overall behavior is sane. There's probably room for several more tests.
There were several reviewers of this patch; in particular, Noah Misch
and Andres Freund spent considerable time in it. Original idea for the
patch came from Simon Riggs, after a problem report by Joel Jacobson.
Most code is from me, with contributions from Marti Raudsepp, Alexander
Shulgin, Noah Misch and Andres Freund.
This patch was discussed in several pgsql-hackers threads; the most
important start at the following message-ids:
AANLkTimo9XVcEzfiBR-ut3KVNDkjm2Vxh+t8kAmWjPuv@mail.gmail.com
1290721684-sup-3951@alvh.no-ip.org
1294953201-sup-2099@alvh.no-ip.org
1320343602-sup-2290@alvh.no-ip.org
1339690386-sup-8927@alvh.no-ip.org
4FE5FF020200002500048A3D@gw.wicourts.gov
4FEAB90A0200002500048B7D@gw.wicourts.gov
2013-01-23 16:04:59 +01:00
|
|
|
SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
|
|
|
|
InvalidTransactionId);
|
2005-04-28 23:47:18 +02:00
|
|
|
}
|
2002-01-11 21:07:03 +01:00
|
|
|
}
|
Improve concurrency of foreign key locking
This patch introduces two additional lock modes for tuples: "SELECT FOR
KEY SHARE" and "SELECT FOR NO KEY UPDATE". These don't block each
other, in contrast with already existing "SELECT FOR SHARE" and "SELECT
FOR UPDATE". UPDATE commands that do not modify the values stored in
the columns that are part of the key of the tuple now grab a SELECT FOR
NO KEY UPDATE lock on the tuple, allowing them to proceed concurrently
with tuple locks of the FOR KEY SHARE variety.
Foreign key triggers now use FOR KEY SHARE instead of FOR SHARE; this
means the concurrency improvement applies to them, which is the whole
point of this patch.
The added tuple lock semantics require some rejiggering of the multixact
module, so that the locking level that each transaction is holding can
be stored alongside its Xid. Also, multixacts now need to persist
across server restarts and crashes, because they can now represent not
only tuple locks, but also tuple updates. This means we need more
careful tracking of lifetime of pg_multixact SLRU files; since they now
persist longer, we require more infrastructure to figure out when they
can be removed. pg_upgrade also needs to be careful to copy
pg_multixact files over from the old server to the new, or at least part
of multixact.c state, depending on the versions of the old and new
servers.
Tuple time qualification rules (HeapTupleSatisfies routines) need to be
careful not to consider tuples with the "is multi" infomask bit set as
being only locked; they might need to look up MultiXact values (i.e.
possibly do pg_multixact I/O) to find out the Xid that updated a tuple,
whereas they previously were assured to only use information readily
available from the tuple header. This is considered acceptable, because
the extra I/O would involve cases that would previously cause some
commands to block waiting for concurrent transactions to finish.
Another important change is the fact that locking tuples that have
previously been updated causes the future versions to be marked as
locked, too; this is essential for correctness of foreign key checks.
This causes additional WAL-logging, also (there was previously a single
WAL record for a locked tuple; now there are as many as updated copies
of the tuple there exist.)
With all this in place, contention related to tuples being checked by
foreign key rules should be much reduced.
As a bonus, the old behavior that a subtransaction grabbing a stronger
tuple lock than the parent (sub)transaction held on a given tuple and
later aborting caused the weaker lock to be lost, has been fixed.
Many new spec files were added for isolation tester framework, to ensure
overall behavior is sane. There's probably room for several more tests.
There were several reviewers of this patch; in particular, Noah Misch
and Andres Freund spent considerable time in it. Original idea for the
patch came from Simon Riggs, after a problem report by Joel Jacobson.
Most code is from me, with contributions from Marti Raudsepp, Alexander
Shulgin, Noah Misch and Andres Freund.
This patch was discussed in several pgsql-hackers threads; the most
important start at the following message-ids:
AANLkTimo9XVcEzfiBR-ut3KVNDkjm2Vxh+t8kAmWjPuv@mail.gmail.com
1290721684-sup-3951@alvh.no-ip.org
1294953201-sup-2099@alvh.no-ip.org
1320343602-sup-2290@alvh.no-ip.org
1339690386-sup-8927@alvh.no-ip.org
4FE5FF020200002500048A3D@gw.wicourts.gov
4FEAB90A0200002500048B7D@gw.wicourts.gov
2013-01-23 16:04:59 +01:00
|
|
|
|
|
|
|
/*
|
2013-05-29 22:58:43 +02:00
|
|
|
* We don't really care whether xmax did commit, abort or crash. We
|
|
|
|
* know that xmax did lock the tuple, but it did not and will never
|
|
|
|
* actually update it.
|
Improve concurrency of foreign key locking
This patch introduces two additional lock modes for tuples: "SELECT FOR
KEY SHARE" and "SELECT FOR NO KEY UPDATE". These don't block each
other, in contrast with already existing "SELECT FOR SHARE" and "SELECT
FOR UPDATE". UPDATE commands that do not modify the values stored in
the columns that are part of the key of the tuple now grab a SELECT FOR
NO KEY UPDATE lock on the tuple, allowing them to proceed concurrently
with tuple locks of the FOR KEY SHARE variety.
Foreign key triggers now use FOR KEY SHARE instead of FOR SHARE; this
means the concurrency improvement applies to them, which is the whole
point of this patch.
The added tuple lock semantics require some rejiggering of the multixact
module, so that the locking level that each transaction is holding can
be stored alongside its Xid. Also, multixacts now need to persist
across server restarts and crashes, because they can now represent not
only tuple locks, but also tuple updates. This means we need more
careful tracking of lifetime of pg_multixact SLRU files; since they now
persist longer, we require more infrastructure to figure out when they
can be removed. pg_upgrade also needs to be careful to copy
pg_multixact files over from the old server to the new, or at least part
of multixact.c state, depending on the versions of the old and new
servers.
Tuple time qualification rules (HeapTupleSatisfies routines) need to be
careful not to consider tuples with the "is multi" infomask bit set as
being only locked; they might need to look up MultiXact values (i.e.
possibly do pg_multixact I/O) to find out the Xid that updated a tuple,
whereas they previously were assured to only use information readily
available from the tuple header. This is considered acceptable, because
the extra I/O would involve cases that would previously cause some
commands to block waiting for concurrent transactions to finish.
Another important change is the fact that locking tuples that have
previously been updated causes the future versions to be marked as
locked, too; this is essential for correctness of foreign key checks.
This causes additional WAL-logging, also (there was previously a single
WAL record for a locked tuple; now there are as many as updated copies
of the tuple there exist.)
With all this in place, contention related to tuples being checked by
foreign key rules should be much reduced.
As a bonus, the old behavior that a subtransaction grabbing a stronger
tuple lock than the parent (sub)transaction held on a given tuple and
later aborting caused the weaker lock to be lost, has been fixed.
Many new spec files were added for isolation tester framework, to ensure
overall behavior is sane. There's probably room for several more tests.
There were several reviewers of this patch; in particular, Noah Misch
and Andres Freund spent considerable time in it. Original idea for the
patch came from Simon Riggs, after a problem report by Joel Jacobson.
Most code is from me, with contributions from Marti Raudsepp, Alexander
Shulgin, Noah Misch and Andres Freund.
This patch was discussed in several pgsql-hackers threads; the most
important start at the following message-ids:
AANLkTimo9XVcEzfiBR-ut3KVNDkjm2Vxh+t8kAmWjPuv@mail.gmail.com
1290721684-sup-3951@alvh.no-ip.org
1294953201-sup-2099@alvh.no-ip.org
1320343602-sup-2290@alvh.no-ip.org
1339690386-sup-8927@alvh.no-ip.org
4FE5FF020200002500048A3D@gw.wicourts.gov
4FEAB90A0200002500048B7D@gw.wicourts.gov
2013-01-23 16:04:59 +01:00
|
|
|
*/
|
|
|
|
|
2001-12-19 18:18:39 +01:00
|
|
|
return HEAPTUPLE_LIVE;
|
|
|
|
}
|
|
|
|
|
2005-04-28 23:47:18 +02:00
|
|
|
if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
|
|
|
|
{
|
Improve concurrency of foreign key locking
This patch introduces two additional lock modes for tuples: "SELECT FOR
KEY SHARE" and "SELECT FOR NO KEY UPDATE". These don't block each
other, in contrast with already existing "SELECT FOR SHARE" and "SELECT
FOR UPDATE". UPDATE commands that do not modify the values stored in
the columns that are part of the key of the tuple now grab a SELECT FOR
NO KEY UPDATE lock on the tuple, allowing them to proceed concurrently
with tuple locks of the FOR KEY SHARE variety.
Foreign key triggers now use FOR KEY SHARE instead of FOR SHARE; this
means the concurrency improvement applies to them, which is the whole
point of this patch.
The added tuple lock semantics require some rejiggering of the multixact
module, so that the locking level that each transaction is holding can
be stored alongside its Xid. Also, multixacts now need to persist
across server restarts and crashes, because they can now represent not
only tuple locks, but also tuple updates. This means we need more
careful tracking of lifetime of pg_multixact SLRU files; since they now
persist longer, we require more infrastructure to figure out when they
can be removed. pg_upgrade also needs to be careful to copy
pg_multixact files over from the old server to the new, or at least part
of multixact.c state, depending on the versions of the old and new
servers.
Tuple time qualification rules (HeapTupleSatisfies routines) need to be
careful not to consider tuples with the "is multi" infomask bit set as
being only locked; they might need to look up MultiXact values (i.e.
possibly do pg_multixact I/O) to find out the Xid that updated a tuple,
whereas they previously were assured to only use information readily
available from the tuple header. This is considered acceptable, because
the extra I/O would involve cases that would previously cause some
commands to block waiting for concurrent transactions to finish.
Another important change is the fact that locking tuples that have
previously been updated causes the future versions to be marked as
locked, too; this is essential for correctness of foreign key checks.
This causes additional WAL-logging, also (there was previously a single
WAL record for a locked tuple; now there are as many as updated copies
of the tuple there exist.)
With all this in place, contention related to tuples being checked by
foreign key rules should be much reduced.
As a bonus, the old behavior that a subtransaction grabbing a stronger
tuple lock than the parent (sub)transaction held on a given tuple and
later aborting caused the weaker lock to be lost, has been fixed.
Many new spec files were added for isolation tester framework, to ensure
overall behavior is sane. There's probably room for several more tests.
There were several reviewers of this patch; in particular, Noah Misch
and Andres Freund spent considerable time in it. Original idea for the
patch came from Simon Riggs, after a problem report by Joel Jacobson.
Most code is from me, with contributions from Marti Raudsepp, Alexander
Shulgin, Noah Misch and Andres Freund.
This patch was discussed in several pgsql-hackers threads; the most
important start at the following message-ids:
AANLkTimo9XVcEzfiBR-ut3KVNDkjm2Vxh+t8kAmWjPuv@mail.gmail.com
1290721684-sup-3951@alvh.no-ip.org
1294953201-sup-2099@alvh.no-ip.org
1320343602-sup-2290@alvh.no-ip.org
1339690386-sup-8927@alvh.no-ip.org
4FE5FF020200002500048A3D@gw.wicourts.gov
4FEAB90A0200002500048B7D@gw.wicourts.gov
2013-01-23 16:04:59 +01:00
|
|
|
TransactionId xmax;
|
|
|
|
|
2014-07-29 21:41:06 +02:00
|
|
|
if (MultiXactIdIsRunning(HeapTupleHeaderGetRawXmax(tuple), false))
|
Improve concurrency of foreign key locking
This patch introduces two additional lock modes for tuples: "SELECT FOR
KEY SHARE" and "SELECT FOR NO KEY UPDATE". These don't block each
other, in contrast with already existing "SELECT FOR SHARE" and "SELECT
FOR UPDATE". UPDATE commands that do not modify the values stored in
the columns that are part of the key of the tuple now grab a SELECT FOR
NO KEY UPDATE lock on the tuple, allowing them to proceed concurrently
with tuple locks of the FOR KEY SHARE variety.
Foreign key triggers now use FOR KEY SHARE instead of FOR SHARE; this
means the concurrency improvement applies to them, which is the whole
point of this patch.
The added tuple lock semantics require some rejiggering of the multixact
module, so that the locking level that each transaction is holding can
be stored alongside its Xid. Also, multixacts now need to persist
across server restarts and crashes, because they can now represent not
only tuple locks, but also tuple updates. This means we need more
careful tracking of lifetime of pg_multixact SLRU files; since they now
persist longer, we require more infrastructure to figure out when they
can be removed. pg_upgrade also needs to be careful to copy
pg_multixact files over from the old server to the new, or at least part
of multixact.c state, depending on the versions of the old and new
servers.
Tuple time qualification rules (HeapTupleSatisfies routines) need to be
careful not to consider tuples with the "is multi" infomask bit set as
being only locked; they might need to look up MultiXact values (i.e.
possibly do pg_multixact I/O) to find out the Xid that updated a tuple,
whereas they previously were assured to only use information readily
available from the tuple header. This is considered acceptable, because
the extra I/O would involve cases that would previously cause some
commands to block waiting for concurrent transactions to finish.
Another important change is the fact that locking tuples that have
previously been updated causes the future versions to be marked as
locked, too; this is essential for correctness of foreign key checks.
This causes additional WAL-logging, also (there was previously a single
WAL record for a locked tuple; now there are as many as updated copies
of the tuple there exist.)
With all this in place, contention related to tuples being checked by
foreign key rules should be much reduced.
As a bonus, the old behavior that a subtransaction grabbing a stronger
tuple lock than the parent (sub)transaction held on a given tuple and
later aborting caused the weaker lock to be lost, has been fixed.
Many new spec files were added for isolation tester framework, to ensure
overall behavior is sane. There's probably room for several more tests.
There were several reviewers of this patch; in particular, Noah Misch
and Andres Freund spent considerable time in it. Original idea for the
patch came from Simon Riggs, after a problem report by Joel Jacobson.
Most code is from me, with contributions from Marti Raudsepp, Alexander
Shulgin, Noah Misch and Andres Freund.
This patch was discussed in several pgsql-hackers threads; the most
important start at the following message-ids:
AANLkTimo9XVcEzfiBR-ut3KVNDkjm2Vxh+t8kAmWjPuv@mail.gmail.com
1290721684-sup-3951@alvh.no-ip.org
1294953201-sup-2099@alvh.no-ip.org
1320343602-sup-2290@alvh.no-ip.org
1339690386-sup-8927@alvh.no-ip.org
4FE5FF020200002500048A3D@gw.wicourts.gov
4FEAB90A0200002500048B7D@gw.wicourts.gov
2013-01-23 16:04:59 +01:00
|
|
|
{
|
|
|
|
/* already checked above */
|
|
|
|
Assert(!HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask));
|
|
|
|
|
|
|
|
xmax = HeapTupleGetUpdateXid(tuple);
|
2013-11-29 20:08:06 +01:00
|
|
|
|
|
|
|
/* not LOCKED_ONLY, so it has to have an xmax */
|
|
|
|
Assert(TransactionIdIsValid(xmax));
|
|
|
|
|
Improve concurrency of foreign key locking
This patch introduces two additional lock modes for tuples: "SELECT FOR
KEY SHARE" and "SELECT FOR NO KEY UPDATE". These don't block each
other, in contrast with already existing "SELECT FOR SHARE" and "SELECT
FOR UPDATE". UPDATE commands that do not modify the values stored in
the columns that are part of the key of the tuple now grab a SELECT FOR
NO KEY UPDATE lock on the tuple, allowing them to proceed concurrently
with tuple locks of the FOR KEY SHARE variety.
Foreign key triggers now use FOR KEY SHARE instead of FOR SHARE; this
means the concurrency improvement applies to them, which is the whole
point of this patch.
The added tuple lock semantics require some rejiggering of the multixact
module, so that the locking level that each transaction is holding can
be stored alongside its Xid. Also, multixacts now need to persist
across server restarts and crashes, because they can now represent not
only tuple locks, but also tuple updates. This means we need more
careful tracking of lifetime of pg_multixact SLRU files; since they now
persist longer, we require more infrastructure to figure out when they
can be removed. pg_upgrade also needs to be careful to copy
pg_multixact files over from the old server to the new, or at least part
of multixact.c state, depending on the versions of the old and new
servers.
Tuple time qualification rules (HeapTupleSatisfies routines) need to be
careful not to consider tuples with the "is multi" infomask bit set as
being only locked; they might need to look up MultiXact values (i.e.
possibly do pg_multixact I/O) to find out the Xid that updated a tuple,
whereas they previously were assured to only use information readily
available from the tuple header. This is considered acceptable, because
the extra I/O would involve cases that would previously cause some
commands to block waiting for concurrent transactions to finish.
Another important change is the fact that locking tuples that have
previously been updated causes the future versions to be marked as
locked, too; this is essential for correctness of foreign key checks.
This causes additional WAL-logging, also (there was previously a single
WAL record for a locked tuple; now there are as many as updated copies
of the tuple there exist.)
With all this in place, contention related to tuples being checked by
foreign key rules should be much reduced.
As a bonus, the old behavior that a subtransaction grabbing a stronger
tuple lock than the parent (sub)transaction held on a given tuple and
later aborting caused the weaker lock to be lost, has been fixed.
Many new spec files were added for isolation tester framework, to ensure
overall behavior is sane. There's probably room for several more tests.
There were several reviewers of this patch; in particular, Noah Misch
and Andres Freund spent considerable time in it. Original idea for the
patch came from Simon Riggs, after a problem report by Joel Jacobson.
Most code is from me, with contributions from Marti Raudsepp, Alexander
Shulgin, Noah Misch and Andres Freund.
This patch was discussed in several pgsql-hackers threads; the most
important start at the following message-ids:
AANLkTimo9XVcEzfiBR-ut3KVNDkjm2Vxh+t8kAmWjPuv@mail.gmail.com
1290721684-sup-3951@alvh.no-ip.org
1294953201-sup-2099@alvh.no-ip.org
1320343602-sup-2290@alvh.no-ip.org
1339690386-sup-8927@alvh.no-ip.org
4FE5FF020200002500048A3D@gw.wicourts.gov
4FEAB90A0200002500048B7D@gw.wicourts.gov
2013-01-23 16:04:59 +01:00
|
|
|
if (TransactionIdIsInProgress(xmax))
|
|
|
|
return HEAPTUPLE_DELETE_IN_PROGRESS;
|
|
|
|
else if (TransactionIdDidCommit(xmax))
|
|
|
|
/* there are still lockers around -- can't return DEAD here */
|
|
|
|
return HEAPTUPLE_RECENTLY_DEAD;
|
|
|
|
/* updating transaction aborted */
|
|
|
|
return HEAPTUPLE_LIVE;
|
|
|
|
}
|
|
|
|
|
|
|
|
Assert(!(tuple->t_infomask & HEAP_XMAX_COMMITTED));
|
|
|
|
|
|
|
|
xmax = HeapTupleGetUpdateXid(tuple);
|
2013-11-29 20:08:06 +01:00
|
|
|
|
|
|
|
/* not LOCKED_ONLY, so it has to have an xmax */
|
|
|
|
Assert(TransactionIdIsValid(xmax));
|
|
|
|
|
Improve concurrency of foreign key locking
This patch introduces two additional lock modes for tuples: "SELECT FOR
KEY SHARE" and "SELECT FOR NO KEY UPDATE". These don't block each
other, in contrast with already existing "SELECT FOR SHARE" and "SELECT
FOR UPDATE". UPDATE commands that do not modify the values stored in
the columns that are part of the key of the tuple now grab a SELECT FOR
NO KEY UPDATE lock on the tuple, allowing them to proceed concurrently
with tuple locks of the FOR KEY SHARE variety.
Foreign key triggers now use FOR KEY SHARE instead of FOR SHARE; this
means the concurrency improvement applies to them, which is the whole
point of this patch.
The added tuple lock semantics require some rejiggering of the multixact
module, so that the locking level that each transaction is holding can
be stored alongside its Xid. Also, multixacts now need to persist
across server restarts and crashes, because they can now represent not
only tuple locks, but also tuple updates. This means we need more
careful tracking of lifetime of pg_multixact SLRU files; since they now
persist longer, we require more infrastructure to figure out when they
can be removed. pg_upgrade also needs to be careful to copy
pg_multixact files over from the old server to the new, or at least part
of multixact.c state, depending on the versions of the old and new
servers.
Tuple time qualification rules (HeapTupleSatisfies routines) need to be
careful not to consider tuples with the "is multi" infomask bit set as
being only locked; they might need to look up MultiXact values (i.e.
possibly do pg_multixact I/O) to find out the Xid that updated a tuple,
whereas they previously were assured to only use information readily
available from the tuple header. This is considered acceptable, because
the extra I/O would involve cases that would previously cause some
commands to block waiting for concurrent transactions to finish.
Another important change is the fact that locking tuples that have
previously been updated causes the future versions to be marked as
locked, too; this is essential for correctness of foreign key checks.
This causes additional WAL-logging, also (there was previously a single
WAL record for a locked tuple; now there are as many as updated copies
of the tuple there exist.)
With all this in place, contention related to tuples being checked by
foreign key rules should be much reduced.
As a bonus, the old behavior that a subtransaction grabbing a stronger
tuple lock than the parent (sub)transaction held on a given tuple and
later aborting caused the weaker lock to be lost, has been fixed.
Many new spec files were added for isolation tester framework, to ensure
overall behavior is sane. There's probably room for several more tests.
There were several reviewers of this patch; in particular, Noah Misch
and Andres Freund spent considerable time in it. Original idea for the
patch came from Simon Riggs, after a problem report by Joel Jacobson.
Most code is from me, with contributions from Marti Raudsepp, Alexander
Shulgin, Noah Misch and Andres Freund.
This patch was discussed in several pgsql-hackers threads; the most
important start at the following message-ids:
AANLkTimo9XVcEzfiBR-ut3KVNDkjm2Vxh+t8kAmWjPuv@mail.gmail.com
1290721684-sup-3951@alvh.no-ip.org
1294953201-sup-2099@alvh.no-ip.org
1320343602-sup-2290@alvh.no-ip.org
1339690386-sup-8927@alvh.no-ip.org
4FE5FF020200002500048A3D@gw.wicourts.gov
4FEAB90A0200002500048B7D@gw.wicourts.gov
2013-01-23 16:04:59 +01:00
|
|
|
/* multi is not running -- updating xact cannot be */
|
|
|
|
Assert(!TransactionIdIsInProgress(xmax));
|
|
|
|
if (TransactionIdDidCommit(xmax))
|
|
|
|
{
|
|
|
|
if (!TransactionIdPrecedes(xmax, OldestXmin))
|
|
|
|
return HEAPTUPLE_RECENTLY_DEAD;
|
|
|
|
else
|
|
|
|
return HEAPTUPLE_DEAD;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2013-11-29 20:08:06 +01:00
|
|
|
* Not in Progress, Not Committed, so either Aborted or crashed.
|
|
|
|
* Remove the Xmax.
|
Improve concurrency of foreign key locking
This patch introduces two additional lock modes for tuples: "SELECT FOR
KEY SHARE" and "SELECT FOR NO KEY UPDATE". These don't block each
other, in contrast with already existing "SELECT FOR SHARE" and "SELECT
FOR UPDATE". UPDATE commands that do not modify the values stored in
the columns that are part of the key of the tuple now grab a SELECT FOR
NO KEY UPDATE lock on the tuple, allowing them to proceed concurrently
with tuple locks of the FOR KEY SHARE variety.
Foreign key triggers now use FOR KEY SHARE instead of FOR SHARE; this
means the concurrency improvement applies to them, which is the whole
point of this patch.
The added tuple lock semantics require some rejiggering of the multixact
module, so that the locking level that each transaction is holding can
be stored alongside its Xid. Also, multixacts now need to persist
across server restarts and crashes, because they can now represent not
only tuple locks, but also tuple updates. This means we need more
careful tracking of lifetime of pg_multixact SLRU files; since they now
persist longer, we require more infrastructure to figure out when they
can be removed. pg_upgrade also needs to be careful to copy
pg_multixact files over from the old server to the new, or at least part
of multixact.c state, depending on the versions of the old and new
servers.
Tuple time qualification rules (HeapTupleSatisfies routines) need to be
careful not to consider tuples with the "is multi" infomask bit set as
being only locked; they might need to look up MultiXact values (i.e.
possibly do pg_multixact I/O) to find out the Xid that updated a tuple,
whereas they previously were assured to only use information readily
available from the tuple header. This is considered acceptable, because
the extra I/O would involve cases that would previously cause some
commands to block waiting for concurrent transactions to finish.
Another important change is the fact that locking tuples that have
previously been updated causes the future versions to be marked as
locked, too; this is essential for correctness of foreign key checks.
This causes additional WAL-logging, also (there was previously a single
WAL record for a locked tuple; now there are as many as updated copies
of the tuple there exist.)
With all this in place, contention related to tuples being checked by
foreign key rules should be much reduced.
As a bonus, the old behavior that a subtransaction grabbing a stronger
tuple lock than the parent (sub)transaction held on a given tuple and
later aborting caused the weaker lock to be lost, has been fixed.
Many new spec files were added for isolation tester framework, to ensure
overall behavior is sane. There's probably room for several more tests.
There were several reviewers of this patch; in particular, Noah Misch
and Andres Freund spent considerable time in it. Original idea for the
patch came from Simon Riggs, after a problem report by Joel Jacobson.
Most code is from me, with contributions from Marti Raudsepp, Alexander
Shulgin, Noah Misch and Andres Freund.
This patch was discussed in several pgsql-hackers threads; the most
important start at the following message-ids:
AANLkTimo9XVcEzfiBR-ut3KVNDkjm2Vxh+t8kAmWjPuv@mail.gmail.com
1290721684-sup-3951@alvh.no-ip.org
1294953201-sup-2099@alvh.no-ip.org
1320343602-sup-2290@alvh.no-ip.org
1339690386-sup-8927@alvh.no-ip.org
4FE5FF020200002500048A3D@gw.wicourts.gov
4FEAB90A0200002500048B7D@gw.wicourts.gov
2013-01-23 16:04:59 +01:00
|
|
|
*/
|
2013-11-29 20:08:06 +01:00
|
|
|
SetHintBits(tuple, buffer, HEAP_XMAX_INVALID, InvalidTransactionId);
|
|
|
|
return HEAPTUPLE_LIVE;
|
2005-04-28 23:47:18 +02:00
|
|
|
}
|
|
|
|
|
2001-07-12 06:11:13 +02:00
|
|
|
if (!(tuple->t_infomask & HEAP_XMAX_COMMITTED))
|
|
|
|
{
|
Improve concurrency of foreign key locking
This patch introduces two additional lock modes for tuples: "SELECT FOR
KEY SHARE" and "SELECT FOR NO KEY UPDATE". These don't block each
other, in contrast with already existing "SELECT FOR SHARE" and "SELECT
FOR UPDATE". UPDATE commands that do not modify the values stored in
the columns that are part of the key of the tuple now grab a SELECT FOR
NO KEY UPDATE lock on the tuple, allowing them to proceed concurrently
with tuple locks of the FOR KEY SHARE variety.
Foreign key triggers now use FOR KEY SHARE instead of FOR SHARE; this
means the concurrency improvement applies to them, which is the whole
point of this patch.
The added tuple lock semantics require some rejiggering of the multixact
module, so that the locking level that each transaction is holding can
be stored alongside its Xid. Also, multixacts now need to persist
across server restarts and crashes, because they can now represent not
only tuple locks, but also tuple updates. This means we need more
careful tracking of lifetime of pg_multixact SLRU files; since they now
persist longer, we require more infrastructure to figure out when they
can be removed. pg_upgrade also needs to be careful to copy
pg_multixact files over from the old server to the new, or at least part
of multixact.c state, depending on the versions of the old and new
servers.
Tuple time qualification rules (HeapTupleSatisfies routines) need to be
careful not to consider tuples with the "is multi" infomask bit set as
being only locked; they might need to look up MultiXact values (i.e.
possibly do pg_multixact I/O) to find out the Xid that updated a tuple,
whereas they previously were assured to only use information readily
available from the tuple header. This is considered acceptable, because
the extra I/O would involve cases that would previously cause some
commands to block waiting for concurrent transactions to finish.
Another important change is the fact that locking tuples that have
previously been updated causes the future versions to be marked as
locked, too; this is essential for correctness of foreign key checks.
This causes additional WAL-logging, also (there was previously a single
WAL record for a locked tuple; now there are as many as updated copies
of the tuple there exist.)
With all this in place, contention related to tuples being checked by
foreign key rules should be much reduced.
As a bonus, the old behavior that a subtransaction grabbing a stronger
tuple lock than the parent (sub)transaction held on a given tuple and
later aborting caused the weaker lock to be lost, has been fixed.
Many new spec files were added for isolation tester framework, to ensure
overall behavior is sane. There's probably room for several more tests.
There were several reviewers of this patch; in particular, Noah Misch
and Andres Freund spent considerable time in it. Original idea for the
patch came from Simon Riggs, after a problem report by Joel Jacobson.
Most code is from me, with contributions from Marti Raudsepp, Alexander
Shulgin, Noah Misch and Andres Freund.
This patch was discussed in several pgsql-hackers threads; the most
important start at the following message-ids:
AANLkTimo9XVcEzfiBR-ut3KVNDkjm2Vxh+t8kAmWjPuv@mail.gmail.com
1290721684-sup-3951@alvh.no-ip.org
1294953201-sup-2099@alvh.no-ip.org
1320343602-sup-2290@alvh.no-ip.org
1339690386-sup-8927@alvh.no-ip.org
4FE5FF020200002500048A3D@gw.wicourts.gov
4FEAB90A0200002500048B7D@gw.wicourts.gov
2013-01-23 16:04:59 +01:00
|
|
|
if (TransactionIdIsInProgress(HeapTupleHeaderGetRawXmax(tuple)))
|
2001-07-17 00:43:34 +02:00
|
|
|
return HEAPTUPLE_DELETE_IN_PROGRESS;
|
Improve concurrency of foreign key locking
This patch introduces two additional lock modes for tuples: "SELECT FOR
KEY SHARE" and "SELECT FOR NO KEY UPDATE". These don't block each
other, in contrast with already existing "SELECT FOR SHARE" and "SELECT
FOR UPDATE". UPDATE commands that do not modify the values stored in
the columns that are part of the key of the tuple now grab a SELECT FOR
NO KEY UPDATE lock on the tuple, allowing them to proceed concurrently
with tuple locks of the FOR KEY SHARE variety.
Foreign key triggers now use FOR KEY SHARE instead of FOR SHARE; this
means the concurrency improvement applies to them, which is the whole
point of this patch.
The added tuple lock semantics require some rejiggering of the multixact
module, so that the locking level that each transaction is holding can
be stored alongside its Xid. Also, multixacts now need to persist
across server restarts and crashes, because they can now represent not
only tuple locks, but also tuple updates. This means we need more
careful tracking of lifetime of pg_multixact SLRU files; since they now
persist longer, we require more infrastructure to figure out when they
can be removed. pg_upgrade also needs to be careful to copy
pg_multixact files over from the old server to the new, or at least part
of multixact.c state, depending on the versions of the old and new
servers.
Tuple time qualification rules (HeapTupleSatisfies routines) need to be
careful not to consider tuples with the "is multi" infomask bit set as
being only locked; they might need to look up MultiXact values (i.e.
possibly do pg_multixact I/O) to find out the Xid that updated a tuple,
whereas they previously were assured to only use information readily
available from the tuple header. This is considered acceptable, because
the extra I/O would involve cases that would previously cause some
commands to block waiting for concurrent transactions to finish.
Another important change is the fact that locking tuples that have
previously been updated causes the future versions to be marked as
locked, too; this is essential for correctness of foreign key checks.
This causes additional WAL-logging, also (there was previously a single
WAL record for a locked tuple; now there are as many as updated copies
of the tuple there exist.)
With all this in place, contention related to tuples being checked by
foreign key rules should be much reduced.
As a bonus, the old behavior that a subtransaction grabbing a stronger
tuple lock than the parent (sub)transaction held on a given tuple and
later aborting caused the weaker lock to be lost, has been fixed.
Many new spec files were added for isolation tester framework, to ensure
overall behavior is sane. There's probably room for several more tests.
There were several reviewers of this patch; in particular, Noah Misch
and Andres Freund spent considerable time in it. Original idea for the
patch came from Simon Riggs, after a problem report by Joel Jacobson.
Most code is from me, with contributions from Marti Raudsepp, Alexander
Shulgin, Noah Misch and Andres Freund.
This patch was discussed in several pgsql-hackers threads; the most
important start at the following message-ids:
AANLkTimo9XVcEzfiBR-ut3KVNDkjm2Vxh+t8kAmWjPuv@mail.gmail.com
1290721684-sup-3951@alvh.no-ip.org
1294953201-sup-2099@alvh.no-ip.org
1320343602-sup-2290@alvh.no-ip.org
1339690386-sup-8927@alvh.no-ip.org
4FE5FF020200002500048A3D@gw.wicourts.gov
4FEAB90A0200002500048B7D@gw.wicourts.gov
2013-01-23 16:04:59 +01:00
|
|
|
else if (TransactionIdDidCommit(HeapTupleHeaderGetRawXmax(tuple)))
|
2007-08-14 19:35:18 +02:00
|
|
|
SetHintBits(tuple, buffer, HEAP_XMAX_COMMITTED,
|
Improve concurrency of foreign key locking
This patch introduces two additional lock modes for tuples: "SELECT FOR
KEY SHARE" and "SELECT FOR NO KEY UPDATE". These don't block each
other, in contrast with already existing "SELECT FOR SHARE" and "SELECT
FOR UPDATE". UPDATE commands that do not modify the values stored in
the columns that are part of the key of the tuple now grab a SELECT FOR
NO KEY UPDATE lock on the tuple, allowing them to proceed concurrently
with tuple locks of the FOR KEY SHARE variety.
Foreign key triggers now use FOR KEY SHARE instead of FOR SHARE; this
means the concurrency improvement applies to them, which is the whole
point of this patch.
The added tuple lock semantics require some rejiggering of the multixact
module, so that the locking level that each transaction is holding can
be stored alongside its Xid. Also, multixacts now need to persist
across server restarts and crashes, because they can now represent not
only tuple locks, but also tuple updates. This means we need more
careful tracking of lifetime of pg_multixact SLRU files; since they now
persist longer, we require more infrastructure to figure out when they
can be removed. pg_upgrade also needs to be careful to copy
pg_multixact files over from the old server to the new, or at least part
of multixact.c state, depending on the versions of the old and new
servers.
Tuple time qualification rules (HeapTupleSatisfies routines) need to be
careful not to consider tuples with the "is multi" infomask bit set as
being only locked; they might need to look up MultiXact values (i.e.
possibly do pg_multixact I/O) to find out the Xid that updated a tuple,
whereas they previously were assured to only use information readily
available from the tuple header. This is considered acceptable, because
the extra I/O would involve cases that would previously cause some
commands to block waiting for concurrent transactions to finish.
Another important change is the fact that locking tuples that have
previously been updated causes the future versions to be marked as
locked, too; this is essential for correctness of foreign key checks.
This causes additional WAL-logging, also (there was previously a single
WAL record for a locked tuple; now there are as many as updated copies
of the tuple there exist.)
With all this in place, contention related to tuples being checked by
foreign key rules should be much reduced.
As a bonus, the old behavior that a subtransaction grabbing a stronger
tuple lock than the parent (sub)transaction held on a given tuple and
later aborting caused the weaker lock to be lost, has been fixed.
Many new spec files were added for isolation tester framework, to ensure
overall behavior is sane. There's probably room for several more tests.
There were several reviewers of this patch; in particular, Noah Misch
and Andres Freund spent considerable time in it. Original idea for the
patch came from Simon Riggs, after a problem report by Joel Jacobson.
Most code is from me, with contributions from Marti Raudsepp, Alexander
Shulgin, Noah Misch and Andres Freund.
This patch was discussed in several pgsql-hackers threads; the most
important start at the following message-ids:
AANLkTimo9XVcEzfiBR-ut3KVNDkjm2Vxh+t8kAmWjPuv@mail.gmail.com
1290721684-sup-3951@alvh.no-ip.org
1294953201-sup-2099@alvh.no-ip.org
1320343602-sup-2290@alvh.no-ip.org
1339690386-sup-8927@alvh.no-ip.org
4FE5FF020200002500048A3D@gw.wicourts.gov
4FEAB90A0200002500048B7D@gw.wicourts.gov
2013-01-23 16:04:59 +01:00
|
|
|
HeapTupleHeaderGetRawXmax(tuple));
|
2001-07-12 06:11:13 +02:00
|
|
|
else
|
|
|
|
{
|
|
|
|
/*
|
2005-10-15 04:49:52 +02:00
|
|
|
* Not in Progress, Not Committed, so either Aborted or crashed
|
2001-07-12 06:11:13 +02:00
|
|
|
*/
|
2007-08-14 19:35:18 +02:00
|
|
|
SetHintBits(tuple, buffer, HEAP_XMAX_INVALID,
|
|
|
|
InvalidTransactionId);
|
2001-07-12 06:11:13 +02:00
|
|
|
return HEAPTUPLE_LIVE;
|
|
|
|
}
|
2007-11-15 22:14:46 +01:00
|
|
|
|
2007-08-02 00:45:09 +02:00
|
|
|
/*
|
|
|
|
* At this point the xmax is known committed, but we might not have
|
2007-11-15 22:14:46 +01:00
|
|
|
* been able to set the hint bit yet; so we can no longer Assert that
|
|
|
|
* it's set.
|
2007-08-02 00:45:09 +02:00
|
|
|
*/
|
2001-07-12 06:11:13 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2011-04-29 22:29:42 +02:00
|
|
|
* Deleter committed, but perhaps it was recent enough that some open
|
|
|
|
* transactions could still see the tuple.
|
2001-07-12 06:11:13 +02:00
|
|
|
*/
|
Improve concurrency of foreign key locking
This patch introduces two additional lock modes for tuples: "SELECT FOR
KEY SHARE" and "SELECT FOR NO KEY UPDATE". These don't block each
other, in contrast with already existing "SELECT FOR SHARE" and "SELECT
FOR UPDATE". UPDATE commands that do not modify the values stored in
the columns that are part of the key of the tuple now grab a SELECT FOR
NO KEY UPDATE lock on the tuple, allowing them to proceed concurrently
with tuple locks of the FOR KEY SHARE variety.
Foreign key triggers now use FOR KEY SHARE instead of FOR SHARE; this
means the concurrency improvement applies to them, which is the whole
point of this patch.
The added tuple lock semantics require some rejiggering of the multixact
module, so that the locking level that each transaction is holding can
be stored alongside its Xid. Also, multixacts now need to persist
across server restarts and crashes, because they can now represent not
only tuple locks, but also tuple updates. This means we need more
careful tracking of lifetime of pg_multixact SLRU files; since they now
persist longer, we require more infrastructure to figure out when they
can be removed. pg_upgrade also needs to be careful to copy
pg_multixact files over from the old server to the new, or at least part
of multixact.c state, depending on the versions of the old and new
servers.
Tuple time qualification rules (HeapTupleSatisfies routines) need to be
careful not to consider tuples with the "is multi" infomask bit set as
being only locked; they might need to look up MultiXact values (i.e.
possibly do pg_multixact I/O) to find out the Xid that updated a tuple,
whereas they previously were assured to only use information readily
available from the tuple header. This is considered acceptable, because
the extra I/O would involve cases that would previously cause some
commands to block waiting for concurrent transactions to finish.
Another important change is the fact that locking tuples that have
previously been updated causes the future versions to be marked as
locked, too; this is essential for correctness of foreign key checks.
This causes additional WAL-logging, also (there was previously a single
WAL record for a locked tuple; now there are as many as updated copies
of the tuple there exist.)
With all this in place, contention related to tuples being checked by
foreign key rules should be much reduced.
As a bonus, the old behavior that a subtransaction grabbing a stronger
tuple lock than the parent (sub)transaction held on a given tuple and
later aborting caused the weaker lock to be lost, has been fixed.
Many new spec files were added for isolation tester framework, to ensure
overall behavior is sane. There's probably room for several more tests.
There were several reviewers of this patch; in particular, Noah Misch
and Andres Freund spent considerable time in it. Original idea for the
patch came from Simon Riggs, after a problem report by Joel Jacobson.
Most code is from me, with contributions from Marti Raudsepp, Alexander
Shulgin, Noah Misch and Andres Freund.
This patch was discussed in several pgsql-hackers threads; the most
important start at the following message-ids:
AANLkTimo9XVcEzfiBR-ut3KVNDkjm2Vxh+t8kAmWjPuv@mail.gmail.com
1290721684-sup-3951@alvh.no-ip.org
1294953201-sup-2099@alvh.no-ip.org
1320343602-sup-2290@alvh.no-ip.org
1339690386-sup-8927@alvh.no-ip.org
4FE5FF020200002500048A3D@gw.wicourts.gov
4FEAB90A0200002500048B7D@gw.wicourts.gov
2013-01-23 16:04:59 +01:00
|
|
|
if (!TransactionIdPrecedes(HeapTupleHeaderGetRawXmax(tuple), OldestXmin))
|
2001-07-12 06:11:13 +02:00
|
|
|
return HEAPTUPLE_RECENTLY_DEAD;
|
|
|
|
|
|
|
|
/* Otherwise, it's dead and removable */
|
|
|
|
return HEAPTUPLE_DEAD;
|
|
|
|
}
|
|
|
|
|
2012-05-02 18:40:07 +02:00
|
|
|
/*
|
|
|
|
* HeapTupleIsSurelyDead
|
|
|
|
*
|
|
|
|
* Determine whether a tuple is surely dead. We sometimes use this
|
|
|
|
* in lieu of HeapTupleSatisifesVacuum when the tuple has just been
|
|
|
|
* tested by HeapTupleSatisfiesMVCC and, therefore, any hint bits that
|
|
|
|
* can be set should already be set. We assume that if no hint bits
|
2014-05-06 18:12:18 +02:00
|
|
|
* either for xmin or xmax, the transaction is still running. This is
|
2012-05-02 18:40:07 +02:00
|
|
|
* therefore faster than HeapTupleSatisfiesVacuum, because we don't
|
|
|
|
* consult CLOG (and also because we don't need to give an exact answer,
|
|
|
|
* just whether or not the tuple is surely dead).
|
|
|
|
*/
|
|
|
|
bool
|
2013-07-22 19:26:33 +02:00
|
|
|
HeapTupleIsSurelyDead(HeapTuple htup, TransactionId OldestXmin)
|
2012-05-02 18:40:07 +02:00
|
|
|
{
|
2013-07-22 19:26:33 +02:00
|
|
|
HeapTupleHeader tuple = htup->t_data;
|
2014-05-06 18:12:18 +02:00
|
|
|
|
2013-07-22 19:26:33 +02:00
|
|
|
Assert(ItemPointerIsValid(&htup->t_self));
|
|
|
|
Assert(htup->t_tableOid != InvalidOid);
|
|
|
|
|
2012-05-02 18:40:07 +02:00
|
|
|
/*
|
2012-06-10 21:20:04 +02:00
|
|
|
* If the inserting transaction is marked invalid, then it aborted, and
|
|
|
|
* the tuple is definitely dead. If it's marked neither committed nor
|
|
|
|
* invalid, then we assume it's still alive (since the presumption is that
|
|
|
|
* all relevant hint bits were just set moments ago).
|
2012-05-02 18:40:07 +02:00
|
|
|
*/
|
2013-12-22 21:49:09 +01:00
|
|
|
if (!HeapTupleHeaderXminCommitted(tuple))
|
|
|
|
return HeapTupleHeaderXminInvalid(tuple) ? true : false;
|
2012-05-02 18:40:07 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* If the inserting transaction committed, but any deleting transaction
|
Improve concurrency of foreign key locking
This patch introduces two additional lock modes for tuples: "SELECT FOR
KEY SHARE" and "SELECT FOR NO KEY UPDATE". These don't block each
other, in contrast with already existing "SELECT FOR SHARE" and "SELECT
FOR UPDATE". UPDATE commands that do not modify the values stored in
the columns that are part of the key of the tuple now grab a SELECT FOR
NO KEY UPDATE lock on the tuple, allowing them to proceed concurrently
with tuple locks of the FOR KEY SHARE variety.
Foreign key triggers now use FOR KEY SHARE instead of FOR SHARE; this
means the concurrency improvement applies to them, which is the whole
point of this patch.
The added tuple lock semantics require some rejiggering of the multixact
module, so that the locking level that each transaction is holding can
be stored alongside its Xid. Also, multixacts now need to persist
across server restarts and crashes, because they can now represent not
only tuple locks, but also tuple updates. This means we need more
careful tracking of lifetime of pg_multixact SLRU files; since they now
persist longer, we require more infrastructure to figure out when they
can be removed. pg_upgrade also needs to be careful to copy
pg_multixact files over from the old server to the new, or at least part
of multixact.c state, depending on the versions of the old and new
servers.
Tuple time qualification rules (HeapTupleSatisfies routines) need to be
careful not to consider tuples with the "is multi" infomask bit set as
being only locked; they might need to look up MultiXact values (i.e.
possibly do pg_multixact I/O) to find out the Xid that updated a tuple,
whereas they previously were assured to only use information readily
available from the tuple header. This is considered acceptable, because
the extra I/O would involve cases that would previously cause some
commands to block waiting for concurrent transactions to finish.
Another important change is the fact that locking tuples that have
previously been updated causes the future versions to be marked as
locked, too; this is essential for correctness of foreign key checks.
This causes additional WAL-logging, also (there was previously a single
WAL record for a locked tuple; now there are as many as updated copies
of the tuple there exist.)
With all this in place, contention related to tuples being checked by
foreign key rules should be much reduced.
As a bonus, the old behavior that a subtransaction grabbing a stronger
tuple lock than the parent (sub)transaction held on a given tuple and
later aborting caused the weaker lock to be lost, has been fixed.
Many new spec files were added for isolation tester framework, to ensure
overall behavior is sane. There's probably room for several more tests.
There were several reviewers of this patch; in particular, Noah Misch
and Andres Freund spent considerable time in it. Original idea for the
patch came from Simon Riggs, after a problem report by Joel Jacobson.
Most code is from me, with contributions from Marti Raudsepp, Alexander
Shulgin, Noah Misch and Andres Freund.
This patch was discussed in several pgsql-hackers threads; the most
important start at the following message-ids:
AANLkTimo9XVcEzfiBR-ut3KVNDkjm2Vxh+t8kAmWjPuv@mail.gmail.com
1290721684-sup-3951@alvh.no-ip.org
1294953201-sup-2099@alvh.no-ip.org
1320343602-sup-2290@alvh.no-ip.org
1339690386-sup-8927@alvh.no-ip.org
4FE5FF020200002500048A3D@gw.wicourts.gov
4FEAB90A0200002500048B7D@gw.wicourts.gov
2013-01-23 16:04:59 +01:00
|
|
|
* aborted, the tuple is still alive.
|
2012-05-02 18:40:07 +02:00
|
|
|
*/
|
Improve concurrency of foreign key locking
This patch introduces two additional lock modes for tuples: "SELECT FOR
KEY SHARE" and "SELECT FOR NO KEY UPDATE". These don't block each
other, in contrast with already existing "SELECT FOR SHARE" and "SELECT
FOR UPDATE". UPDATE commands that do not modify the values stored in
the columns that are part of the key of the tuple now grab a SELECT FOR
NO KEY UPDATE lock on the tuple, allowing them to proceed concurrently
with tuple locks of the FOR KEY SHARE variety.
Foreign key triggers now use FOR KEY SHARE instead of FOR SHARE; this
means the concurrency improvement applies to them, which is the whole
point of this patch.
The added tuple lock semantics require some rejiggering of the multixact
module, so that the locking level that each transaction is holding can
be stored alongside its Xid. Also, multixacts now need to persist
across server restarts and crashes, because they can now represent not
only tuple locks, but also tuple updates. This means we need more
careful tracking of lifetime of pg_multixact SLRU files; since they now
persist longer, we require more infrastructure to figure out when they
can be removed. pg_upgrade also needs to be careful to copy
pg_multixact files over from the old server to the new, or at least part
of multixact.c state, depending on the versions of the old and new
servers.
Tuple time qualification rules (HeapTupleSatisfies routines) need to be
careful not to consider tuples with the "is multi" infomask bit set as
being only locked; they might need to look up MultiXact values (i.e.
possibly do pg_multixact I/O) to find out the Xid that updated a tuple,
whereas they previously were assured to only use information readily
available from the tuple header. This is considered acceptable, because
the extra I/O would involve cases that would previously cause some
commands to block waiting for concurrent transactions to finish.
Another important change is the fact that locking tuples that have
previously been updated causes the future versions to be marked as
locked, too; this is essential for correctness of foreign key checks.
This causes additional WAL-logging, also (there was previously a single
WAL record for a locked tuple; now there are as many as updated copies
of the tuple there exist.)
With all this in place, contention related to tuples being checked by
foreign key rules should be much reduced.
As a bonus, the old behavior that a subtransaction grabbing a stronger
tuple lock than the parent (sub)transaction held on a given tuple and
later aborting caused the weaker lock to be lost, has been fixed.
Many new spec files were added for isolation tester framework, to ensure
overall behavior is sane. There's probably room for several more tests.
There were several reviewers of this patch; in particular, Noah Misch
and Andres Freund spent considerable time in it. Original idea for the
patch came from Simon Riggs, after a problem report by Joel Jacobson.
Most code is from me, with contributions from Marti Raudsepp, Alexander
Shulgin, Noah Misch and Andres Freund.
This patch was discussed in several pgsql-hackers threads; the most
important start at the following message-ids:
AANLkTimo9XVcEzfiBR-ut3KVNDkjm2Vxh+t8kAmWjPuv@mail.gmail.com
1290721684-sup-3951@alvh.no-ip.org
1294953201-sup-2099@alvh.no-ip.org
1320343602-sup-2290@alvh.no-ip.org
1339690386-sup-8927@alvh.no-ip.org
4FE5FF020200002500048A3D@gw.wicourts.gov
4FEAB90A0200002500048B7D@gw.wicourts.gov
2013-01-23 16:04:59 +01:00
|
|
|
if (tuple->t_infomask & HEAP_XMAX_INVALID)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If the XMAX is just a lock, the tuple is still alive.
|
|
|
|
*/
|
|
|
|
if (HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If the Xmax is a MultiXact, it might be dead or alive, but we cannot
|
|
|
|
* know without checking pg_multixact.
|
|
|
|
*/
|
|
|
|
if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
|
2012-05-02 18:40:07 +02:00
|
|
|
return false;
|
|
|
|
|
|
|
|
/* If deleter isn't known to have committed, assume it's still running. */
|
|
|
|
if (!(tuple->t_infomask & HEAP_XMAX_COMMITTED))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
/* Deleter committed, so tuple is dead if the XID is old enough. */
|
Improve concurrency of foreign key locking
This patch introduces two additional lock modes for tuples: "SELECT FOR
KEY SHARE" and "SELECT FOR NO KEY UPDATE". These don't block each
other, in contrast with already existing "SELECT FOR SHARE" and "SELECT
FOR UPDATE". UPDATE commands that do not modify the values stored in
the columns that are part of the key of the tuple now grab a SELECT FOR
NO KEY UPDATE lock on the tuple, allowing them to proceed concurrently
with tuple locks of the FOR KEY SHARE variety.
Foreign key triggers now use FOR KEY SHARE instead of FOR SHARE; this
means the concurrency improvement applies to them, which is the whole
point of this patch.
The added tuple lock semantics require some rejiggering of the multixact
module, so that the locking level that each transaction is holding can
be stored alongside its Xid. Also, multixacts now need to persist
across server restarts and crashes, because they can now represent not
only tuple locks, but also tuple updates. This means we need more
careful tracking of lifetime of pg_multixact SLRU files; since they now
persist longer, we require more infrastructure to figure out when they
can be removed. pg_upgrade also needs to be careful to copy
pg_multixact files over from the old server to the new, or at least part
of multixact.c state, depending on the versions of the old and new
servers.
Tuple time qualification rules (HeapTupleSatisfies routines) need to be
careful not to consider tuples with the "is multi" infomask bit set as
being only locked; they might need to look up MultiXact values (i.e.
possibly do pg_multixact I/O) to find out the Xid that updated a tuple,
whereas they previously were assured to only use information readily
available from the tuple header. This is considered acceptable, because
the extra I/O would involve cases that would previously cause some
commands to block waiting for concurrent transactions to finish.
Another important change is the fact that locking tuples that have
previously been updated causes the future versions to be marked as
locked, too; this is essential for correctness of foreign key checks.
This causes additional WAL-logging, also (there was previously a single
WAL record for a locked tuple; now there are as many as updated copies
of the tuple there exist.)
With all this in place, contention related to tuples being checked by
foreign key rules should be much reduced.
As a bonus, the old behavior that a subtransaction grabbing a stronger
tuple lock than the parent (sub)transaction held on a given tuple and
later aborting caused the weaker lock to be lost, has been fixed.
Many new spec files were added for isolation tester framework, to ensure
overall behavior is sane. There's probably room for several more tests.
There were several reviewers of this patch; in particular, Noah Misch
and Andres Freund spent considerable time in it. Original idea for the
patch came from Simon Riggs, after a problem report by Joel Jacobson.
Most code is from me, with contributions from Marti Raudsepp, Alexander
Shulgin, Noah Misch and Andres Freund.
This patch was discussed in several pgsql-hackers threads; the most
important start at the following message-ids:
AANLkTimo9XVcEzfiBR-ut3KVNDkjm2Vxh+t8kAmWjPuv@mail.gmail.com
1290721684-sup-3951@alvh.no-ip.org
1294953201-sup-2099@alvh.no-ip.org
1320343602-sup-2290@alvh.no-ip.org
1339690386-sup-8927@alvh.no-ip.org
4FE5FF020200002500048A3D@gw.wicourts.gov
4FEAB90A0200002500048B7D@gw.wicourts.gov
2013-01-23 16:04:59 +01:00
|
|
|
return TransactionIdPrecedes(HeapTupleHeaderGetRawXmax(tuple), OldestXmin);
|
2012-05-02 18:40:07 +02:00
|
|
|
}
|
2001-07-12 06:11:13 +02:00
|
|
|
|
2006-09-03 17:59:39 +02:00
|
|
|
/*
|
2007-03-25 21:45:14 +02:00
|
|
|
* XidInMVCCSnapshot
|
2006-09-03 17:59:39 +02:00
|
|
|
* Is the given XID still-in-progress according to the snapshot?
|
|
|
|
*
|
|
|
|
* Note: GetSnapshotData never stores either top xid or subxids of our own
|
|
|
|
* backend into a snapshot, so these xids will not be reported as "running"
|
|
|
|
* by this function. This is OK for current uses, because we actually only
|
|
|
|
* apply this for known-committed XIDs.
|
|
|
|
*/
|
|
|
|
static bool
|
2007-03-25 21:45:14 +02:00
|
|
|
XidInMVCCSnapshot(TransactionId xid, Snapshot snapshot)
|
2006-09-03 17:59:39 +02:00
|
|
|
{
|
|
|
|
uint32 i;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Make a quick range check to eliminate most XIDs without looking at the
|
2014-05-06 18:12:18 +02:00
|
|
|
* xip arrays. Note that this is OK even if we convert a subxact XID to
|
2006-09-03 17:59:39 +02:00
|
|
|
* its parent below, because a subxact with XID < xmin has surely also got
|
|
|
|
* a parent with XID < xmin, while one with XID >= xmax must belong to a
|
|
|
|
* parent that was not yet committed at the time of this snapshot.
|
|
|
|
*/
|
|
|
|
|
|
|
|
/* Any xid < xmin is not in-progress */
|
|
|
|
if (TransactionIdPrecedes(xid, snapshot->xmin))
|
|
|
|
return false;
|
|
|
|
/* Any xid >= xmax is in-progress */
|
|
|
|
if (TransactionIdFollowsOrEquals(xid, snapshot->xmax))
|
|
|
|
return true;
|
|
|
|
|
|
|
|
/*
|
2010-02-26 03:01:40 +01:00
|
|
|
* Snapshot information is stored slightly differently in snapshots taken
|
|
|
|
* during recovery.
|
2006-09-03 17:59:39 +02:00
|
|
|
*/
|
Allow read only connections during recovery, known as Hot Standby.
Enabled by recovery_connections = on (default) and forcing archive recovery using a recovery.conf. Recovery processing now emulates the original transactions as they are replayed, providing full locking and MVCC behaviour for read only queries. Recovery must enter consistent state before connections are allowed, so there is a delay, typically short, before connections succeed. Replay of recovering transactions can conflict and in some cases deadlock with queries during recovery; these result in query cancellation after max_standby_delay seconds have expired. Infrastructure changes have minor effects on normal running, though introduce four new types of WAL record.
New test mode "make standbycheck" allows regression tests of static command behaviour on a standby server while in recovery. Typical and extreme dynamic behaviours have been checked via code inspection and manual testing. Few port specific behaviours have been utilised, though primary testing has been on Linux only so far.
This commit is the basic patch. Additional changes will follow in this release to enhance some aspects of behaviour, notably improved handling of conflicts, deadlock detection and query cancellation. Changes to VACUUM FULL are also required.
Simon Riggs, with significant and lengthy review by Heikki Linnakangas, including streamlined redesign of snapshot creation and two-phase commit.
Important contributions from Florian Pflug, Mark Kirkwood, Merlin Moncure, Greg Stark, Gianni Ciolli, Gabriele Bartolini, Hannu Krosing, Robert Haas, Tatsuo Ishii, Hiroyuki Yamada plus support and feedback from many other community members.
2009-12-19 02:32:45 +01:00
|
|
|
if (!snapshot->takenDuringRecovery)
|
2006-09-03 17:59:39 +02:00
|
|
|
{
|
Allow read only connections during recovery, known as Hot Standby.
Enabled by recovery_connections = on (default) and forcing archive recovery using a recovery.conf. Recovery processing now emulates the original transactions as they are replayed, providing full locking and MVCC behaviour for read only queries. Recovery must enter consistent state before connections are allowed, so there is a delay, typically short, before connections succeed. Replay of recovering transactions can conflict and in some cases deadlock with queries during recovery; these result in query cancellation after max_standby_delay seconds have expired. Infrastructure changes have minor effects on normal running, though introduce four new types of WAL record.
New test mode "make standbycheck" allows regression tests of static command behaviour on a standby server while in recovery. Typical and extreme dynamic behaviours have been checked via code inspection and manual testing. Few port specific behaviours have been utilised, though primary testing has been on Linux only so far.
This commit is the basic patch. Additional changes will follow in this release to enhance some aspects of behaviour, notably improved handling of conflicts, deadlock detection and query cancellation. Changes to VACUUM FULL are also required.
Simon Riggs, with significant and lengthy review by Heikki Linnakangas, including streamlined redesign of snapshot creation and two-phase commit.
Important contributions from Florian Pflug, Mark Kirkwood, Merlin Moncure, Greg Stark, Gianni Ciolli, Gabriele Bartolini, Hannu Krosing, Robert Haas, Tatsuo Ishii, Hiroyuki Yamada plus support and feedback from many other community members.
2009-12-19 02:32:45 +01:00
|
|
|
/*
|
2010-02-26 03:01:40 +01:00
|
|
|
* If the snapshot contains full subxact data, the fastest way to
|
|
|
|
* check things is just to compare the given XID against both subxact
|
|
|
|
* XIDs and top-level XIDs. If the snapshot overflowed, we have to
|
|
|
|
* use pg_subtrans to convert a subxact XID to its parent XID, but
|
|
|
|
* then we need only look at top-level XIDs not subxacts.
|
Allow read only connections during recovery, known as Hot Standby.
Enabled by recovery_connections = on (default) and forcing archive recovery using a recovery.conf. Recovery processing now emulates the original transactions as they are replayed, providing full locking and MVCC behaviour for read only queries. Recovery must enter consistent state before connections are allowed, so there is a delay, typically short, before connections succeed. Replay of recovering transactions can conflict and in some cases deadlock with queries during recovery; these result in query cancellation after max_standby_delay seconds have expired. Infrastructure changes have minor effects on normal running, though introduce four new types of WAL record.
New test mode "make standbycheck" allows regression tests of static command behaviour on a standby server while in recovery. Typical and extreme dynamic behaviours have been checked via code inspection and manual testing. Few port specific behaviours have been utilised, though primary testing has been on Linux only so far.
This commit is the basic patch. Additional changes will follow in this release to enhance some aspects of behaviour, notably improved handling of conflicts, deadlock detection and query cancellation. Changes to VACUUM FULL are also required.
Simon Riggs, with significant and lengthy review by Heikki Linnakangas, including streamlined redesign of snapshot creation and two-phase commit.
Important contributions from Florian Pflug, Mark Kirkwood, Merlin Moncure, Greg Stark, Gianni Ciolli, Gabriele Bartolini, Hannu Krosing, Robert Haas, Tatsuo Ishii, Hiroyuki Yamada plus support and feedback from many other community members.
2009-12-19 02:32:45 +01:00
|
|
|
*/
|
|
|
|
if (!snapshot->suboverflowed)
|
|
|
|
{
|
|
|
|
/* full data, so search subxip */
|
|
|
|
int32 j;
|
2006-09-03 17:59:39 +02:00
|
|
|
|
Allow read only connections during recovery, known as Hot Standby.
Enabled by recovery_connections = on (default) and forcing archive recovery using a recovery.conf. Recovery processing now emulates the original transactions as they are replayed, providing full locking and MVCC behaviour for read only queries. Recovery must enter consistent state before connections are allowed, so there is a delay, typically short, before connections succeed. Replay of recovering transactions can conflict and in some cases deadlock with queries during recovery; these result in query cancellation after max_standby_delay seconds have expired. Infrastructure changes have minor effects on normal running, though introduce four new types of WAL record.
New test mode "make standbycheck" allows regression tests of static command behaviour on a standby server while in recovery. Typical and extreme dynamic behaviours have been checked via code inspection and manual testing. Few port specific behaviours have been utilised, though primary testing has been on Linux only so far.
This commit is the basic patch. Additional changes will follow in this release to enhance some aspects of behaviour, notably improved handling of conflicts, deadlock detection and query cancellation. Changes to VACUUM FULL are also required.
Simon Riggs, with significant and lengthy review by Heikki Linnakangas, including streamlined redesign of snapshot creation and two-phase commit.
Important contributions from Florian Pflug, Mark Kirkwood, Merlin Moncure, Greg Stark, Gianni Ciolli, Gabriele Bartolini, Hannu Krosing, Robert Haas, Tatsuo Ishii, Hiroyuki Yamada plus support and feedback from many other community members.
2009-12-19 02:32:45 +01:00
|
|
|
for (j = 0; j < snapshot->subxcnt; j++)
|
|
|
|
{
|
|
|
|
if (TransactionIdEquals(xid, snapshot->subxip[j]))
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* not there, fall through to search xip[] */
|
|
|
|
}
|
|
|
|
else
|
2006-09-03 17:59:39 +02:00
|
|
|
{
|
Allow read only connections during recovery, known as Hot Standby.
Enabled by recovery_connections = on (default) and forcing archive recovery using a recovery.conf. Recovery processing now emulates the original transactions as they are replayed, providing full locking and MVCC behaviour for read only queries. Recovery must enter consistent state before connections are allowed, so there is a delay, typically short, before connections succeed. Replay of recovering transactions can conflict and in some cases deadlock with queries during recovery; these result in query cancellation after max_standby_delay seconds have expired. Infrastructure changes have minor effects on normal running, though introduce four new types of WAL record.
New test mode "make standbycheck" allows regression tests of static command behaviour on a standby server while in recovery. Typical and extreme dynamic behaviours have been checked via code inspection and manual testing. Few port specific behaviours have been utilised, though primary testing has been on Linux only so far.
This commit is the basic patch. Additional changes will follow in this release to enhance some aspects of behaviour, notably improved handling of conflicts, deadlock detection and query cancellation. Changes to VACUUM FULL are also required.
Simon Riggs, with significant and lengthy review by Heikki Linnakangas, including streamlined redesign of snapshot creation and two-phase commit.
Important contributions from Florian Pflug, Mark Kirkwood, Merlin Moncure, Greg Stark, Gianni Ciolli, Gabriele Bartolini, Hannu Krosing, Robert Haas, Tatsuo Ishii, Hiroyuki Yamada plus support and feedback from many other community members.
2009-12-19 02:32:45 +01:00
|
|
|
/* overflowed, so convert xid to top-level */
|
|
|
|
xid = SubTransGetTopmostTransaction(xid);
|
|
|
|
|
|
|
|
/*
|
2010-02-26 03:01:40 +01:00
|
|
|
* If xid was indeed a subxact, we might now have an xid < xmin,
|
|
|
|
* so recheck to avoid an array scan. No point in rechecking
|
|
|
|
* xmax.
|
Allow read only connections during recovery, known as Hot Standby.
Enabled by recovery_connections = on (default) and forcing archive recovery using a recovery.conf. Recovery processing now emulates the original transactions as they are replayed, providing full locking and MVCC behaviour for read only queries. Recovery must enter consistent state before connections are allowed, so there is a delay, typically short, before connections succeed. Replay of recovering transactions can conflict and in some cases deadlock with queries during recovery; these result in query cancellation after max_standby_delay seconds have expired. Infrastructure changes have minor effects on normal running, though introduce four new types of WAL record.
New test mode "make standbycheck" allows regression tests of static command behaviour on a standby server while in recovery. Typical and extreme dynamic behaviours have been checked via code inspection and manual testing. Few port specific behaviours have been utilised, though primary testing has been on Linux only so far.
This commit is the basic patch. Additional changes will follow in this release to enhance some aspects of behaviour, notably improved handling of conflicts, deadlock detection and query cancellation. Changes to VACUUM FULL are also required.
Simon Riggs, with significant and lengthy review by Heikki Linnakangas, including streamlined redesign of snapshot creation and two-phase commit.
Important contributions from Florian Pflug, Mark Kirkwood, Merlin Moncure, Greg Stark, Gianni Ciolli, Gabriele Bartolini, Hannu Krosing, Robert Haas, Tatsuo Ishii, Hiroyuki Yamada plus support and feedback from many other community members.
2009-12-19 02:32:45 +01:00
|
|
|
*/
|
|
|
|
if (TransactionIdPrecedes(xid, snapshot->xmin))
|
|
|
|
return false;
|
2006-09-03 17:59:39 +02:00
|
|
|
}
|
|
|
|
|
Allow read only connections during recovery, known as Hot Standby.
Enabled by recovery_connections = on (default) and forcing archive recovery using a recovery.conf. Recovery processing now emulates the original transactions as they are replayed, providing full locking and MVCC behaviour for read only queries. Recovery must enter consistent state before connections are allowed, so there is a delay, typically short, before connections succeed. Replay of recovering transactions can conflict and in some cases deadlock with queries during recovery; these result in query cancellation after max_standby_delay seconds have expired. Infrastructure changes have minor effects on normal running, though introduce four new types of WAL record.
New test mode "make standbycheck" allows regression tests of static command behaviour on a standby server while in recovery. Typical and extreme dynamic behaviours have been checked via code inspection and manual testing. Few port specific behaviours have been utilised, though primary testing has been on Linux only so far.
This commit is the basic patch. Additional changes will follow in this release to enhance some aspects of behaviour, notably improved handling of conflicts, deadlock detection and query cancellation. Changes to VACUUM FULL are also required.
Simon Riggs, with significant and lengthy review by Heikki Linnakangas, including streamlined redesign of snapshot creation and two-phase commit.
Important contributions from Florian Pflug, Mark Kirkwood, Merlin Moncure, Greg Stark, Gianni Ciolli, Gabriele Bartolini, Hannu Krosing, Robert Haas, Tatsuo Ishii, Hiroyuki Yamada plus support and feedback from many other community members.
2009-12-19 02:32:45 +01:00
|
|
|
for (i = 0; i < snapshot->xcnt; i++)
|
|
|
|
{
|
|
|
|
if (TransactionIdEquals(xid, snapshot->xip[i]))
|
|
|
|
return true;
|
|
|
|
}
|
2006-09-03 17:59:39 +02:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
Allow read only connections during recovery, known as Hot Standby.
Enabled by recovery_connections = on (default) and forcing archive recovery using a recovery.conf. Recovery processing now emulates the original transactions as they are replayed, providing full locking and MVCC behaviour for read only queries. Recovery must enter consistent state before connections are allowed, so there is a delay, typically short, before connections succeed. Replay of recovering transactions can conflict and in some cases deadlock with queries during recovery; these result in query cancellation after max_standby_delay seconds have expired. Infrastructure changes have minor effects on normal running, though introduce four new types of WAL record.
New test mode "make standbycheck" allows regression tests of static command behaviour on a standby server while in recovery. Typical and extreme dynamic behaviours have been checked via code inspection and manual testing. Few port specific behaviours have been utilised, though primary testing has been on Linux only so far.
This commit is the basic patch. Additional changes will follow in this release to enhance some aspects of behaviour, notably improved handling of conflicts, deadlock detection and query cancellation. Changes to VACUUM FULL are also required.
Simon Riggs, with significant and lengthy review by Heikki Linnakangas, including streamlined redesign of snapshot creation and two-phase commit.
Important contributions from Florian Pflug, Mark Kirkwood, Merlin Moncure, Greg Stark, Gianni Ciolli, Gabriele Bartolini, Hannu Krosing, Robert Haas, Tatsuo Ishii, Hiroyuki Yamada plus support and feedback from many other community members.
2009-12-19 02:32:45 +01:00
|
|
|
int32 j;
|
2006-09-03 17:59:39 +02:00
|
|
|
|
|
|
|
/*
|
2010-02-26 03:01:40 +01:00
|
|
|
* In recovery we store all xids in the subxact array because it is by
|
|
|
|
* far the bigger array, and we mostly don't know which xids are
|
|
|
|
* top-level and which are subxacts. The xip array is empty.
|
Allow read only connections during recovery, known as Hot Standby.
Enabled by recovery_connections = on (default) and forcing archive recovery using a recovery.conf. Recovery processing now emulates the original transactions as they are replayed, providing full locking and MVCC behaviour for read only queries. Recovery must enter consistent state before connections are allowed, so there is a delay, typically short, before connections succeed. Replay of recovering transactions can conflict and in some cases deadlock with queries during recovery; these result in query cancellation after max_standby_delay seconds have expired. Infrastructure changes have minor effects on normal running, though introduce four new types of WAL record.
New test mode "make standbycheck" allows regression tests of static command behaviour on a standby server while in recovery. Typical and extreme dynamic behaviours have been checked via code inspection and manual testing. Few port specific behaviours have been utilised, though primary testing has been on Linux only so far.
This commit is the basic patch. Additional changes will follow in this release to enhance some aspects of behaviour, notably improved handling of conflicts, deadlock detection and query cancellation. Changes to VACUUM FULL are also required.
Simon Riggs, with significant and lengthy review by Heikki Linnakangas, including streamlined redesign of snapshot creation and two-phase commit.
Important contributions from Florian Pflug, Mark Kirkwood, Merlin Moncure, Greg Stark, Gianni Ciolli, Gabriele Bartolini, Hannu Krosing, Robert Haas, Tatsuo Ishii, Hiroyuki Yamada plus support and feedback from many other community members.
2009-12-19 02:32:45 +01:00
|
|
|
*
|
|
|
|
* We start by searching subtrans, if we overflowed.
|
2006-09-03 17:59:39 +02:00
|
|
|
*/
|
Allow read only connections during recovery, known as Hot Standby.
Enabled by recovery_connections = on (default) and forcing archive recovery using a recovery.conf. Recovery processing now emulates the original transactions as they are replayed, providing full locking and MVCC behaviour for read only queries. Recovery must enter consistent state before connections are allowed, so there is a delay, typically short, before connections succeed. Replay of recovering transactions can conflict and in some cases deadlock with queries during recovery; these result in query cancellation after max_standby_delay seconds have expired. Infrastructure changes have minor effects on normal running, though introduce four new types of WAL record.
New test mode "make standbycheck" allows regression tests of static command behaviour on a standby server while in recovery. Typical and extreme dynamic behaviours have been checked via code inspection and manual testing. Few port specific behaviours have been utilised, though primary testing has been on Linux only so far.
This commit is the basic patch. Additional changes will follow in this release to enhance some aspects of behaviour, notably improved handling of conflicts, deadlock detection and query cancellation. Changes to VACUUM FULL are also required.
Simon Riggs, with significant and lengthy review by Heikki Linnakangas, including streamlined redesign of snapshot creation and two-phase commit.
Important contributions from Florian Pflug, Mark Kirkwood, Merlin Moncure, Greg Stark, Gianni Ciolli, Gabriele Bartolini, Hannu Krosing, Robert Haas, Tatsuo Ishii, Hiroyuki Yamada plus support and feedback from many other community members.
2009-12-19 02:32:45 +01:00
|
|
|
if (snapshot->suboverflowed)
|
|
|
|
{
|
|
|
|
/* overflowed, so convert xid to top-level */
|
|
|
|
xid = SubTransGetTopmostTransaction(xid);
|
2006-09-03 17:59:39 +02:00
|
|
|
|
Allow read only connections during recovery, known as Hot Standby.
Enabled by recovery_connections = on (default) and forcing archive recovery using a recovery.conf. Recovery processing now emulates the original transactions as they are replayed, providing full locking and MVCC behaviour for read only queries. Recovery must enter consistent state before connections are allowed, so there is a delay, typically short, before connections succeed. Replay of recovering transactions can conflict and in some cases deadlock with queries during recovery; these result in query cancellation after max_standby_delay seconds have expired. Infrastructure changes have minor effects on normal running, though introduce four new types of WAL record.
New test mode "make standbycheck" allows regression tests of static command behaviour on a standby server while in recovery. Typical and extreme dynamic behaviours have been checked via code inspection and manual testing. Few port specific behaviours have been utilised, though primary testing has been on Linux only so far.
This commit is the basic patch. Additional changes will follow in this release to enhance some aspects of behaviour, notably improved handling of conflicts, deadlock detection and query cancellation. Changes to VACUUM FULL are also required.
Simon Riggs, with significant and lengthy review by Heikki Linnakangas, including streamlined redesign of snapshot creation and two-phase commit.
Important contributions from Florian Pflug, Mark Kirkwood, Merlin Moncure, Greg Stark, Gianni Ciolli, Gabriele Bartolini, Hannu Krosing, Robert Haas, Tatsuo Ishii, Hiroyuki Yamada plus support and feedback from many other community members.
2009-12-19 02:32:45 +01:00
|
|
|
/*
|
2010-02-26 03:01:40 +01:00
|
|
|
* If xid was indeed a subxact, we might now have an xid < xmin,
|
|
|
|
* so recheck to avoid an array scan. No point in rechecking
|
|
|
|
* xmax.
|
Allow read only connections during recovery, known as Hot Standby.
Enabled by recovery_connections = on (default) and forcing archive recovery using a recovery.conf. Recovery processing now emulates the original transactions as they are replayed, providing full locking and MVCC behaviour for read only queries. Recovery must enter consistent state before connections are allowed, so there is a delay, typically short, before connections succeed. Replay of recovering transactions can conflict and in some cases deadlock with queries during recovery; these result in query cancellation after max_standby_delay seconds have expired. Infrastructure changes have minor effects on normal running, though introduce four new types of WAL record.
New test mode "make standbycheck" allows regression tests of static command behaviour on a standby server while in recovery. Typical and extreme dynamic behaviours have been checked via code inspection and manual testing. Few port specific behaviours have been utilised, though primary testing has been on Linux only so far.
This commit is the basic patch. Additional changes will follow in this release to enhance some aspects of behaviour, notably improved handling of conflicts, deadlock detection and query cancellation. Changes to VACUUM FULL are also required.
Simon Riggs, with significant and lengthy review by Heikki Linnakangas, including streamlined redesign of snapshot creation and two-phase commit.
Important contributions from Florian Pflug, Mark Kirkwood, Merlin Moncure, Greg Stark, Gianni Ciolli, Gabriele Bartolini, Hannu Krosing, Robert Haas, Tatsuo Ishii, Hiroyuki Yamada plus support and feedback from many other community members.
2009-12-19 02:32:45 +01:00
|
|
|
*/
|
|
|
|
if (TransactionIdPrecedes(xid, snapshot->xmin))
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* We now have either a top-level xid higher than xmin or an
|
|
|
|
* indeterminate xid. We don't know whether it's top level or subxact
|
|
|
|
* but it doesn't matter. If it's present, the xid is visible.
|
|
|
|
*/
|
|
|
|
for (j = 0; j < snapshot->subxcnt; j++)
|
|
|
|
{
|
|
|
|
if (TransactionIdEquals(xid, snapshot->subxip[j]))
|
|
|
|
return true;
|
|
|
|
}
|
2006-09-03 17:59:39 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
Improve concurrency of foreign key locking
This patch introduces two additional lock modes for tuples: "SELECT FOR
KEY SHARE" and "SELECT FOR NO KEY UPDATE". These don't block each
other, in contrast with already existing "SELECT FOR SHARE" and "SELECT
FOR UPDATE". UPDATE commands that do not modify the values stored in
the columns that are part of the key of the tuple now grab a SELECT FOR
NO KEY UPDATE lock on the tuple, allowing them to proceed concurrently
with tuple locks of the FOR KEY SHARE variety.
Foreign key triggers now use FOR KEY SHARE instead of FOR SHARE; this
means the concurrency improvement applies to them, which is the whole
point of this patch.
The added tuple lock semantics require some rejiggering of the multixact
module, so that the locking level that each transaction is holding can
be stored alongside its Xid. Also, multixacts now need to persist
across server restarts and crashes, because they can now represent not
only tuple locks, but also tuple updates. This means we need more
careful tracking of lifetime of pg_multixact SLRU files; since they now
persist longer, we require more infrastructure to figure out when they
can be removed. pg_upgrade also needs to be careful to copy
pg_multixact files over from the old server to the new, or at least part
of multixact.c state, depending on the versions of the old and new
servers.
Tuple time qualification rules (HeapTupleSatisfies routines) need to be
careful not to consider tuples with the "is multi" infomask bit set as
being only locked; they might need to look up MultiXact values (i.e.
possibly do pg_multixact I/O) to find out the Xid that updated a tuple,
whereas they previously were assured to only use information readily
available from the tuple header. This is considered acceptable, because
the extra I/O would involve cases that would previously cause some
commands to block waiting for concurrent transactions to finish.
Another important change is the fact that locking tuples that have
previously been updated causes the future versions to be marked as
locked, too; this is essential for correctness of foreign key checks.
This causes additional WAL-logging, also (there was previously a single
WAL record for a locked tuple; now there are as many as updated copies
of the tuple there exist.)
With all this in place, contention related to tuples being checked by
foreign key rules should be much reduced.
As a bonus, the old behavior that a subtransaction grabbing a stronger
tuple lock than the parent (sub)transaction held on a given tuple and
later aborting caused the weaker lock to be lost, has been fixed.
Many new spec files were added for isolation tester framework, to ensure
overall behavior is sane. There's probably room for several more tests.
There were several reviewers of this patch; in particular, Noah Misch
and Andres Freund spent considerable time in it. Original idea for the
patch came from Simon Riggs, after a problem report by Joel Jacobson.
Most code is from me, with contributions from Marti Raudsepp, Alexander
Shulgin, Noah Misch and Andres Freund.
This patch was discussed in several pgsql-hackers threads; the most
important start at the following message-ids:
AANLkTimo9XVcEzfiBR-ut3KVNDkjm2Vxh+t8kAmWjPuv@mail.gmail.com
1290721684-sup-3951@alvh.no-ip.org
1294953201-sup-2099@alvh.no-ip.org
1320343602-sup-2290@alvh.no-ip.org
1339690386-sup-8927@alvh.no-ip.org
4FE5FF020200002500048A3D@gw.wicourts.gov
4FEAB90A0200002500048B7D@gw.wicourts.gov
2013-01-23 16:04:59 +01:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Is the tuple really only locked? That is, is it not updated?
|
|
|
|
*
|
|
|
|
* It's easy to check just infomask bits if the locker is not a multi; but
|
|
|
|
* otherwise we need to verify that the updating transaction has not aborted.
|
|
|
|
*
|
|
|
|
* This function is here because it follows the same time qualification rules
|
|
|
|
* laid out at the top of this file.
|
|
|
|
*/
|
|
|
|
bool
|
|
|
|
HeapTupleHeaderIsOnlyLocked(HeapTupleHeader tuple)
|
|
|
|
{
|
2013-05-29 22:58:43 +02:00
|
|
|
TransactionId xmax;
|
Improve concurrency of foreign key locking
This patch introduces two additional lock modes for tuples: "SELECT FOR
KEY SHARE" and "SELECT FOR NO KEY UPDATE". These don't block each
other, in contrast with already existing "SELECT FOR SHARE" and "SELECT
FOR UPDATE". UPDATE commands that do not modify the values stored in
the columns that are part of the key of the tuple now grab a SELECT FOR
NO KEY UPDATE lock on the tuple, allowing them to proceed concurrently
with tuple locks of the FOR KEY SHARE variety.
Foreign key triggers now use FOR KEY SHARE instead of FOR SHARE; this
means the concurrency improvement applies to them, which is the whole
point of this patch.
The added tuple lock semantics require some rejiggering of the multixact
module, so that the locking level that each transaction is holding can
be stored alongside its Xid. Also, multixacts now need to persist
across server restarts and crashes, because they can now represent not
only tuple locks, but also tuple updates. This means we need more
careful tracking of lifetime of pg_multixact SLRU files; since they now
persist longer, we require more infrastructure to figure out when they
can be removed. pg_upgrade also needs to be careful to copy
pg_multixact files over from the old server to the new, or at least part
of multixact.c state, depending on the versions of the old and new
servers.
Tuple time qualification rules (HeapTupleSatisfies routines) need to be
careful not to consider tuples with the "is multi" infomask bit set as
being only locked; they might need to look up MultiXact values (i.e.
possibly do pg_multixact I/O) to find out the Xid that updated a tuple,
whereas they previously were assured to only use information readily
available from the tuple header. This is considered acceptable, because
the extra I/O would involve cases that would previously cause some
commands to block waiting for concurrent transactions to finish.
Another important change is the fact that locking tuples that have
previously been updated causes the future versions to be marked as
locked, too; this is essential for correctness of foreign key checks.
This causes additional WAL-logging, also (there was previously a single
WAL record for a locked tuple; now there are as many as updated copies
of the tuple there exist.)
With all this in place, contention related to tuples being checked by
foreign key rules should be much reduced.
As a bonus, the old behavior that a subtransaction grabbing a stronger
tuple lock than the parent (sub)transaction held on a given tuple and
later aborting caused the weaker lock to be lost, has been fixed.
Many new spec files were added for isolation tester framework, to ensure
overall behavior is sane. There's probably room for several more tests.
There were several reviewers of this patch; in particular, Noah Misch
and Andres Freund spent considerable time in it. Original idea for the
patch came from Simon Riggs, after a problem report by Joel Jacobson.
Most code is from me, with contributions from Marti Raudsepp, Alexander
Shulgin, Noah Misch and Andres Freund.
This patch was discussed in several pgsql-hackers threads; the most
important start at the following message-ids:
AANLkTimo9XVcEzfiBR-ut3KVNDkjm2Vxh+t8kAmWjPuv@mail.gmail.com
1290721684-sup-3951@alvh.no-ip.org
1294953201-sup-2099@alvh.no-ip.org
1320343602-sup-2290@alvh.no-ip.org
1339690386-sup-8927@alvh.no-ip.org
4FE5FF020200002500048A3D@gw.wicourts.gov
4FEAB90A0200002500048B7D@gw.wicourts.gov
2013-01-23 16:04:59 +01:00
|
|
|
|
|
|
|
/* if there's no valid Xmax, then there's obviously no update either */
|
|
|
|
if (tuple->t_infomask & HEAP_XMAX_INVALID)
|
|
|
|
return true;
|
|
|
|
|
|
|
|
if (tuple->t_infomask & HEAP_XMAX_LOCK_ONLY)
|
|
|
|
return true;
|
|
|
|
|
|
|
|
/* invalid xmax means no update */
|
|
|
|
if (!TransactionIdIsValid(HeapTupleHeaderGetRawXmax(tuple)))
|
|
|
|
return true;
|
|
|
|
|
|
|
|
/*
|
2013-05-29 22:58:43 +02:00
|
|
|
* if HEAP_XMAX_LOCK_ONLY is not set and not a multi, then this must
|
|
|
|
* necessarily have been updated
|
Improve concurrency of foreign key locking
This patch introduces two additional lock modes for tuples: "SELECT FOR
KEY SHARE" and "SELECT FOR NO KEY UPDATE". These don't block each
other, in contrast with already existing "SELECT FOR SHARE" and "SELECT
FOR UPDATE". UPDATE commands that do not modify the values stored in
the columns that are part of the key of the tuple now grab a SELECT FOR
NO KEY UPDATE lock on the tuple, allowing them to proceed concurrently
with tuple locks of the FOR KEY SHARE variety.
Foreign key triggers now use FOR KEY SHARE instead of FOR SHARE; this
means the concurrency improvement applies to them, which is the whole
point of this patch.
The added tuple lock semantics require some rejiggering of the multixact
module, so that the locking level that each transaction is holding can
be stored alongside its Xid. Also, multixacts now need to persist
across server restarts and crashes, because they can now represent not
only tuple locks, but also tuple updates. This means we need more
careful tracking of lifetime of pg_multixact SLRU files; since they now
persist longer, we require more infrastructure to figure out when they
can be removed. pg_upgrade also needs to be careful to copy
pg_multixact files over from the old server to the new, or at least part
of multixact.c state, depending on the versions of the old and new
servers.
Tuple time qualification rules (HeapTupleSatisfies routines) need to be
careful not to consider tuples with the "is multi" infomask bit set as
being only locked; they might need to look up MultiXact values (i.e.
possibly do pg_multixact I/O) to find out the Xid that updated a tuple,
whereas they previously were assured to only use information readily
available from the tuple header. This is considered acceptable, because
the extra I/O would involve cases that would previously cause some
commands to block waiting for concurrent transactions to finish.
Another important change is the fact that locking tuples that have
previously been updated causes the future versions to be marked as
locked, too; this is essential for correctness of foreign key checks.
This causes additional WAL-logging, also (there was previously a single
WAL record for a locked tuple; now there are as many as updated copies
of the tuple there exist.)
With all this in place, contention related to tuples being checked by
foreign key rules should be much reduced.
As a bonus, the old behavior that a subtransaction grabbing a stronger
tuple lock than the parent (sub)transaction held on a given tuple and
later aborting caused the weaker lock to be lost, has been fixed.
Many new spec files were added for isolation tester framework, to ensure
overall behavior is sane. There's probably room for several more tests.
There were several reviewers of this patch; in particular, Noah Misch
and Andres Freund spent considerable time in it. Original idea for the
patch came from Simon Riggs, after a problem report by Joel Jacobson.
Most code is from me, with contributions from Marti Raudsepp, Alexander
Shulgin, Noah Misch and Andres Freund.
This patch was discussed in several pgsql-hackers threads; the most
important start at the following message-ids:
AANLkTimo9XVcEzfiBR-ut3KVNDkjm2Vxh+t8kAmWjPuv@mail.gmail.com
1290721684-sup-3951@alvh.no-ip.org
1294953201-sup-2099@alvh.no-ip.org
1320343602-sup-2290@alvh.no-ip.org
1339690386-sup-8927@alvh.no-ip.org
4FE5FF020200002500048A3D@gw.wicourts.gov
4FEAB90A0200002500048B7D@gw.wicourts.gov
2013-01-23 16:04:59 +01:00
|
|
|
*/
|
|
|
|
if (!(tuple->t_infomask & HEAP_XMAX_IS_MULTI))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
/* ... but if it's a multi, then perhaps the updating Xid aborted. */
|
|
|
|
xmax = HeapTupleGetUpdateXid(tuple);
|
2013-11-29 20:08:06 +01:00
|
|
|
|
|
|
|
/* not LOCKED_ONLY, so it has to have an xmax */
|
|
|
|
Assert(TransactionIdIsValid(xmax));
|
Improve concurrency of foreign key locking
This patch introduces two additional lock modes for tuples: "SELECT FOR
KEY SHARE" and "SELECT FOR NO KEY UPDATE". These don't block each
other, in contrast with already existing "SELECT FOR SHARE" and "SELECT
FOR UPDATE". UPDATE commands that do not modify the values stored in
the columns that are part of the key of the tuple now grab a SELECT FOR
NO KEY UPDATE lock on the tuple, allowing them to proceed concurrently
with tuple locks of the FOR KEY SHARE variety.
Foreign key triggers now use FOR KEY SHARE instead of FOR SHARE; this
means the concurrency improvement applies to them, which is the whole
point of this patch.
The added tuple lock semantics require some rejiggering of the multixact
module, so that the locking level that each transaction is holding can
be stored alongside its Xid. Also, multixacts now need to persist
across server restarts and crashes, because they can now represent not
only tuple locks, but also tuple updates. This means we need more
careful tracking of lifetime of pg_multixact SLRU files; since they now
persist longer, we require more infrastructure to figure out when they
can be removed. pg_upgrade also needs to be careful to copy
pg_multixact files over from the old server to the new, or at least part
of multixact.c state, depending on the versions of the old and new
servers.
Tuple time qualification rules (HeapTupleSatisfies routines) need to be
careful not to consider tuples with the "is multi" infomask bit set as
being only locked; they might need to look up MultiXact values (i.e.
possibly do pg_multixact I/O) to find out the Xid that updated a tuple,
whereas they previously were assured to only use information readily
available from the tuple header. This is considered acceptable, because
the extra I/O would involve cases that would previously cause some
commands to block waiting for concurrent transactions to finish.
Another important change is the fact that locking tuples that have
previously been updated causes the future versions to be marked as
locked, too; this is essential for correctness of foreign key checks.
This causes additional WAL-logging, also (there was previously a single
WAL record for a locked tuple; now there are as many as updated copies
of the tuple there exist.)
With all this in place, contention related to tuples being checked by
foreign key rules should be much reduced.
As a bonus, the old behavior that a subtransaction grabbing a stronger
tuple lock than the parent (sub)transaction held on a given tuple and
later aborting caused the weaker lock to be lost, has been fixed.
Many new spec files were added for isolation tester framework, to ensure
overall behavior is sane. There's probably room for several more tests.
There were several reviewers of this patch; in particular, Noah Misch
and Andres Freund spent considerable time in it. Original idea for the
patch came from Simon Riggs, after a problem report by Joel Jacobson.
Most code is from me, with contributions from Marti Raudsepp, Alexander
Shulgin, Noah Misch and Andres Freund.
This patch was discussed in several pgsql-hackers threads; the most
important start at the following message-ids:
AANLkTimo9XVcEzfiBR-ut3KVNDkjm2Vxh+t8kAmWjPuv@mail.gmail.com
1290721684-sup-3951@alvh.no-ip.org
1294953201-sup-2099@alvh.no-ip.org
1320343602-sup-2290@alvh.no-ip.org
1339690386-sup-8927@alvh.no-ip.org
4FE5FF020200002500048A3D@gw.wicourts.gov
4FEAB90A0200002500048B7D@gw.wicourts.gov
2013-01-23 16:04:59 +01:00
|
|
|
|
|
|
|
if (TransactionIdIsCurrentTransactionId(xmax))
|
|
|
|
return false;
|
|
|
|
if (TransactionIdIsInProgress(xmax))
|
|
|
|
return false;
|
|
|
|
if (TransactionIdDidCommit(xmax))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* not current, not in progress, not committed -- must have aborted or
|
|
|
|
* crashed
|
|
|
|
*/
|
|
|
|
return true;
|
|
|
|
}
|
Introduce logical decoding.
This feature, building on previous commits, allows the write-ahead log
stream to be decoded into a series of logical changes; that is,
inserts, updates, and deletes and the transactions which contain them.
It is capable of handling decoding even across changes to the schema
of the effected tables. The output format is controlled by a
so-called "output plugin"; an example is included. To make use of
this in a real replication system, the output plugin will need to be
modified to produce output in the format appropriate to that system,
and to perform filtering.
Currently, information can be extracted from the logical decoding
system only via SQL; future commits will add the ability to stream
changes via walsender.
Andres Freund, with review and other contributions from many other
people, including Álvaro Herrera, Abhijit Menon-Sen, Peter Gheogegan,
Kevin Grittner, Robert Haas, Heikki Linnakangas, Fujii Masao, Abhijit
Menon-Sen, Michael Paquier, Simon Riggs, Craig Ringer, and Steve
Singer.
2014-03-03 22:32:18 +01:00
|
|
|
|
|
|
|
/*
|
2014-03-17 12:47:28 +01:00
|
|
|
* check whether the transaciont id 'xid' is in the pre-sorted array 'xip'.
|
Introduce logical decoding.
This feature, building on previous commits, allows the write-ahead log
stream to be decoded into a series of logical changes; that is,
inserts, updates, and deletes and the transactions which contain them.
It is capable of handling decoding even across changes to the schema
of the effected tables. The output format is controlled by a
so-called "output plugin"; an example is included. To make use of
this in a real replication system, the output plugin will need to be
modified to produce output in the format appropriate to that system,
and to perform filtering.
Currently, information can be extracted from the logical decoding
system only via SQL; future commits will add the ability to stream
changes via walsender.
Andres Freund, with review and other contributions from many other
people, including Álvaro Herrera, Abhijit Menon-Sen, Peter Gheogegan,
Kevin Grittner, Robert Haas, Heikki Linnakangas, Fujii Masao, Abhijit
Menon-Sen, Michael Paquier, Simon Riggs, Craig Ringer, and Steve
Singer.
2014-03-03 22:32:18 +01:00
|
|
|
*/
|
|
|
|
static bool
|
|
|
|
TransactionIdInArray(TransactionId xid, TransactionId *xip, Size num)
|
|
|
|
{
|
|
|
|
return bsearch(&xid, xip, num,
|
|
|
|
sizeof(TransactionId), xidComparator) != NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* See the comments for HeapTupleSatisfiesMVCC for the semantics this function
|
|
|
|
* obeys.
|
|
|
|
*
|
|
|
|
* Only usable on tuples from catalog tables!
|
|
|
|
*
|
|
|
|
* We don't need to support HEAP_MOVED_(IN|OFF) for now because we only support
|
|
|
|
* reading catalog pages which couldn't have been created in an older version.
|
|
|
|
*
|
|
|
|
* We don't set any hint bits in here as it seems unlikely to be beneficial as
|
|
|
|
* those should already be set by normal access and it seems to be too
|
|
|
|
* dangerous to do so as the semantics of doing so during timetravel are more
|
|
|
|
* complicated than when dealing "only" with the present.
|
|
|
|
*/
|
|
|
|
bool
|
|
|
|
HeapTupleSatisfiesHistoricMVCC(HeapTuple htup, Snapshot snapshot,
|
2014-05-06 18:12:18 +02:00
|
|
|
Buffer buffer)
|
Introduce logical decoding.
This feature, building on previous commits, allows the write-ahead log
stream to be decoded into a series of logical changes; that is,
inserts, updates, and deletes and the transactions which contain them.
It is capable of handling decoding even across changes to the schema
of the effected tables. The output format is controlled by a
so-called "output plugin"; an example is included. To make use of
this in a real replication system, the output plugin will need to be
modified to produce output in the format appropriate to that system,
and to perform filtering.
Currently, information can be extracted from the logical decoding
system only via SQL; future commits will add the ability to stream
changes via walsender.
Andres Freund, with review and other contributions from many other
people, including Álvaro Herrera, Abhijit Menon-Sen, Peter Gheogegan,
Kevin Grittner, Robert Haas, Heikki Linnakangas, Fujii Masao, Abhijit
Menon-Sen, Michael Paquier, Simon Riggs, Craig Ringer, and Steve
Singer.
2014-03-03 22:32:18 +01:00
|
|
|
{
|
|
|
|
HeapTupleHeader tuple = htup->t_data;
|
|
|
|
TransactionId xmin = HeapTupleHeaderGetXmin(tuple);
|
|
|
|
TransactionId xmax = HeapTupleHeaderGetRawXmax(tuple);
|
|
|
|
|
|
|
|
Assert(ItemPointerIsValid(&htup->t_self));
|
|
|
|
Assert(htup->t_tableOid != InvalidOid);
|
|
|
|
|
|
|
|
/* inserting transaction aborted */
|
|
|
|
if (HeapTupleHeaderXminInvalid(tuple))
|
|
|
|
{
|
|
|
|
Assert(!TransactionIdDidCommit(xmin));
|
|
|
|
return false;
|
|
|
|
}
|
2014-03-17 12:47:28 +01:00
|
|
|
/* check if it's one of our txids, toplevel is also in there */
|
Introduce logical decoding.
This feature, building on previous commits, allows the write-ahead log
stream to be decoded into a series of logical changes; that is,
inserts, updates, and deletes and the transactions which contain them.
It is capable of handling decoding even across changes to the schema
of the effected tables. The output format is controlled by a
so-called "output plugin"; an example is included. To make use of
this in a real replication system, the output plugin will need to be
modified to produce output in the format appropriate to that system,
and to perform filtering.
Currently, information can be extracted from the logical decoding
system only via SQL; future commits will add the ability to stream
changes via walsender.
Andres Freund, with review and other contributions from many other
people, including Álvaro Herrera, Abhijit Menon-Sen, Peter Gheogegan,
Kevin Grittner, Robert Haas, Heikki Linnakangas, Fujii Masao, Abhijit
Menon-Sen, Michael Paquier, Simon Riggs, Craig Ringer, and Steve
Singer.
2014-03-03 22:32:18 +01:00
|
|
|
else if (TransactionIdInArray(xmin, snapshot->subxip, snapshot->subxcnt))
|
|
|
|
{
|
|
|
|
bool resolved;
|
|
|
|
CommandId cmin = HeapTupleHeaderGetRawCommandId(tuple);
|
|
|
|
CommandId cmax = InvalidCommandId;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* another transaction might have (tried to) delete this tuple or
|
2014-05-06 18:12:18 +02:00
|
|
|
* cmin/cmax was stored in a combocid. So we need to lookup the actual
|
|
|
|
* values externally.
|
Introduce logical decoding.
This feature, building on previous commits, allows the write-ahead log
stream to be decoded into a series of logical changes; that is,
inserts, updates, and deletes and the transactions which contain them.
It is capable of handling decoding even across changes to the schema
of the effected tables. The output format is controlled by a
so-called "output plugin"; an example is included. To make use of
this in a real replication system, the output plugin will need to be
modified to produce output in the format appropriate to that system,
and to perform filtering.
Currently, information can be extracted from the logical decoding
system only via SQL; future commits will add the ability to stream
changes via walsender.
Andres Freund, with review and other contributions from many other
people, including Álvaro Herrera, Abhijit Menon-Sen, Peter Gheogegan,
Kevin Grittner, Robert Haas, Heikki Linnakangas, Fujii Masao, Abhijit
Menon-Sen, Michael Paquier, Simon Riggs, Craig Ringer, and Steve
Singer.
2014-03-03 22:32:18 +01:00
|
|
|
*/
|
|
|
|
resolved = ResolveCminCmaxDuringDecoding(HistoricSnapshotGetTupleCids(), snapshot,
|
|
|
|
htup, buffer,
|
|
|
|
&cmin, &cmax);
|
|
|
|
|
|
|
|
if (!resolved)
|
|
|
|
elog(ERROR, "could not resolve cmin/cmax of catalog tuple");
|
|
|
|
|
|
|
|
Assert(cmin != InvalidCommandId);
|
|
|
|
|
|
|
|
if (cmin >= snapshot->curcid)
|
2014-05-06 18:12:18 +02:00
|
|
|
return false; /* inserted after scan started */
|
Introduce logical decoding.
This feature, building on previous commits, allows the write-ahead log
stream to be decoded into a series of logical changes; that is,
inserts, updates, and deletes and the transactions which contain them.
It is capable of handling decoding even across changes to the schema
of the effected tables. The output format is controlled by a
so-called "output plugin"; an example is included. To make use of
this in a real replication system, the output plugin will need to be
modified to produce output in the format appropriate to that system,
and to perform filtering.
Currently, information can be extracted from the logical decoding
system only via SQL; future commits will add the ability to stream
changes via walsender.
Andres Freund, with review and other contributions from many other
people, including Álvaro Herrera, Abhijit Menon-Sen, Peter Gheogegan,
Kevin Grittner, Robert Haas, Heikki Linnakangas, Fujii Masao, Abhijit
Menon-Sen, Michael Paquier, Simon Riggs, Craig Ringer, and Steve
Singer.
2014-03-03 22:32:18 +01:00
|
|
|
/* fall through */
|
|
|
|
}
|
|
|
|
/* committed before our xmin horizon. Do a normal visibility check. */
|
|
|
|
else if (TransactionIdPrecedes(xmin, snapshot->xmin))
|
|
|
|
{
|
|
|
|
Assert(!(HeapTupleHeaderXminCommitted(tuple) &&
|
|
|
|
!TransactionIdDidCommit(xmin)));
|
|
|
|
|
|
|
|
/* check for hint bit first, consult clog afterwards */
|
|
|
|
if (!HeapTupleHeaderXminCommitted(tuple) &&
|
|
|
|
!TransactionIdDidCommit(xmin))
|
|
|
|
return false;
|
|
|
|
/* fall through */
|
|
|
|
}
|
|
|
|
/* beyond our xmax horizon, i.e. invisible */
|
|
|
|
else if (TransactionIdFollowsOrEquals(xmin, snapshot->xmax))
|
|
|
|
{
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
/* check if it's a committed transaction in [xmin, xmax) */
|
2014-05-06 18:12:18 +02:00
|
|
|
else if (TransactionIdInArray(xmin, snapshot->xip, snapshot->xcnt))
|
Introduce logical decoding.
This feature, building on previous commits, allows the write-ahead log
stream to be decoded into a series of logical changes; that is,
inserts, updates, and deletes and the transactions which contain them.
It is capable of handling decoding even across changes to the schema
of the effected tables. The output format is controlled by a
so-called "output plugin"; an example is included. To make use of
this in a real replication system, the output plugin will need to be
modified to produce output in the format appropriate to that system,
and to perform filtering.
Currently, information can be extracted from the logical decoding
system only via SQL; future commits will add the ability to stream
changes via walsender.
Andres Freund, with review and other contributions from many other
people, including Álvaro Herrera, Abhijit Menon-Sen, Peter Gheogegan,
Kevin Grittner, Robert Haas, Heikki Linnakangas, Fujii Masao, Abhijit
Menon-Sen, Michael Paquier, Simon Riggs, Craig Ringer, and Steve
Singer.
2014-03-03 22:32:18 +01:00
|
|
|
{
|
|
|
|
/* fall through */
|
|
|
|
}
|
2014-05-06 18:12:18 +02:00
|
|
|
|
Introduce logical decoding.
This feature, building on previous commits, allows the write-ahead log
stream to be decoded into a series of logical changes; that is,
inserts, updates, and deletes and the transactions which contain them.
It is capable of handling decoding even across changes to the schema
of the effected tables. The output format is controlled by a
so-called "output plugin"; an example is included. To make use of
this in a real replication system, the output plugin will need to be
modified to produce output in the format appropriate to that system,
and to perform filtering.
Currently, information can be extracted from the logical decoding
system only via SQL; future commits will add the ability to stream
changes via walsender.
Andres Freund, with review and other contributions from many other
people, including Álvaro Herrera, Abhijit Menon-Sen, Peter Gheogegan,
Kevin Grittner, Robert Haas, Heikki Linnakangas, Fujii Masao, Abhijit
Menon-Sen, Michael Paquier, Simon Riggs, Craig Ringer, and Steve
Singer.
2014-03-03 22:32:18 +01:00
|
|
|
/*
|
2014-05-06 18:12:18 +02:00
|
|
|
* none of the above, i.e. between [xmin, xmax) but hasn't committed. I.e.
|
|
|
|
* invisible.
|
Introduce logical decoding.
This feature, building on previous commits, allows the write-ahead log
stream to be decoded into a series of logical changes; that is,
inserts, updates, and deletes and the transactions which contain them.
It is capable of handling decoding even across changes to the schema
of the effected tables. The output format is controlled by a
so-called "output plugin"; an example is included. To make use of
this in a real replication system, the output plugin will need to be
modified to produce output in the format appropriate to that system,
and to perform filtering.
Currently, information can be extracted from the logical decoding
system only via SQL; future commits will add the ability to stream
changes via walsender.
Andres Freund, with review and other contributions from many other
people, including Álvaro Herrera, Abhijit Menon-Sen, Peter Gheogegan,
Kevin Grittner, Robert Haas, Heikki Linnakangas, Fujii Masao, Abhijit
Menon-Sen, Michael Paquier, Simon Riggs, Craig Ringer, and Steve
Singer.
2014-03-03 22:32:18 +01:00
|
|
|
*/
|
|
|
|
else
|
|
|
|
{
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* at this point we know xmin is visible, go on to check xmax */
|
|
|
|
|
|
|
|
/* xid invalid or aborted */
|
|
|
|
if (tuple->t_infomask & HEAP_XMAX_INVALID)
|
|
|
|
return true;
|
|
|
|
/* locked tuples are always visible */
|
|
|
|
else if (HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask))
|
|
|
|
return true;
|
2014-05-06 18:12:18 +02:00
|
|
|
|
Introduce logical decoding.
This feature, building on previous commits, allows the write-ahead log
stream to be decoded into a series of logical changes; that is,
inserts, updates, and deletes and the transactions which contain them.
It is capable of handling decoding even across changes to the schema
of the effected tables. The output format is controlled by a
so-called "output plugin"; an example is included. To make use of
this in a real replication system, the output plugin will need to be
modified to produce output in the format appropriate to that system,
and to perform filtering.
Currently, information can be extracted from the logical decoding
system only via SQL; future commits will add the ability to stream
changes via walsender.
Andres Freund, with review and other contributions from many other
people, including Álvaro Herrera, Abhijit Menon-Sen, Peter Gheogegan,
Kevin Grittner, Robert Haas, Heikki Linnakangas, Fujii Masao, Abhijit
Menon-Sen, Michael Paquier, Simon Riggs, Craig Ringer, and Steve
Singer.
2014-03-03 22:32:18 +01:00
|
|
|
/*
|
2014-05-06 18:12:18 +02:00
|
|
|
* We can see multis here if we're looking at user tables or if somebody
|
|
|
|
* SELECT ... FOR SHARE/UPDATE a system table.
|
Introduce logical decoding.
This feature, building on previous commits, allows the write-ahead log
stream to be decoded into a series of logical changes; that is,
inserts, updates, and deletes and the transactions which contain them.
It is capable of handling decoding even across changes to the schema
of the effected tables. The output format is controlled by a
so-called "output plugin"; an example is included. To make use of
this in a real replication system, the output plugin will need to be
modified to produce output in the format appropriate to that system,
and to perform filtering.
Currently, information can be extracted from the logical decoding
system only via SQL; future commits will add the ability to stream
changes via walsender.
Andres Freund, with review and other contributions from many other
people, including Álvaro Herrera, Abhijit Menon-Sen, Peter Gheogegan,
Kevin Grittner, Robert Haas, Heikki Linnakangas, Fujii Masao, Abhijit
Menon-Sen, Michael Paquier, Simon Riggs, Craig Ringer, and Steve
Singer.
2014-03-03 22:32:18 +01:00
|
|
|
*/
|
|
|
|
else if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
|
|
|
|
{
|
|
|
|
xmax = HeapTupleGetUpdateXid(tuple);
|
|
|
|
}
|
|
|
|
|
2014-03-17 12:47:28 +01:00
|
|
|
/* check if it's one of our txids, toplevel is also in there */
|
Introduce logical decoding.
This feature, building on previous commits, allows the write-ahead log
stream to be decoded into a series of logical changes; that is,
inserts, updates, and deletes and the transactions which contain them.
It is capable of handling decoding even across changes to the schema
of the effected tables. The output format is controlled by a
so-called "output plugin"; an example is included. To make use of
this in a real replication system, the output plugin will need to be
modified to produce output in the format appropriate to that system,
and to perform filtering.
Currently, information can be extracted from the logical decoding
system only via SQL; future commits will add the ability to stream
changes via walsender.
Andres Freund, with review and other contributions from many other
people, including Álvaro Herrera, Abhijit Menon-Sen, Peter Gheogegan,
Kevin Grittner, Robert Haas, Heikki Linnakangas, Fujii Masao, Abhijit
Menon-Sen, Michael Paquier, Simon Riggs, Craig Ringer, and Steve
Singer.
2014-03-03 22:32:18 +01:00
|
|
|
if (TransactionIdInArray(xmax, snapshot->subxip, snapshot->subxcnt))
|
|
|
|
{
|
2014-05-06 18:12:18 +02:00
|
|
|
bool resolved;
|
|
|
|
CommandId cmin;
|
|
|
|
CommandId cmax = HeapTupleHeaderGetRawCommandId(tuple);
|
Introduce logical decoding.
This feature, building on previous commits, allows the write-ahead log
stream to be decoded into a series of logical changes; that is,
inserts, updates, and deletes and the transactions which contain them.
It is capable of handling decoding even across changes to the schema
of the effected tables. The output format is controlled by a
so-called "output plugin"; an example is included. To make use of
this in a real replication system, the output plugin will need to be
modified to produce output in the format appropriate to that system,
and to perform filtering.
Currently, information can be extracted from the logical decoding
system only via SQL; future commits will add the ability to stream
changes via walsender.
Andres Freund, with review and other contributions from many other
people, including Álvaro Herrera, Abhijit Menon-Sen, Peter Gheogegan,
Kevin Grittner, Robert Haas, Heikki Linnakangas, Fujii Masao, Abhijit
Menon-Sen, Michael Paquier, Simon Riggs, Craig Ringer, and Steve
Singer.
2014-03-03 22:32:18 +01:00
|
|
|
|
|
|
|
/* Lookup actual cmin/cmax values */
|
|
|
|
resolved = ResolveCminCmaxDuringDecoding(HistoricSnapshotGetTupleCids(), snapshot,
|
|
|
|
htup, buffer,
|
|
|
|
&cmin, &cmax);
|
|
|
|
|
|
|
|
if (!resolved)
|
|
|
|
elog(ERROR, "could not resolve combocid to cmax");
|
|
|
|
|
|
|
|
Assert(cmax != InvalidCommandId);
|
|
|
|
|
|
|
|
if (cmax >= snapshot->curcid)
|
2014-05-06 18:12:18 +02:00
|
|
|
return true; /* deleted after scan started */
|
Introduce logical decoding.
This feature, building on previous commits, allows the write-ahead log
stream to be decoded into a series of logical changes; that is,
inserts, updates, and deletes and the transactions which contain them.
It is capable of handling decoding even across changes to the schema
of the effected tables. The output format is controlled by a
so-called "output plugin"; an example is included. To make use of
this in a real replication system, the output plugin will need to be
modified to produce output in the format appropriate to that system,
and to perform filtering.
Currently, information can be extracted from the logical decoding
system only via SQL; future commits will add the ability to stream
changes via walsender.
Andres Freund, with review and other contributions from many other
people, including Álvaro Herrera, Abhijit Menon-Sen, Peter Gheogegan,
Kevin Grittner, Robert Haas, Heikki Linnakangas, Fujii Masao, Abhijit
Menon-Sen, Michael Paquier, Simon Riggs, Craig Ringer, and Steve
Singer.
2014-03-03 22:32:18 +01:00
|
|
|
else
|
2014-05-06 18:12:18 +02:00
|
|
|
return false; /* deleted before scan started */
|
Introduce logical decoding.
This feature, building on previous commits, allows the write-ahead log
stream to be decoded into a series of logical changes; that is,
inserts, updates, and deletes and the transactions which contain them.
It is capable of handling decoding even across changes to the schema
of the effected tables. The output format is controlled by a
so-called "output plugin"; an example is included. To make use of
this in a real replication system, the output plugin will need to be
modified to produce output in the format appropriate to that system,
and to perform filtering.
Currently, information can be extracted from the logical decoding
system only via SQL; future commits will add the ability to stream
changes via walsender.
Andres Freund, with review and other contributions from many other
people, including Álvaro Herrera, Abhijit Menon-Sen, Peter Gheogegan,
Kevin Grittner, Robert Haas, Heikki Linnakangas, Fujii Masao, Abhijit
Menon-Sen, Michael Paquier, Simon Riggs, Craig Ringer, and Steve
Singer.
2014-03-03 22:32:18 +01:00
|
|
|
}
|
|
|
|
/* below xmin horizon, normal transaction state is valid */
|
|
|
|
else if (TransactionIdPrecedes(xmax, snapshot->xmin))
|
|
|
|
{
|
|
|
|
Assert(!(tuple->t_infomask & HEAP_XMAX_COMMITTED &&
|
|
|
|
!TransactionIdDidCommit(xmax)));
|
|
|
|
|
|
|
|
/* check hint bit first */
|
|
|
|
if (tuple->t_infomask & HEAP_XMAX_COMMITTED)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
/* check clog */
|
|
|
|
return !TransactionIdDidCommit(xmax);
|
|
|
|
}
|
|
|
|
/* above xmax horizon, we cannot possibly see the deleting transaction */
|
|
|
|
else if (TransactionIdFollowsOrEquals(xmax, snapshot->xmax))
|
|
|
|
return true;
|
|
|
|
/* xmax is between [xmin, xmax), check known committed array */
|
|
|
|
else if (TransactionIdInArray(xmax, snapshot->xip, snapshot->xcnt))
|
|
|
|
return false;
|
|
|
|
/* xmax is between [xmin, xmax), but known not to have committed yet */
|
|
|
|
else
|
|
|
|
return true;
|
|
|
|
}
|