1996-08-28 03:59:28 +02:00
|
|
|
/*-------------------------------------------------------------------------
|
|
|
|
*
|
1999-02-14 00:22:53 +01:00
|
|
|
* lock.h
|
2000-12-22 01:51:54 +01:00
|
|
|
* POSTGRES low-level lock mechanism
|
1996-08-28 03:59:28 +02:00
|
|
|
*
|
|
|
|
*
|
2020-01-01 18:21:45 +01:00
|
|
|
* Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
|
2000-01-26 06:58:53 +01:00
|
|
|
* Portions Copyright (c) 1994, Regents of the University of California
|
1996-08-28 03:59:28 +02:00
|
|
|
*
|
2010-09-20 22:08:53 +02:00
|
|
|
* src/include/storage/lock.h
|
1996-08-28 03:59:28 +02:00
|
|
|
*
|
|
|
|
*-------------------------------------------------------------------------
|
|
|
|
*/
|
|
|
|
#ifndef LOCK_H_
|
|
|
|
#define LOCK_H_
|
|
|
|
|
2015-08-07 15:10:56 +02:00
|
|
|
#ifdef FRONTEND
|
|
|
|
#error "lock.h may not be included from frontend code"
|
|
|
|
#endif
|
|
|
|
|
2007-09-05 20:10:48 +02:00
|
|
|
#include "storage/backendid.h"
|
2019-11-25 03:38:57 +01:00
|
|
|
#include "storage/lockdefs.h"
|
2001-09-29 06:02:27 +02:00
|
|
|
#include "storage/lwlock.h"
|
1999-07-16 19:07:40 +02:00
|
|
|
#include "storage/shmem.h"
|
1996-08-28 03:59:28 +02:00
|
|
|
|
2006-07-23 05:07:58 +02:00
|
|
|
/* struct PGPROC is declared in proc.h, but must forward-reference it */
|
|
|
|
typedef struct PGPROC PGPROC;
|
2005-12-11 22:02:18 +01:00
|
|
|
|
2000-12-22 01:51:54 +01:00
|
|
|
typedef struct PROC_QUEUE
|
|
|
|
{
|
2002-06-11 15:40:53 +02:00
|
|
|
SHM_QUEUE links; /* head of list of PGPROC objects */
|
2001-01-22 23:30:06 +01:00
|
|
|
int size; /* number of entries in list */
|
2000-12-22 01:51:54 +01:00
|
|
|
} PROC_QUEUE;
|
|
|
|
|
2004-08-26 19:22:28 +02:00
|
|
|
/* GUC variables */
|
2001-06-28 01:31:40 +02:00
|
|
|
extern int max_locks_per_xact;
|
|
|
|
|
2000-05-31 02:28:42 +02:00
|
|
|
#ifdef LOCK_DEBUG
|
2001-03-22 05:01:46 +01:00
|
|
|
extern int Trace_lock_oidmin;
|
2000-05-31 02:28:42 +02:00
|
|
|
extern bool Trace_locks;
|
2011-11-10 23:54:27 +01:00
|
|
|
extern bool Trace_userlocks;
|
2001-03-22 05:01:46 +01:00
|
|
|
extern int Trace_lock_table;
|
2000-05-31 02:28:42 +02:00
|
|
|
extern bool Debug_deadlocks;
|
Phase 2 of pgindent updates.
Change pg_bsd_indent to follow upstream rules for placement of comments
to the right of code, and remove pgindent hack that caused comments
following #endif to not obey the general rule.
Commit e3860ffa4dd0dad0dd9eea4be9cc1412373a8c89 wasn't actually using
the published version of pg_bsd_indent, but a hacked-up version that
tried to minimize the amount of movement of comments to the right of
code. The situation of interest is where such a comment has to be
moved to the right of its default placement at column 33 because there's
code there. BSD indent has always moved right in units of tab stops
in such cases --- but in the previous incarnation, indent was working
in 8-space tab stops, while now it knows we use 4-space tabs. So the
net result is that in about half the cases, such comments are placed
one tab stop left of before. This is better all around: it leaves
more room on the line for comment text, and it means that in such
cases the comment uniformly starts at the next 4-space tab stop after
the code, rather than sometimes one and sometimes two tabs after.
Also, ensure that comments following #endif are indented the same
as comments following other preprocessor commands such as #else.
That inconsistency turns out to have been self-inflicted damage
from a poorly-thought-through post-indent "fixup" in pgindent.
This patch is much less interesting than the first round of indent
changes, but also bulkier, so I thought it best to separate the effects.
Discussion: https://postgr.es/m/E1dAmxK-0006EE-1r@gemulon.postgresql.org
Discussion: https://postgr.es/m/30527.1495162840@sss.pgh.pa.us
2017-06-21 21:18:54 +02:00
|
|
|
#endif /* LOCK_DEBUG */
|
2000-05-31 02:28:42 +02:00
|
|
|
|
|
|
|
|
2007-09-05 20:10:48 +02:00
|
|
|
/*
|
|
|
|
* Top-level transactions are identified by VirtualTransactionIDs comprising
|
|
|
|
* the BackendId of the backend running the xact, plus a locally-assigned
|
2014-05-06 18:12:18 +02:00
|
|
|
* LocalTransactionId. These are guaranteed unique over the short term,
|
2007-09-05 20:10:48 +02:00
|
|
|
* but will be reused after a database restart; hence they should never
|
|
|
|
* be stored on disk.
|
|
|
|
*
|
|
|
|
* Note that struct VirtualTransactionId can not be assumed to be atomically
|
|
|
|
* assignable as a whole. However, type LocalTransactionId is assumed to
|
|
|
|
* be atomically assignable, and the backend ID doesn't change often enough
|
|
|
|
* to be a problem, so we can fetch or assign the two fields separately.
|
|
|
|
* We deliberately refrain from using the struct within PGPROC, to prevent
|
|
|
|
* coding errors from trying to use struct assignment with it; instead use
|
|
|
|
* GET_VXID_FROM_PGPROC().
|
|
|
|
*/
|
|
|
|
typedef struct
|
|
|
|
{
|
|
|
|
BackendId backendId; /* determined at backend startup */
|
Phase 2 of pgindent updates.
Change pg_bsd_indent to follow upstream rules for placement of comments
to the right of code, and remove pgindent hack that caused comments
following #endif to not obey the general rule.
Commit e3860ffa4dd0dad0dd9eea4be9cc1412373a8c89 wasn't actually using
the published version of pg_bsd_indent, but a hacked-up version that
tried to minimize the amount of movement of comments to the right of
code. The situation of interest is where such a comment has to be
moved to the right of its default placement at column 33 because there's
code there. BSD indent has always moved right in units of tab stops
in such cases --- but in the previous incarnation, indent was working
in 8-space tab stops, while now it knows we use 4-space tabs. So the
net result is that in about half the cases, such comments are placed
one tab stop left of before. This is better all around: it leaves
more room on the line for comment text, and it means that in such
cases the comment uniformly starts at the next 4-space tab stop after
the code, rather than sometimes one and sometimes two tabs after.
Also, ensure that comments following #endif are indented the same
as comments following other preprocessor commands such as #else.
That inconsistency turns out to have been self-inflicted damage
from a poorly-thought-through post-indent "fixup" in pgindent.
This patch is much less interesting than the first round of indent
changes, but also bulkier, so I thought it best to separate the effects.
Discussion: https://postgr.es/m/E1dAmxK-0006EE-1r@gemulon.postgresql.org
Discussion: https://postgr.es/m/30527.1495162840@sss.pgh.pa.us
2017-06-21 21:18:54 +02:00
|
|
|
LocalTransactionId localTransactionId; /* backend-local transaction id */
|
2007-11-15 23:25:18 +01:00
|
|
|
} VirtualTransactionId;
|
2007-09-05 20:10:48 +02:00
|
|
|
|
|
|
|
#define InvalidLocalTransactionId 0
|
2007-11-15 22:14:46 +01:00
|
|
|
#define LocalTransactionIdIsValid(lxid) ((lxid) != InvalidLocalTransactionId)
|
2007-09-05 20:10:48 +02:00
|
|
|
#define VirtualTransactionIdIsValid(vxid) \
|
|
|
|
(((vxid).backendId != InvalidBackendId) && \
|
|
|
|
LocalTransactionIdIsValid((vxid).localTransactionId))
|
2009-04-04 19:40:36 +02:00
|
|
|
#define VirtualTransactionIdEquals(vxid1, vxid2) \
|
|
|
|
((vxid1).backendId == (vxid2).backendId && \
|
|
|
|
(vxid1).localTransactionId == (vxid2).localTransactionId)
|
|
|
|
#define SetInvalidVirtualTransactionId(vxid) \
|
|
|
|
((vxid).backendId = InvalidBackendId, \
|
|
|
|
(vxid).localTransactionId = InvalidLocalTransactionId)
|
2007-09-05 20:10:48 +02:00
|
|
|
#define GET_VXID_FROM_PGPROC(vxid, proc) \
|
|
|
|
((vxid).backendId = (proc).backendId, \
|
|
|
|
(vxid).localTransactionId = (proc).lxid)
|
|
|
|
|
2001-01-22 23:30:06 +01:00
|
|
|
/* MAX_LOCKMODES cannot be larger than the # of bits in LOCKMASK */
|
2001-07-10 00:18:34 +02:00
|
|
|
#define MAX_LOCKMODES 10
|
1996-08-28 03:59:28 +02:00
|
|
|
|
Try to reduce confusion about what is a lock method identifier, a lock
method control structure, or a table of control structures.
. Use type LOCKMASK where an int is not a counter.
. Get rid of INVALID_TABLEID, use INVALID_LOCKMETHOD instead.
. Use INVALID_LOCKMETHOD instead of (LOCKMETHOD) NULL, because
LOCKMETHOD is not a pointer.
. Define and use macro LockMethodIsValid.
. Rename LOCKMETHOD to LOCKMETHODID.
. Remove global variable LongTermTableId in lmgr.c, because it is
never used.
. Make LockTableId static in lmgr.c, because it is used nowhere else.
Why not remove it and use DEFAULT_LOCKMETHOD?
. Rename the lock method control structure from LOCKMETHODTABLE to
LockMethodData. Introduce a pointer type named LockMethod.
. Remove elog(FATAL) after InitLockTable() call in
CreateSharedMemoryAndSemaphores(), because if something goes wrong,
there is elog(FATAL) in LockMethodTableInit(), and if this doesn't
help, an elog(ERROR) in InitLockTable() is promoted to FATAL.
. Make InitLockTable() void, because its only caller does not use its
return value any more.
. Rename variables in lock.c to avoid statements like
LockMethodTable[NumLockMethods] = lockMethodTable;
lockMethodTable = LockMethodTable[lockmethod];
. Change LOCKMETHODID type to uint16 to fit into struct LOCKTAG.
. Remove static variables BITS_OFF and BITS_ON from lock.c, because
I agree to this doubt:
* XXX is a fetch from a static array really faster than a shift?
. Define and use macros LOCKBIT_ON/OFF.
Manfred Koizar
2003-12-01 22:59:25 +01:00
|
|
|
#define LOCKBIT_ON(lockmode) (1 << (lockmode))
|
|
|
|
#define LOCKBIT_OFF(lockmode) (~(1 << (lockmode)))
|
|
|
|
|
2004-08-27 19:07:42 +02:00
|
|
|
|
2001-06-22 02:04:59 +02:00
|
|
|
/*
|
2005-12-09 02:22:04 +01:00
|
|
|
* This data structure defines the locking semantics associated with a
|
|
|
|
* "lock method". The semantics specify the meaning of each lock mode
|
Overdue code review for transaction-level advisory locks patch.
Commit 62c7bd31c8878dd45c9b9b2429ab7a12103f3590 had assorted problems, most
visibly that it broke PREPARE TRANSACTION in the presence of session-level
advisory locks (which should be ignored by PREPARE), as per a recent
complaint from Stephen Rees. More abstractly, the patch made the
LockMethodData.transactional flag not merely useless but outright
dangerous, because in point of fact that flag no longer tells you anything
at all about whether a lock is held transactionally. This fix therefore
removes that flag altogether. We now rely entirely on the convention
already in use in lock.c that transactional lock holds must be owned by
some ResourceOwner, while session holds are never so owned. Setting the
locallock struct's owner link to NULL thus denotes a session hold, and
there is no redundant marker for that.
PREPARE TRANSACTION now works again when there are session-level advisory
locks, and it is also able to transfer transactional advisory locks to the
prepared transaction, but for implementation reasons it throws an error if
we hold both types of lock on a single lockable object. Perhaps it will be
worth improving that someday.
Assorted other minor cleanup and documentation editing, as well.
Back-patch to 9.1, except that in the 9.1 branch I did not remove the
LockMethodData.transactional flag for fear of causing an ABI break for
any external code that might be examining those structs.
2012-05-04 23:43:27 +02:00
|
|
|
* (by defining which lock modes it conflicts with).
|
2005-12-09 02:22:04 +01:00
|
|
|
* All of this data is constant and is kept in const tables.
|
1996-08-28 03:59:28 +02:00
|
|
|
*
|
2005-12-09 02:22:04 +01:00
|
|
|
* numLockModes -- number of lock modes (READ,WRITE,etc) that
|
|
|
|
* are defined in this lock method. Must be less than MAX_LOCKMODES.
|
1996-08-28 03:59:28 +02:00
|
|
|
*
|
|
|
|
* conflictTab -- this is an array of bitmasks showing lock
|
2005-12-09 02:22:04 +01:00
|
|
|
* mode conflicts. conflictTab[i] is a mask with the j-th bit
|
|
|
|
* turned on if lock modes i and j conflict. Lock modes are
|
|
|
|
* numbered 1..numLockModes; conflictTab[0] is unused.
|
|
|
|
*
|
|
|
|
* lockModeNames -- ID strings for debug printouts.
|
|
|
|
*
|
Overdue code review for transaction-level advisory locks patch.
Commit 62c7bd31c8878dd45c9b9b2429ab7a12103f3590 had assorted problems, most
visibly that it broke PREPARE TRANSACTION in the presence of session-level
advisory locks (which should be ignored by PREPARE), as per a recent
complaint from Stephen Rees. More abstractly, the patch made the
LockMethodData.transactional flag not merely useless but outright
dangerous, because in point of fact that flag no longer tells you anything
at all about whether a lock is held transactionally. This fix therefore
removes that flag altogether. We now rely entirely on the convention
already in use in lock.c that transactional lock holds must be owned by
some ResourceOwner, while session holds are never so owned. Setting the
locallock struct's owner link to NULL thus denotes a session hold, and
there is no redundant marker for that.
PREPARE TRANSACTION now works again when there are session-level advisory
locks, and it is also able to transfer transactional advisory locks to the
prepared transaction, but for implementation reasons it throws an error if
we hold both types of lock on a single lockable object. Perhaps it will be
worth improving that someday.
Assorted other minor cleanup and documentation editing, as well.
Back-patch to 9.1, except that in the 9.1 branch I did not remove the
LockMethodData.transactional flag for fear of causing an ABI break for
any external code that might be examining those structs.
2012-05-04 23:43:27 +02:00
|
|
|
* trace_flag -- pointer to GUC trace flag for this lock method. (The
|
|
|
|
* GUC variable is not constant, but we use "const" here to denote that
|
|
|
|
* it can't be changed through this reference.)
|
1996-08-28 03:59:28 +02:00
|
|
|
*/
|
Try to reduce confusion about what is a lock method identifier, a lock
method control structure, or a table of control structures.
. Use type LOCKMASK where an int is not a counter.
. Get rid of INVALID_TABLEID, use INVALID_LOCKMETHOD instead.
. Use INVALID_LOCKMETHOD instead of (LOCKMETHOD) NULL, because
LOCKMETHOD is not a pointer.
. Define and use macro LockMethodIsValid.
. Rename LOCKMETHOD to LOCKMETHODID.
. Remove global variable LongTermTableId in lmgr.c, because it is
never used.
. Make LockTableId static in lmgr.c, because it is used nowhere else.
Why not remove it and use DEFAULT_LOCKMETHOD?
. Rename the lock method control structure from LOCKMETHODTABLE to
LockMethodData. Introduce a pointer type named LockMethod.
. Remove elog(FATAL) after InitLockTable() call in
CreateSharedMemoryAndSemaphores(), because if something goes wrong,
there is elog(FATAL) in LockMethodTableInit(), and if this doesn't
help, an elog(ERROR) in InitLockTable() is promoted to FATAL.
. Make InitLockTable() void, because its only caller does not use its
return value any more.
. Rename variables in lock.c to avoid statements like
LockMethodTable[NumLockMethods] = lockMethodTable;
lockMethodTable = LockMethodTable[lockmethod];
. Change LOCKMETHODID type to uint16 to fit into struct LOCKTAG.
. Remove static variables BITS_OFF and BITS_ON from lock.c, because
I agree to this doubt:
* XXX is a fetch from a static array really faster than a shift?
. Define and use macros LOCKBIT_ON/OFF.
Manfred Koizar
2003-12-01 22:59:25 +01:00
|
|
|
typedef struct LockMethodData
|
1997-09-07 07:04:48 +02:00
|
|
|
{
|
2004-08-29 07:07:03 +02:00
|
|
|
int numLockModes;
|
2005-12-09 02:22:04 +01:00
|
|
|
const LOCKMASK *conflictTab;
|
2017-06-21 20:39:04 +02:00
|
|
|
const char *const *lockModeNames;
|
2005-12-09 02:22:04 +01:00
|
|
|
const bool *trace_flag;
|
Try to reduce confusion about what is a lock method identifier, a lock
method control structure, or a table of control structures.
. Use type LOCKMASK where an int is not a counter.
. Get rid of INVALID_TABLEID, use INVALID_LOCKMETHOD instead.
. Use INVALID_LOCKMETHOD instead of (LOCKMETHOD) NULL, because
LOCKMETHOD is not a pointer.
. Define and use macro LockMethodIsValid.
. Rename LOCKMETHOD to LOCKMETHODID.
. Remove global variable LongTermTableId in lmgr.c, because it is
never used.
. Make LockTableId static in lmgr.c, because it is used nowhere else.
Why not remove it and use DEFAULT_LOCKMETHOD?
. Rename the lock method control structure from LOCKMETHODTABLE to
LockMethodData. Introduce a pointer type named LockMethod.
. Remove elog(FATAL) after InitLockTable() call in
CreateSharedMemoryAndSemaphores(), because if something goes wrong,
there is elog(FATAL) in LockMethodTableInit(), and if this doesn't
help, an elog(ERROR) in InitLockTable() is promoted to FATAL.
. Make InitLockTable() void, because its only caller does not use its
return value any more.
. Rename variables in lock.c to avoid statements like
LockMethodTable[NumLockMethods] = lockMethodTable;
lockMethodTable = LockMethodTable[lockmethod];
. Change LOCKMETHODID type to uint16 to fit into struct LOCKTAG.
. Remove static variables BITS_OFF and BITS_ON from lock.c, because
I agree to this doubt:
* XXX is a fetch from a static array really faster than a shift?
. Define and use macros LOCKBIT_ON/OFF.
Manfred Koizar
2003-12-01 22:59:25 +01:00
|
|
|
} LockMethodData;
|
2004-08-27 19:07:42 +02:00
|
|
|
|
2005-12-09 02:22:04 +01:00
|
|
|
typedef const LockMethodData *LockMethod;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Lock methods are identified by LOCKMETHODID. (Despite the declaration as
|
|
|
|
* uint16, we are constrained to 256 lockmethods by the layout of LOCKTAG.)
|
|
|
|
*/
|
|
|
|
typedef uint16 LOCKMETHODID;
|
|
|
|
|
|
|
|
/* These identify the known lock methods */
|
|
|
|
#define DEFAULT_LOCKMETHOD 1
|
|
|
|
#define USER_LOCKMETHOD 2
|
|
|
|
|
2000-12-22 01:51:54 +01:00
|
|
|
/*
|
|
|
|
* LOCKTAG is the key information needed to look up a LOCK item in the
|
2014-05-06 18:12:18 +02:00
|
|
|
* lock hashtable. A LOCKTAG value uniquely identifies a lockable object.
|
2005-04-30 00:28:24 +02:00
|
|
|
*
|
|
|
|
* The LockTagType enum defines the different kinds of objects we can lock.
|
|
|
|
* We can handle up to 256 different LockTagTypes.
|
2000-12-22 01:51:54 +01:00
|
|
|
*/
|
2005-04-30 00:28:24 +02:00
|
|
|
typedef enum LockTagType
|
1997-09-07 07:04:48 +02:00
|
|
|
{
|
2005-04-30 00:28:24 +02:00
|
|
|
LOCKTAG_RELATION, /* whole relation */
|
|
|
|
LOCKTAG_RELATION_EXTEND, /* the right to extend a relation */
|
|
|
|
LOCKTAG_PAGE, /* one page of a relation */
|
|
|
|
LOCKTAG_TUPLE, /* one physical tuple */
|
|
|
|
LOCKTAG_TRANSACTION, /* transaction (for waiting for xact done) */
|
2007-11-15 22:14:46 +01:00
|
|
|
LOCKTAG_VIRTUALTRANSACTION, /* virtual transaction (ditto) */
|
2015-05-24 03:35:49 +02:00
|
|
|
LOCKTAG_SPECULATIVE_TOKEN, /* speculative insertion Xid and token */
|
2005-04-30 00:28:24 +02:00
|
|
|
LOCKTAG_OBJECT, /* non-relation database object */
|
2006-09-23 01:20:14 +02:00
|
|
|
LOCKTAG_USERLOCK, /* reserved for old contrib/userlock code */
|
|
|
|
LOCKTAG_ADVISORY /* advisory user locks */
|
2005-04-30 00:28:24 +02:00
|
|
|
} LockTagType;
|
1996-08-28 03:59:28 +02:00
|
|
|
|
2008-01-09 00:18:51 +01:00
|
|
|
#define LOCKTAG_LAST_TYPE LOCKTAG_ADVISORY
|
|
|
|
|
2016-03-10 18:44:09 +01:00
|
|
|
extern const char *const LockTagTypeNames[];
|
|
|
|
|
2005-04-30 00:28:24 +02:00
|
|
|
/*
|
|
|
|
* The LOCKTAG struct is defined with malice aforethought to fit into 16
|
|
|
|
* bytes with no padding. Note that this would need adjustment if we were
|
|
|
|
* to widen Oid, BlockNumber, or TransactionId to more than 32 bits.
|
|
|
|
*
|
|
|
|
* We include lockmethodid in the locktag so that a single hash table in
|
2005-12-09 02:22:04 +01:00
|
|
|
* shared memory can store locks of different lockmethods.
|
2005-04-30 00:28:24 +02:00
|
|
|
*/
|
|
|
|
typedef struct LOCKTAG
|
|
|
|
{
|
2005-10-15 04:49:52 +02:00
|
|
|
uint32 locktag_field1; /* a 32-bit ID field */
|
|
|
|
uint32 locktag_field2; /* a 32-bit ID field */
|
|
|
|
uint32 locktag_field3; /* a 32-bit ID field */
|
|
|
|
uint16 locktag_field4; /* a 16-bit ID field */
|
|
|
|
uint8 locktag_type; /* see enum LockTagType */
|
2005-04-30 00:28:24 +02:00
|
|
|
uint8 locktag_lockmethodid; /* lockmethod indicator */
|
2000-12-22 01:51:54 +01:00
|
|
|
} LOCKTAG;
|
1996-08-28 03:59:28 +02:00
|
|
|
|
2005-04-30 00:28:24 +02:00
|
|
|
/*
|
|
|
|
* These macros define how we map logical IDs of lockable objects into
|
2014-05-06 18:12:18 +02:00
|
|
|
* the physical fields of LOCKTAG. Use these to set up LOCKTAG values,
|
2005-04-30 00:28:24 +02:00
|
|
|
* rather than accessing the fields directly. Note multiple eval of target!
|
|
|
|
*/
|
2019-05-10 02:35:27 +02:00
|
|
|
|
|
|
|
/* ID info for a relation is DB OID + REL OID; DB OID = 0 if shared */
|
2005-04-30 00:28:24 +02:00
|
|
|
#define SET_LOCKTAG_RELATION(locktag,dboid,reloid) \
|
|
|
|
((locktag).locktag_field1 = (dboid), \
|
|
|
|
(locktag).locktag_field2 = (reloid), \
|
|
|
|
(locktag).locktag_field3 = 0, \
|
|
|
|
(locktag).locktag_field4 = 0, \
|
2005-12-09 02:22:04 +01:00
|
|
|
(locktag).locktag_type = LOCKTAG_RELATION, \
|
|
|
|
(locktag).locktag_lockmethodid = DEFAULT_LOCKMETHOD)
|
2005-04-30 00:28:24 +02:00
|
|
|
|
2019-05-10 02:35:27 +02:00
|
|
|
/* same ID info as RELATION */
|
2005-04-30 00:28:24 +02:00
|
|
|
#define SET_LOCKTAG_RELATION_EXTEND(locktag,dboid,reloid) \
|
|
|
|
((locktag).locktag_field1 = (dboid), \
|
|
|
|
(locktag).locktag_field2 = (reloid), \
|
|
|
|
(locktag).locktag_field3 = 0, \
|
|
|
|
(locktag).locktag_field4 = 0, \
|
2005-12-09 02:22:04 +01:00
|
|
|
(locktag).locktag_type = LOCKTAG_RELATION_EXTEND, \
|
|
|
|
(locktag).locktag_lockmethodid = DEFAULT_LOCKMETHOD)
|
2005-04-30 00:28:24 +02:00
|
|
|
|
2019-05-10 02:35:27 +02:00
|
|
|
/* ID info for a page is RELATION info + BlockNumber */
|
2005-04-30 00:28:24 +02:00
|
|
|
#define SET_LOCKTAG_PAGE(locktag,dboid,reloid,blocknum) \
|
|
|
|
((locktag).locktag_field1 = (dboid), \
|
|
|
|
(locktag).locktag_field2 = (reloid), \
|
|
|
|
(locktag).locktag_field3 = (blocknum), \
|
|
|
|
(locktag).locktag_field4 = 0, \
|
2005-12-09 02:22:04 +01:00
|
|
|
(locktag).locktag_type = LOCKTAG_PAGE, \
|
|
|
|
(locktag).locktag_lockmethodid = DEFAULT_LOCKMETHOD)
|
2005-04-30 00:28:24 +02:00
|
|
|
|
2019-05-10 02:35:27 +02:00
|
|
|
/* ID info for a tuple is PAGE info + OffsetNumber */
|
2005-04-30 00:28:24 +02:00
|
|
|
#define SET_LOCKTAG_TUPLE(locktag,dboid,reloid,blocknum,offnum) \
|
|
|
|
((locktag).locktag_field1 = (dboid), \
|
|
|
|
(locktag).locktag_field2 = (reloid), \
|
|
|
|
(locktag).locktag_field3 = (blocknum), \
|
|
|
|
(locktag).locktag_field4 = (offnum), \
|
2005-12-09 02:22:04 +01:00
|
|
|
(locktag).locktag_type = LOCKTAG_TUPLE, \
|
|
|
|
(locktag).locktag_lockmethodid = DEFAULT_LOCKMETHOD)
|
2005-04-30 00:28:24 +02:00
|
|
|
|
2019-05-10 02:35:27 +02:00
|
|
|
/* ID info for a transaction is its TransactionId */
|
2005-04-30 00:28:24 +02:00
|
|
|
#define SET_LOCKTAG_TRANSACTION(locktag,xid) \
|
|
|
|
((locktag).locktag_field1 = (xid), \
|
|
|
|
(locktag).locktag_field2 = 0, \
|
|
|
|
(locktag).locktag_field3 = 0, \
|
|
|
|
(locktag).locktag_field4 = 0, \
|
2005-12-09 02:22:04 +01:00
|
|
|
(locktag).locktag_type = LOCKTAG_TRANSACTION, \
|
|
|
|
(locktag).locktag_lockmethodid = DEFAULT_LOCKMETHOD)
|
2005-04-30 00:28:24 +02:00
|
|
|
|
2019-05-10 02:35:27 +02:00
|
|
|
/* ID info for a virtual transaction is its VirtualTransactionId */
|
2007-09-05 20:10:48 +02:00
|
|
|
#define SET_LOCKTAG_VIRTUALTRANSACTION(locktag,vxid) \
|
|
|
|
((locktag).locktag_field1 = (vxid).backendId, \
|
|
|
|
(locktag).locktag_field2 = (vxid).localTransactionId, \
|
|
|
|
(locktag).locktag_field3 = 0, \
|
|
|
|
(locktag).locktag_field4 = 0, \
|
|
|
|
(locktag).locktag_type = LOCKTAG_VIRTUALTRANSACTION, \
|
|
|
|
(locktag).locktag_lockmethodid = DEFAULT_LOCKMETHOD)
|
|
|
|
|
2019-05-10 02:35:27 +02:00
|
|
|
/*
|
|
|
|
* ID info for a speculative insert is TRANSACTION info +
|
|
|
|
* its speculative insert counter.
|
|
|
|
*/
|
Add support for INSERT ... ON CONFLICT DO NOTHING/UPDATE.
The newly added ON CONFLICT clause allows to specify an alternative to
raising a unique or exclusion constraint violation error when inserting.
ON CONFLICT refers to constraints that can either be specified using a
inference clause (by specifying the columns of a unique constraint) or
by naming a unique or exclusion constraint. DO NOTHING avoids the
constraint violation, without touching the pre-existing row. DO UPDATE
SET ... [WHERE ...] updates the pre-existing tuple, and has access to
both the tuple proposed for insertion and the existing tuple; the
optional WHERE clause can be used to prevent an update from being
executed. The UPDATE SET and WHERE clauses have access to the tuple
proposed for insertion using the "magic" EXCLUDED alias, and to the
pre-existing tuple using the table name or its alias.
This feature is often referred to as upsert.
This is implemented using a new infrastructure called "speculative
insertion". It is an optimistic variant of regular insertion that first
does a pre-check for existing tuples and then attempts an insert. If a
violating tuple was inserted concurrently, the speculatively inserted
tuple is deleted and a new attempt is made. If the pre-check finds a
matching tuple the alternative DO NOTHING or DO UPDATE action is taken.
If the insertion succeeds without detecting a conflict, the tuple is
deemed inserted.
To handle the possible ambiguity between the excluded alias and a table
named excluded, and for convenience with long relation names, INSERT
INTO now can alias its target table.
Bumps catversion as stored rules change.
Author: Peter Geoghegan, with significant contributions from Heikki
Linnakangas and Andres Freund. Testing infrastructure by Jeff Janes.
Reviewed-By: Heikki Linnakangas, Andres Freund, Robert Haas, Simon Riggs,
Dean Rasheed, Stephen Frost and many others.
2015-05-08 05:31:36 +02:00
|
|
|
#define SET_LOCKTAG_SPECULATIVE_INSERTION(locktag,xid,token) \
|
|
|
|
((locktag).locktag_field1 = (xid), \
|
|
|
|
(locktag).locktag_field2 = (token), \
|
|
|
|
(locktag).locktag_field3 = 0, \
|
|
|
|
(locktag).locktag_field4 = 0, \
|
|
|
|
(locktag).locktag_type = LOCKTAG_SPECULATIVE_TOKEN, \
|
|
|
|
(locktag).locktag_lockmethodid = DEFAULT_LOCKMETHOD)
|
|
|
|
|
2019-05-10 02:35:27 +02:00
|
|
|
/*
|
|
|
|
* ID info for an object is DB OID + CLASS OID + OBJECT OID + SUBID
|
|
|
|
*
|
|
|
|
* Note: object ID has same representation as in pg_depend and
|
|
|
|
* pg_description, but notice that we are constraining SUBID to 16 bits.
|
|
|
|
* Also, we use DB OID = 0 for shared objects such as tablespaces.
|
|
|
|
*/
|
2005-04-30 00:28:24 +02:00
|
|
|
#define SET_LOCKTAG_OBJECT(locktag,dboid,classoid,objoid,objsubid) \
|
|
|
|
((locktag).locktag_field1 = (dboid), \
|
|
|
|
(locktag).locktag_field2 = (classoid), \
|
|
|
|
(locktag).locktag_field3 = (objoid), \
|
|
|
|
(locktag).locktag_field4 = (objsubid), \
|
2005-12-09 02:22:04 +01:00
|
|
|
(locktag).locktag_type = LOCKTAG_OBJECT, \
|
|
|
|
(locktag).locktag_lockmethodid = DEFAULT_LOCKMETHOD)
|
2005-04-30 00:28:24 +02:00
|
|
|
|
2006-09-23 01:20:14 +02:00
|
|
|
#define SET_LOCKTAG_ADVISORY(locktag,id1,id2,id3,id4) \
|
2006-09-19 00:40:40 +02:00
|
|
|
((locktag).locktag_field1 = (id1), \
|
|
|
|
(locktag).locktag_field2 = (id2), \
|
|
|
|
(locktag).locktag_field3 = (id3), \
|
|
|
|
(locktag).locktag_field4 = (id4), \
|
2006-09-23 01:20:14 +02:00
|
|
|
(locktag).locktag_type = LOCKTAG_ADVISORY, \
|
2006-09-19 00:40:40 +02:00
|
|
|
(locktag).locktag_lockmethodid = USER_LOCKMETHOD)
|
|
|
|
|
1996-08-28 03:59:28 +02:00
|
|
|
|
|
|
|
/*
|
2000-12-22 01:51:54 +01:00
|
|
|
* Per-locked-object lock information:
|
1996-08-28 03:59:28 +02:00
|
|
|
*
|
|
|
|
* tag -- uniquely identifies the object being locked
|
2001-01-16 07:11:34 +01:00
|
|
|
* grantMask -- bitmask for all lock types currently granted on this object.
|
|
|
|
* waitMask -- bitmask for all lock types currently awaited on this object.
|
2004-08-27 19:07:42 +02:00
|
|
|
* procLocks -- list of PROCLOCK objects for this lock.
|
2001-01-16 07:11:34 +01:00
|
|
|
* waitProcs -- queue of processes waiting for this lock.
|
|
|
|
* requested -- count of each lock type currently requested on the lock
|
|
|
|
* (includes requests already granted!!).
|
|
|
|
* nRequested -- total requested locks of all types.
|
|
|
|
* granted -- count of each lock type currently granted on the lock.
|
|
|
|
* nGranted -- total granted locks of all types.
|
2005-06-15 00:15:33 +02:00
|
|
|
*
|
|
|
|
* Note: these counts count 1 for each backend. Internally to a backend,
|
|
|
|
* there may be multiple grabs on a particular lock, but this is not reflected
|
|
|
|
* into shared memory.
|
1996-08-28 03:59:28 +02:00
|
|
|
*/
|
1998-06-30 04:33:34 +02:00
|
|
|
typedef struct LOCK
|
1997-09-07 07:04:48 +02:00
|
|
|
{
|
|
|
|
/* hash key */
|
2001-01-16 07:11:34 +01:00
|
|
|
LOCKTAG tag; /* unique identifier of lockable object */
|
1997-09-07 07:04:48 +02:00
|
|
|
|
|
|
|
/* data */
|
Try to reduce confusion about what is a lock method identifier, a lock
method control structure, or a table of control structures.
. Use type LOCKMASK where an int is not a counter.
. Get rid of INVALID_TABLEID, use INVALID_LOCKMETHOD instead.
. Use INVALID_LOCKMETHOD instead of (LOCKMETHOD) NULL, because
LOCKMETHOD is not a pointer.
. Define and use macro LockMethodIsValid.
. Rename LOCKMETHOD to LOCKMETHODID.
. Remove global variable LongTermTableId in lmgr.c, because it is
never used.
. Make LockTableId static in lmgr.c, because it is used nowhere else.
Why not remove it and use DEFAULT_LOCKMETHOD?
. Rename the lock method control structure from LOCKMETHODTABLE to
LockMethodData. Introduce a pointer type named LockMethod.
. Remove elog(FATAL) after InitLockTable() call in
CreateSharedMemoryAndSemaphores(), because if something goes wrong,
there is elog(FATAL) in LockMethodTableInit(), and if this doesn't
help, an elog(ERROR) in InitLockTable() is promoted to FATAL.
. Make InitLockTable() void, because its only caller does not use its
return value any more.
. Rename variables in lock.c to avoid statements like
LockMethodTable[NumLockMethods] = lockMethodTable;
lockMethodTable = LockMethodTable[lockmethod];
. Change LOCKMETHODID type to uint16 to fit into struct LOCKTAG.
. Remove static variables BITS_OFF and BITS_ON from lock.c, because
I agree to this doubt:
* XXX is a fetch from a static array really faster than a shift?
. Define and use macros LOCKBIT_ON/OFF.
Manfred Koizar
2003-12-01 22:59:25 +01:00
|
|
|
LOCKMASK grantMask; /* bitmask for lock types already granted */
|
|
|
|
LOCKMASK waitMask; /* bitmask for lock types awaited */
|
2005-10-15 04:49:52 +02:00
|
|
|
SHM_QUEUE procLocks; /* list of PROCLOCK objects assoc. with lock */
|
2002-06-11 15:40:53 +02:00
|
|
|
PROC_QUEUE waitProcs; /* list of PGPROC objects waiting on lock */
|
Phase 2 of pgindent updates.
Change pg_bsd_indent to follow upstream rules for placement of comments
to the right of code, and remove pgindent hack that caused comments
following #endif to not obey the general rule.
Commit e3860ffa4dd0dad0dd9eea4be9cc1412373a8c89 wasn't actually using
the published version of pg_bsd_indent, but a hacked-up version that
tried to minimize the amount of movement of comments to the right of
code. The situation of interest is where such a comment has to be
moved to the right of its default placement at column 33 because there's
code there. BSD indent has always moved right in units of tab stops
in such cases --- but in the previous incarnation, indent was working
in 8-space tab stops, while now it knows we use 4-space tabs. So the
net result is that in about half the cases, such comments are placed
one tab stop left of before. This is better all around: it leaves
more room on the line for comment text, and it means that in such
cases the comment uniformly starts at the next 4-space tab stop after
the code, rather than sometimes one and sometimes two tabs after.
Also, ensure that comments following #endif are indented the same
as comments following other preprocessor commands such as #else.
That inconsistency turns out to have been self-inflicted damage
from a poorly-thought-through post-indent "fixup" in pgindent.
This patch is much less interesting than the first round of indent
changes, but also bulkier, so I thought it best to separate the effects.
Discussion: https://postgr.es/m/E1dAmxK-0006EE-1r@gemulon.postgresql.org
Discussion: https://postgr.es/m/30527.1495162840@sss.pgh.pa.us
2017-06-21 21:18:54 +02:00
|
|
|
int requested[MAX_LOCKMODES]; /* counts of requested locks */
|
2001-01-16 07:11:34 +01:00
|
|
|
int nRequested; /* total of requested[] array */
|
2001-03-22 05:01:46 +01:00
|
|
|
int granted[MAX_LOCKMODES]; /* counts of granted locks */
|
2001-01-16 07:11:34 +01:00
|
|
|
int nGranted; /* total of granted[] array */
|
1997-09-08 23:56:23 +02:00
|
|
|
} LOCK;
|
1996-08-28 03:59:28 +02:00
|
|
|
|
2005-04-30 00:28:24 +02:00
|
|
|
#define LOCK_LOCKMETHOD(lock) ((LOCKMETHODID) (lock).tag.locktag_lockmethodid)
|
1996-08-28 03:59:28 +02:00
|
|
|
|
2000-12-22 01:51:54 +01:00
|
|
|
|
|
|
|
/*
|
2005-06-15 00:15:33 +02:00
|
|
|
* We may have several different backends holding or awaiting locks
|
2004-08-26 19:22:28 +02:00
|
|
|
* on the same lockable object. We need to store some per-holder/waiter
|
|
|
|
* information for each such holder (or would-be holder). This is kept in
|
|
|
|
* a PROCLOCK struct.
|
2000-12-22 01:51:54 +01:00
|
|
|
*
|
2002-07-19 02:17:40 +02:00
|
|
|
* PROCLOCKTAG is the key information needed to look up a PROCLOCK item in the
|
2014-05-06 18:12:18 +02:00
|
|
|
* proclock hashtable. A PROCLOCKTAG value uniquely identifies the combination
|
2006-07-24 01:08:46 +02:00
|
|
|
* of a lockable object and a holder/waiter for that object. (We can use
|
|
|
|
* pointers here because the PROCLOCKTAG need only be unique for the lifespan
|
|
|
|
* of the PROCLOCK, and it will never outlive the lock or the proc.)
|
2000-12-22 01:51:54 +01:00
|
|
|
*
|
2005-06-15 00:15:33 +02:00
|
|
|
* Internally to a backend, it is possible for the same lock to be held
|
|
|
|
* for different purposes: the backend tracks transaction locks separately
|
2014-05-06 18:12:18 +02:00
|
|
|
* from session locks. However, this is not reflected in the shared-memory
|
2005-06-15 00:15:33 +02:00
|
|
|
* state: we only track which backend(s) hold the lock. This is OK since a
|
|
|
|
* backend can never block itself.
|
2001-01-16 07:11:34 +01:00
|
|
|
*
|
2004-08-27 19:07:42 +02:00
|
|
|
* The holdMask field shows the already-granted locks represented by this
|
|
|
|
* proclock. Note that there will be a proclock object, possibly with
|
|
|
|
* zero holdMask, for any lock that the process is currently waiting on.
|
|
|
|
* Otherwise, proclock objects whose holdMasks are zero are recycled
|
2001-01-16 07:11:34 +01:00
|
|
|
* as soon as convenient.
|
2001-01-22 23:30:06 +01:00
|
|
|
*
|
2005-06-15 00:15:33 +02:00
|
|
|
* releaseMask is workspace for LockReleaseAll(): it shows the locks due
|
2014-05-06 18:12:18 +02:00
|
|
|
* to be released during the current call. This must only be examined or
|
2005-06-15 00:15:33 +02:00
|
|
|
* set by the backend owning the PROCLOCK.
|
|
|
|
*
|
2004-07-01 02:52:04 +02:00
|
|
|
* Each PROCLOCK object is linked into lists for both the associated LOCK
|
|
|
|
* object and the owning PGPROC object. Note that the PROCLOCK is entered
|
|
|
|
* into these lists as soon as it is created, even if no lock has yet been
|
|
|
|
* granted. A PGPROC that is waiting for a lock to be granted will also be
|
|
|
|
* linked into the lock's waitProcs queue.
|
2000-12-22 01:51:54 +01:00
|
|
|
*/
|
2002-07-19 02:17:40 +02:00
|
|
|
typedef struct PROCLOCKTAG
|
2000-12-22 01:51:54 +01:00
|
|
|
{
|
2006-07-24 01:08:46 +02:00
|
|
|
/* NB: we assume this struct contains no padding! */
|
|
|
|
LOCK *myLock; /* link to per-lockable-object information */
|
|
|
|
PGPROC *myProc; /* link to PGPROC of owning backend */
|
2002-07-19 02:17:40 +02:00
|
|
|
} PROCLOCKTAG;
|
2000-12-22 01:51:54 +01:00
|
|
|
|
2002-07-19 02:17:40 +02:00
|
|
|
typedef struct PROCLOCK
|
2000-12-22 01:51:54 +01:00
|
|
|
{
|
|
|
|
/* tag */
|
2003-02-18 03:13:24 +01:00
|
|
|
PROCLOCKTAG tag; /* unique identifier of proclock object */
|
2000-12-22 01:51:54 +01:00
|
|
|
|
|
|
|
/* data */
|
Create a function to reliably identify which sessions block which others.
This patch introduces "pg_blocking_pids(int) returns int[]", which returns
the PIDs of any sessions that are blocking the session with the given PID.
Historically people have obtained such information using a self-join on
the pg_locks view, but it's unreasonably tedious to do it that way with any
modicum of correctness, and the addition of parallel queries has pretty
much broken that approach altogether. (Given some more columns in the view
than there are today, you could imagine handling parallel-query cases with
a 4-way join; but ugh.)
The new function has the following behaviors that are painful or impossible
to get right via pg_locks:
1. Correctly understands which lock modes block which other ones.
2. In soft-block situations (two processes both waiting for conflicting lock
modes), only the one that's in front in the wait queue is reported to
block the other.
3. In parallel-query cases, reports all sessions blocking any member of
the given PID's lock group, and reports a session by naming its leader
process's PID, which will be the pg_backend_pid() value visible to
clients.
The motivation for doing this right now is mostly to fix the isolation
tests. Commit 38f8bdcac4982215beb9f65a19debecaf22fd470 lobotomized
isolationtester's is-it-waiting query by removing its ability to recognize
nonconflicting lock modes, as a crude workaround for the inability to
handle soft-block situations properly. But even without the lock mode
tests, the old query was excessively slow, particularly in
CLOBBER_CACHE_ALWAYS builds; some of our buildfarm animals fail the new
deadlock-hard test because the deadlock timeout elapses before they can
probe the waiting status of all eight sessions. Replacing the pg_locks
self-join with use of pg_blocking_pids() is not only much more correct, but
a lot faster: I measure it at about 9X faster in a typical dev build with
Asserts, and 3X faster in CLOBBER_CACHE_ALWAYS builds. That should provide
enough headroom for the slower CLOBBER_CACHE_ALWAYS animals to pass the
test, without having to lengthen deadlock_timeout yet more and thus slow
down the test for everyone else.
2016-02-22 20:31:43 +01:00
|
|
|
PGPROC *groupLeader; /* proc's lock group leader, or proc itself */
|
2004-08-27 19:07:42 +02:00
|
|
|
LOCKMASK holdMask; /* bitmask for lock types currently held */
|
2005-06-15 00:15:33 +02:00
|
|
|
LOCKMASK releaseMask; /* bitmask for lock types to be released */
|
|
|
|
SHM_QUEUE lockLink; /* list link in LOCK's list of proclocks */
|
|
|
|
SHM_QUEUE procLink; /* list link in PGPROC's list of proclocks */
|
2002-07-19 02:17:40 +02:00
|
|
|
} PROCLOCK;
|
2000-12-22 01:51:54 +01:00
|
|
|
|
2003-02-18 03:13:24 +01:00
|
|
|
#define PROCLOCK_LOCKMETHOD(proclock) \
|
2006-07-24 01:08:46 +02:00
|
|
|
LOCK_LOCKMETHOD(*((proclock).tag.myLock))
|
2000-12-22 01:51:54 +01:00
|
|
|
|
2004-08-27 19:07:42 +02:00
|
|
|
/*
|
|
|
|
* Each backend also maintains a local hash table with information about each
|
2014-05-06 18:12:18 +02:00
|
|
|
* lock it is currently interested in. In particular the local table counts
|
2004-08-27 19:07:42 +02:00
|
|
|
* the number of times that lock has been acquired. This allows multiple
|
|
|
|
* requests for the same lock to be executed without additional accesses to
|
|
|
|
* shared memory. We also track the number of lock acquisitions per
|
|
|
|
* ResourceOwner, so that we can release just those locks belonging to a
|
|
|
|
* particular ResourceOwner.
|
2013-11-28 00:10:00 +01:00
|
|
|
*
|
|
|
|
* When holding a lock taken "normally", the lock and proclock fields always
|
|
|
|
* point to the associated objects in shared memory. However, if we acquired
|
|
|
|
* the lock via the fast-path mechanism, the lock and proclock fields are set
|
|
|
|
* to NULL, since there probably aren't any such objects in shared memory.
|
|
|
|
* (If the lock later gets promoted to normal representation, we may eventually
|
|
|
|
* update our locallock's lock/proclock fields after finding the shared
|
|
|
|
* objects.)
|
|
|
|
*
|
|
|
|
* Caution: a locallock object can be left over from a failed lock acquisition
|
|
|
|
* attempt. In this case its lock/proclock fields are untrustworthy, since
|
|
|
|
* the shared lock object is neither held nor awaited, and hence is available
|
|
|
|
* to be reclaimed. If nLocks > 0 then these pointers must either be valid or
|
|
|
|
* NULL, but when nLocks == 0 they should be considered garbage.
|
2004-08-27 19:07:42 +02:00
|
|
|
*/
|
|
|
|
typedef struct LOCALLOCKTAG
|
|
|
|
{
|
|
|
|
LOCKTAG lock; /* identifies the lockable object */
|
|
|
|
LOCKMODE mode; /* lock mode for this table entry */
|
|
|
|
} LOCALLOCKTAG;
|
|
|
|
|
|
|
|
typedef struct LOCALLOCKOWNER
|
|
|
|
{
|
|
|
|
/*
|
2005-06-15 00:15:33 +02:00
|
|
|
* Note: if owner is NULL then the lock is held on behalf of the session;
|
|
|
|
* otherwise it is held on behalf of my current transaction.
|
|
|
|
*
|
|
|
|
* Must use a forward struct reference to avoid circularity.
|
2004-08-27 19:07:42 +02:00
|
|
|
*/
|
|
|
|
struct ResourceOwnerData *owner;
|
2008-09-16 03:56:26 +02:00
|
|
|
int64 nLocks; /* # of times held by this owner */
|
2004-08-27 19:07:42 +02:00
|
|
|
} LOCALLOCKOWNER;
|
|
|
|
|
|
|
|
typedef struct LOCALLOCK
|
|
|
|
{
|
|
|
|
/* tag */
|
|
|
|
LOCALLOCKTAG tag; /* unique identifier of locallock entry */
|
|
|
|
|
|
|
|
/* data */
|
2019-03-19 14:07:08 +01:00
|
|
|
uint32 hashcode; /* copy of LOCKTAG's hash value */
|
2013-11-28 00:10:00 +01:00
|
|
|
LOCK *lock; /* associated LOCK object, if any */
|
|
|
|
PROCLOCK *proclock; /* associated PROCLOCK object, if any */
|
2008-09-16 03:56:26 +02:00
|
|
|
int64 nLocks; /* total number of times lock is held */
|
2004-08-27 19:07:42 +02:00
|
|
|
int numLockOwners; /* # of relevant ResourceOwners */
|
|
|
|
int maxLockOwners; /* allocated size of array */
|
2004-08-29 07:07:03 +02:00
|
|
|
LOCALLOCKOWNER *lockOwners; /* dynamically resizable array */
|
2019-03-19 14:07:08 +01:00
|
|
|
bool holdsStrongLockCount; /* bumped FastPathStrongRelationLocks */
|
|
|
|
bool lockCleared; /* we read all sinval msgs for lock */
|
2004-08-27 19:07:42 +02:00
|
|
|
} LOCALLOCK;
|
|
|
|
|
2005-04-30 00:28:24 +02:00
|
|
|
#define LOCALLOCK_LOCKMETHOD(llock) ((llock).tag.lock.locktag_lockmethodid)
|
2004-08-27 19:07:42 +02:00
|
|
|
|
|
|
|
|
2002-08-17 15:04:19 +02:00
|
|
|
/*
|
2011-05-29 01:52:00 +02:00
|
|
|
* These structures hold information passed from lmgr internals to the lock
|
|
|
|
* listing user-level functions (in lockfuncs.c).
|
2002-08-17 15:04:19 +02:00
|
|
|
*/
|
2011-05-29 01:52:00 +02:00
|
|
|
|
|
|
|
typedef struct LockInstanceData
|
|
|
|
{
|
Create a function to reliably identify which sessions block which others.
This patch introduces "pg_blocking_pids(int) returns int[]", which returns
the PIDs of any sessions that are blocking the session with the given PID.
Historically people have obtained such information using a self-join on
the pg_locks view, but it's unreasonably tedious to do it that way with any
modicum of correctness, and the addition of parallel queries has pretty
much broken that approach altogether. (Given some more columns in the view
than there are today, you could imagine handling parallel-query cases with
a 4-way join; but ugh.)
The new function has the following behaviors that are painful or impossible
to get right via pg_locks:
1. Correctly understands which lock modes block which other ones.
2. In soft-block situations (two processes both waiting for conflicting lock
modes), only the one that's in front in the wait queue is reported to
block the other.
3. In parallel-query cases, reports all sessions blocking any member of
the given PID's lock group, and reports a session by naming its leader
process's PID, which will be the pg_backend_pid() value visible to
clients.
The motivation for doing this right now is mostly to fix the isolation
tests. Commit 38f8bdcac4982215beb9f65a19debecaf22fd470 lobotomized
isolationtester's is-it-waiting query by removing its ability to recognize
nonconflicting lock modes, as a crude workaround for the inability to
handle soft-block situations properly. But even without the lock mode
tests, the old query was excessively slow, particularly in
CLOBBER_CACHE_ALWAYS builds; some of our buildfarm animals fail the new
deadlock-hard test because the deadlock timeout elapses before they can
probe the waiting status of all eight sessions. Replacing the pg_locks
self-join with use of pg_blocking_pids() is not only much more correct, but
a lot faster: I measure it at about 9X faster in a typical dev build with
Asserts, and 3X faster in CLOBBER_CACHE_ALWAYS builds. That should provide
enough headroom for the slower CLOBBER_CACHE_ALWAYS animals to pass the
test, without having to lengthen deadlock_timeout yet more and thus slow
down the test for everyone else.
2016-02-22 20:31:43 +01:00
|
|
|
LOCKTAG locktag; /* tag for locked object */
|
2011-05-29 01:52:00 +02:00
|
|
|
LOCKMASK holdMask; /* locks held by this PGPROC */
|
|
|
|
LOCKMODE waitLockMode; /* lock awaited by this PGPROC, if any */
|
|
|
|
BackendId backend; /* backend ID of this PGPROC */
|
2012-06-10 21:20:04 +02:00
|
|
|
LocalTransactionId lxid; /* local transaction ID of this PGPROC */
|
2011-05-29 01:52:00 +02:00
|
|
|
int pid; /* pid of this PGPROC */
|
Create a function to reliably identify which sessions block which others.
This patch introduces "pg_blocking_pids(int) returns int[]", which returns
the PIDs of any sessions that are blocking the session with the given PID.
Historically people have obtained such information using a self-join on
the pg_locks view, but it's unreasonably tedious to do it that way with any
modicum of correctness, and the addition of parallel queries has pretty
much broken that approach altogether. (Given some more columns in the view
than there are today, you could imagine handling parallel-query cases with
a 4-way join; but ugh.)
The new function has the following behaviors that are painful or impossible
to get right via pg_locks:
1. Correctly understands which lock modes block which other ones.
2. In soft-block situations (two processes both waiting for conflicting lock
modes), only the one that's in front in the wait queue is reported to
block the other.
3. In parallel-query cases, reports all sessions blocking any member of
the given PID's lock group, and reports a session by naming its leader
process's PID, which will be the pg_backend_pid() value visible to
clients.
The motivation for doing this right now is mostly to fix the isolation
tests. Commit 38f8bdcac4982215beb9f65a19debecaf22fd470 lobotomized
isolationtester's is-it-waiting query by removing its ability to recognize
nonconflicting lock modes, as a crude workaround for the inability to
handle soft-block situations properly. But even without the lock mode
tests, the old query was excessively slow, particularly in
CLOBBER_CACHE_ALWAYS builds; some of our buildfarm animals fail the new
deadlock-hard test because the deadlock timeout elapses before they can
probe the waiting status of all eight sessions. Replacing the pg_locks
self-join with use of pg_blocking_pids() is not only much more correct, but
a lot faster: I measure it at about 9X faster in a typical dev build with
Asserts, and 3X faster in CLOBBER_CACHE_ALWAYS builds. That should provide
enough headroom for the slower CLOBBER_CACHE_ALWAYS animals to pass the
test, without having to lengthen deadlock_timeout yet more and thus slow
down the test for everyone else.
2016-02-22 20:31:43 +01:00
|
|
|
int leaderPid; /* pid of group leader; = pid if no group */
|
2011-05-29 01:52:00 +02:00
|
|
|
bool fastpath; /* taken via fastpath? */
|
|
|
|
} LockInstanceData;
|
|
|
|
|
2006-07-24 01:08:46 +02:00
|
|
|
typedef struct LockData
|
2002-08-17 15:04:19 +02:00
|
|
|
{
|
2011-05-29 01:52:00 +02:00
|
|
|
int nelements; /* The length of the array */
|
Create a function to reliably identify which sessions block which others.
This patch introduces "pg_blocking_pids(int) returns int[]", which returns
the PIDs of any sessions that are blocking the session with the given PID.
Historically people have obtained such information using a self-join on
the pg_locks view, but it's unreasonably tedious to do it that way with any
modicum of correctness, and the addition of parallel queries has pretty
much broken that approach altogether. (Given some more columns in the view
than there are today, you could imagine handling parallel-query cases with
a 4-way join; but ugh.)
The new function has the following behaviors that are painful or impossible
to get right via pg_locks:
1. Correctly understands which lock modes block which other ones.
2. In soft-block situations (two processes both waiting for conflicting lock
modes), only the one that's in front in the wait queue is reported to
block the other.
3. In parallel-query cases, reports all sessions blocking any member of
the given PID's lock group, and reports a session by naming its leader
process's PID, which will be the pg_backend_pid() value visible to
clients.
The motivation for doing this right now is mostly to fix the isolation
tests. Commit 38f8bdcac4982215beb9f65a19debecaf22fd470 lobotomized
isolationtester's is-it-waiting query by removing its ability to recognize
nonconflicting lock modes, as a crude workaround for the inability to
handle soft-block situations properly. But even without the lock mode
tests, the old query was excessively slow, particularly in
CLOBBER_CACHE_ALWAYS builds; some of our buildfarm animals fail the new
deadlock-hard test because the deadlock timeout elapses before they can
probe the waiting status of all eight sessions. Replacing the pg_locks
self-join with use of pg_blocking_pids() is not only much more correct, but
a lot faster: I measure it at about 9X faster in a typical dev build with
Asserts, and 3X faster in CLOBBER_CACHE_ALWAYS builds. That should provide
enough headroom for the slower CLOBBER_CACHE_ALWAYS animals to pass the
test, without having to lengthen deadlock_timeout yet more and thus slow
down the test for everyone else.
2016-02-22 20:31:43 +01:00
|
|
|
LockInstanceData *locks; /* Array of per-PROCLOCK information */
|
2002-08-17 15:04:19 +02:00
|
|
|
} LockData;
|
2000-12-22 01:51:54 +01:00
|
|
|
|
Create a function to reliably identify which sessions block which others.
This patch introduces "pg_blocking_pids(int) returns int[]", which returns
the PIDs of any sessions that are blocking the session with the given PID.
Historically people have obtained such information using a self-join on
the pg_locks view, but it's unreasonably tedious to do it that way with any
modicum of correctness, and the addition of parallel queries has pretty
much broken that approach altogether. (Given some more columns in the view
than there are today, you could imagine handling parallel-query cases with
a 4-way join; but ugh.)
The new function has the following behaviors that are painful or impossible
to get right via pg_locks:
1. Correctly understands which lock modes block which other ones.
2. In soft-block situations (two processes both waiting for conflicting lock
modes), only the one that's in front in the wait queue is reported to
block the other.
3. In parallel-query cases, reports all sessions blocking any member of
the given PID's lock group, and reports a session by naming its leader
process's PID, which will be the pg_backend_pid() value visible to
clients.
The motivation for doing this right now is mostly to fix the isolation
tests. Commit 38f8bdcac4982215beb9f65a19debecaf22fd470 lobotomized
isolationtester's is-it-waiting query by removing its ability to recognize
nonconflicting lock modes, as a crude workaround for the inability to
handle soft-block situations properly. But even without the lock mode
tests, the old query was excessively slow, particularly in
CLOBBER_CACHE_ALWAYS builds; some of our buildfarm animals fail the new
deadlock-hard test because the deadlock timeout elapses before they can
probe the waiting status of all eight sessions. Replacing the pg_locks
self-join with use of pg_blocking_pids() is not only much more correct, but
a lot faster: I measure it at about 9X faster in a typical dev build with
Asserts, and 3X faster in CLOBBER_CACHE_ALWAYS builds. That should provide
enough headroom for the slower CLOBBER_CACHE_ALWAYS animals to pass the
test, without having to lengthen deadlock_timeout yet more and thus slow
down the test for everyone else.
2016-02-22 20:31:43 +01:00
|
|
|
typedef struct BlockedProcData
|
|
|
|
{
|
|
|
|
int pid; /* pid of a blocked PGPROC */
|
|
|
|
/* Per-PROCLOCK information about PROCLOCKs of the lock the pid awaits */
|
|
|
|
/* (these fields refer to indexes in BlockedProcsData.locks[]) */
|
|
|
|
int first_lock; /* index of first relevant LockInstanceData */
|
|
|
|
int num_locks; /* number of relevant LockInstanceDatas */
|
|
|
|
/* PIDs of PGPROCs that are ahead of "pid" in the lock's wait queue */
|
|
|
|
/* (these fields refer to indexes in BlockedProcsData.waiter_pids[]) */
|
|
|
|
int first_waiter; /* index of first preceding waiter */
|
|
|
|
int num_waiters; /* number of preceding waiters */
|
|
|
|
} BlockedProcData;
|
|
|
|
|
|
|
|
typedef struct BlockedProcsData
|
|
|
|
{
|
|
|
|
BlockedProcData *procs; /* Array of per-blocked-proc information */
|
|
|
|
LockInstanceData *locks; /* Array of per-PROCLOCK information */
|
|
|
|
int *waiter_pids; /* Array of PIDs of other blocked PGPROCs */
|
|
|
|
int nprocs; /* # of valid entries in procs[] array */
|
|
|
|
int maxprocs; /* Allocated length of procs[] array */
|
|
|
|
int nlocks; /* # of valid entries in locks[] array */
|
|
|
|
int maxlocks; /* Allocated length of locks[] array */
|
|
|
|
int npids; /* # of valid entries in waiter_pids[] array */
|
|
|
|
int maxpids; /* Allocated length of waiter_pids[] array */
|
|
|
|
} BlockedProcsData;
|
|
|
|
|
2004-05-28 07:13:32 +02:00
|
|
|
|
2005-05-30 00:45:02 +02:00
|
|
|
/* Result codes for LockAcquire() */
|
|
|
|
typedef enum
|
|
|
|
{
|
|
|
|
LOCKACQUIRE_NOT_AVAIL, /* lock not available, and dontWait=true */
|
|
|
|
LOCKACQUIRE_OK, /* lock successfully acquired */
|
Fix longstanding recursion hazard in sinval message processing.
LockRelationOid and sibling routines supposed that, if our session already
holds the lock they were asked to acquire, they could skip calling
AcceptInvalidationMessages on the grounds that we must have already read
any remote sinval messages issued against the relation being locked.
This is normally true, but there's a critical special case where it's not:
processing inside AcceptInvalidationMessages might attempt to access system
relations, resulting in a recursive call to acquire a relation lock.
Hence, if the outer call had acquired that same system catalog lock, we'd
fall through, despite the possibility that there's an as-yet-unread sinval
message for that system catalog. This could, for example, result in
failure to access a system catalog or index that had just been processed
by VACUUM FULL. This is the explanation for buildfarm failures we've been
seeing intermittently for the past three months. The bug is far older
than that, but commits a54e1f158 et al added a new recursion case within
AcceptInvalidationMessages that is apparently easier to hit than any
previous case.
To fix this, we must not skip calling AcceptInvalidationMessages until
we have *finished* a call to it since acquiring a relation lock, not
merely acquired the lock. (There's already adequate logic inside
AcceptInvalidationMessages to deal with being called recursively.)
Fortunately, we can implement that at trivial cost, by adding a flag
to LOCALLOCK hashtable entries that tracks whether we know we have
completed such a call.
There is an API hazard added by this patch for external callers of
LockAcquire: if anything is testing for LOCKACQUIRE_ALREADY_HELD,
it might be fooled by the new return code LOCKACQUIRE_ALREADY_CLEAR
into thinking the lock wasn't already held. This should be a fail-soft
condition, though, unless something very bizarre is being done in
response to the test.
Also, I added an additional output argument to LockAcquireExtended,
assuming that that probably isn't called by any outside code given
the very limited usefulness of its additional functionality.
Back-patch to all supported branches.
Discussion: https://postgr.es/m/12259.1532117714@sss.pgh.pa.us
2018-09-08 00:04:37 +02:00
|
|
|
LOCKACQUIRE_ALREADY_HELD, /* incremented count for lock already held */
|
|
|
|
LOCKACQUIRE_ALREADY_CLEAR /* incremented count for lock already clear */
|
2005-05-30 00:45:02 +02:00
|
|
|
} LockAcquireResult;
|
|
|
|
|
2007-05-30 18:16:32 +02:00
|
|
|
/* Deadlock states identified by DeadLockCheck() */
|
2007-03-03 19:46:40 +01:00
|
|
|
typedef enum
|
|
|
|
{
|
2007-06-19 22:13:22 +02:00
|
|
|
DS_NOT_YET_CHECKED, /* no deadlock check has run yet */
|
|
|
|
DS_NO_DEADLOCK, /* no deadlock detected */
|
|
|
|
DS_SOFT_DEADLOCK, /* deadlock avoided by queue rearrangement */
|
2007-10-26 22:45:10 +02:00
|
|
|
DS_HARD_DEADLOCK, /* deadlock, no way out but ERROR */
|
|
|
|
DS_BLOCKED_BY_AUTOVACUUM /* no deadlock; queue blocked by autovacuum
|
2007-11-15 22:14:46 +01:00
|
|
|
* worker */
|
2007-11-15 23:25:18 +01:00
|
|
|
} DeadLockState;
|
2007-06-19 22:13:22 +02:00
|
|
|
|
2006-07-24 01:08:46 +02:00
|
|
|
/*
|
|
|
|
* The lockmgr's shared hash tables are partitioned to reduce contention.
|
|
|
|
* To determine which partition a given locktag belongs to, compute the tag's
|
|
|
|
* hash code with LockTagHashCode(), then apply one of these macros.
|
|
|
|
* NB: NUM_LOCK_PARTITIONS must be a power of 2!
|
|
|
|
*/
|
|
|
|
#define LockHashPartition(hashcode) \
|
|
|
|
((hashcode) % NUM_LOCK_PARTITIONS)
|
|
|
|
#define LockHashPartitionLock(hashcode) \
|
2014-01-27 17:07:44 +01:00
|
|
|
(&MainLWLockArray[LOCK_MANAGER_LWLOCK_OFFSET + \
|
|
|
|
LockHashPartition(hashcode)].lock)
|
|
|
|
#define LockHashPartitionLockByIndex(i) \
|
|
|
|
(&MainLWLockArray[LOCK_MANAGER_LWLOCK_OFFSET + (i)].lock)
|
2006-07-24 01:08:46 +02:00
|
|
|
|
2016-02-07 16:16:13 +01:00
|
|
|
/*
|
|
|
|
* The deadlock detector needs to be able to access lockGroupLeader and
|
|
|
|
* related fields in the PGPROC, so we arrange for those fields to be protected
|
|
|
|
* by one of the lock hash partition locks. Since the deadlock detector
|
|
|
|
* acquires all such locks anyway, this makes it safe for it to access these
|
|
|
|
* fields without doing anything extra. To avoid contention as much as
|
2016-02-21 11:12:02 +01:00
|
|
|
* possible, we map different PGPROCs to different partition locks. The lock
|
|
|
|
* used for a given lock group is determined by the group leader's pgprocno.
|
2016-02-07 16:16:13 +01:00
|
|
|
*/
|
2016-02-21 11:12:02 +01:00
|
|
|
#define LockHashPartitionLockByProc(leader_pgproc) \
|
|
|
|
LockHashPartitionLock((leader_pgproc)->pgprocno)
|
2016-02-07 16:16:13 +01:00
|
|
|
|
1996-08-28 03:59:28 +02:00
|
|
|
/*
|
|
|
|
* function prototypes
|
|
|
|
*/
|
1997-09-08 04:41:22 +02:00
|
|
|
extern void InitLocks(void);
|
2005-12-09 02:22:04 +01:00
|
|
|
extern LockMethod GetLocksMethodTable(const LOCK *lock);
|
Create a function to reliably identify which sessions block which others.
This patch introduces "pg_blocking_pids(int) returns int[]", which returns
the PIDs of any sessions that are blocking the session with the given PID.
Historically people have obtained such information using a self-join on
the pg_locks view, but it's unreasonably tedious to do it that way with any
modicum of correctness, and the addition of parallel queries has pretty
much broken that approach altogether. (Given some more columns in the view
than there are today, you could imagine handling parallel-query cases with
a 4-way join; but ugh.)
The new function has the following behaviors that are painful or impossible
to get right via pg_locks:
1. Correctly understands which lock modes block which other ones.
2. In soft-block situations (two processes both waiting for conflicting lock
modes), only the one that's in front in the wait queue is reported to
block the other.
3. In parallel-query cases, reports all sessions blocking any member of
the given PID's lock group, and reports a session by naming its leader
process's PID, which will be the pg_backend_pid() value visible to
clients.
The motivation for doing this right now is mostly to fix the isolation
tests. Commit 38f8bdcac4982215beb9f65a19debecaf22fd470 lobotomized
isolationtester's is-it-waiting query by removing its ability to recognize
nonconflicting lock modes, as a crude workaround for the inability to
handle soft-block situations properly. But even without the lock mode
tests, the old query was excessively slow, particularly in
CLOBBER_CACHE_ALWAYS builds; some of our buildfarm animals fail the new
deadlock-hard test because the deadlock timeout elapses before they can
probe the waiting status of all eight sessions. Replacing the pg_locks
self-join with use of pg_blocking_pids() is not only much more correct, but
a lot faster: I measure it at about 9X faster in a typical dev build with
Asserts, and 3X faster in CLOBBER_CACHE_ALWAYS builds. That should provide
enough headroom for the slower CLOBBER_CACHE_ALWAYS animals to pass the
test, without having to lengthen deadlock_timeout yet more and thus slow
down the test for everyone else.
2016-02-22 20:31:43 +01:00
|
|
|
extern LockMethod GetLockTagsMethodTable(const LOCKTAG *locktag);
|
2006-07-24 01:08:46 +02:00
|
|
|
extern uint32 LockTagHashCode(const LOCKTAG *locktag);
|
Improve concurrency of foreign key locking
This patch introduces two additional lock modes for tuples: "SELECT FOR
KEY SHARE" and "SELECT FOR NO KEY UPDATE". These don't block each
other, in contrast with already existing "SELECT FOR SHARE" and "SELECT
FOR UPDATE". UPDATE commands that do not modify the values stored in
the columns that are part of the key of the tuple now grab a SELECT FOR
NO KEY UPDATE lock on the tuple, allowing them to proceed concurrently
with tuple locks of the FOR KEY SHARE variety.
Foreign key triggers now use FOR KEY SHARE instead of FOR SHARE; this
means the concurrency improvement applies to them, which is the whole
point of this patch.
The added tuple lock semantics require some rejiggering of the multixact
module, so that the locking level that each transaction is holding can
be stored alongside its Xid. Also, multixacts now need to persist
across server restarts and crashes, because they can now represent not
only tuple locks, but also tuple updates. This means we need more
careful tracking of lifetime of pg_multixact SLRU files; since they now
persist longer, we require more infrastructure to figure out when they
can be removed. pg_upgrade also needs to be careful to copy
pg_multixact files over from the old server to the new, or at least part
of multixact.c state, depending on the versions of the old and new
servers.
Tuple time qualification rules (HeapTupleSatisfies routines) need to be
careful not to consider tuples with the "is multi" infomask bit set as
being only locked; they might need to look up MultiXact values (i.e.
possibly do pg_multixact I/O) to find out the Xid that updated a tuple,
whereas they previously were assured to only use information readily
available from the tuple header. This is considered acceptable, because
the extra I/O would involve cases that would previously cause some
commands to block waiting for concurrent transactions to finish.
Another important change is the fact that locking tuples that have
previously been updated causes the future versions to be marked as
locked, too; this is essential for correctness of foreign key checks.
This causes additional WAL-logging, also (there was previously a single
WAL record for a locked tuple; now there are as many as updated copies
of the tuple there exist.)
With all this in place, contention related to tuples being checked by
foreign key rules should be much reduced.
As a bonus, the old behavior that a subtransaction grabbing a stronger
tuple lock than the parent (sub)transaction held on a given tuple and
later aborting caused the weaker lock to be lost, has been fixed.
Many new spec files were added for isolation tester framework, to ensure
overall behavior is sane. There's probably room for several more tests.
There were several reviewers of this patch; in particular, Noah Misch
and Andres Freund spent considerable time in it. Original idea for the
patch came from Simon Riggs, after a problem report by Joel Jacobson.
Most code is from me, with contributions from Marti Raudsepp, Alexander
Shulgin, Noah Misch and Andres Freund.
This patch was discussed in several pgsql-hackers threads; the most
important start at the following message-ids:
AANLkTimo9XVcEzfiBR-ut3KVNDkjm2Vxh+t8kAmWjPuv@mail.gmail.com
1290721684-sup-3951@alvh.no-ip.org
1294953201-sup-2099@alvh.no-ip.org
1320343602-sup-2290@alvh.no-ip.org
1339690386-sup-8927@alvh.no-ip.org
4FE5FF020200002500048A3D@gw.wicourts.gov
4FEAB90A0200002500048B7D@gw.wicourts.gov
2013-01-23 16:04:59 +01:00
|
|
|
extern bool DoLockModesConflict(LOCKMODE mode1, LOCKMODE mode2);
|
2005-12-09 02:22:04 +01:00
|
|
|
extern LockAcquireResult LockAcquire(const LOCKTAG *locktag,
|
2019-05-22 19:04:48 +02:00
|
|
|
LOCKMODE lockmode,
|
|
|
|
bool sessionLock,
|
|
|
|
bool dontWait);
|
Allow read only connections during recovery, known as Hot Standby.
Enabled by recovery_connections = on (default) and forcing archive recovery using a recovery.conf. Recovery processing now emulates the original transactions as they are replayed, providing full locking and MVCC behaviour for read only queries. Recovery must enter consistent state before connections are allowed, so there is a delay, typically short, before connections succeed. Replay of recovering transactions can conflict and in some cases deadlock with queries during recovery; these result in query cancellation after max_standby_delay seconds have expired. Infrastructure changes have minor effects on normal running, though introduce four new types of WAL record.
New test mode "make standbycheck" allows regression tests of static command behaviour on a standby server while in recovery. Typical and extreme dynamic behaviours have been checked via code inspection and manual testing. Few port specific behaviours have been utilised, though primary testing has been on Linux only so far.
This commit is the basic patch. Additional changes will follow in this release to enhance some aspects of behaviour, notably improved handling of conflicts, deadlock detection and query cancellation. Changes to VACUUM FULL are also required.
Simon Riggs, with significant and lengthy review by Heikki Linnakangas, including streamlined redesign of snapshot creation and two-phase commit.
Important contributions from Florian Pflug, Mark Kirkwood, Merlin Moncure, Greg Stark, Gianni Ciolli, Gabriele Bartolini, Hannu Krosing, Robert Haas, Tatsuo Ishii, Hiroyuki Yamada plus support and feedback from many other community members.
2009-12-19 02:32:45 +01:00
|
|
|
extern LockAcquireResult LockAcquireExtended(const LOCKTAG *locktag,
|
2019-05-22 19:04:48 +02:00
|
|
|
LOCKMODE lockmode,
|
|
|
|
bool sessionLock,
|
|
|
|
bool dontWait,
|
|
|
|
bool reportMemoryError,
|
|
|
|
LOCALLOCK **locallockp);
|
2012-04-18 17:17:30 +02:00
|
|
|
extern void AbortStrongLockAcquire(void);
|
Fix longstanding recursion hazard in sinval message processing.
LockRelationOid and sibling routines supposed that, if our session already
holds the lock they were asked to acquire, they could skip calling
AcceptInvalidationMessages on the grounds that we must have already read
any remote sinval messages issued against the relation being locked.
This is normally true, but there's a critical special case where it's not:
processing inside AcceptInvalidationMessages might attempt to access system
relations, resulting in a recursive call to acquire a relation lock.
Hence, if the outer call had acquired that same system catalog lock, we'd
fall through, despite the possibility that there's an as-yet-unread sinval
message for that system catalog. This could, for example, result in
failure to access a system catalog or index that had just been processed
by VACUUM FULL. This is the explanation for buildfarm failures we've been
seeing intermittently for the past three months. The bug is far older
than that, but commits a54e1f158 et al added a new recursion case within
AcceptInvalidationMessages that is apparently easier to hit than any
previous case.
To fix this, we must not skip calling AcceptInvalidationMessages until
we have *finished* a call to it since acquiring a relation lock, not
merely acquired the lock. (There's already adequate logic inside
AcceptInvalidationMessages to deal with being called recursively.)
Fortunately, we can implement that at trivial cost, by adding a flag
to LOCALLOCK hashtable entries that tracks whether we know we have
completed such a call.
There is an API hazard added by this patch for external callers of
LockAcquire: if anything is testing for LOCKACQUIRE_ALREADY_HELD,
it might be fooled by the new return code LOCKACQUIRE_ALREADY_CLEAR
into thinking the lock wasn't already held. This should be a fail-soft
condition, though, unless something very bizarre is being done in
response to the test.
Also, I added an additional output argument to LockAcquireExtended,
assuming that that probably isn't called by any outside code given
the very limited usefulness of its additional functionality.
Back-patch to all supported branches.
Discussion: https://postgr.es/m/12259.1532117714@sss.pgh.pa.us
2018-09-08 00:04:37 +02:00
|
|
|
extern void MarkLockClear(LOCALLOCK *locallock);
|
2005-12-09 02:22:04 +01:00
|
|
|
extern bool LockRelease(const LOCKTAG *locktag,
|
2019-05-22 19:04:48 +02:00
|
|
|
LOCKMODE lockmode, bool sessionLock);
|
2005-06-15 00:15:33 +02:00
|
|
|
extern void LockReleaseAll(LOCKMETHODID lockmethodid, bool allLocks);
|
Overdue code review for transaction-level advisory locks patch.
Commit 62c7bd31c8878dd45c9b9b2429ab7a12103f3590 had assorted problems, most
visibly that it broke PREPARE TRANSACTION in the presence of session-level
advisory locks (which should be ignored by PREPARE), as per a recent
complaint from Stephen Rees. More abstractly, the patch made the
LockMethodData.transactional flag not merely useless but outright
dangerous, because in point of fact that flag no longer tells you anything
at all about whether a lock is held transactionally. This fix therefore
removes that flag altogether. We now rely entirely on the convention
already in use in lock.c that transactional lock holds must be owned by
some ResourceOwner, while session holds are never so owned. Setting the
locallock struct's owner link to NULL thus denotes a session hold, and
there is no redundant marker for that.
PREPARE TRANSACTION now works again when there are session-level advisory
locks, and it is also able to transfer transactional advisory locks to the
prepared transaction, but for implementation reasons it throws an error if
we hold both types of lock on a single lockable object. Perhaps it will be
worth improving that someday.
Assorted other minor cleanup and documentation editing, as well.
Back-patch to 9.1, except that in the 9.1 branch I did not remove the
LockMethodData.transactional flag for fear of causing an ABI break for
any external code that might be examining those structs.
2012-05-04 23:43:27 +02:00
|
|
|
extern void LockReleaseSession(LOCKMETHODID lockmethodid);
|
2012-06-21 14:01:17 +02:00
|
|
|
extern void LockReleaseCurrentOwner(LOCALLOCK **locallocks, int nlocks);
|
|
|
|
extern void LockReassignCurrentOwner(LOCALLOCK **locallocks, int nlocks);
|
2018-10-01 18:43:21 +02:00
|
|
|
extern bool LockHeldByMe(const LOCKTAG *locktag, LOCKMODE lockmode);
|
Fix performance problems with autovacuum truncation in busy workloads.
In situations where there are over 8MB of empty pages at the end of
a table, the truncation work for trailing empty pages takes longer
than deadlock_timeout, and there is frequent access to the table by
processes other than autovacuum, there was a problem with the
autovacuum worker process being canceled by the deadlock checking
code. The truncation work done by autovacuum up that point was
lost, and the attempt tried again by a later autovacuum worker. The
attempts could continue indefinitely without making progress,
consuming resources and blocking other processes for up to
deadlock_timeout each time.
This patch has the autovacuum worker checking whether it is
blocking any other thread at 20ms intervals. If such a condition
develops, the autovacuum worker will persist the work it has done
so far, release its lock on the table, and sleep in 50ms intervals
for up to 5 seconds, hoping to be able to re-acquire the lock and
try again. If it is unable to get the lock in that time, it moves
on and a worker will try to continue later from the point this one
left off.
While this patch doesn't change the rules about when and what to
truncate, it does cause the truncation to occur sooner, with less
blocking, and with the consumption of fewer resources when there is
contention for the table's lock.
The only user-visible change other than improved performance is
that the table size during truncation may change incrementally
instead of just once.
This problem exists in all supported versions but is infrequently
reported, although some reports of performance problems when
autovacuum runs might be caused by this. Initial commit is just the
master branch, but this should probably be backpatched once the
build farm and general developer usage confirm that there are no
surprising effects.
Jan Wieck
2012-12-11 21:33:08 +01:00
|
|
|
extern bool LockHasWaiters(const LOCKTAG *locktag,
|
2019-05-22 19:04:48 +02:00
|
|
|
LOCKMODE lockmode, bool sessionLock);
|
2007-09-05 20:10:48 +02:00
|
|
|
extern VirtualTransactionId *GetLockConflicts(const LOCKTAG *locktag,
|
2019-05-22 19:04:48 +02:00
|
|
|
LOCKMODE lockmode, int *countp);
|
2005-06-18 00:32:51 +02:00
|
|
|
extern void AtPrepare_Locks(void);
|
|
|
|
extern void PostPrepare_Locks(TransactionId xid);
|
2019-12-29 09:09:20 +01:00
|
|
|
extern bool LockCheckConflicts(LockMethod lockMethodTable,
|
2019-05-22 19:04:48 +02:00
|
|
|
LOCKMODE lockmode,
|
|
|
|
LOCK *lock, PROCLOCK *proclock);
|
2003-02-18 03:13:24 +01:00
|
|
|
extern void GrantLock(LOCK *lock, PROCLOCK *proclock, LOCKMODE lockmode);
|
2004-08-27 19:07:42 +02:00
|
|
|
extern void GrantAwaitedLock(void);
|
2006-07-24 01:08:46 +02:00
|
|
|
extern void RemoveFromWaitQueue(PGPROC *proc, uint32 hashcode);
|
2005-08-21 01:26:37 +02:00
|
|
|
extern Size LockShmemSize(void);
|
2002-08-31 19:14:28 +02:00
|
|
|
extern LockData *GetLockStatusData(void);
|
Create a function to reliably identify which sessions block which others.
This patch introduces "pg_blocking_pids(int) returns int[]", which returns
the PIDs of any sessions that are blocking the session with the given PID.
Historically people have obtained such information using a self-join on
the pg_locks view, but it's unreasonably tedious to do it that way with any
modicum of correctness, and the addition of parallel queries has pretty
much broken that approach altogether. (Given some more columns in the view
than there are today, you could imagine handling parallel-query cases with
a 4-way join; but ugh.)
The new function has the following behaviors that are painful or impossible
to get right via pg_locks:
1. Correctly understands which lock modes block which other ones.
2. In soft-block situations (two processes both waiting for conflicting lock
modes), only the one that's in front in the wait queue is reported to
block the other.
3. In parallel-query cases, reports all sessions blocking any member of
the given PID's lock group, and reports a session by naming its leader
process's PID, which will be the pg_backend_pid() value visible to
clients.
The motivation for doing this right now is mostly to fix the isolation
tests. Commit 38f8bdcac4982215beb9f65a19debecaf22fd470 lobotomized
isolationtester's is-it-waiting query by removing its ability to recognize
nonconflicting lock modes, as a crude workaround for the inability to
handle soft-block situations properly. But even without the lock mode
tests, the old query was excessively slow, particularly in
CLOBBER_CACHE_ALWAYS builds; some of our buildfarm animals fail the new
deadlock-hard test because the deadlock timeout elapses before they can
probe the waiting status of all eight sessions. Replacing the pg_locks
self-join with use of pg_blocking_pids() is not only much more correct, but
a lot faster: I measure it at about 9X faster in a typical dev build with
Asserts, and 3X faster in CLOBBER_CACHE_ALWAYS builds. That should provide
enough headroom for the slower CLOBBER_CACHE_ALWAYS animals to pass the
test, without having to lengthen deadlock_timeout yet more and thus slow
down the test for everyone else.
2016-02-22 20:31:43 +01:00
|
|
|
extern BlockedProcsData *GetBlockerStatusData(int blocked_pid);
|
Allow read only connections during recovery, known as Hot Standby.
Enabled by recovery_connections = on (default) and forcing archive recovery using a recovery.conf. Recovery processing now emulates the original transactions as they are replayed, providing full locking and MVCC behaviour for read only queries. Recovery must enter consistent state before connections are allowed, so there is a delay, typically short, before connections succeed. Replay of recovering transactions can conflict and in some cases deadlock with queries during recovery; these result in query cancellation after max_standby_delay seconds have expired. Infrastructure changes have minor effects on normal running, though introduce four new types of WAL record.
New test mode "make standbycheck" allows regression tests of static command behaviour on a standby server while in recovery. Typical and extreme dynamic behaviours have been checked via code inspection and manual testing. Few port specific behaviours have been utilised, though primary testing has been on Linux only so far.
This commit is the basic patch. Additional changes will follow in this release to enhance some aspects of behaviour, notably improved handling of conflicts, deadlock detection and query cancellation. Changes to VACUUM FULL are also required.
Simon Riggs, with significant and lengthy review by Heikki Linnakangas, including streamlined redesign of snapshot creation and two-phase commit.
Important contributions from Florian Pflug, Mark Kirkwood, Merlin Moncure, Greg Stark, Gianni Ciolli, Gabriele Bartolini, Hannu Krosing, Robert Haas, Tatsuo Ishii, Hiroyuki Yamada plus support and feedback from many other community members.
2009-12-19 02:32:45 +01:00
|
|
|
|
|
|
|
extern xl_standby_lock *GetRunningTransactionLocks(int *nlocks);
|
2005-12-09 02:22:04 +01:00
|
|
|
extern const char *GetLockmodeName(LOCKMETHODID lockmethodid, LOCKMODE mode);
|
1998-02-26 05:46:47 +01:00
|
|
|
|
2005-06-18 00:32:51 +02:00
|
|
|
extern void lock_twophase_recover(TransactionId xid, uint16 info,
|
2019-05-22 19:04:48 +02:00
|
|
|
void *recdata, uint32 len);
|
2005-06-18 00:32:51 +02:00
|
|
|
extern void lock_twophase_postcommit(TransactionId xid, uint16 info,
|
2019-05-22 19:04:48 +02:00
|
|
|
void *recdata, uint32 len);
|
2005-06-18 00:32:51 +02:00
|
|
|
extern void lock_twophase_postabort(TransactionId xid, uint16 info,
|
2019-05-22 19:04:48 +02:00
|
|
|
void *recdata, uint32 len);
|
Allow read only connections during recovery, known as Hot Standby.
Enabled by recovery_connections = on (default) and forcing archive recovery using a recovery.conf. Recovery processing now emulates the original transactions as they are replayed, providing full locking and MVCC behaviour for read only queries. Recovery must enter consistent state before connections are allowed, so there is a delay, typically short, before connections succeed. Replay of recovering transactions can conflict and in some cases deadlock with queries during recovery; these result in query cancellation after max_standby_delay seconds have expired. Infrastructure changes have minor effects on normal running, though introduce four new types of WAL record.
New test mode "make standbycheck" allows regression tests of static command behaviour on a standby server while in recovery. Typical and extreme dynamic behaviours have been checked via code inspection and manual testing. Few port specific behaviours have been utilised, though primary testing has been on Linux only so far.
This commit is the basic patch. Additional changes will follow in this release to enhance some aspects of behaviour, notably improved handling of conflicts, deadlock detection and query cancellation. Changes to VACUUM FULL are also required.
Simon Riggs, with significant and lengthy review by Heikki Linnakangas, including streamlined redesign of snapshot creation and two-phase commit.
Important contributions from Florian Pflug, Mark Kirkwood, Merlin Moncure, Greg Stark, Gianni Ciolli, Gabriele Bartolini, Hannu Krosing, Robert Haas, Tatsuo Ishii, Hiroyuki Yamada plus support and feedback from many other community members.
2009-12-19 02:32:45 +01:00
|
|
|
extern void lock_twophase_standby_recover(TransactionId xid, uint16 info,
|
2019-05-22 19:04:48 +02:00
|
|
|
void *recdata, uint32 len);
|
2005-06-18 00:32:51 +02:00
|
|
|
|
2007-06-19 22:13:22 +02:00
|
|
|
extern DeadLockState DeadLockCheck(PGPROC *proc);
|
2007-10-26 22:45:10 +02:00
|
|
|
extern PGPROC *GetBlockingAutoVacuumPgproc(void);
|
2015-03-26 19:03:19 +01:00
|
|
|
extern void DeadLockReport(void) pg_attribute_noreturn();
|
2006-08-27 21:14:34 +02:00
|
|
|
extern void RememberSimpleDeadLock(PGPROC *proc1,
|
2019-05-22 19:04:48 +02:00
|
|
|
LOCKMODE lockmode,
|
|
|
|
LOCK *lock,
|
|
|
|
PGPROC *proc2);
|
2006-08-27 21:14:34 +02:00
|
|
|
extern void InitDeadLockChecking(void);
|
|
|
|
|
2016-04-08 08:04:46 +02:00
|
|
|
extern int LockWaiterCount(const LOCKTAG *locktag);
|
|
|
|
|
2000-05-31 02:28:42 +02:00
|
|
|
#ifdef LOCK_DEBUG
|
2005-06-18 00:32:51 +02:00
|
|
|
extern void DumpLocks(PGPROC *proc);
|
1998-08-25 23:20:32 +02:00
|
|
|
extern void DumpAllLocks(void);
|
1997-02-12 06:25:13 +01:00
|
|
|
#endif
|
2001-10-28 07:26:15 +01:00
|
|
|
|
2011-08-04 18:38:33 +02:00
|
|
|
/* Lock a VXID (used to wait for a transaction to finish) */
|
|
|
|
extern void VirtualXactLockTableInsert(VirtualTransactionId vxid);
|
2012-11-29 22:59:11 +01:00
|
|
|
extern void VirtualXactLockTableCleanup(void);
|
2011-08-04 18:38:33 +02:00
|
|
|
extern bool VirtualXactLock(VirtualTransactionId vxid, bool wait);
|
|
|
|
|
2019-07-29 05:28:30 +02:00
|
|
|
#endif /* LOCK_H_ */
|