1996-08-28 03:59:28 +02:00
|
|
|
/*-------------------------------------------------------------------------
|
|
|
|
*
|
1999-02-14 00:22:53 +01:00
|
|
|
* lock.h
|
2000-12-22 01:51:54 +01:00
|
|
|
* POSTGRES low-level lock mechanism
|
1996-08-28 03:59:28 +02:00
|
|
|
*
|
|
|
|
*
|
2014-01-07 22:05:30 +01:00
|
|
|
* Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
|
2000-01-26 06:58:53 +01:00
|
|
|
* Portions Copyright (c) 1994, Regents of the University of California
|
1996-08-28 03:59:28 +02:00
|
|
|
*
|
2010-09-20 22:08:53 +02:00
|
|
|
* src/include/storage/lock.h
|
1996-08-28 03:59:28 +02:00
|
|
|
*
|
|
|
|
*-------------------------------------------------------------------------
|
|
|
|
*/
|
|
|
|
#ifndef LOCK_H_
|
|
|
|
#define LOCK_H_
|
|
|
|
|
2007-09-05 20:10:48 +02:00
|
|
|
#include "storage/backendid.h"
|
2001-09-29 06:02:27 +02:00
|
|
|
#include "storage/lwlock.h"
|
1999-07-16 19:07:40 +02:00
|
|
|
#include "storage/shmem.h"
|
1996-08-28 03:59:28 +02:00
|
|
|
|
|
|
|
|
2006-07-23 05:07:58 +02:00
|
|
|
/* struct PGPROC is declared in proc.h, but must forward-reference it */
|
|
|
|
typedef struct PGPROC PGPROC;
|
2005-12-11 22:02:18 +01:00
|
|
|
|
2000-12-22 01:51:54 +01:00
|
|
|
typedef struct PROC_QUEUE
|
|
|
|
{
|
2002-06-11 15:40:53 +02:00
|
|
|
SHM_QUEUE links; /* head of list of PGPROC objects */
|
2001-01-22 23:30:06 +01:00
|
|
|
int size; /* number of entries in list */
|
2000-12-22 01:51:54 +01:00
|
|
|
} PROC_QUEUE;
|
|
|
|
|
2004-08-26 19:22:28 +02:00
|
|
|
/* GUC variables */
|
2001-06-28 01:31:40 +02:00
|
|
|
extern int max_locks_per_xact;
|
|
|
|
|
2000-05-31 02:28:42 +02:00
|
|
|
#ifdef LOCK_DEBUG
|
2001-03-22 05:01:46 +01:00
|
|
|
extern int Trace_lock_oidmin;
|
2000-05-31 02:28:42 +02:00
|
|
|
extern bool Trace_locks;
|
2011-11-10 23:54:27 +01:00
|
|
|
extern bool Trace_userlocks;
|
2001-03-22 05:01:46 +01:00
|
|
|
extern int Trace_lock_table;
|
2000-05-31 02:28:42 +02:00
|
|
|
extern bool Debug_deadlocks;
|
2001-11-05 18:46:40 +01:00
|
|
|
#endif /* LOCK_DEBUG */
|
2000-05-31 02:28:42 +02:00
|
|
|
|
|
|
|
|
2007-09-05 20:10:48 +02:00
|
|
|
/*
|
|
|
|
* Top-level transactions are identified by VirtualTransactionIDs comprising
|
|
|
|
* the BackendId of the backend running the xact, plus a locally-assigned
|
2014-05-06 18:12:18 +02:00
|
|
|
* LocalTransactionId. These are guaranteed unique over the short term,
|
2007-09-05 20:10:48 +02:00
|
|
|
* but will be reused after a database restart; hence they should never
|
|
|
|
* be stored on disk.
|
|
|
|
*
|
|
|
|
* Note that struct VirtualTransactionId can not be assumed to be atomically
|
|
|
|
* assignable as a whole. However, type LocalTransactionId is assumed to
|
|
|
|
* be atomically assignable, and the backend ID doesn't change often enough
|
|
|
|
* to be a problem, so we can fetch or assign the two fields separately.
|
|
|
|
* We deliberately refrain from using the struct within PGPROC, to prevent
|
|
|
|
* coding errors from trying to use struct assignment with it; instead use
|
|
|
|
* GET_VXID_FROM_PGPROC().
|
|
|
|
*/
|
|
|
|
typedef struct
|
|
|
|
{
|
|
|
|
BackendId backendId; /* determined at backend startup */
|
2007-11-15 22:14:46 +01:00
|
|
|
LocalTransactionId localTransactionId; /* backend-local transaction
|
|
|
|
* id */
|
2007-11-15 23:25:18 +01:00
|
|
|
} VirtualTransactionId;
|
2007-09-05 20:10:48 +02:00
|
|
|
|
|
|
|
#define InvalidLocalTransactionId 0
|
2007-11-15 22:14:46 +01:00
|
|
|
#define LocalTransactionIdIsValid(lxid) ((lxid) != InvalidLocalTransactionId)
|
2007-09-05 20:10:48 +02:00
|
|
|
#define VirtualTransactionIdIsValid(vxid) \
|
|
|
|
(((vxid).backendId != InvalidBackendId) && \
|
|
|
|
LocalTransactionIdIsValid((vxid).localTransactionId))
|
2009-04-04 19:40:36 +02:00
|
|
|
#define VirtualTransactionIdEquals(vxid1, vxid2) \
|
|
|
|
((vxid1).backendId == (vxid2).backendId && \
|
|
|
|
(vxid1).localTransactionId == (vxid2).localTransactionId)
|
|
|
|
#define SetInvalidVirtualTransactionId(vxid) \
|
|
|
|
((vxid).backendId = InvalidBackendId, \
|
|
|
|
(vxid).localTransactionId = InvalidLocalTransactionId)
|
2007-09-05 20:10:48 +02:00
|
|
|
#define GET_VXID_FROM_PGPROC(vxid, proc) \
|
|
|
|
((vxid).backendId = (proc).backendId, \
|
|
|
|
(vxid).localTransactionId = (proc).lxid)
|
|
|
|
|
|
|
|
|
2004-08-26 19:22:28 +02:00
|
|
|
/*
|
|
|
|
* LOCKMODE is an integer (1..N) indicating a lock type. LOCKMASK is a bit
|
|
|
|
* mask indicating a set of held or requested lock types (the bit 1<<mode
|
|
|
|
* corresponds to a particular lock mode).
|
|
|
|
*/
|
2000-12-22 01:51:54 +01:00
|
|
|
typedef int LOCKMASK;
|
1998-06-30 04:33:34 +02:00
|
|
|
typedef int LOCKMODE;
|
2004-08-29 07:07:03 +02:00
|
|
|
|
2001-01-22 23:30:06 +01:00
|
|
|
/* MAX_LOCKMODES cannot be larger than the # of bits in LOCKMASK */
|
2001-07-10 00:18:34 +02:00
|
|
|
#define MAX_LOCKMODES 10
|
1996-08-28 03:59:28 +02:00
|
|
|
|
Try to reduce confusion about what is a lock method identifier, a lock
method control structure, or a table of control structures.
. Use type LOCKMASK where an int is not a counter.
. Get rid of INVALID_TABLEID, use INVALID_LOCKMETHOD instead.
. Use INVALID_LOCKMETHOD instead of (LOCKMETHOD) NULL, because
LOCKMETHOD is not a pointer.
. Define and use macro LockMethodIsValid.
. Rename LOCKMETHOD to LOCKMETHODID.
. Remove global variable LongTermTableId in lmgr.c, because it is
never used.
. Make LockTableId static in lmgr.c, because it is used nowhere else.
Why not remove it and use DEFAULT_LOCKMETHOD?
. Rename the lock method control structure from LOCKMETHODTABLE to
LockMethodData. Introduce a pointer type named LockMethod.
. Remove elog(FATAL) after InitLockTable() call in
CreateSharedMemoryAndSemaphores(), because if something goes wrong,
there is elog(FATAL) in LockMethodTableInit(), and if this doesn't
help, an elog(ERROR) in InitLockTable() is promoted to FATAL.
. Make InitLockTable() void, because its only caller does not use its
return value any more.
. Rename variables in lock.c to avoid statements like
LockMethodTable[NumLockMethods] = lockMethodTable;
lockMethodTable = LockMethodTable[lockmethod];
. Change LOCKMETHODID type to uint16 to fit into struct LOCKTAG.
. Remove static variables BITS_OFF and BITS_ON from lock.c, because
I agree to this doubt:
* XXX is a fetch from a static array really faster than a shift?
. Define and use macros LOCKBIT_ON/OFF.
Manfred Koizar
2003-12-01 22:59:25 +01:00
|
|
|
#define LOCKBIT_ON(lockmode) (1 << (lockmode))
|
|
|
|
#define LOCKBIT_OFF(lockmode) (~(1 << (lockmode)))
|
|
|
|
|
2004-08-27 19:07:42 +02:00
|
|
|
|
2001-06-22 02:04:59 +02:00
|
|
|
/*
|
2005-12-09 02:22:04 +01:00
|
|
|
* This data structure defines the locking semantics associated with a
|
|
|
|
* "lock method". The semantics specify the meaning of each lock mode
|
Overdue code review for transaction-level advisory locks patch.
Commit 62c7bd31c8878dd45c9b9b2429ab7a12103f3590 had assorted problems, most
visibly that it broke PREPARE TRANSACTION in the presence of session-level
advisory locks (which should be ignored by PREPARE), as per a recent
complaint from Stephen Rees. More abstractly, the patch made the
LockMethodData.transactional flag not merely useless but outright
dangerous, because in point of fact that flag no longer tells you anything
at all about whether a lock is held transactionally. This fix therefore
removes that flag altogether. We now rely entirely on the convention
already in use in lock.c that transactional lock holds must be owned by
some ResourceOwner, while session holds are never so owned. Setting the
locallock struct's owner link to NULL thus denotes a session hold, and
there is no redundant marker for that.
PREPARE TRANSACTION now works again when there are session-level advisory
locks, and it is also able to transfer transactional advisory locks to the
prepared transaction, but for implementation reasons it throws an error if
we hold both types of lock on a single lockable object. Perhaps it will be
worth improving that someday.
Assorted other minor cleanup and documentation editing, as well.
Back-patch to 9.1, except that in the 9.1 branch I did not remove the
LockMethodData.transactional flag for fear of causing an ABI break for
any external code that might be examining those structs.
2012-05-04 23:43:27 +02:00
|
|
|
* (by defining which lock modes it conflicts with).
|
2005-12-09 02:22:04 +01:00
|
|
|
* All of this data is constant and is kept in const tables.
|
1996-08-28 03:59:28 +02:00
|
|
|
*
|
2005-12-09 02:22:04 +01:00
|
|
|
* numLockModes -- number of lock modes (READ,WRITE,etc) that
|
|
|
|
* are defined in this lock method. Must be less than MAX_LOCKMODES.
|
1996-08-28 03:59:28 +02:00
|
|
|
*
|
|
|
|
* conflictTab -- this is an array of bitmasks showing lock
|
2005-12-09 02:22:04 +01:00
|
|
|
* mode conflicts. conflictTab[i] is a mask with the j-th bit
|
|
|
|
* turned on if lock modes i and j conflict. Lock modes are
|
|
|
|
* numbered 1..numLockModes; conflictTab[0] is unused.
|
|
|
|
*
|
|
|
|
* lockModeNames -- ID strings for debug printouts.
|
|
|
|
*
|
Overdue code review for transaction-level advisory locks patch.
Commit 62c7bd31c8878dd45c9b9b2429ab7a12103f3590 had assorted problems, most
visibly that it broke PREPARE TRANSACTION in the presence of session-level
advisory locks (which should be ignored by PREPARE), as per a recent
complaint from Stephen Rees. More abstractly, the patch made the
LockMethodData.transactional flag not merely useless but outright
dangerous, because in point of fact that flag no longer tells you anything
at all about whether a lock is held transactionally. This fix therefore
removes that flag altogether. We now rely entirely on the convention
already in use in lock.c that transactional lock holds must be owned by
some ResourceOwner, while session holds are never so owned. Setting the
locallock struct's owner link to NULL thus denotes a session hold, and
there is no redundant marker for that.
PREPARE TRANSACTION now works again when there are session-level advisory
locks, and it is also able to transfer transactional advisory locks to the
prepared transaction, but for implementation reasons it throws an error if
we hold both types of lock on a single lockable object. Perhaps it will be
worth improving that someday.
Assorted other minor cleanup and documentation editing, as well.
Back-patch to 9.1, except that in the 9.1 branch I did not remove the
LockMethodData.transactional flag for fear of causing an ABI break for
any external code that might be examining those structs.
2012-05-04 23:43:27 +02:00
|
|
|
* trace_flag -- pointer to GUC trace flag for this lock method. (The
|
|
|
|
* GUC variable is not constant, but we use "const" here to denote that
|
|
|
|
* it can't be changed through this reference.)
|
1996-08-28 03:59:28 +02:00
|
|
|
*/
|
Try to reduce confusion about what is a lock method identifier, a lock
method control structure, or a table of control structures.
. Use type LOCKMASK where an int is not a counter.
. Get rid of INVALID_TABLEID, use INVALID_LOCKMETHOD instead.
. Use INVALID_LOCKMETHOD instead of (LOCKMETHOD) NULL, because
LOCKMETHOD is not a pointer.
. Define and use macro LockMethodIsValid.
. Rename LOCKMETHOD to LOCKMETHODID.
. Remove global variable LongTermTableId in lmgr.c, because it is
never used.
. Make LockTableId static in lmgr.c, because it is used nowhere else.
Why not remove it and use DEFAULT_LOCKMETHOD?
. Rename the lock method control structure from LOCKMETHODTABLE to
LockMethodData. Introduce a pointer type named LockMethod.
. Remove elog(FATAL) after InitLockTable() call in
CreateSharedMemoryAndSemaphores(), because if something goes wrong,
there is elog(FATAL) in LockMethodTableInit(), and if this doesn't
help, an elog(ERROR) in InitLockTable() is promoted to FATAL.
. Make InitLockTable() void, because its only caller does not use its
return value any more.
. Rename variables in lock.c to avoid statements like
LockMethodTable[NumLockMethods] = lockMethodTable;
lockMethodTable = LockMethodTable[lockmethod];
. Change LOCKMETHODID type to uint16 to fit into struct LOCKTAG.
. Remove static variables BITS_OFF and BITS_ON from lock.c, because
I agree to this doubt:
* XXX is a fetch from a static array really faster than a shift?
. Define and use macros LOCKBIT_ON/OFF.
Manfred Koizar
2003-12-01 22:59:25 +01:00
|
|
|
typedef struct LockMethodData
|
1997-09-07 07:04:48 +02:00
|
|
|
{
|
2004-08-29 07:07:03 +02:00
|
|
|
int numLockModes;
|
2005-12-09 02:22:04 +01:00
|
|
|
const LOCKMASK *conflictTab;
|
2006-10-04 02:30:14 +02:00
|
|
|
const char *const * lockModeNames;
|
2005-12-09 02:22:04 +01:00
|
|
|
const bool *trace_flag;
|
Try to reduce confusion about what is a lock method identifier, a lock
method control structure, or a table of control structures.
. Use type LOCKMASK where an int is not a counter.
. Get rid of INVALID_TABLEID, use INVALID_LOCKMETHOD instead.
. Use INVALID_LOCKMETHOD instead of (LOCKMETHOD) NULL, because
LOCKMETHOD is not a pointer.
. Define and use macro LockMethodIsValid.
. Rename LOCKMETHOD to LOCKMETHODID.
. Remove global variable LongTermTableId in lmgr.c, because it is
never used.
. Make LockTableId static in lmgr.c, because it is used nowhere else.
Why not remove it and use DEFAULT_LOCKMETHOD?
. Rename the lock method control structure from LOCKMETHODTABLE to
LockMethodData. Introduce a pointer type named LockMethod.
. Remove elog(FATAL) after InitLockTable() call in
CreateSharedMemoryAndSemaphores(), because if something goes wrong,
there is elog(FATAL) in LockMethodTableInit(), and if this doesn't
help, an elog(ERROR) in InitLockTable() is promoted to FATAL.
. Make InitLockTable() void, because its only caller does not use its
return value any more.
. Rename variables in lock.c to avoid statements like
LockMethodTable[NumLockMethods] = lockMethodTable;
lockMethodTable = LockMethodTable[lockmethod];
. Change LOCKMETHODID type to uint16 to fit into struct LOCKTAG.
. Remove static variables BITS_OFF and BITS_ON from lock.c, because
I agree to this doubt:
* XXX is a fetch from a static array really faster than a shift?
. Define and use macros LOCKBIT_ON/OFF.
Manfred Koizar
2003-12-01 22:59:25 +01:00
|
|
|
} LockMethodData;
|
2004-08-27 19:07:42 +02:00
|
|
|
|
2005-12-09 02:22:04 +01:00
|
|
|
typedef const LockMethodData *LockMethod;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Lock methods are identified by LOCKMETHODID. (Despite the declaration as
|
|
|
|
* uint16, we are constrained to 256 lockmethods by the layout of LOCKTAG.)
|
|
|
|
*/
|
|
|
|
typedef uint16 LOCKMETHODID;
|
|
|
|
|
|
|
|
/* These identify the known lock methods */
|
|
|
|
#define DEFAULT_LOCKMETHOD 1
|
|
|
|
#define USER_LOCKMETHOD 2
|
|
|
|
|
|
|
|
/*
|
|
|
|
* These are the valid values of type LOCKMODE for all the standard lock
|
|
|
|
* methods (both DEFAULT and USER).
|
|
|
|
*/
|
|
|
|
|
|
|
|
/* NoLock is not a lock mode, but a flag value meaning "don't get a lock" */
|
|
|
|
#define NoLock 0
|
|
|
|
|
|
|
|
#define AccessShareLock 1 /* SELECT */
|
|
|
|
#define RowShareLock 2 /* SELECT FOR UPDATE/FOR SHARE */
|
|
|
|
#define RowExclusiveLock 3 /* INSERT, UPDATE, DELETE */
|
2006-11-23 06:14:04 +01:00
|
|
|
#define ShareUpdateExclusiveLock 4 /* VACUUM (non-FULL),ANALYZE, CREATE
|
|
|
|
* INDEX CONCURRENTLY */
|
|
|
|
#define ShareLock 5 /* CREATE INDEX (WITHOUT CONCURRENTLY) */
|
2005-12-09 02:22:04 +01:00
|
|
|
#define ShareRowExclusiveLock 6 /* like EXCLUSIVE MODE, but allows ROW
|
|
|
|
* SHARE */
|
|
|
|
#define ExclusiveLock 7 /* blocks ROW SHARE/SELECT...FOR
|
|
|
|
* UPDATE */
|
|
|
|
#define AccessExclusiveLock 8 /* ALTER TABLE, DROP TABLE, VACUUM
|
|
|
|
* FULL, and unqualified LOCK TABLE */
|
1996-08-28 03:59:28 +02:00
|
|
|
|
1998-08-25 23:20:32 +02:00
|
|
|
|
2000-12-22 01:51:54 +01:00
|
|
|
/*
|
|
|
|
* LOCKTAG is the key information needed to look up a LOCK item in the
|
2014-05-06 18:12:18 +02:00
|
|
|
* lock hashtable. A LOCKTAG value uniquely identifies a lockable object.
|
2005-04-30 00:28:24 +02:00
|
|
|
*
|
|
|
|
* The LockTagType enum defines the different kinds of objects we can lock.
|
|
|
|
* We can handle up to 256 different LockTagTypes.
|
2000-12-22 01:51:54 +01:00
|
|
|
*/
|
2005-04-30 00:28:24 +02:00
|
|
|
typedef enum LockTagType
|
1997-09-07 07:04:48 +02:00
|
|
|
{
|
2005-04-30 00:28:24 +02:00
|
|
|
LOCKTAG_RELATION, /* whole relation */
|
|
|
|
/* ID info for a relation is DB OID + REL OID; DB OID = 0 if shared */
|
|
|
|
LOCKTAG_RELATION_EXTEND, /* the right to extend a relation */
|
|
|
|
/* same ID info as RELATION */
|
|
|
|
LOCKTAG_PAGE, /* one page of a relation */
|
|
|
|
/* ID info for a page is RELATION info + BlockNumber */
|
|
|
|
LOCKTAG_TUPLE, /* one physical tuple */
|
|
|
|
/* ID info for a tuple is PAGE info + OffsetNumber */
|
|
|
|
LOCKTAG_TRANSACTION, /* transaction (for waiting for xact done) */
|
|
|
|
/* ID info for a transaction is its TransactionId */
|
2007-11-15 22:14:46 +01:00
|
|
|
LOCKTAG_VIRTUALTRANSACTION, /* virtual transaction (ditto) */
|
2007-09-05 20:10:48 +02:00
|
|
|
/* ID info for a virtual transaction is its VirtualTransactionId */
|
2005-04-30 00:28:24 +02:00
|
|
|
LOCKTAG_OBJECT, /* non-relation database object */
|
|
|
|
/* ID info for an object is DB OID + CLASS OID + OBJECT OID + SUBID */
|
2005-10-15 04:49:52 +02:00
|
|
|
|
2000-12-22 01:51:54 +01:00
|
|
|
/*
|
2005-04-30 00:28:24 +02:00
|
|
|
* Note: object ID has same representation as in pg_depend and
|
|
|
|
* pg_description, but notice that we are constraining SUBID to 16 bits.
|
|
|
|
* Also, we use DB OID = 0 for shared objects such as tablespaces.
|
2000-12-22 01:51:54 +01:00
|
|
|
*/
|
2006-09-23 01:20:14 +02:00
|
|
|
LOCKTAG_USERLOCK, /* reserved for old contrib/userlock code */
|
|
|
|
LOCKTAG_ADVISORY /* advisory user locks */
|
2005-04-30 00:28:24 +02:00
|
|
|
} LockTagType;
|
1996-08-28 03:59:28 +02:00
|
|
|
|
2008-01-09 00:18:51 +01:00
|
|
|
#define LOCKTAG_LAST_TYPE LOCKTAG_ADVISORY
|
|
|
|
|
2005-04-30 00:28:24 +02:00
|
|
|
/*
|
|
|
|
* The LOCKTAG struct is defined with malice aforethought to fit into 16
|
|
|
|
* bytes with no padding. Note that this would need adjustment if we were
|
|
|
|
* to widen Oid, BlockNumber, or TransactionId to more than 32 bits.
|
|
|
|
*
|
|
|
|
* We include lockmethodid in the locktag so that a single hash table in
|
2005-12-09 02:22:04 +01:00
|
|
|
* shared memory can store locks of different lockmethods.
|
2005-04-30 00:28:24 +02:00
|
|
|
*/
|
|
|
|
typedef struct LOCKTAG
|
|
|
|
{
|
2005-10-15 04:49:52 +02:00
|
|
|
uint32 locktag_field1; /* a 32-bit ID field */
|
|
|
|
uint32 locktag_field2; /* a 32-bit ID field */
|
|
|
|
uint32 locktag_field3; /* a 32-bit ID field */
|
|
|
|
uint16 locktag_field4; /* a 16-bit ID field */
|
|
|
|
uint8 locktag_type; /* see enum LockTagType */
|
2005-04-30 00:28:24 +02:00
|
|
|
uint8 locktag_lockmethodid; /* lockmethod indicator */
|
2000-12-22 01:51:54 +01:00
|
|
|
} LOCKTAG;
|
1996-08-28 03:59:28 +02:00
|
|
|
|
2005-04-30 00:28:24 +02:00
|
|
|
/*
|
|
|
|
* These macros define how we map logical IDs of lockable objects into
|
2014-05-06 18:12:18 +02:00
|
|
|
* the physical fields of LOCKTAG. Use these to set up LOCKTAG values,
|
2005-04-30 00:28:24 +02:00
|
|
|
* rather than accessing the fields directly. Note multiple eval of target!
|
|
|
|
*/
|
|
|
|
#define SET_LOCKTAG_RELATION(locktag,dboid,reloid) \
|
|
|
|
((locktag).locktag_field1 = (dboid), \
|
|
|
|
(locktag).locktag_field2 = (reloid), \
|
|
|
|
(locktag).locktag_field3 = 0, \
|
|
|
|
(locktag).locktag_field4 = 0, \
|
2005-12-09 02:22:04 +01:00
|
|
|
(locktag).locktag_type = LOCKTAG_RELATION, \
|
|
|
|
(locktag).locktag_lockmethodid = DEFAULT_LOCKMETHOD)
|
2005-04-30 00:28:24 +02:00
|
|
|
|
|
|
|
#define SET_LOCKTAG_RELATION_EXTEND(locktag,dboid,reloid) \
|
|
|
|
((locktag).locktag_field1 = (dboid), \
|
|
|
|
(locktag).locktag_field2 = (reloid), \
|
|
|
|
(locktag).locktag_field3 = 0, \
|
|
|
|
(locktag).locktag_field4 = 0, \
|
2005-12-09 02:22:04 +01:00
|
|
|
(locktag).locktag_type = LOCKTAG_RELATION_EXTEND, \
|
|
|
|
(locktag).locktag_lockmethodid = DEFAULT_LOCKMETHOD)
|
2005-04-30 00:28:24 +02:00
|
|
|
|
|
|
|
#define SET_LOCKTAG_PAGE(locktag,dboid,reloid,blocknum) \
|
|
|
|
((locktag).locktag_field1 = (dboid), \
|
|
|
|
(locktag).locktag_field2 = (reloid), \
|
|
|
|
(locktag).locktag_field3 = (blocknum), \
|
|
|
|
(locktag).locktag_field4 = 0, \
|
2005-12-09 02:22:04 +01:00
|
|
|
(locktag).locktag_type = LOCKTAG_PAGE, \
|
|
|
|
(locktag).locktag_lockmethodid = DEFAULT_LOCKMETHOD)
|
2005-04-30 00:28:24 +02:00
|
|
|
|
|
|
|
#define SET_LOCKTAG_TUPLE(locktag,dboid,reloid,blocknum,offnum) \
|
|
|
|
((locktag).locktag_field1 = (dboid), \
|
|
|
|
(locktag).locktag_field2 = (reloid), \
|
|
|
|
(locktag).locktag_field3 = (blocknum), \
|
|
|
|
(locktag).locktag_field4 = (offnum), \
|
2005-12-09 02:22:04 +01:00
|
|
|
(locktag).locktag_type = LOCKTAG_TUPLE, \
|
|
|
|
(locktag).locktag_lockmethodid = DEFAULT_LOCKMETHOD)
|
2005-04-30 00:28:24 +02:00
|
|
|
|
|
|
|
#define SET_LOCKTAG_TRANSACTION(locktag,xid) \
|
|
|
|
((locktag).locktag_field1 = (xid), \
|
|
|
|
(locktag).locktag_field2 = 0, \
|
|
|
|
(locktag).locktag_field3 = 0, \
|
|
|
|
(locktag).locktag_field4 = 0, \
|
2005-12-09 02:22:04 +01:00
|
|
|
(locktag).locktag_type = LOCKTAG_TRANSACTION, \
|
|
|
|
(locktag).locktag_lockmethodid = DEFAULT_LOCKMETHOD)
|
2005-04-30 00:28:24 +02:00
|
|
|
|
2007-09-05 20:10:48 +02:00
|
|
|
#define SET_LOCKTAG_VIRTUALTRANSACTION(locktag,vxid) \
|
|
|
|
((locktag).locktag_field1 = (vxid).backendId, \
|
|
|
|
(locktag).locktag_field2 = (vxid).localTransactionId, \
|
|
|
|
(locktag).locktag_field3 = 0, \
|
|
|
|
(locktag).locktag_field4 = 0, \
|
|
|
|
(locktag).locktag_type = LOCKTAG_VIRTUALTRANSACTION, \
|
|
|
|
(locktag).locktag_lockmethodid = DEFAULT_LOCKMETHOD)
|
|
|
|
|
2005-04-30 00:28:24 +02:00
|
|
|
#define SET_LOCKTAG_OBJECT(locktag,dboid,classoid,objoid,objsubid) \
|
|
|
|
((locktag).locktag_field1 = (dboid), \
|
|
|
|
(locktag).locktag_field2 = (classoid), \
|
|
|
|
(locktag).locktag_field3 = (objoid), \
|
|
|
|
(locktag).locktag_field4 = (objsubid), \
|
2005-12-09 02:22:04 +01:00
|
|
|
(locktag).locktag_type = LOCKTAG_OBJECT, \
|
|
|
|
(locktag).locktag_lockmethodid = DEFAULT_LOCKMETHOD)
|
2005-04-30 00:28:24 +02:00
|
|
|
|
2006-09-23 01:20:14 +02:00
|
|
|
#define SET_LOCKTAG_ADVISORY(locktag,id1,id2,id3,id4) \
|
2006-09-19 00:40:40 +02:00
|
|
|
((locktag).locktag_field1 = (id1), \
|
|
|
|
(locktag).locktag_field2 = (id2), \
|
|
|
|
(locktag).locktag_field3 = (id3), \
|
|
|
|
(locktag).locktag_field4 = (id4), \
|
2006-09-23 01:20:14 +02:00
|
|
|
(locktag).locktag_type = LOCKTAG_ADVISORY, \
|
2006-09-19 00:40:40 +02:00
|
|
|
(locktag).locktag_lockmethodid = USER_LOCKMETHOD)
|
|
|
|
|
1996-08-28 03:59:28 +02:00
|
|
|
|
|
|
|
/*
|
2000-12-22 01:51:54 +01:00
|
|
|
* Per-locked-object lock information:
|
1996-08-28 03:59:28 +02:00
|
|
|
*
|
|
|
|
* tag -- uniquely identifies the object being locked
|
2001-01-16 07:11:34 +01:00
|
|
|
* grantMask -- bitmask for all lock types currently granted on this object.
|
|
|
|
* waitMask -- bitmask for all lock types currently awaited on this object.
|
2004-08-27 19:07:42 +02:00
|
|
|
* procLocks -- list of PROCLOCK objects for this lock.
|
2001-01-16 07:11:34 +01:00
|
|
|
* waitProcs -- queue of processes waiting for this lock.
|
|
|
|
* requested -- count of each lock type currently requested on the lock
|
|
|
|
* (includes requests already granted!!).
|
|
|
|
* nRequested -- total requested locks of all types.
|
|
|
|
* granted -- count of each lock type currently granted on the lock.
|
|
|
|
* nGranted -- total granted locks of all types.
|
2005-06-15 00:15:33 +02:00
|
|
|
*
|
|
|
|
* Note: these counts count 1 for each backend. Internally to a backend,
|
|
|
|
* there may be multiple grabs on a particular lock, but this is not reflected
|
|
|
|
* into shared memory.
|
1996-08-28 03:59:28 +02:00
|
|
|
*/
|
1998-06-30 04:33:34 +02:00
|
|
|
typedef struct LOCK
|
1997-09-07 07:04:48 +02:00
|
|
|
{
|
|
|
|
/* hash key */
|
2001-01-16 07:11:34 +01:00
|
|
|
LOCKTAG tag; /* unique identifier of lockable object */
|
1997-09-07 07:04:48 +02:00
|
|
|
|
|
|
|
/* data */
|
Try to reduce confusion about what is a lock method identifier, a lock
method control structure, or a table of control structures.
. Use type LOCKMASK where an int is not a counter.
. Get rid of INVALID_TABLEID, use INVALID_LOCKMETHOD instead.
. Use INVALID_LOCKMETHOD instead of (LOCKMETHOD) NULL, because
LOCKMETHOD is not a pointer.
. Define and use macro LockMethodIsValid.
. Rename LOCKMETHOD to LOCKMETHODID.
. Remove global variable LongTermTableId in lmgr.c, because it is
never used.
. Make LockTableId static in lmgr.c, because it is used nowhere else.
Why not remove it and use DEFAULT_LOCKMETHOD?
. Rename the lock method control structure from LOCKMETHODTABLE to
LockMethodData. Introduce a pointer type named LockMethod.
. Remove elog(FATAL) after InitLockTable() call in
CreateSharedMemoryAndSemaphores(), because if something goes wrong,
there is elog(FATAL) in LockMethodTableInit(), and if this doesn't
help, an elog(ERROR) in InitLockTable() is promoted to FATAL.
. Make InitLockTable() void, because its only caller does not use its
return value any more.
. Rename variables in lock.c to avoid statements like
LockMethodTable[NumLockMethods] = lockMethodTable;
lockMethodTable = LockMethodTable[lockmethod];
. Change LOCKMETHODID type to uint16 to fit into struct LOCKTAG.
. Remove static variables BITS_OFF and BITS_ON from lock.c, because
I agree to this doubt:
* XXX is a fetch from a static array really faster than a shift?
. Define and use macros LOCKBIT_ON/OFF.
Manfred Koizar
2003-12-01 22:59:25 +01:00
|
|
|
LOCKMASK grantMask; /* bitmask for lock types already granted */
|
|
|
|
LOCKMASK waitMask; /* bitmask for lock types awaited */
|
2005-10-15 04:49:52 +02:00
|
|
|
SHM_QUEUE procLocks; /* list of PROCLOCK objects assoc. with lock */
|
2002-06-11 15:40:53 +02:00
|
|
|
PROC_QUEUE waitProcs; /* list of PGPROC objects waiting on lock */
|
2005-10-15 04:49:52 +02:00
|
|
|
int requested[MAX_LOCKMODES]; /* counts of requested locks */
|
2001-01-16 07:11:34 +01:00
|
|
|
int nRequested; /* total of requested[] array */
|
2001-03-22 05:01:46 +01:00
|
|
|
int granted[MAX_LOCKMODES]; /* counts of granted locks */
|
2001-01-16 07:11:34 +01:00
|
|
|
int nGranted; /* total of granted[] array */
|
1997-09-08 23:56:23 +02:00
|
|
|
} LOCK;
|
1996-08-28 03:59:28 +02:00
|
|
|
|
2005-04-30 00:28:24 +02:00
|
|
|
#define LOCK_LOCKMETHOD(lock) ((LOCKMETHODID) (lock).tag.locktag_lockmethodid)
|
1996-08-28 03:59:28 +02:00
|
|
|
|
2000-12-22 01:51:54 +01:00
|
|
|
|
|
|
|
/*
|
2005-06-15 00:15:33 +02:00
|
|
|
* We may have several different backends holding or awaiting locks
|
2004-08-26 19:22:28 +02:00
|
|
|
* on the same lockable object. We need to store some per-holder/waiter
|
|
|
|
* information for each such holder (or would-be holder). This is kept in
|
|
|
|
* a PROCLOCK struct.
|
2000-12-22 01:51:54 +01:00
|
|
|
*
|
2002-07-19 02:17:40 +02:00
|
|
|
* PROCLOCKTAG is the key information needed to look up a PROCLOCK item in the
|
2014-05-06 18:12:18 +02:00
|
|
|
* proclock hashtable. A PROCLOCKTAG value uniquely identifies the combination
|
2006-07-24 01:08:46 +02:00
|
|
|
* of a lockable object and a holder/waiter for that object. (We can use
|
|
|
|
* pointers here because the PROCLOCKTAG need only be unique for the lifespan
|
|
|
|
* of the PROCLOCK, and it will never outlive the lock or the proc.)
|
2000-12-22 01:51:54 +01:00
|
|
|
*
|
2005-06-15 00:15:33 +02:00
|
|
|
* Internally to a backend, it is possible for the same lock to be held
|
|
|
|
* for different purposes: the backend tracks transaction locks separately
|
2014-05-06 18:12:18 +02:00
|
|
|
* from session locks. However, this is not reflected in the shared-memory
|
2005-06-15 00:15:33 +02:00
|
|
|
* state: we only track which backend(s) hold the lock. This is OK since a
|
|
|
|
* backend can never block itself.
|
2001-01-16 07:11:34 +01:00
|
|
|
*
|
2004-08-27 19:07:42 +02:00
|
|
|
* The holdMask field shows the already-granted locks represented by this
|
|
|
|
* proclock. Note that there will be a proclock object, possibly with
|
|
|
|
* zero holdMask, for any lock that the process is currently waiting on.
|
|
|
|
* Otherwise, proclock objects whose holdMasks are zero are recycled
|
2001-01-16 07:11:34 +01:00
|
|
|
* as soon as convenient.
|
2001-01-22 23:30:06 +01:00
|
|
|
*
|
2005-06-15 00:15:33 +02:00
|
|
|
* releaseMask is workspace for LockReleaseAll(): it shows the locks due
|
2014-05-06 18:12:18 +02:00
|
|
|
* to be released during the current call. This must only be examined or
|
2005-06-15 00:15:33 +02:00
|
|
|
* set by the backend owning the PROCLOCK.
|
|
|
|
*
|
2004-07-01 02:52:04 +02:00
|
|
|
* Each PROCLOCK object is linked into lists for both the associated LOCK
|
|
|
|
* object and the owning PGPROC object. Note that the PROCLOCK is entered
|
|
|
|
* into these lists as soon as it is created, even if no lock has yet been
|
|
|
|
* granted. A PGPROC that is waiting for a lock to be granted will also be
|
|
|
|
* linked into the lock's waitProcs queue.
|
2000-12-22 01:51:54 +01:00
|
|
|
*/
|
2002-07-19 02:17:40 +02:00
|
|
|
typedef struct PROCLOCKTAG
|
2000-12-22 01:51:54 +01:00
|
|
|
{
|
2006-07-24 01:08:46 +02:00
|
|
|
/* NB: we assume this struct contains no padding! */
|
|
|
|
LOCK *myLock; /* link to per-lockable-object information */
|
|
|
|
PGPROC *myProc; /* link to PGPROC of owning backend */
|
2002-07-19 02:17:40 +02:00
|
|
|
} PROCLOCKTAG;
|
2000-12-22 01:51:54 +01:00
|
|
|
|
2002-07-19 02:17:40 +02:00
|
|
|
typedef struct PROCLOCK
|
2000-12-22 01:51:54 +01:00
|
|
|
{
|
|
|
|
/* tag */
|
2003-02-18 03:13:24 +01:00
|
|
|
PROCLOCKTAG tag; /* unique identifier of proclock object */
|
2000-12-22 01:51:54 +01:00
|
|
|
|
|
|
|
/* data */
|
2004-08-27 19:07:42 +02:00
|
|
|
LOCKMASK holdMask; /* bitmask for lock types currently held */
|
2005-06-15 00:15:33 +02:00
|
|
|
LOCKMASK releaseMask; /* bitmask for lock types to be released */
|
|
|
|
SHM_QUEUE lockLink; /* list link in LOCK's list of proclocks */
|
|
|
|
SHM_QUEUE procLink; /* list link in PGPROC's list of proclocks */
|
2002-07-19 02:17:40 +02:00
|
|
|
} PROCLOCK;
|
2000-12-22 01:51:54 +01:00
|
|
|
|
2003-02-18 03:13:24 +01:00
|
|
|
#define PROCLOCK_LOCKMETHOD(proclock) \
|
2006-07-24 01:08:46 +02:00
|
|
|
LOCK_LOCKMETHOD(*((proclock).tag.myLock))
|
2000-12-22 01:51:54 +01:00
|
|
|
|
2004-08-27 19:07:42 +02:00
|
|
|
/*
|
|
|
|
* Each backend also maintains a local hash table with information about each
|
2014-05-06 18:12:18 +02:00
|
|
|
* lock it is currently interested in. In particular the local table counts
|
2004-08-27 19:07:42 +02:00
|
|
|
* the number of times that lock has been acquired. This allows multiple
|
|
|
|
* requests for the same lock to be executed without additional accesses to
|
|
|
|
* shared memory. We also track the number of lock acquisitions per
|
|
|
|
* ResourceOwner, so that we can release just those locks belonging to a
|
|
|
|
* particular ResourceOwner.
|
2013-11-28 00:10:00 +01:00
|
|
|
*
|
|
|
|
* When holding a lock taken "normally", the lock and proclock fields always
|
|
|
|
* point to the associated objects in shared memory. However, if we acquired
|
|
|
|
* the lock via the fast-path mechanism, the lock and proclock fields are set
|
|
|
|
* to NULL, since there probably aren't any such objects in shared memory.
|
|
|
|
* (If the lock later gets promoted to normal representation, we may eventually
|
|
|
|
* update our locallock's lock/proclock fields after finding the shared
|
|
|
|
* objects.)
|
|
|
|
*
|
|
|
|
* Caution: a locallock object can be left over from a failed lock acquisition
|
|
|
|
* attempt. In this case its lock/proclock fields are untrustworthy, since
|
|
|
|
* the shared lock object is neither held nor awaited, and hence is available
|
|
|
|
* to be reclaimed. If nLocks > 0 then these pointers must either be valid or
|
|
|
|
* NULL, but when nLocks == 0 they should be considered garbage.
|
2004-08-27 19:07:42 +02:00
|
|
|
*/
|
|
|
|
typedef struct LOCALLOCKTAG
|
|
|
|
{
|
|
|
|
LOCKTAG lock; /* identifies the lockable object */
|
|
|
|
LOCKMODE mode; /* lock mode for this table entry */
|
|
|
|
} LOCALLOCKTAG;
|
|
|
|
|
|
|
|
typedef struct LOCALLOCKOWNER
|
|
|
|
{
|
|
|
|
/*
|
2005-06-15 00:15:33 +02:00
|
|
|
* Note: if owner is NULL then the lock is held on behalf of the session;
|
|
|
|
* otherwise it is held on behalf of my current transaction.
|
|
|
|
*
|
|
|
|
* Must use a forward struct reference to avoid circularity.
|
2004-08-27 19:07:42 +02:00
|
|
|
*/
|
|
|
|
struct ResourceOwnerData *owner;
|
2008-09-16 03:56:26 +02:00
|
|
|
int64 nLocks; /* # of times held by this owner */
|
2004-08-27 19:07:42 +02:00
|
|
|
} LOCALLOCKOWNER;
|
|
|
|
|
|
|
|
typedef struct LOCALLOCK
|
|
|
|
{
|
|
|
|
/* tag */
|
|
|
|
LOCALLOCKTAG tag; /* unique identifier of locallock entry */
|
|
|
|
|
|
|
|
/* data */
|
2013-11-28 00:10:00 +01:00
|
|
|
LOCK *lock; /* associated LOCK object, if any */
|
|
|
|
PROCLOCK *proclock; /* associated PROCLOCK object, if any */
|
2006-07-24 01:08:46 +02:00
|
|
|
uint32 hashcode; /* copy of LOCKTAG's hash value */
|
2008-09-16 03:56:26 +02:00
|
|
|
int64 nLocks; /* total number of times lock is held */
|
2004-08-27 19:07:42 +02:00
|
|
|
int numLockOwners; /* # of relevant ResourceOwners */
|
|
|
|
int maxLockOwners; /* allocated size of array */
|
2013-11-28 00:10:00 +01:00
|
|
|
bool holdsStrongLockCount; /* bumped FastPathStrongRelationLocks */
|
2004-08-29 07:07:03 +02:00
|
|
|
LOCALLOCKOWNER *lockOwners; /* dynamically resizable array */
|
2004-08-27 19:07:42 +02:00
|
|
|
} LOCALLOCK;
|
|
|
|
|
2005-04-30 00:28:24 +02:00
|
|
|
#define LOCALLOCK_LOCKMETHOD(llock) ((llock).tag.lock.locktag_lockmethodid)
|
2004-08-27 19:07:42 +02:00
|
|
|
|
|
|
|
|
2002-08-17 15:04:19 +02:00
|
|
|
/*
|
2011-05-29 01:52:00 +02:00
|
|
|
* These structures hold information passed from lmgr internals to the lock
|
|
|
|
* listing user-level functions (in lockfuncs.c).
|
2002-08-17 15:04:19 +02:00
|
|
|
*/
|
2011-05-29 01:52:00 +02:00
|
|
|
|
|
|
|
typedef struct LockInstanceData
|
|
|
|
{
|
|
|
|
LOCKTAG locktag; /* locked object */
|
|
|
|
LOCKMASK holdMask; /* locks held by this PGPROC */
|
|
|
|
LOCKMODE waitLockMode; /* lock awaited by this PGPROC, if any */
|
|
|
|
BackendId backend; /* backend ID of this PGPROC */
|
2012-06-10 21:20:04 +02:00
|
|
|
LocalTransactionId lxid; /* local transaction ID of this PGPROC */
|
2011-05-29 01:52:00 +02:00
|
|
|
int pid; /* pid of this PGPROC */
|
|
|
|
bool fastpath; /* taken via fastpath? */
|
|
|
|
} LockInstanceData;
|
|
|
|
|
2006-07-24 01:08:46 +02:00
|
|
|
typedef struct LockData
|
2002-08-17 15:04:19 +02:00
|
|
|
{
|
2011-05-29 01:52:00 +02:00
|
|
|
int nelements; /* The length of the array */
|
2012-06-10 21:20:04 +02:00
|
|
|
LockInstanceData *locks;
|
2002-08-17 15:04:19 +02:00
|
|
|
} LockData;
|
2000-12-22 01:51:54 +01:00
|
|
|
|
2004-05-28 07:13:32 +02:00
|
|
|
|
2005-05-30 00:45:02 +02:00
|
|
|
/* Result codes for LockAcquire() */
|
|
|
|
typedef enum
|
|
|
|
{
|
|
|
|
LOCKACQUIRE_NOT_AVAIL, /* lock not available, and dontWait=true */
|
|
|
|
LOCKACQUIRE_OK, /* lock successfully acquired */
|
|
|
|
LOCKACQUIRE_ALREADY_HELD /* incremented count for lock already held */
|
|
|
|
} LockAcquireResult;
|
|
|
|
|
2007-05-30 18:16:32 +02:00
|
|
|
/* Deadlock states identified by DeadLockCheck() */
|
2007-03-03 19:46:40 +01:00
|
|
|
typedef enum
|
|
|
|
{
|
2007-06-19 22:13:22 +02:00
|
|
|
DS_NOT_YET_CHECKED, /* no deadlock check has run yet */
|
|
|
|
DS_NO_DEADLOCK, /* no deadlock detected */
|
|
|
|
DS_SOFT_DEADLOCK, /* deadlock avoided by queue rearrangement */
|
2007-10-26 22:45:10 +02:00
|
|
|
DS_HARD_DEADLOCK, /* deadlock, no way out but ERROR */
|
|
|
|
DS_BLOCKED_BY_AUTOVACUUM /* no deadlock; queue blocked by autovacuum
|
2007-11-15 22:14:46 +01:00
|
|
|
* worker */
|
2007-11-15 23:25:18 +01:00
|
|
|
} DeadLockState;
|
2007-06-19 22:13:22 +02:00
|
|
|
|
2005-05-30 00:45:02 +02:00
|
|
|
|
2006-07-24 01:08:46 +02:00
|
|
|
/*
|
|
|
|
* The lockmgr's shared hash tables are partitioned to reduce contention.
|
|
|
|
* To determine which partition a given locktag belongs to, compute the tag's
|
|
|
|
* hash code with LockTagHashCode(), then apply one of these macros.
|
|
|
|
* NB: NUM_LOCK_PARTITIONS must be a power of 2!
|
|
|
|
*/
|
|
|
|
#define LockHashPartition(hashcode) \
|
|
|
|
((hashcode) % NUM_LOCK_PARTITIONS)
|
|
|
|
#define LockHashPartitionLock(hashcode) \
|
2014-01-27 17:07:44 +01:00
|
|
|
(&MainLWLockArray[LOCK_MANAGER_LWLOCK_OFFSET + \
|
|
|
|
LockHashPartition(hashcode)].lock)
|
|
|
|
#define LockHashPartitionLockByIndex(i) \
|
|
|
|
(&MainLWLockArray[LOCK_MANAGER_LWLOCK_OFFSET + (i)].lock)
|
2006-07-24 01:08:46 +02:00
|
|
|
|
1996-08-28 03:59:28 +02:00
|
|
|
/*
|
|
|
|
* function prototypes
|
|
|
|
*/
|
1997-09-08 04:41:22 +02:00
|
|
|
extern void InitLocks(void);
|
2005-12-09 02:22:04 +01:00
|
|
|
extern LockMethod GetLocksMethodTable(const LOCK *lock);
|
2006-07-24 01:08:46 +02:00
|
|
|
extern uint32 LockTagHashCode(const LOCKTAG *locktag);
|
Improve concurrency of foreign key locking
This patch introduces two additional lock modes for tuples: "SELECT FOR
KEY SHARE" and "SELECT FOR NO KEY UPDATE". These don't block each
other, in contrast with already existing "SELECT FOR SHARE" and "SELECT
FOR UPDATE". UPDATE commands that do not modify the values stored in
the columns that are part of the key of the tuple now grab a SELECT FOR
NO KEY UPDATE lock on the tuple, allowing them to proceed concurrently
with tuple locks of the FOR KEY SHARE variety.
Foreign key triggers now use FOR KEY SHARE instead of FOR SHARE; this
means the concurrency improvement applies to them, which is the whole
point of this patch.
The added tuple lock semantics require some rejiggering of the multixact
module, so that the locking level that each transaction is holding can
be stored alongside its Xid. Also, multixacts now need to persist
across server restarts and crashes, because they can now represent not
only tuple locks, but also tuple updates. This means we need more
careful tracking of lifetime of pg_multixact SLRU files; since they now
persist longer, we require more infrastructure to figure out when they
can be removed. pg_upgrade also needs to be careful to copy
pg_multixact files over from the old server to the new, or at least part
of multixact.c state, depending on the versions of the old and new
servers.
Tuple time qualification rules (HeapTupleSatisfies routines) need to be
careful not to consider tuples with the "is multi" infomask bit set as
being only locked; they might need to look up MultiXact values (i.e.
possibly do pg_multixact I/O) to find out the Xid that updated a tuple,
whereas they previously were assured to only use information readily
available from the tuple header. This is considered acceptable, because
the extra I/O would involve cases that would previously cause some
commands to block waiting for concurrent transactions to finish.
Another important change is the fact that locking tuples that have
previously been updated causes the future versions to be marked as
locked, too; this is essential for correctness of foreign key checks.
This causes additional WAL-logging, also (there was previously a single
WAL record for a locked tuple; now there are as many as updated copies
of the tuple there exist.)
With all this in place, contention related to tuples being checked by
foreign key rules should be much reduced.
As a bonus, the old behavior that a subtransaction grabbing a stronger
tuple lock than the parent (sub)transaction held on a given tuple and
later aborting caused the weaker lock to be lost, has been fixed.
Many new spec files were added for isolation tester framework, to ensure
overall behavior is sane. There's probably room for several more tests.
There were several reviewers of this patch; in particular, Noah Misch
and Andres Freund spent considerable time in it. Original idea for the
patch came from Simon Riggs, after a problem report by Joel Jacobson.
Most code is from me, with contributions from Marti Raudsepp, Alexander
Shulgin, Noah Misch and Andres Freund.
This patch was discussed in several pgsql-hackers threads; the most
important start at the following message-ids:
AANLkTimo9XVcEzfiBR-ut3KVNDkjm2Vxh+t8kAmWjPuv@mail.gmail.com
1290721684-sup-3951@alvh.no-ip.org
1294953201-sup-2099@alvh.no-ip.org
1320343602-sup-2290@alvh.no-ip.org
1339690386-sup-8927@alvh.no-ip.org
4FE5FF020200002500048A3D@gw.wicourts.gov
4FEAB90A0200002500048B7D@gw.wicourts.gov
2013-01-23 16:04:59 +01:00
|
|
|
extern bool DoLockModesConflict(LOCKMODE mode1, LOCKMODE mode2);
|
2005-12-09 02:22:04 +01:00
|
|
|
extern LockAcquireResult LockAcquire(const LOCKTAG *locktag,
|
2005-10-15 04:49:52 +02:00
|
|
|
LOCKMODE lockmode,
|
|
|
|
bool sessionLock,
|
|
|
|
bool dontWait);
|
Allow read only connections during recovery, known as Hot Standby.
Enabled by recovery_connections = on (default) and forcing archive recovery using a recovery.conf. Recovery processing now emulates the original transactions as they are replayed, providing full locking and MVCC behaviour for read only queries. Recovery must enter consistent state before connections are allowed, so there is a delay, typically short, before connections succeed. Replay of recovering transactions can conflict and in some cases deadlock with queries during recovery; these result in query cancellation after max_standby_delay seconds have expired. Infrastructure changes have minor effects on normal running, though introduce four new types of WAL record.
New test mode "make standbycheck" allows regression tests of static command behaviour on a standby server while in recovery. Typical and extreme dynamic behaviours have been checked via code inspection and manual testing. Few port specific behaviours have been utilised, though primary testing has been on Linux only so far.
This commit is the basic patch. Additional changes will follow in this release to enhance some aspects of behaviour, notably improved handling of conflicts, deadlock detection and query cancellation. Changes to VACUUM FULL are also required.
Simon Riggs, with significant and lengthy review by Heikki Linnakangas, including streamlined redesign of snapshot creation and two-phase commit.
Important contributions from Florian Pflug, Mark Kirkwood, Merlin Moncure, Greg Stark, Gianni Ciolli, Gabriele Bartolini, Hannu Krosing, Robert Haas, Tatsuo Ishii, Hiroyuki Yamada plus support and feedback from many other community members.
2009-12-19 02:32:45 +01:00
|
|
|
extern LockAcquireResult LockAcquireExtended(const LOCKTAG *locktag,
|
2010-02-26 03:01:40 +01:00
|
|
|
LOCKMODE lockmode,
|
|
|
|
bool sessionLock,
|
|
|
|
bool dontWait,
|
|
|
|
bool report_memory_error);
|
2012-04-18 17:17:30 +02:00
|
|
|
extern void AbortStrongLockAcquire(void);
|
2005-12-09 02:22:04 +01:00
|
|
|
extern bool LockRelease(const LOCKTAG *locktag,
|
2005-10-15 04:49:52 +02:00
|
|
|
LOCKMODE lockmode, bool sessionLock);
|
2005-06-15 00:15:33 +02:00
|
|
|
extern void LockReleaseAll(LOCKMETHODID lockmethodid, bool allLocks);
|
Overdue code review for transaction-level advisory locks patch.
Commit 62c7bd31c8878dd45c9b9b2429ab7a12103f3590 had assorted problems, most
visibly that it broke PREPARE TRANSACTION in the presence of session-level
advisory locks (which should be ignored by PREPARE), as per a recent
complaint from Stephen Rees. More abstractly, the patch made the
LockMethodData.transactional flag not merely useless but outright
dangerous, because in point of fact that flag no longer tells you anything
at all about whether a lock is held transactionally. This fix therefore
removes that flag altogether. We now rely entirely on the convention
already in use in lock.c that transactional lock holds must be owned by
some ResourceOwner, while session holds are never so owned. Setting the
locallock struct's owner link to NULL thus denotes a session hold, and
there is no redundant marker for that.
PREPARE TRANSACTION now works again when there are session-level advisory
locks, and it is also able to transfer transactional advisory locks to the
prepared transaction, but for implementation reasons it throws an error if
we hold both types of lock on a single lockable object. Perhaps it will be
worth improving that someday.
Assorted other minor cleanup and documentation editing, as well.
Back-patch to 9.1, except that in the 9.1 branch I did not remove the
LockMethodData.transactional flag for fear of causing an ABI break for
any external code that might be examining those structs.
2012-05-04 23:43:27 +02:00
|
|
|
extern void LockReleaseSession(LOCKMETHODID lockmethodid);
|
2012-06-21 14:01:17 +02:00
|
|
|
extern void LockReleaseCurrentOwner(LOCALLOCK **locallocks, int nlocks);
|
|
|
|
extern void LockReassignCurrentOwner(LOCALLOCK **locallocks, int nlocks);
|
Fix performance problems with autovacuum truncation in busy workloads.
In situations where there are over 8MB of empty pages at the end of
a table, the truncation work for trailing empty pages takes longer
than deadlock_timeout, and there is frequent access to the table by
processes other than autovacuum, there was a problem with the
autovacuum worker process being canceled by the deadlock checking
code. The truncation work done by autovacuum up that point was
lost, and the attempt tried again by a later autovacuum worker. The
attempts could continue indefinitely without making progress,
consuming resources and blocking other processes for up to
deadlock_timeout each time.
This patch has the autovacuum worker checking whether it is
blocking any other thread at 20ms intervals. If such a condition
develops, the autovacuum worker will persist the work it has done
so far, release its lock on the table, and sleep in 50ms intervals
for up to 5 seconds, hoping to be able to re-acquire the lock and
try again. If it is unable to get the lock in that time, it moves
on and a worker will try to continue later from the point this one
left off.
While this patch doesn't change the rules about when and what to
truncate, it does cause the truncation to occur sooner, with less
blocking, and with the consumption of fewer resources when there is
contention for the table's lock.
The only user-visible change other than improved performance is
that the table size during truncation may change incrementally
instead of just once.
This problem exists in all supported versions but is infrequently
reported, although some reports of performance problems when
autovacuum runs might be caused by this. Initial commit is just the
master branch, but this should probably be backpatched once the
build farm and general developer usage confirm that there are no
surprising effects.
Jan Wieck
2012-12-11 21:33:08 +01:00
|
|
|
extern bool LockHasWaiters(const LOCKTAG *locktag,
|
|
|
|
LOCKMODE lockmode, bool sessionLock);
|
2007-09-05 20:10:48 +02:00
|
|
|
extern VirtualTransactionId *GetLockConflicts(const LOCKTAG *locktag,
|
2007-11-15 22:14:46 +01:00
|
|
|
LOCKMODE lockmode);
|
2005-06-18 00:32:51 +02:00
|
|
|
extern void AtPrepare_Locks(void);
|
|
|
|
extern void PostPrepare_Locks(TransactionId xid);
|
Try to reduce confusion about what is a lock method identifier, a lock
method control structure, or a table of control structures.
. Use type LOCKMASK where an int is not a counter.
. Get rid of INVALID_TABLEID, use INVALID_LOCKMETHOD instead.
. Use INVALID_LOCKMETHOD instead of (LOCKMETHOD) NULL, because
LOCKMETHOD is not a pointer.
. Define and use macro LockMethodIsValid.
. Rename LOCKMETHOD to LOCKMETHODID.
. Remove global variable LongTermTableId in lmgr.c, because it is
never used.
. Make LockTableId static in lmgr.c, because it is used nowhere else.
Why not remove it and use DEFAULT_LOCKMETHOD?
. Rename the lock method control structure from LOCKMETHODTABLE to
LockMethodData. Introduce a pointer type named LockMethod.
. Remove elog(FATAL) after InitLockTable() call in
CreateSharedMemoryAndSemaphores(), because if something goes wrong,
there is elog(FATAL) in LockMethodTableInit(), and if this doesn't
help, an elog(ERROR) in InitLockTable() is promoted to FATAL.
. Make InitLockTable() void, because its only caller does not use its
return value any more.
. Rename variables in lock.c to avoid statements like
LockMethodTable[NumLockMethods] = lockMethodTable;
lockMethodTable = LockMethodTable[lockmethod];
. Change LOCKMETHODID type to uint16 to fit into struct LOCKTAG.
. Remove static variables BITS_OFF and BITS_ON from lock.c, because
I agree to this doubt:
* XXX is a fetch from a static array really faster than a shift?
. Define and use macros LOCKBIT_ON/OFF.
Manfred Koizar
2003-12-01 22:59:25 +01:00
|
|
|
extern int LockCheckConflicts(LockMethod lockMethodTable,
|
2001-03-22 05:01:46 +01:00
|
|
|
LOCKMODE lockmode,
|
2013-09-17 03:14:14 +02:00
|
|
|
LOCK *lock, PROCLOCK *proclock);
|
2003-02-18 03:13:24 +01:00
|
|
|
extern void GrantLock(LOCK *lock, PROCLOCK *proclock, LOCKMODE lockmode);
|
2004-08-27 19:07:42 +02:00
|
|
|
extern void GrantAwaitedLock(void);
|
2006-07-24 01:08:46 +02:00
|
|
|
extern void RemoveFromWaitQueue(PGPROC *proc, uint32 hashcode);
|
2005-08-21 01:26:37 +02:00
|
|
|
extern Size LockShmemSize(void);
|
2002-08-31 19:14:28 +02:00
|
|
|
extern LockData *GetLockStatusData(void);
|
Allow read only connections during recovery, known as Hot Standby.
Enabled by recovery_connections = on (default) and forcing archive recovery using a recovery.conf. Recovery processing now emulates the original transactions as they are replayed, providing full locking and MVCC behaviour for read only queries. Recovery must enter consistent state before connections are allowed, so there is a delay, typically short, before connections succeed. Replay of recovering transactions can conflict and in some cases deadlock with queries during recovery; these result in query cancellation after max_standby_delay seconds have expired. Infrastructure changes have minor effects on normal running, though introduce four new types of WAL record.
New test mode "make standbycheck" allows regression tests of static command behaviour on a standby server while in recovery. Typical and extreme dynamic behaviours have been checked via code inspection and manual testing. Few port specific behaviours have been utilised, though primary testing has been on Linux only so far.
This commit is the basic patch. Additional changes will follow in this release to enhance some aspects of behaviour, notably improved handling of conflicts, deadlock detection and query cancellation. Changes to VACUUM FULL are also required.
Simon Riggs, with significant and lengthy review by Heikki Linnakangas, including streamlined redesign of snapshot creation and two-phase commit.
Important contributions from Florian Pflug, Mark Kirkwood, Merlin Moncure, Greg Stark, Gianni Ciolli, Gabriele Bartolini, Hannu Krosing, Robert Haas, Tatsuo Ishii, Hiroyuki Yamada plus support and feedback from many other community members.
2009-12-19 02:32:45 +01:00
|
|
|
|
|
|
|
typedef struct xl_standby_lock
|
|
|
|
{
|
2010-02-26 03:01:40 +01:00
|
|
|
TransactionId xid; /* xid of holder of AccessExclusiveLock */
|
|
|
|
Oid dbOid;
|
|
|
|
Oid relOid;
|
Allow read only connections during recovery, known as Hot Standby.
Enabled by recovery_connections = on (default) and forcing archive recovery using a recovery.conf. Recovery processing now emulates the original transactions as they are replayed, providing full locking and MVCC behaviour for read only queries. Recovery must enter consistent state before connections are allowed, so there is a delay, typically short, before connections succeed. Replay of recovering transactions can conflict and in some cases deadlock with queries during recovery; these result in query cancellation after max_standby_delay seconds have expired. Infrastructure changes have minor effects on normal running, though introduce four new types of WAL record.
New test mode "make standbycheck" allows regression tests of static command behaviour on a standby server while in recovery. Typical and extreme dynamic behaviours have been checked via code inspection and manual testing. Few port specific behaviours have been utilised, though primary testing has been on Linux only so far.
This commit is the basic patch. Additional changes will follow in this release to enhance some aspects of behaviour, notably improved handling of conflicts, deadlock detection and query cancellation. Changes to VACUUM FULL are also required.
Simon Riggs, with significant and lengthy review by Heikki Linnakangas, including streamlined redesign of snapshot creation and two-phase commit.
Important contributions from Florian Pflug, Mark Kirkwood, Merlin Moncure, Greg Stark, Gianni Ciolli, Gabriele Bartolini, Hannu Krosing, Robert Haas, Tatsuo Ishii, Hiroyuki Yamada plus support and feedback from many other community members.
2009-12-19 02:32:45 +01:00
|
|
|
} xl_standby_lock;
|
|
|
|
|
|
|
|
extern xl_standby_lock *GetRunningTransactionLocks(int *nlocks);
|
2005-12-09 02:22:04 +01:00
|
|
|
extern const char *GetLockmodeName(LOCKMETHODID lockmethodid, LOCKMODE mode);
|
1998-02-26 05:46:47 +01:00
|
|
|
|
2005-06-18 00:32:51 +02:00
|
|
|
extern void lock_twophase_recover(TransactionId xid, uint16 info,
|
2005-10-15 04:49:52 +02:00
|
|
|
void *recdata, uint32 len);
|
2005-06-18 00:32:51 +02:00
|
|
|
extern void lock_twophase_postcommit(TransactionId xid, uint16 info,
|
2005-10-15 04:49:52 +02:00
|
|
|
void *recdata, uint32 len);
|
2005-06-18 00:32:51 +02:00
|
|
|
extern void lock_twophase_postabort(TransactionId xid, uint16 info,
|
2005-10-15 04:49:52 +02:00
|
|
|
void *recdata, uint32 len);
|
Allow read only connections during recovery, known as Hot Standby.
Enabled by recovery_connections = on (default) and forcing archive recovery using a recovery.conf. Recovery processing now emulates the original transactions as they are replayed, providing full locking and MVCC behaviour for read only queries. Recovery must enter consistent state before connections are allowed, so there is a delay, typically short, before connections succeed. Replay of recovering transactions can conflict and in some cases deadlock with queries during recovery; these result in query cancellation after max_standby_delay seconds have expired. Infrastructure changes have minor effects on normal running, though introduce four new types of WAL record.
New test mode "make standbycheck" allows regression tests of static command behaviour on a standby server while in recovery. Typical and extreme dynamic behaviours have been checked via code inspection and manual testing. Few port specific behaviours have been utilised, though primary testing has been on Linux only so far.
This commit is the basic patch. Additional changes will follow in this release to enhance some aspects of behaviour, notably improved handling of conflicts, deadlock detection and query cancellation. Changes to VACUUM FULL are also required.
Simon Riggs, with significant and lengthy review by Heikki Linnakangas, including streamlined redesign of snapshot creation and two-phase commit.
Important contributions from Florian Pflug, Mark Kirkwood, Merlin Moncure, Greg Stark, Gianni Ciolli, Gabriele Bartolini, Hannu Krosing, Robert Haas, Tatsuo Ishii, Hiroyuki Yamada plus support and feedback from many other community members.
2009-12-19 02:32:45 +01:00
|
|
|
extern void lock_twophase_standby_recover(TransactionId xid, uint16 info,
|
2010-02-26 03:01:40 +01:00
|
|
|
void *recdata, uint32 len);
|
2005-06-18 00:32:51 +02:00
|
|
|
|
2007-06-19 22:13:22 +02:00
|
|
|
extern DeadLockState DeadLockCheck(PGPROC *proc);
|
2007-10-26 22:45:10 +02:00
|
|
|
extern PGPROC *GetBlockingAutoVacuumPgproc(void);
|
2013-02-12 13:13:22 +01:00
|
|
|
extern void DeadLockReport(void) __attribute__((noreturn));
|
2006-08-27 21:14:34 +02:00
|
|
|
extern void RememberSimpleDeadLock(PGPROC *proc1,
|
|
|
|
LOCKMODE lockmode,
|
|
|
|
LOCK *lock,
|
|
|
|
PGPROC *proc2);
|
|
|
|
extern void InitDeadLockChecking(void);
|
|
|
|
|
2000-05-31 02:28:42 +02:00
|
|
|
#ifdef LOCK_DEBUG
|
2005-06-18 00:32:51 +02:00
|
|
|
extern void DumpLocks(PGPROC *proc);
|
1998-08-25 23:20:32 +02:00
|
|
|
extern void DumpAllLocks(void);
|
1997-02-12 06:25:13 +01:00
|
|
|
#endif
|
2001-10-28 07:26:15 +01:00
|
|
|
|
2011-08-04 18:38:33 +02:00
|
|
|
/* Lock a VXID (used to wait for a transaction to finish) */
|
|
|
|
extern void VirtualXactLockTableInsert(VirtualTransactionId vxid);
|
2012-11-29 22:59:11 +01:00
|
|
|
extern void VirtualXactLockTableCleanup(void);
|
2011-08-04 18:38:33 +02:00
|
|
|
extern bool VirtualXactLock(VirtualTransactionId vxid, bool wait);
|
|
|
|
|
2001-11-05 18:46:40 +01:00
|
|
|
#endif /* LOCK_H */
|