2001-09-29 06:02:27 +02:00
|
|
|
/*-------------------------------------------------------------------------
|
|
|
|
*
|
|
|
|
* lwlock.h
|
|
|
|
* Lightweight lock manager
|
|
|
|
*
|
|
|
|
*
|
2011-01-01 19:18:15 +01:00
|
|
|
* Portions Copyright (c) 1996-2011, PostgreSQL Global Development Group
|
2001-09-29 06:02:27 +02:00
|
|
|
* Portions Copyright (c) 1994, Regents of the University of California
|
|
|
|
*
|
2010-09-20 22:08:53 +02:00
|
|
|
* src/include/storage/lwlock.h
|
2001-09-29 06:02:27 +02:00
|
|
|
*
|
|
|
|
*-------------------------------------------------------------------------
|
|
|
|
*/
|
|
|
|
#ifndef LWLOCK_H
|
|
|
|
#define LWLOCK_H
|
|
|
|
|
2006-07-23 05:07:58 +02:00
|
|
|
/*
|
|
|
|
* It's a bit odd to declare NUM_BUFFER_PARTITIONS and NUM_LOCK_PARTITIONS
|
|
|
|
* here, but we need them to set up enum LWLockId correctly, and having
|
|
|
|
* this file include lock.h or bufmgr.h would be backwards.
|
|
|
|
*/
|
|
|
|
|
|
|
|
/* Number of partitions of the shared buffer mapping hashtable */
|
|
|
|
#define NUM_BUFFER_PARTITIONS 16
|
|
|
|
|
|
|
|
/* Number of partitions the shared lock tables are divided into */
|
2006-07-24 01:08:46 +02:00
|
|
|
#define LOG2_NUM_LOCK_PARTITIONS 4
|
|
|
|
#define NUM_LOCK_PARTITIONS (1 << LOG2_NUM_LOCK_PARTITIONS)
|
2006-07-23 05:07:58 +02:00
|
|
|
|
Implement genuine serializable isolation level.
Until now, our Serializable mode has in fact been what's called Snapshot
Isolation, which allows some anomalies that could not occur in any
serialized ordering of the transactions. This patch fixes that using a
method called Serializable Snapshot Isolation, based on research papers by
Michael J. Cahill (see README-SSI for full references). In Serializable
Snapshot Isolation, transactions run like they do in Snapshot Isolation,
but a predicate lock manager observes the reads and writes performed and
aborts transactions if it detects that an anomaly might occur. This method
produces some false positives, ie. it sometimes aborts transactions even
though there is no anomaly.
To track reads we implement predicate locking, see storage/lmgr/predicate.c.
Whenever a tuple is read, a predicate lock is acquired on the tuple. Shared
memory is finite, so when a transaction takes many tuple-level locks on a
page, the locks are promoted to a single page-level lock, and further to a
single relation level lock if necessary. To lock key values with no matching
tuple, a sequential scan always takes a relation-level lock, and an index
scan acquires a page-level lock that covers the search key, whether or not
there are any matching keys at the moment.
A predicate lock doesn't conflict with any regular locks or with another
predicate locks in the normal sense. They're only used by the predicate lock
manager to detect the danger of anomalies. Only serializable transactions
participate in predicate locking, so there should be no extra overhead for
for other transactions.
Predicate locks can't be released at commit, but must be remembered until
all the transactions that overlapped with it have completed. That means that
we need to remember an unbounded amount of predicate locks, so we apply a
lossy but conservative method of tracking locks for committed transactions.
If we run short of shared memory, we overflow to a new "pg_serial" SLRU
pool.
We don't currently allow Serializable transactions in Hot Standby mode.
That would be hard, because even read-only transactions can cause anomalies
that wouldn't otherwise occur.
Serializable isolation mode now means the new fully serializable level.
Repeatable Read gives you the old Snapshot Isolation level that we have
always had.
Kevin Grittner and Dan Ports, reviewed by Jeff Davis, Heikki Linnakangas and
Anssi Kääriäinen
2011-02-07 22:46:51 +01:00
|
|
|
/* Number of partitions the shared predicate lock tables are divided into */
|
|
|
|
#define LOG2_NUM_PREDICATELOCK_PARTITIONS 4
|
|
|
|
#define NUM_PREDICATELOCK_PARTITIONS (1 << LOG2_NUM_PREDICATELOCK_PARTITIONS)
|
|
|
|
|
2001-09-29 06:02:27 +02:00
|
|
|
/*
|
|
|
|
* We have a number of predefined LWLocks, plus a bunch of LWLocks that are
|
2005-12-11 22:02:18 +01:00
|
|
|
* dynamically assigned (e.g., for shared buffers). The LWLock structures
|
|
|
|
* live in shared memory (since they contain shared data) and are identified
|
|
|
|
* by values of this enumerated type. We abuse the notion of an enum somewhat
|
2001-09-29 06:02:27 +02:00
|
|
|
* by allowing values not listed in the enum declaration to be assigned.
|
|
|
|
* The extra value MaxDynamicLWLock is there to keep the compiler from
|
|
|
|
* deciding that the enum can be represented as char or short ...
|
2008-09-30 12:52:14 +02:00
|
|
|
*
|
2009-03-03 09:11:24 +01:00
|
|
|
* If you remove a lock, please replace it with a placeholder. This retains
|
|
|
|
* the lock numbering, which is helpful for DTrace and other external
|
|
|
|
* debugging scripts.
|
2001-09-29 06:02:27 +02:00
|
|
|
*/
|
|
|
|
typedef enum LWLockId
|
|
|
|
{
|
2005-03-04 21:21:07 +01:00
|
|
|
BufFreelistLock,
|
2006-01-04 22:06:32 +01:00
|
|
|
ShmemIndexLock,
|
2001-10-28 07:26:15 +01:00
|
|
|
OidGenLock,
|
|
|
|
XidGenLock,
|
2005-05-19 23:35:48 +02:00
|
|
|
ProcArrayLock,
|
2008-06-19 23:32:56 +02:00
|
|
|
SInvalReadLock,
|
|
|
|
SInvalWriteLock,
|
2001-10-28 07:26:15 +01:00
|
|
|
WALInsertLock,
|
|
|
|
WALWriteLock,
|
|
|
|
ControlFileLock,
|
|
|
|
CheckpointLock,
|
2004-08-24 01:22:45 +02:00
|
|
|
CLogControlLock,
|
|
|
|
SubtransControlLock,
|
2005-04-28 23:47:18 +02:00
|
|
|
MultiXactGenLock,
|
|
|
|
MultiXactOffsetControlLock,
|
|
|
|
MultiXactMemberControlLock,
|
2002-02-19 21:11:20 +01:00
|
|
|
RelCacheInitLock,
|
2004-05-31 05:48:10 +02:00
|
|
|
BgWriterCommLock,
|
2005-06-18 00:32:51 +02:00
|
|
|
TwoPhaseStateLock,
|
2006-01-19 05:45:38 +01:00
|
|
|
TablespaceCreateLock,
|
2006-05-08 02:00:17 +02:00
|
|
|
BtreeVacuumLock,
|
2006-10-16 00:04:08 +02:00
|
|
|
AddinShmemInitLock,
|
2007-02-16 00:23:23 +01:00
|
|
|
AutovacuumLock,
|
2007-04-16 20:30:04 +02:00
|
|
|
AutovacuumScheduleLock,
|
2007-06-08 20:23:53 +02:00
|
|
|
SyncScanLock,
|
2010-02-07 21:48:13 +01:00
|
|
|
RelationMappingLock,
|
2010-02-26 03:01:40 +01:00
|
|
|
AsyncCtlLock,
|
|
|
|
AsyncQueueLock,
|
Implement genuine serializable isolation level.
Until now, our Serializable mode has in fact been what's called Snapshot
Isolation, which allows some anomalies that could not occur in any
serialized ordering of the transactions. This patch fixes that using a
method called Serializable Snapshot Isolation, based on research papers by
Michael J. Cahill (see README-SSI for full references). In Serializable
Snapshot Isolation, transactions run like they do in Snapshot Isolation,
but a predicate lock manager observes the reads and writes performed and
aborts transactions if it detects that an anomaly might occur. This method
produces some false positives, ie. it sometimes aborts transactions even
though there is no anomaly.
To track reads we implement predicate locking, see storage/lmgr/predicate.c.
Whenever a tuple is read, a predicate lock is acquired on the tuple. Shared
memory is finite, so when a transaction takes many tuple-level locks on a
page, the locks are promoted to a single page-level lock, and further to a
single relation level lock if necessary. To lock key values with no matching
tuple, a sequential scan always takes a relation-level lock, and an index
scan acquires a page-level lock that covers the search key, whether or not
there are any matching keys at the moment.
A predicate lock doesn't conflict with any regular locks or with another
predicate locks in the normal sense. They're only used by the predicate lock
manager to detect the danger of anomalies. Only serializable transactions
participate in predicate locking, so there should be no extra overhead for
for other transactions.
Predicate locks can't be released at commit, but must be remembered until
all the transactions that overlapped with it have completed. That means that
we need to remember an unbounded amount of predicate locks, so we apply a
lossy but conservative method of tracking locks for committed transactions.
If we run short of shared memory, we overflow to a new "pg_serial" SLRU
pool.
We don't currently allow Serializable transactions in Hot Standby mode.
That would be hard, because even read-only transactions can cause anomalies
that wouldn't otherwise occur.
Serializable isolation mode now means the new fully serializable level.
Repeatable Read gives you the old Snapshot Isolation level that we have
always had.
Kevin Grittner and Dan Ports, reviewed by Jeff Davis, Heikki Linnakangas and
Anssi Kääriäinen
2011-02-07 22:46:51 +01:00
|
|
|
SerializableXactHashLock,
|
|
|
|
SerializableFinishedListLock,
|
|
|
|
SerializablePredicateLockListLock,
|
|
|
|
OldSerXidLock,
|
2011-03-06 23:49:16 +01:00
|
|
|
SyncRepLock,
|
2007-02-16 00:23:23 +01:00
|
|
|
/* Individual lock IDs end here */
|
2006-07-23 05:07:58 +02:00
|
|
|
FirstBufMappingLock,
|
|
|
|
FirstLockMgrLock = FirstBufMappingLock + NUM_BUFFER_PARTITIONS,
|
Implement genuine serializable isolation level.
Until now, our Serializable mode has in fact been what's called Snapshot
Isolation, which allows some anomalies that could not occur in any
serialized ordering of the transactions. This patch fixes that using a
method called Serializable Snapshot Isolation, based on research papers by
Michael J. Cahill (see README-SSI for full references). In Serializable
Snapshot Isolation, transactions run like they do in Snapshot Isolation,
but a predicate lock manager observes the reads and writes performed and
aborts transactions if it detects that an anomaly might occur. This method
produces some false positives, ie. it sometimes aborts transactions even
though there is no anomaly.
To track reads we implement predicate locking, see storage/lmgr/predicate.c.
Whenever a tuple is read, a predicate lock is acquired on the tuple. Shared
memory is finite, so when a transaction takes many tuple-level locks on a
page, the locks are promoted to a single page-level lock, and further to a
single relation level lock if necessary. To lock key values with no matching
tuple, a sequential scan always takes a relation-level lock, and an index
scan acquires a page-level lock that covers the search key, whether or not
there are any matching keys at the moment.
A predicate lock doesn't conflict with any regular locks or with another
predicate locks in the normal sense. They're only used by the predicate lock
manager to detect the danger of anomalies. Only serializable transactions
participate in predicate locking, so there should be no extra overhead for
for other transactions.
Predicate locks can't be released at commit, but must be remembered until
all the transactions that overlapped with it have completed. That means that
we need to remember an unbounded amount of predicate locks, so we apply a
lossy but conservative method of tracking locks for committed transactions.
If we run short of shared memory, we overflow to a new "pg_serial" SLRU
pool.
We don't currently allow Serializable transactions in Hot Standby mode.
That would be hard, because even read-only transactions can cause anomalies
that wouldn't otherwise occur.
Serializable isolation mode now means the new fully serializable level.
Repeatable Read gives you the old Snapshot Isolation level that we have
always had.
Kevin Grittner and Dan Ports, reviewed by Jeff Davis, Heikki Linnakangas and
Anssi Kääriäinen
2011-02-07 22:46:51 +01:00
|
|
|
FirstPredicateLockMgrLock = FirstLockMgrLock + NUM_LOCK_PARTITIONS,
|
2006-07-23 05:07:58 +02:00
|
|
|
|
|
|
|
/* must be last except for MaxDynamicLWLock: */
|
Implement genuine serializable isolation level.
Until now, our Serializable mode has in fact been what's called Snapshot
Isolation, which allows some anomalies that could not occur in any
serialized ordering of the transactions. This patch fixes that using a
method called Serializable Snapshot Isolation, based on research papers by
Michael J. Cahill (see README-SSI for full references). In Serializable
Snapshot Isolation, transactions run like they do in Snapshot Isolation,
but a predicate lock manager observes the reads and writes performed and
aborts transactions if it detects that an anomaly might occur. This method
produces some false positives, ie. it sometimes aborts transactions even
though there is no anomaly.
To track reads we implement predicate locking, see storage/lmgr/predicate.c.
Whenever a tuple is read, a predicate lock is acquired on the tuple. Shared
memory is finite, so when a transaction takes many tuple-level locks on a
page, the locks are promoted to a single page-level lock, and further to a
single relation level lock if necessary. To lock key values with no matching
tuple, a sequential scan always takes a relation-level lock, and an index
scan acquires a page-level lock that covers the search key, whether or not
there are any matching keys at the moment.
A predicate lock doesn't conflict with any regular locks or with another
predicate locks in the normal sense. They're only used by the predicate lock
manager to detect the danger of anomalies. Only serializable transactions
participate in predicate locking, so there should be no extra overhead for
for other transactions.
Predicate locks can't be released at commit, but must be remembered until
all the transactions that overlapped with it have completed. That means that
we need to remember an unbounded amount of predicate locks, so we apply a
lossy but conservative method of tracking locks for committed transactions.
If we run short of shared memory, we overflow to a new "pg_serial" SLRU
pool.
We don't currently allow Serializable transactions in Hot Standby mode.
That would be hard, because even read-only transactions can cause anomalies
that wouldn't otherwise occur.
Serializable isolation mode now means the new fully serializable level.
Repeatable Read gives you the old Snapshot Isolation level that we have
always had.
Kevin Grittner and Dan Ports, reviewed by Jeff Davis, Heikki Linnakangas and
Anssi Kääriäinen
2011-02-07 22:46:51 +01:00
|
|
|
NumFixedLWLocks = FirstPredicateLockMgrLock + NUM_PREDICATELOCK_PARTITIONS,
|
2001-09-29 06:02:27 +02:00
|
|
|
|
2001-10-28 07:26:15 +01:00
|
|
|
MaxDynamicLWLock = 1000000000
|
2001-09-29 06:02:27 +02:00
|
|
|
} LWLockId;
|
|
|
|
|
|
|
|
|
|
|
|
typedef enum LWLockMode
|
|
|
|
{
|
2001-10-28 07:26:15 +01:00
|
|
|
LW_EXCLUSIVE,
|
|
|
|
LW_SHARED
|
2001-09-29 06:02:27 +02:00
|
|
|
} LWLockMode;
|
|
|
|
|
|
|
|
|
|
|
|
#ifdef LOCK_DEBUG
|
|
|
|
extern bool Trace_lwlocks;
|
|
|
|
#endif
|
|
|
|
|
|
|
|
extern LWLockId LWLockAssign(void);
|
|
|
|
extern void LWLockAcquire(LWLockId lockid, LWLockMode mode);
|
|
|
|
extern bool LWLockConditionalAcquire(LWLockId lockid, LWLockMode mode);
|
|
|
|
extern void LWLockRelease(LWLockId lockid);
|
|
|
|
extern void LWLockReleaseAll(void);
|
2004-06-11 18:43:24 +02:00
|
|
|
extern bool LWLockHeldByMe(LWLockId lockid);
|
2001-09-29 06:02:27 +02:00
|
|
|
|
|
|
|
extern int NumLWLocks(void);
|
2005-08-21 01:26:37 +02:00
|
|
|
extern Size LWLockShmemSize(void);
|
2001-09-29 06:02:27 +02:00
|
|
|
extern void CreateLWLocks(void);
|
2001-10-28 07:26:15 +01:00
|
|
|
|
2006-10-16 00:04:08 +02:00
|
|
|
extern void RequestAddinLWLocks(int n);
|
2006-08-01 21:03:11 +02:00
|
|
|
|
2001-11-05 18:46:40 +01:00
|
|
|
#endif /* LWLOCK_H */
|