2001-09-29 06:02:27 +02:00
|
|
|
/*-------------------------------------------------------------------------
|
|
|
|
*
|
|
|
|
* lwlock.h
|
|
|
|
* Lightweight lock manager
|
|
|
|
*
|
|
|
|
*
|
2014-01-07 22:05:30 +01:00
|
|
|
* Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
|
2001-09-29 06:02:27 +02:00
|
|
|
* Portions Copyright (c) 1994, Regents of the University of California
|
|
|
|
*
|
2010-09-20 22:08:53 +02:00
|
|
|
* src/include/storage/lwlock.h
|
2001-09-29 06:02:27 +02:00
|
|
|
*
|
|
|
|
*-------------------------------------------------------------------------
|
|
|
|
*/
|
|
|
|
#ifndef LWLOCK_H
|
|
|
|
#define LWLOCK_H
|
|
|
|
|
2006-07-23 05:07:58 +02:00
|
|
|
/*
|
|
|
|
* It's a bit odd to declare NUM_BUFFER_PARTITIONS and NUM_LOCK_PARTITIONS
|
|
|
|
* here, but we need them to set up enum LWLockId correctly, and having
|
|
|
|
* this file include lock.h or bufmgr.h would be backwards.
|
|
|
|
*/
|
|
|
|
|
|
|
|
/* Number of partitions of the shared buffer mapping hashtable */
|
|
|
|
#define NUM_BUFFER_PARTITIONS 16
|
|
|
|
|
|
|
|
/* Number of partitions the shared lock tables are divided into */
|
2006-07-24 01:08:46 +02:00
|
|
|
#define LOG2_NUM_LOCK_PARTITIONS 4
|
|
|
|
#define NUM_LOCK_PARTITIONS (1 << LOG2_NUM_LOCK_PARTITIONS)
|
2006-07-23 05:07:58 +02:00
|
|
|
|
Implement genuine serializable isolation level.
Until now, our Serializable mode has in fact been what's called Snapshot
Isolation, which allows some anomalies that could not occur in any
serialized ordering of the transactions. This patch fixes that using a
method called Serializable Snapshot Isolation, based on research papers by
Michael J. Cahill (see README-SSI for full references). In Serializable
Snapshot Isolation, transactions run like they do in Snapshot Isolation,
but a predicate lock manager observes the reads and writes performed and
aborts transactions if it detects that an anomaly might occur. This method
produces some false positives, ie. it sometimes aborts transactions even
though there is no anomaly.
To track reads we implement predicate locking, see storage/lmgr/predicate.c.
Whenever a tuple is read, a predicate lock is acquired on the tuple. Shared
memory is finite, so when a transaction takes many tuple-level locks on a
page, the locks are promoted to a single page-level lock, and further to a
single relation level lock if necessary. To lock key values with no matching
tuple, a sequential scan always takes a relation-level lock, and an index
scan acquires a page-level lock that covers the search key, whether or not
there are any matching keys at the moment.
A predicate lock doesn't conflict with any regular locks or with another
predicate locks in the normal sense. They're only used by the predicate lock
manager to detect the danger of anomalies. Only serializable transactions
participate in predicate locking, so there should be no extra overhead for
for other transactions.
Predicate locks can't be released at commit, but must be remembered until
all the transactions that overlapped with it have completed. That means that
we need to remember an unbounded amount of predicate locks, so we apply a
lossy but conservative method of tracking locks for committed transactions.
If we run short of shared memory, we overflow to a new "pg_serial" SLRU
pool.
We don't currently allow Serializable transactions in Hot Standby mode.
That would be hard, because even read-only transactions can cause anomalies
that wouldn't otherwise occur.
Serializable isolation mode now means the new fully serializable level.
Repeatable Read gives you the old Snapshot Isolation level that we have
always had.
Kevin Grittner and Dan Ports, reviewed by Jeff Davis, Heikki Linnakangas and
Anssi Kääriäinen
2011-02-07 22:46:51 +01:00
|
|
|
/* Number of partitions the shared predicate lock tables are divided into */
|
|
|
|
#define LOG2_NUM_PREDICATELOCK_PARTITIONS 4
|
|
|
|
#define NUM_PREDICATELOCK_PARTITIONS (1 << LOG2_NUM_PREDICATELOCK_PARTITIONS)
|
|
|
|
|
2001-09-29 06:02:27 +02:00
|
|
|
/*
|
|
|
|
* We have a number of predefined LWLocks, plus a bunch of LWLocks that are
|
2005-12-11 22:02:18 +01:00
|
|
|
* dynamically assigned (e.g., for shared buffers). The LWLock structures
|
|
|
|
* live in shared memory (since they contain shared data) and are identified
|
|
|
|
* by values of this enumerated type. We abuse the notion of an enum somewhat
|
2001-09-29 06:02:27 +02:00
|
|
|
* by allowing values not listed in the enum declaration to be assigned.
|
|
|
|
* The extra value MaxDynamicLWLock is there to keep the compiler from
|
|
|
|
* deciding that the enum can be represented as char or short ...
|
2008-09-30 12:52:14 +02:00
|
|
|
*
|
2009-03-03 09:11:24 +01:00
|
|
|
* If you remove a lock, please replace it with a placeholder. This retains
|
|
|
|
* the lock numbering, which is helpful for DTrace and other external
|
|
|
|
* debugging scripts.
|
2001-09-29 06:02:27 +02:00
|
|
|
*/
|
|
|
|
typedef enum LWLockId
|
|
|
|
{
|
2005-03-04 21:21:07 +01:00
|
|
|
BufFreelistLock,
|
2006-01-04 22:06:32 +01:00
|
|
|
ShmemIndexLock,
|
2001-10-28 07:26:15 +01:00
|
|
|
OidGenLock,
|
|
|
|
XidGenLock,
|
2005-05-19 23:35:48 +02:00
|
|
|
ProcArrayLock,
|
2008-06-19 23:32:56 +02:00
|
|
|
SInvalReadLock,
|
|
|
|
SInvalWriteLock,
|
2013-07-08 10:23:56 +02:00
|
|
|
WALBufMappingLock,
|
2001-10-28 07:26:15 +01:00
|
|
|
WALWriteLock,
|
|
|
|
ControlFileLock,
|
|
|
|
CheckpointLock,
|
2004-08-24 01:22:45 +02:00
|
|
|
CLogControlLock,
|
|
|
|
SubtransControlLock,
|
2005-04-28 23:47:18 +02:00
|
|
|
MultiXactGenLock,
|
|
|
|
MultiXactOffsetControlLock,
|
|
|
|
MultiXactMemberControlLock,
|
2002-02-19 21:11:20 +01:00
|
|
|
RelCacheInitLock,
|
2012-05-09 15:11:48 +02:00
|
|
|
CheckpointerCommLock,
|
2005-06-18 00:32:51 +02:00
|
|
|
TwoPhaseStateLock,
|
2006-01-19 05:45:38 +01:00
|
|
|
TablespaceCreateLock,
|
2006-05-08 02:00:17 +02:00
|
|
|
BtreeVacuumLock,
|
2006-10-16 00:04:08 +02:00
|
|
|
AddinShmemInitLock,
|
2007-02-16 00:23:23 +01:00
|
|
|
AutovacuumLock,
|
2007-04-16 20:30:04 +02:00
|
|
|
AutovacuumScheduleLock,
|
2007-06-08 20:23:53 +02:00
|
|
|
SyncScanLock,
|
2010-02-07 21:48:13 +01:00
|
|
|
RelationMappingLock,
|
2010-02-26 03:01:40 +01:00
|
|
|
AsyncCtlLock,
|
|
|
|
AsyncQueueLock,
|
Implement genuine serializable isolation level.
Until now, our Serializable mode has in fact been what's called Snapshot
Isolation, which allows some anomalies that could not occur in any
serialized ordering of the transactions. This patch fixes that using a
method called Serializable Snapshot Isolation, based on research papers by
Michael J. Cahill (see README-SSI for full references). In Serializable
Snapshot Isolation, transactions run like they do in Snapshot Isolation,
but a predicate lock manager observes the reads and writes performed and
aborts transactions if it detects that an anomaly might occur. This method
produces some false positives, ie. it sometimes aborts transactions even
though there is no anomaly.
To track reads we implement predicate locking, see storage/lmgr/predicate.c.
Whenever a tuple is read, a predicate lock is acquired on the tuple. Shared
memory is finite, so when a transaction takes many tuple-level locks on a
page, the locks are promoted to a single page-level lock, and further to a
single relation level lock if necessary. To lock key values with no matching
tuple, a sequential scan always takes a relation-level lock, and an index
scan acquires a page-level lock that covers the search key, whether or not
there are any matching keys at the moment.
A predicate lock doesn't conflict with any regular locks or with another
predicate locks in the normal sense. They're only used by the predicate lock
manager to detect the danger of anomalies. Only serializable transactions
participate in predicate locking, so there should be no extra overhead for
for other transactions.
Predicate locks can't be released at commit, but must be remembered until
all the transactions that overlapped with it have completed. That means that
we need to remember an unbounded amount of predicate locks, so we apply a
lossy but conservative method of tracking locks for committed transactions.
If we run short of shared memory, we overflow to a new "pg_serial" SLRU
pool.
We don't currently allow Serializable transactions in Hot Standby mode.
That would be hard, because even read-only transactions can cause anomalies
that wouldn't otherwise occur.
Serializable isolation mode now means the new fully serializable level.
Repeatable Read gives you the old Snapshot Isolation level that we have
always had.
Kevin Grittner and Dan Ports, reviewed by Jeff Davis, Heikki Linnakangas and
Anssi Kääriäinen
2011-02-07 22:46:51 +01:00
|
|
|
SerializableXactHashLock,
|
|
|
|
SerializableFinishedListLock,
|
|
|
|
SerializablePredicateLockListLock,
|
|
|
|
OldSerXidLock,
|
2011-03-06 23:49:16 +01:00
|
|
|
SyncRepLock,
|
Allow background workers to be started dynamically.
There is a new API, RegisterDynamicBackgroundWorker, which allows
an ordinary user backend to register a new background writer during
normal running. This means that it's no longer necessary for all
background workers to be registered during processing of
shared_preload_libraries, although the option of registering workers
at that time remains available.
When a background worker exits and will not be restarted, the
slot previously used by that background worker is automatically
released and becomes available for reuse. Slots used by background
workers that are configured for automatic restart can't (yet) be
released without shutting down the system.
This commit adds a new source file, bgworker.c, and moves some
of the existing control logic for background workers there.
Previously, there was little enough logic that it made sense to
keep everything in postmaster.c, but not any more.
This commit also makes the worker_spi contrib module into an
extension and adds a new function, worker_spi_launch, which can
be used to demonstrate the new facility.
2013-07-16 19:02:15 +02:00
|
|
|
BackgroundWorkerLock,
|
2013-10-10 03:05:02 +02:00
|
|
|
DynamicSharedMemoryControlLock,
|
2013-12-18 15:42:44 +01:00
|
|
|
AutoFileLock,
|
2007-02-16 00:23:23 +01:00
|
|
|
/* Individual lock IDs end here */
|
2006-07-23 05:07:58 +02:00
|
|
|
FirstBufMappingLock,
|
|
|
|
FirstLockMgrLock = FirstBufMappingLock + NUM_BUFFER_PARTITIONS,
|
Implement genuine serializable isolation level.
Until now, our Serializable mode has in fact been what's called Snapshot
Isolation, which allows some anomalies that could not occur in any
serialized ordering of the transactions. This patch fixes that using a
method called Serializable Snapshot Isolation, based on research papers by
Michael J. Cahill (see README-SSI for full references). In Serializable
Snapshot Isolation, transactions run like they do in Snapshot Isolation,
but a predicate lock manager observes the reads and writes performed and
aborts transactions if it detects that an anomaly might occur. This method
produces some false positives, ie. it sometimes aborts transactions even
though there is no anomaly.
To track reads we implement predicate locking, see storage/lmgr/predicate.c.
Whenever a tuple is read, a predicate lock is acquired on the tuple. Shared
memory is finite, so when a transaction takes many tuple-level locks on a
page, the locks are promoted to a single page-level lock, and further to a
single relation level lock if necessary. To lock key values with no matching
tuple, a sequential scan always takes a relation-level lock, and an index
scan acquires a page-level lock that covers the search key, whether or not
there are any matching keys at the moment.
A predicate lock doesn't conflict with any regular locks or with another
predicate locks in the normal sense. They're only used by the predicate lock
manager to detect the danger of anomalies. Only serializable transactions
participate in predicate locking, so there should be no extra overhead for
for other transactions.
Predicate locks can't be released at commit, but must be remembered until
all the transactions that overlapped with it have completed. That means that
we need to remember an unbounded amount of predicate locks, so we apply a
lossy but conservative method of tracking locks for committed transactions.
If we run short of shared memory, we overflow to a new "pg_serial" SLRU
pool.
We don't currently allow Serializable transactions in Hot Standby mode.
That would be hard, because even read-only transactions can cause anomalies
that wouldn't otherwise occur.
Serializable isolation mode now means the new fully serializable level.
Repeatable Read gives you the old Snapshot Isolation level that we have
always had.
Kevin Grittner and Dan Ports, reviewed by Jeff Davis, Heikki Linnakangas and
Anssi Kääriäinen
2011-02-07 22:46:51 +01:00
|
|
|
FirstPredicateLockMgrLock = FirstLockMgrLock + NUM_LOCK_PARTITIONS,
|
2006-07-23 05:07:58 +02:00
|
|
|
|
|
|
|
/* must be last except for MaxDynamicLWLock: */
|
Implement genuine serializable isolation level.
Until now, our Serializable mode has in fact been what's called Snapshot
Isolation, which allows some anomalies that could not occur in any
serialized ordering of the transactions. This patch fixes that using a
method called Serializable Snapshot Isolation, based on research papers by
Michael J. Cahill (see README-SSI for full references). In Serializable
Snapshot Isolation, transactions run like they do in Snapshot Isolation,
but a predicate lock manager observes the reads and writes performed and
aborts transactions if it detects that an anomaly might occur. This method
produces some false positives, ie. it sometimes aborts transactions even
though there is no anomaly.
To track reads we implement predicate locking, see storage/lmgr/predicate.c.
Whenever a tuple is read, a predicate lock is acquired on the tuple. Shared
memory is finite, so when a transaction takes many tuple-level locks on a
page, the locks are promoted to a single page-level lock, and further to a
single relation level lock if necessary. To lock key values with no matching
tuple, a sequential scan always takes a relation-level lock, and an index
scan acquires a page-level lock that covers the search key, whether or not
there are any matching keys at the moment.
A predicate lock doesn't conflict with any regular locks or with another
predicate locks in the normal sense. They're only used by the predicate lock
manager to detect the danger of anomalies. Only serializable transactions
participate in predicate locking, so there should be no extra overhead for
for other transactions.
Predicate locks can't be released at commit, but must be remembered until
all the transactions that overlapped with it have completed. That means that
we need to remember an unbounded amount of predicate locks, so we apply a
lossy but conservative method of tracking locks for committed transactions.
If we run short of shared memory, we overflow to a new "pg_serial" SLRU
pool.
We don't currently allow Serializable transactions in Hot Standby mode.
That would be hard, because even read-only transactions can cause anomalies
that wouldn't otherwise occur.
Serializable isolation mode now means the new fully serializable level.
Repeatable Read gives you the old Snapshot Isolation level that we have
always had.
Kevin Grittner and Dan Ports, reviewed by Jeff Davis, Heikki Linnakangas and
Anssi Kääriäinen
2011-02-07 22:46:51 +01:00
|
|
|
NumFixedLWLocks = FirstPredicateLockMgrLock + NUM_PREDICATELOCK_PARTITIONS,
|
2001-09-29 06:02:27 +02:00
|
|
|
|
2001-10-28 07:26:15 +01:00
|
|
|
MaxDynamicLWLock = 1000000000
|
2001-09-29 06:02:27 +02:00
|
|
|
} LWLockId;
|
|
|
|
|
|
|
|
|
|
|
|
typedef enum LWLockMode
|
|
|
|
{
|
2001-10-28 07:26:15 +01:00
|
|
|
LW_EXCLUSIVE,
|
Make group commit more effective.
When a backend needs to flush the WAL, and someone else is already flushing
the WAL, wait until it releases the WALInsertLock and check if we still need
to do the flush or if the other backend already did the work for us, before
acquiring WALInsertLock. This helps group commit, because when the WAL flush
finishes, all the backends that were waiting for it can be woken up in one
go, and the can all concurrently observe that they're done, rather than
waking them up one by one in a cascading fashion.
This is based on a new LWLock function, LWLockWaitUntilFree(), which has
peculiar semantics. If the lock is immediately free, it grabs the lock and
returns true. If it's not free, it waits until it is released, but then
returns false without grabbing the lock. This is used in XLogFlush(), so
that when the lock is acquired, the backend flushes the WAL, but if it's
not, the backend first checks the current flush location before retrying.
Original patch and benchmarking by Peter Geoghegan and Simon Riggs, although
this patch as committed ended up being very different from that.
2012-01-30 15:40:58 +01:00
|
|
|
LW_SHARED,
|
2012-06-10 21:20:04 +02:00
|
|
|
LW_WAIT_UNTIL_FREE /* A special mode used in PGPROC->lwlockMode,
|
|
|
|
* when waiting for lock to become free. Not
|
|
|
|
* to be used as LWLockAcquire argument */
|
2001-09-29 06:02:27 +02:00
|
|
|
} LWLockMode;
|
|
|
|
|
|
|
|
|
|
|
|
#ifdef LOCK_DEBUG
|
|
|
|
extern bool Trace_lwlocks;
|
|
|
|
#endif
|
|
|
|
|
|
|
|
extern LWLockId LWLockAssign(void);
|
|
|
|
extern void LWLockAcquire(LWLockId lockid, LWLockMode mode);
|
|
|
|
extern bool LWLockConditionalAcquire(LWLockId lockid, LWLockMode mode);
|
2012-02-08 08:17:13 +01:00
|
|
|
extern bool LWLockAcquireOrWait(LWLockId lockid, LWLockMode mode);
|
2001-09-29 06:02:27 +02:00
|
|
|
extern void LWLockRelease(LWLockId lockid);
|
|
|
|
extern void LWLockReleaseAll(void);
|
2004-06-11 18:43:24 +02:00
|
|
|
extern bool LWLockHeldByMe(LWLockId lockid);
|
2001-09-29 06:02:27 +02:00
|
|
|
|
|
|
|
extern int NumLWLocks(void);
|
2005-08-21 01:26:37 +02:00
|
|
|
extern Size LWLockShmemSize(void);
|
2001-09-29 06:02:27 +02:00
|
|
|
extern void CreateLWLocks(void);
|
2001-10-28 07:26:15 +01:00
|
|
|
|
2006-10-16 00:04:08 +02:00
|
|
|
extern void RequestAddinLWLocks(int n);
|
2006-08-01 21:03:11 +02:00
|
|
|
|
2001-11-05 18:46:40 +01:00
|
|
|
#endif /* LWLOCK_H */
|