postgresql/src/include/storage/predicate.h

/*-------------------------------------------------------------------------
 *
 * predicate.h
 *	  POSTGRES public predicate locking definitions.
 *
 *
 * Portions Copyright (c) 1996-2011, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
 * src/include/storage/predicate.h
 *
 *-------------------------------------------------------------------------
 */
#ifndef PREDICATE_H
#define PREDICATE_H

#include "utils/relcache.h"
#include "utils/snapshot.h"


/*
 * GUC variables
 */
extern int	max_predicate_locks_per_xact;


/* Number of SLRU buffers to use for predicate locking */
#define NUM_OLDSERXID_BUFFERS	16


/*
 * function prototypes
 */

/* housekeeping for shared memory predicate lock structures */
extern void InitPredicateLocks(void);
extern Size PredicateLockShmemSize(void);

extern void CheckPointPredicate(void);

/* predicate lock reporting */
extern bool PageIsPredicateLocked(Relation relation, BlockNumber blkno);

/* predicate lock maintenance */
extern Snapshot RegisterSerializableTransaction(Snapshot snapshot);
extern void RegisterPredicateLockingXid(TransactionId xid);
extern void PredicateLockRelation(Relation relation, Snapshot snapshot);
extern void PredicateLockPage(Relation relation, BlockNumber blkno, Snapshot snapshot);
extern void PredicateLockTuple(Relation relation, HeapTuple tuple, Snapshot snapshot);
extern void PredicateLockPageSplit(Relation relation, BlockNumber oldblkno, BlockNumber newblkno);
extern void PredicateLockPageCombine(Relation relation, BlockNumber oldblkno, BlockNumber newblkno);
extern void TransferPredicateLocksToHeapRelation(Relation relation);
extern void ReleasePredicateLocks(bool isCommit);

/* conflict detection (may also trigger rollback) */
extern void CheckForSerializableConflictOut(bool valid, Relation relation, HeapTuple tuple,
								Buffer buffer, Snapshot snapshot);
extern void CheckForSerializableConflictIn(Relation relation, HeapTuple tuple, Buffer buffer);
extern void CheckTableForSerializableConflictIn(Relation relation);

/* final rollback checking */
extern void PreCommit_CheckForSerializationFailure(void);

/* two-phase commit support */
extern void AtPrepare_PredicateLocks(void);
extern void PostPrepare_PredicateLocks(TransactionId xid);
extern void PredicateLockTwoPhaseFinish(TransactionId xid, bool isCommit);
extern void predicatelock_twophase_recover(TransactionId xid, uint16 info,
							   void *recdata, uint32 len);

#endif   /* PREDICATE_H */
Implement genuine serializable isolation level. Until now, our Serializable mode has in fact been what's called Snapshot Isolation, which allows some anomalies that could not occur in any serialized ordering of the transactions. This patch fixes that using a method called Serializable Snapshot Isolation, based on research papers by Michael J. Cahill (see README-SSI for full references). In Serializable Snapshot Isolation, transactions run like they do in Snapshot Isolation, but a predicate lock manager observes the reads and writes performed and aborts transactions if it detects that an anomaly might occur. This method produces some false positives, ie. it sometimes aborts transactions even though there is no anomaly. To track reads we implement predicate locking, see storage/lmgr/predicate.c. Whenever a tuple is read, a predicate lock is acquired on the tuple. Shared memory is finite, so when a transaction takes many tuple-level locks on a page, the locks are promoted to a single page-level lock, and further to a single relation level lock if necessary. To lock key values with no matching tuple, a sequential scan always takes a relation-level lock, and an index scan acquires a page-level lock that covers the search key, whether or not there are any matching keys at the moment. A predicate lock doesn't conflict with any regular locks or with another predicate locks in the normal sense. They're only used by the predicate lock manager to detect the danger of anomalies. Only serializable transactions participate in predicate locking, so there should be no extra overhead for for other transactions. Predicate locks can't be released at commit, but must be remembered until all the transactions that overlapped with it have completed. That means that we need to remember an unbounded amount of predicate locks, so we apply a lossy but conservative method of tracking locks for committed transactions. If we run short of shared memory, we overflow to a new "pg_serial" SLRU pool. We don't currently allow Serializable transactions in Hot Standby mode. That would be hard, because even read-only transactions can cause anomalies that wouldn't otherwise occur. Serializable isolation mode now means the new fully serializable level. Repeatable Read gives you the old Snapshot Isolation level that we have always had. Kevin Grittner and Dan Ports, reviewed by Jeff Davis, Heikki Linnakangas and Anssi Kääriäinen 2011-02-07 22:46:51 +01:00			`/*-------------------------------------------------------------------------`
			`*`
			`* predicate.h`
			`* POSTGRES public predicate locking definitions.`
			`*`
			`*`
			`* Portions Copyright (c) 1996-2011, PostgreSQL Global Development Group`
			`* Portions Copyright (c) 1994, Regents of the University of California`
			`*`
			`* src/include/storage/predicate.h`
			`*`
			`*-------------------------------------------------------------------------`
			`*/`
			`#ifndef PREDICATE_H`
			`#define PREDICATE_H`

			`#include "utils/relcache.h"`
			`#include "utils/snapshot.h"`


			`/*`
			`* GUC variables`
			`*/`
			`extern int max_predicate_locks_per_xact;`


			`/* Number of SLRU buffers to use for predicate locking */`
			`#define NUM_OLDSERXID_BUFFERS 16`


			`/*`
			`* function prototypes`
			`*/`

			`/* housekeeping for shared memory predicate lock structures */`
			`extern void InitPredicateLocks(void);`
			`extern Size PredicateLockShmemSize(void);`

Truncate predicate lock manager's SLRU lazily at checkpoint. That's safer than doing it aggressively whenever the tail-XID pointer is advanced, because this way we don't need to do it while holding SerializableXactHashLock. This also fixes bug #5915 spotted by YAMAMOTO Takashi, and removes an obsolete comment spotted by Kevin Grittner. 2011-03-08 11:07:29 +01:00			`extern void CheckPointPredicate(void);`

Implement genuine serializable isolation level. Until now, our Serializable mode has in fact been what's called Snapshot Isolation, which allows some anomalies that could not occur in any serialized ordering of the transactions. This patch fixes that using a method called Serializable Snapshot Isolation, based on research papers by Michael J. Cahill (see README-SSI for full references). In Serializable Snapshot Isolation, transactions run like they do in Snapshot Isolation, but a predicate lock manager observes the reads and writes performed and aborts transactions if it detects that an anomaly might occur. This method produces some false positives, ie. it sometimes aborts transactions even though there is no anomaly. To track reads we implement predicate locking, see storage/lmgr/predicate.c. Whenever a tuple is read, a predicate lock is acquired on the tuple. Shared memory is finite, so when a transaction takes many tuple-level locks on a page, the locks are promoted to a single page-level lock, and further to a single relation level lock if necessary. To lock key values with no matching tuple, a sequential scan always takes a relation-level lock, and an index scan acquires a page-level lock that covers the search key, whether or not there are any matching keys at the moment. A predicate lock doesn't conflict with any regular locks or with another predicate locks in the normal sense. They're only used by the predicate lock manager to detect the danger of anomalies. Only serializable transactions participate in predicate locking, so there should be no extra overhead for for other transactions. Predicate locks can't be released at commit, but must be remembered until all the transactions that overlapped with it have completed. That means that we need to remember an unbounded amount of predicate locks, so we apply a lossy but conservative method of tracking locks for committed transactions. If we run short of shared memory, we overflow to a new "pg_serial" SLRU pool. We don't currently allow Serializable transactions in Hot Standby mode. That would be hard, because even read-only transactions can cause anomalies that wouldn't otherwise occur. Serializable isolation mode now means the new fully serializable level. Repeatable Read gives you the old Snapshot Isolation level that we have always had. Kevin Grittner and Dan Ports, reviewed by Jeff Davis, Heikki Linnakangas and Anssi Kääriäinen 2011-02-07 22:46:51 +01:00			`/* predicate lock reporting */`
Remove pointless const qualifiers from function arguments in the SSI code. As Tom Lane pointed out, "const Relation foo" doesn't guarantee that you can't modify the data the "foo" pointer points to. It just means that you can't change the pointer to point to something else within the function, which is not very useful. 2011-06-22 11:18:39 +02:00			`extern bool PageIsPredicateLocked(Relation relation, BlockNumber blkno);`
Implement genuine serializable isolation level. Until now, our Serializable mode has in fact been what's called Snapshot Isolation, which allows some anomalies that could not occur in any serialized ordering of the transactions. This patch fixes that using a method called Serializable Snapshot Isolation, based on research papers by Michael J. Cahill (see README-SSI for full references). In Serializable Snapshot Isolation, transactions run like they do in Snapshot Isolation, but a predicate lock manager observes the reads and writes performed and aborts transactions if it detects that an anomaly might occur. This method produces some false positives, ie. it sometimes aborts transactions even though there is no anomaly. To track reads we implement predicate locking, see storage/lmgr/predicate.c. Whenever a tuple is read, a predicate lock is acquired on the tuple. Shared memory is finite, so when a transaction takes many tuple-level locks on a page, the locks are promoted to a single page-level lock, and further to a single relation level lock if necessary. To lock key values with no matching tuple, a sequential scan always takes a relation-level lock, and an index scan acquires a page-level lock that covers the search key, whether or not there are any matching keys at the moment. A predicate lock doesn't conflict with any regular locks or with another predicate locks in the normal sense. They're only used by the predicate lock manager to detect the danger of anomalies. Only serializable transactions participate in predicate locking, so there should be no extra overhead for for other transactions. Predicate locks can't be released at commit, but must be remembered until all the transactions that overlapped with it have completed. That means that we need to remember an unbounded amount of predicate locks, so we apply a lossy but conservative method of tracking locks for committed transactions. If we run short of shared memory, we overflow to a new "pg_serial" SLRU pool. We don't currently allow Serializable transactions in Hot Standby mode. That would be hard, because even read-only transactions can cause anomalies that wouldn't otherwise occur. Serializable isolation mode now means the new fully serializable level. Repeatable Read gives you the old Snapshot Isolation level that we have always had. Kevin Grittner and Dan Ports, reviewed by Jeff Davis, Heikki Linnakangas and Anssi Kääriäinen 2011-02-07 22:46:51 +01:00
			`/* predicate lock maintenance */`
			`extern Snapshot RegisterSerializableTransaction(Snapshot snapshot);`
Remove pointless const qualifiers from function arguments in the SSI code. As Tom Lane pointed out, "const Relation foo" doesn't guarantee that you can't modify the data the "foo" pointer points to. It just means that you can't change the pointer to point to something else within the function, which is not very useful. 2011-06-22 11:18:39 +02:00			`extern void RegisterPredicateLockingXid(TransactionId xid);`
			`extern void PredicateLockRelation(Relation relation, Snapshot snapshot);`
			`extern void PredicateLockPage(Relation relation, BlockNumber blkno, Snapshot snapshot);`
			`extern void PredicateLockTuple(Relation relation, HeapTuple tuple, Snapshot snapshot);`
			`extern void PredicateLockPageSplit(Relation relation, BlockNumber oldblkno, BlockNumber newblkno);`
			`extern void PredicateLockPageCombine(Relation relation, BlockNumber oldblkno, BlockNumber newblkno);`
			`extern void TransferPredicateLocksToHeapRelation(Relation relation);`
			`extern void ReleasePredicateLocks(bool isCommit);`
Implement genuine serializable isolation level. Until now, our Serializable mode has in fact been what's called Snapshot Isolation, which allows some anomalies that could not occur in any serialized ordering of the transactions. This patch fixes that using a method called Serializable Snapshot Isolation, based on research papers by Michael J. Cahill (see README-SSI for full references). In Serializable Snapshot Isolation, transactions run like they do in Snapshot Isolation, but a predicate lock manager observes the reads and writes performed and aborts transactions if it detects that an anomaly might occur. This method produces some false positives, ie. it sometimes aborts transactions even though there is no anomaly. To track reads we implement predicate locking, see storage/lmgr/predicate.c. Whenever a tuple is read, a predicate lock is acquired on the tuple. Shared memory is finite, so when a transaction takes many tuple-level locks on a page, the locks are promoted to a single page-level lock, and further to a single relation level lock if necessary. To lock key values with no matching tuple, a sequential scan always takes a relation-level lock, and an index scan acquires a page-level lock that covers the search key, whether or not there are any matching keys at the moment. A predicate lock doesn't conflict with any regular locks or with another predicate locks in the normal sense. They're only used by the predicate lock manager to detect the danger of anomalies. Only serializable transactions participate in predicate locking, so there should be no extra overhead for for other transactions. Predicate locks can't be released at commit, but must be remembered until all the transactions that overlapped with it have completed. That means that we need to remember an unbounded amount of predicate locks, so we apply a lossy but conservative method of tracking locks for committed transactions. If we run short of shared memory, we overflow to a new "pg_serial" SLRU pool. We don't currently allow Serializable transactions in Hot Standby mode. That would be hard, because even read-only transactions can cause anomalies that wouldn't otherwise occur. Serializable isolation mode now means the new fully serializable level. Repeatable Read gives you the old Snapshot Isolation level that we have always had. Kevin Grittner and Dan Ports, reviewed by Jeff Davis, Heikki Linnakangas and Anssi Kääriäinen 2011-02-07 22:46:51 +01:00
			`/* conflict detection (may also trigger rollback) */`
Remove pointless const qualifiers from function arguments in the SSI code. As Tom Lane pointed out, "const Relation foo" doesn't guarantee that you can't modify the data the "foo" pointer points to. It just means that you can't change the pointer to point to something else within the function, which is not very useful. 2011-06-22 11:18:39 +02:00			`extern void CheckForSerializableConflictOut(bool valid, Relation relation, HeapTuple tuple,`
			`Buffer buffer, Snapshot snapshot);`
			`extern void CheckForSerializableConflictIn(Relation relation, HeapTuple tuple, Buffer buffer);`
			`extern void CheckTableForSerializableConflictIn(Relation relation);`
Implement genuine serializable isolation level. Until now, our Serializable mode has in fact been what's called Snapshot Isolation, which allows some anomalies that could not occur in any serialized ordering of the transactions. This patch fixes that using a method called Serializable Snapshot Isolation, based on research papers by Michael J. Cahill (see README-SSI for full references). In Serializable Snapshot Isolation, transactions run like they do in Snapshot Isolation, but a predicate lock manager observes the reads and writes performed and aborts transactions if it detects that an anomaly might occur. This method produces some false positives, ie. it sometimes aborts transactions even though there is no anomaly. To track reads we implement predicate locking, see storage/lmgr/predicate.c. Whenever a tuple is read, a predicate lock is acquired on the tuple. Shared memory is finite, so when a transaction takes many tuple-level locks on a page, the locks are promoted to a single page-level lock, and further to a single relation level lock if necessary. To lock key values with no matching tuple, a sequential scan always takes a relation-level lock, and an index scan acquires a page-level lock that covers the search key, whether or not there are any matching keys at the moment. A predicate lock doesn't conflict with any regular locks or with another predicate locks in the normal sense. They're only used by the predicate lock manager to detect the danger of anomalies. Only serializable transactions participate in predicate locking, so there should be no extra overhead for for other transactions. Predicate locks can't be released at commit, but must be remembered until all the transactions that overlapped with it have completed. That means that we need to remember an unbounded amount of predicate locks, so we apply a lossy but conservative method of tracking locks for committed transactions. If we run short of shared memory, we overflow to a new "pg_serial" SLRU pool. We don't currently allow Serializable transactions in Hot Standby mode. That would be hard, because even read-only transactions can cause anomalies that wouldn't otherwise occur. Serializable isolation mode now means the new fully serializable level. Repeatable Read gives you the old Snapshot Isolation level that we have always had. Kevin Grittner and Dan Ports, reviewed by Jeff Davis, Heikki Linnakangas and Anssi Kääriäinen 2011-02-07 22:46:51 +01:00
			`/* final rollback checking */`
			`extern void PreCommit_CheckForSerializationFailure(void);`

			`/* two-phase commit support */`
			`extern void AtPrepare_PredicateLocks(void);`
			`extern void PostPrepare_PredicateLocks(TransactionId xid);`
			`extern void PredicateLockTwoPhaseFinish(TransactionId xid, bool isCommit);`
			`extern void predicatelock_twophase_recover(TransactionId xid, uint16 info,`
			`void *recdata, uint32 len);`

			`#endif /* PREDICATE_H */`