mirror of
https://git.postgresql.org/git/postgresql.git
synced 2024-10-03 13:16:53 +02:00
2584029e31
backend flowchart.
436 lines
11 KiB
C
436 lines
11 KiB
C
/*-------------------------------------------------------------------------
|
|
*
|
|
* multi.c--
|
|
* multi level lock table manager
|
|
*
|
|
* Standard multi-level lock manager as per the Gray paper
|
|
* (at least, that is what it is supposed to be). We implement
|
|
* three levels -- RELN, PAGE, TUPLE. Tuple is actually TID
|
|
* a physical record pointer. It isn't an object id.
|
|
*
|
|
* Copyright (c) 1994, Regents of the University of California
|
|
*
|
|
*
|
|
* IDENTIFICATION
|
|
* $Header: /cvsroot/pgsql/src/backend/storage/lmgr/Attic/multi.c,v 1.19 1998/06/30 02:33:31 momjian Exp $
|
|
*
|
|
* NOTES:
|
|
* (1) The lock.c module assumes that the caller here is doing
|
|
* two phase locking.
|
|
*
|
|
*-------------------------------------------------------------------------
|
|
*/
|
|
#include <stdio.h>
|
|
#include <string.h>
|
|
#include "postgres.h"
|
|
#include "storage/lmgr.h"
|
|
#include "storage/multilev.h"
|
|
|
|
#include "utils/rel.h"
|
|
#include "miscadmin.h" /* MyDatabaseId */
|
|
|
|
static bool
|
|
MultiAcquire(LOCKMETHOD lockmethod, LOCKTAG *tag, LOCKMODE lockmode,
|
|
PG_LOCK_LEVEL level);
|
|
static bool
|
|
MultiRelease(LOCKMETHOD lockmethod, LOCKTAG *tag, LOCKMODE lockmode,
|
|
PG_LOCK_LEVEL level);
|
|
|
|
/*
|
|
* INTENT indicates to higher level that a lower level lock has been
|
|
* set. For example, a write lock on a tuple conflicts with a write
|
|
* lock on a relation. This conflict is detected as a WRITE_INTENT/
|
|
* WRITE conflict between the tuple's intent lock and the relation's
|
|
* write lock.
|
|
*/
|
|
static int MultiConflicts[] = {
|
|
(int) NULL,
|
|
/* All reads and writes at any level conflict with a write lock */
|
|
(1 << WRITE_LOCK) | (1 << WRITE_INTENT) | (1 << READ_LOCK) | (1 << READ_INTENT),
|
|
/* read locks conflict with write locks at curr and lower levels */
|
|
(1 << WRITE_LOCK) | (1 << WRITE_INTENT),
|
|
/* write intent locks */
|
|
(1 << READ_LOCK) | (1 << WRITE_LOCK),
|
|
/* read intent locks */
|
|
(1 << WRITE_LOCK),
|
|
|
|
/*
|
|
* extend locks for archive storage manager conflict only w/extend
|
|
* locks
|
|
*/
|
|
(1 << EXTEND_LOCK)
|
|
};
|
|
|
|
/*
|
|
* write locks have higher priority than read locks and extend locks. May
|
|
* want to treat INTENT locks differently.
|
|
*/
|
|
static int MultiPrios[] = {
|
|
(int) NULL,
|
|
2,
|
|
1,
|
|
2,
|
|
1,
|
|
1
|
|
};
|
|
|
|
/*
|
|
* Lock table identifier for this lock table. The multi-level
|
|
* lock table is ONE lock table, not three.
|
|
*/
|
|
LOCKMETHOD MultiTableId = (LOCKMETHOD) NULL;
|
|
#ifdef NOT_USED
|
|
LOCKMETHOD ShortTermTableId = (LOCKMETHOD) NULL;
|
|
#endif
|
|
|
|
/*
|
|
* Create the lock table described by MultiConflicts and Multiprio.
|
|
*/
|
|
LOCKMETHOD
|
|
InitMultiLevelLocks()
|
|
{
|
|
int lockmethod;
|
|
|
|
lockmethod = LockMethodTableInit("MultiLevelLockTable", MultiConflicts, MultiPrios, 5);
|
|
MultiTableId = lockmethod;
|
|
if (!(MultiTableId))
|
|
elog(ERROR, "InitMultiLocks: couldnt initialize lock table");
|
|
/* -----------------------
|
|
* No short term lock table for now. -Jeff 15 July 1991
|
|
*
|
|
* ShortTermTableId = LockTableRename(lockmethod);
|
|
* if (! (ShortTermTableId)) {
|
|
* elog(ERROR,"InitMultiLocks: couldnt rename lock table");
|
|
* }
|
|
* -----------------------
|
|
*/
|
|
return MultiTableId;
|
|
}
|
|
|
|
/*
|
|
* MultiLockReln -- lock a relation
|
|
*
|
|
* Returns: TRUE if the lock can be set, FALSE otherwise.
|
|
*/
|
|
bool
|
|
MultiLockReln(LockInfo linfo, LOCKMODE lockmode)
|
|
{
|
|
LOCKTAG tag;
|
|
|
|
/*
|
|
* LOCKTAG has two bytes of padding, unfortunately. The hash function
|
|
* will return miss if the padding bytes aren't zero'd.
|
|
*/
|
|
MemSet(&tag, 0, sizeof(tag));
|
|
tag.relId = linfo->lRelId.relId;
|
|
tag.dbId = linfo->lRelId.dbId;
|
|
return (MultiAcquire(MultiTableId, &tag, lockmode, RELN_LEVEL));
|
|
}
|
|
|
|
/*
|
|
* MultiLockTuple -- Lock the TID associated with a tuple
|
|
*
|
|
* Returns: TRUE if lock is set, FALSE otherwise.
|
|
*
|
|
* Side Effects: causes intention level locks to be set
|
|
* at the page and relation level.
|
|
*/
|
|
bool
|
|
MultiLockTuple(LockInfo linfo, ItemPointer tidPtr, LOCKMODE lockmode)
|
|
{
|
|
LOCKTAG tag;
|
|
|
|
/*
|
|
* LOCKTAG has two bytes of padding, unfortunately. The hash function
|
|
* will return miss if the padding bytes aren't zero'd.
|
|
*/
|
|
MemSet(&tag, 0, sizeof(tag));
|
|
|
|
tag.relId = linfo->lRelId.relId;
|
|
tag.dbId = linfo->lRelId.dbId;
|
|
|
|
/* not locking any valid Tuple, just the page */
|
|
tag.tupleId = *tidPtr;
|
|
return (MultiAcquire(MultiTableId, &tag, lockmode, TUPLE_LEVEL));
|
|
}
|
|
|
|
/*
|
|
* same as above at page level
|
|
*/
|
|
bool
|
|
MultiLockPage(LockInfo linfo, ItemPointer tidPtr, LOCKMODE lockmode)
|
|
{
|
|
LOCKTAG tag;
|
|
|
|
/*
|
|
* LOCKTAG has two bytes of padding, unfortunately. The hash function
|
|
* will return miss if the padding bytes aren't zero'd.
|
|
*/
|
|
MemSet(&tag, 0, sizeof(tag));
|
|
|
|
|
|
/* ----------------------------
|
|
* Now we want to set the page offset to be invalid
|
|
* and lock the block. There is some confusion here as to what
|
|
* a page is. In Postgres a page is an 8k block, however this
|
|
* block may be partitioned into many subpages which are sometimes
|
|
* also called pages. The term is overloaded, so don't be fooled
|
|
* when we say lock the page we mean the 8k block. -Jeff 16 July 1991
|
|
* ----------------------------
|
|
*/
|
|
tag.relId = linfo->lRelId.relId;
|
|
tag.dbId = linfo->lRelId.dbId;
|
|
BlockIdCopy(&(tag.tupleId.ip_blkid), &(tidPtr->ip_blkid));
|
|
return (MultiAcquire(MultiTableId, &tag, lockmode, PAGE_LEVEL));
|
|
}
|
|
|
|
/*
|
|
* MultiAcquire -- acquire multi level lock at requested level
|
|
*
|
|
* Returns: TRUE if lock is set, FALSE if not
|
|
* Side Effects:
|
|
*/
|
|
static bool
|
|
MultiAcquire(LOCKMETHOD lockmethod,
|
|
LOCKTAG *tag,
|
|
LOCKMODE lockmode,
|
|
PG_LOCK_LEVEL level)
|
|
{
|
|
LOCKMODE locks[N_LEVELS];
|
|
int i,
|
|
status;
|
|
LOCKTAG xxTag,
|
|
*tmpTag = &xxTag;
|
|
int retStatus = TRUE;
|
|
|
|
/*
|
|
* Three levels implemented. If we set a low level (e.g. Tuple) lock,
|
|
* we must set INTENT locks on the higher levels. The intent lock
|
|
* detects conflicts between the low level lock and an existing high
|
|
* level lock. For example, setting a write lock on a tuple in a
|
|
* relation is disallowed if there is an existing read lock on the
|
|
* entire relation. The write lock would set a WRITE + INTENT lock on
|
|
* the relation and that lock would conflict with the read.
|
|
*/
|
|
switch (level)
|
|
{
|
|
case RELN_LEVEL:
|
|
locks[0] = lockmode;
|
|
locks[1] = NO_LOCK;
|
|
locks[2] = NO_LOCK;
|
|
break;
|
|
case PAGE_LEVEL:
|
|
locks[0] = lockmode + INTENT;
|
|
locks[1] = lockmode;
|
|
locks[2] = NO_LOCK;
|
|
break;
|
|
case TUPLE_LEVEL:
|
|
locks[0] = lockmode + INTENT;
|
|
locks[1] = lockmode + INTENT;
|
|
locks[2] = lockmode;
|
|
break;
|
|
default:
|
|
elog(ERROR, "MultiAcquire: bad lock level");
|
|
return (FALSE);
|
|
}
|
|
|
|
/*
|
|
* construct a new tag as we go. Always loop through all levels, but
|
|
* if we arent' seting a low level lock, locks[i] is set to NO_LOCK
|
|
* for the lower levels. Always start from the highest level and go
|
|
* to the lowest level.
|
|
*/
|
|
MemSet(tmpTag, 0, sizeof(*tmpTag));
|
|
tmpTag->relId = tag->relId;
|
|
tmpTag->dbId = tag->dbId;
|
|
|
|
for (i = 0; i < N_LEVELS; i++)
|
|
{
|
|
if (locks[i] != NO_LOCK)
|
|
{
|
|
switch (i)
|
|
{
|
|
case RELN_LEVEL:
|
|
/* -------------
|
|
* Set the block # and offset to invalid
|
|
* -------------
|
|
*/
|
|
BlockIdSet(&(tmpTag->tupleId.ip_blkid), InvalidBlockNumber);
|
|
tmpTag->tupleId.ip_posid = InvalidOffsetNumber;
|
|
break;
|
|
case PAGE_LEVEL:
|
|
/* -------------
|
|
* Copy the block #, set the offset to invalid
|
|
* -------------
|
|
*/
|
|
BlockIdCopy(&(tmpTag->tupleId.ip_blkid),
|
|
&(tag->tupleId.ip_blkid));
|
|
tmpTag->tupleId.ip_posid = InvalidOffsetNumber;
|
|
break;
|
|
case TUPLE_LEVEL:
|
|
/* --------------
|
|
* Copy the entire tuple id.
|
|
* --------------
|
|
*/
|
|
ItemPointerCopy(&tmpTag->tupleId, &tag->tupleId);
|
|
break;
|
|
}
|
|
|
|
status = LockAcquire(lockmethod, tmpTag, locks[i]);
|
|
if (!status)
|
|
{
|
|
|
|
/*
|
|
* failed for some reason. Before returning we have to
|
|
* release all of the locks we just acquired.
|
|
* MultiRelease(xx,xx,xx, i) means release starting from
|
|
* the last level lock we successfully acquired
|
|
*/
|
|
retStatus = FALSE;
|
|
MultiRelease(lockmethod, tag, lockmode, i);
|
|
/* now leave the loop. Don't try for any more locks */
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
return (retStatus);
|
|
}
|
|
|
|
/* ------------------
|
|
* Release a page in the multi-level lock table
|
|
* ------------------
|
|
*/
|
|
#ifdef NOT_USED
|
|
bool
|
|
MultiReleasePage(LockInfo linfo, ItemPointer tidPtr, LOCKMODE lockmode)
|
|
{
|
|
LOCKTAG tag;
|
|
|
|
/* ------------------
|
|
* LOCKTAG has two bytes of padding, unfortunately. The
|
|
* hash function will return miss if the padding bytes aren't
|
|
* zero'd.
|
|
* ------------------
|
|
*/
|
|
MemSet(&tag, 0, sizeof(LOCKTAG));
|
|
|
|
tag.relId = linfo->lRelId.relId;
|
|
tag.dbId = linfo->lRelId.dbId;
|
|
BlockIdCopy(&(tag.tupleId.ip_blkid), &(tidPtr->ip_blkid));
|
|
|
|
return (MultiRelease(MultiTableId, &tag, lockmode, PAGE_LEVEL));
|
|
}
|
|
|
|
#endif
|
|
|
|
/* ------------------
|
|
* Release a relation in the multi-level lock table
|
|
* ------------------
|
|
*/
|
|
bool
|
|
MultiReleaseReln(LockInfo linfo, LOCKMODE lockmode)
|
|
{
|
|
LOCKTAG tag;
|
|
|
|
/* ------------------
|
|
* LOCKTAG has two bytes of padding, unfortunately. The
|
|
* hash function will return miss if the padding bytes aren't
|
|
* zero'd.
|
|
* ------------------
|
|
*/
|
|
MemSet(&tag, 0, sizeof(LOCKTAG));
|
|
tag.relId = linfo->lRelId.relId;
|
|
tag.dbId = linfo->lRelId.dbId;
|
|
|
|
return (MultiRelease(MultiTableId, &tag, lockmode, RELN_LEVEL));
|
|
}
|
|
|
|
/*
|
|
* MultiRelease -- release a multi-level lock
|
|
*
|
|
* Returns: TRUE if successful, FALSE otherwise.
|
|
*/
|
|
static bool
|
|
MultiRelease(LOCKMETHOD lockmethod,
|
|
LOCKTAG *tag,
|
|
LOCKMODE lockmode,
|
|
PG_LOCK_LEVEL level)
|
|
{
|
|
LOCKMODE locks[N_LEVELS];
|
|
int i,
|
|
status;
|
|
LOCKTAG xxTag,
|
|
*tmpTag = &xxTag;
|
|
|
|
/*
|
|
* same level scheme as MultiAcquire().
|
|
*/
|
|
switch (level)
|
|
{
|
|
case RELN_LEVEL:
|
|
locks[0] = lockmode;
|
|
locks[1] = NO_LOCK;
|
|
locks[2] = NO_LOCK;
|
|
break;
|
|
case PAGE_LEVEL:
|
|
locks[0] = lockmode + INTENT;
|
|
locks[1] = lockmode;
|
|
locks[2] = NO_LOCK;
|
|
break;
|
|
case TUPLE_LEVEL:
|
|
locks[0] = lockmode + INTENT;
|
|
locks[1] = lockmode + INTENT;
|
|
locks[2] = lockmode;
|
|
break;
|
|
default:
|
|
elog(ERROR, "MultiRelease: bad lockmode");
|
|
}
|
|
|
|
/*
|
|
* again, construct the tag on the fly. This time, however, we
|
|
* release the locks in the REVERSE order -- from lowest level to
|
|
* highest level.
|
|
*
|
|
* Must zero out the tag to set padding byes to zero and ensure hashing
|
|
* consistency.
|
|
*/
|
|
MemSet(tmpTag, 0, sizeof(*tmpTag));
|
|
tmpTag->relId = tag->relId;
|
|
tmpTag->dbId = tag->dbId;
|
|
|
|
for (i = (N_LEVELS - 1); i >= 0; i--)
|
|
{
|
|
if (locks[i] != NO_LOCK)
|
|
{
|
|
switch (i)
|
|
{
|
|
case RELN_LEVEL:
|
|
/* -------------
|
|
* Set the block # and offset to invalid
|
|
* -------------
|
|
*/
|
|
BlockIdSet(&(tmpTag->tupleId.ip_blkid), InvalidBlockNumber);
|
|
tmpTag->tupleId.ip_posid = InvalidOffsetNumber;
|
|
break;
|
|
case PAGE_LEVEL:
|
|
/* -------------
|
|
* Copy the block #, set the offset to invalid
|
|
* -------------
|
|
*/
|
|
BlockIdCopy(&(tmpTag->tupleId.ip_blkid),
|
|
&(tag->tupleId.ip_blkid));
|
|
tmpTag->tupleId.ip_posid = InvalidOffsetNumber;
|
|
break;
|
|
case TUPLE_LEVEL:
|
|
ItemPointerCopy(&tmpTag->tupleId, &tag->tupleId);
|
|
break;
|
|
}
|
|
status = LockRelease(lockmethod, tmpTag, locks[i]);
|
|
if (!status)
|
|
elog(ERROR, "MultiRelease: couldn't release after error");
|
|
}
|
|
}
|
|
/* shouldn't reach here */
|
|
return false;
|
|
}
|