diff --git a/src/backend/access/transam/twophase.c b/src/backend/access/transam/twophase.c index ffdee8388b..0898df6233 100644 --- a/src/backend/access/transam/twophase.c +++ b/src/backend/access/transam/twophase.c @@ -7,7 +7,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/transam/twophase.c,v 1.17 2005/11/22 18:17:07 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/access/transam/twophase.c,v 1.18 2005/12/11 21:02:17 tgl Exp $ * * NOTES * Each global transaction is associated with a global transaction @@ -284,7 +284,8 @@ MarkAsPreparing(TransactionId xid, const char *gid, gxact->proc.lwWaitLink = NULL; gxact->proc.waitLock = NULL; gxact->proc.waitProcLock = NULL; - SHMQueueInit(&(gxact->proc.procLocks)); + for (i = 0; i < NUM_LOCK_PARTITIONS; i++) + SHMQueueInit(&(gxact->proc.myProcLocks[i])); /* subxid data must be filled later by GXactLoadSubxactData */ gxact->proc.subxids.overflowed = false; gxact->proc.subxids.nxids = 0; diff --git a/src/backend/storage/ipc/procarray.c b/src/backend/storage/ipc/procarray.c index 7ac8084f6a..cafadeb905 100644 --- a/src/backend/storage/ipc/procarray.c +++ b/src/backend/storage/ipc/procarray.c @@ -14,8 +14,8 @@ * * The process array now also includes PGPROC structures representing * prepared transactions. The xid and subxids fields of these are valid, - * as is the procLocks list. They can be distinguished from regular backend - * PGPROCs at need by checking for pid == 0. + * as are the myProcLocks lists. They can be distinguished from regular + * backend PGPROCs at need by checking for pid == 0. * * * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group @@ -23,7 +23,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/storage/ipc/procarray.c,v 1.8 2005/11/22 18:17:20 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/storage/ipc/procarray.c,v 1.9 2005/12/11 21:02:18 tgl Exp $ * *------------------------------------------------------------------------- */ diff --git a/src/backend/storage/lmgr/README b/src/backend/storage/lmgr/README index 25820f4b73..fdda5bf82a 100644 --- a/src/backend/storage/lmgr/README +++ b/src/backend/storage/lmgr/README @@ -1,4 +1,4 @@ -$PostgreSQL: pgsql/src/backend/storage/lmgr/README,v 1.18 2005/12/09 01:22:04 tgl Exp $ +$PostgreSQL: pgsql/src/backend/storage/lmgr/README,v 1.19 2005/12/11 21:02:18 tgl Exp $ LOCKING OVERVIEW @@ -50,9 +50,12 @@ LOCK DATA STRUCTURES Lock methods describe the overall locking behavior. Currently there are two lock methods: DEFAULT and USER. (USER locks are non-blocking.) -Lock modes describe the type of the lock (read/write or shared/exclusive). -See src/tools/backend/index.html and src/include/storage/lock.h for more -details. +Lock modes describe the type of the lock (read/write or shared/exclusive). +In principle, each lock method can have its own set of lock modes with +different conflict rules, but currently DEFAULT and USER methods use +identical lock mode sets. See src/tools/backend/index.html and +src/include/storage/lock.h for more details. (Lock modes are also called +lock types in some places in the code and documentation.) There are two fundamental lock structures in shared memory: the per-lockable-object LOCK struct, and the per-lock-and-requestor PROCLOCK @@ -67,7 +70,7 @@ be made per lockable object/lock mode/backend. Internally to a backend, however, the same lock may be requested and perhaps released multiple times in a transaction, and it can also be held both transactionally and session- wide. The internal request counts are held in LOCALLOCK so that the shared -LockMgrLock need not be obtained to alter them. +data structures need not be accessed to alter them. --------------------------------------------------------------------------- @@ -103,10 +106,10 @@ procLocks - be waiting for more!). waitProcs - - This is a shared memory queue of all process structures corresponding to - a backend that is waiting (sleeping) until another backend releases this + This is a shared memory queue of all PGPROC structures corresponding to + backends that are waiting (sleeping) until another backend releases this lock. The process structure holds the information needed to determine - if it should be woken up when this lock is released. + if it should be woken up when the lock is released. nRequested - Keeps a count of how many times this lock has been attempted to be @@ -131,12 +134,12 @@ nGranted - granted - Keeps count of how many locks of each type are currently held. Once again only elements 1 through MAX_LOCKMODES-1 are used (0 is not). Also, like - requested, summing the values of granted should total to the value + requested[], summing the values of granted[] should total to the value of nGranted. We should always have 0 <= nGranted <= nRequested, and -0 <= granted[i] <= requested[i] for each i. If the request counts go to -zero, the lock object is no longer needed and can be freed. +0 <= granted[i] <= requested[i] for each i. When all the request counts +go to zero, the LOCK object is no longer needed and can be freed. --------------------------------------------------------------------------- @@ -154,15 +157,16 @@ tag - SHMEM offset of PGPROC of backend process that owns this PROCLOCK. holdMask - - A bitmask for the lock types successfully acquired by this PROCLOCK. + A bitmask for the lock modes successfully acquired by this PROCLOCK. This should be a subset of the LOCK object's grantMask, and also a - subset of the PGPROC object's heldLocks mask. + subset of the PGPROC object's heldLocks mask (if the PGPROC is + currently waiting for another lock mode on this lock). releaseMask - - A bitmask for the lock types due to be released during LockReleaseAll. + A bitmask for the lock modes due to be released during LockReleaseAll. This must be a subset of the holdMask. Note that it is modified without - taking the LockMgrLock, and therefore it is unsafe for any backend except - the one owning the PROCLOCK to examine/change it. + taking the partition LWLock, and therefore it is unsafe for any + backend except the one owning the PROCLOCK to examine/change it. lockLink - List link for shared memory queue of all the PROCLOCK objects for the @@ -174,7 +178,60 @@ procLink - --------------------------------------------------------------------------- -The deadlock detection algorithm: + +LOCK MANAGER INTERNAL LOCKING + +Before PostgreSQL 8.2, all of the shared-memory data structures used by +the lock manager were protected by a single LWLock, the LockMgrLock; +any operation involving these data structures had to exclusively lock +LockMgrLock. Not too surprisingly, this became a contention bottleneck. +To reduce contention, the lock manager's data structures have been split +into multiple "partitions", each protected by an independent LWLock. +Most operations only need to lock the single partition they are working in. +Here are the details: + +* Each possible lock is assigned to one partition according to a hash of +its LOCKTAG value (see LockTagToPartition()). The partition's LWLock is +considered to protect all the LOCK objects of that partition as well as +their subsidiary PROCLOCKs. The shared-memory hash tables for LOCKs and +PROCLOCKs are divided into separate hash tables for each partition, and +operations on each hash table are likewise protected by the partition +lock. + +* Formerly, each PGPROC had a single list of PROCLOCKs belonging to it. +This has now been split into per-partition lists, so that access to a +particular PROCLOCK list can be protected by the associated partition's +LWLock. (This is not strictly necessary at the moment, because at this +writing a PGPROC's PROCLOCK list is only accessed by the owning backend +anyway. But it seems forward-looking to maintain a convention for how +other backends could access it. In any case LockReleaseAll needs to be +able to quickly determine which partition each LOCK belongs to, and +for the currently contemplated number of partitions, this way takes less +shared memory than explicitly storing a partition number in LOCK structs +would require.) + +* The other lock-related fields of a PGPROC are only interesting when +the PGPROC is waiting for a lock, so we consider that they are protected +by the partition LWLock of the awaited lock. + +For normal lock acquisition and release, it is sufficient to lock the +partition containing the desired lock. Deadlock checking needs to touch +multiple partitions in general; for simplicity, we just make it lock all +the partitions in partition-number order. (To prevent LWLock deadlock, +we establish the rule that any backend needing to lock more than one +partition at once must lock them in partition-number order.) It's +possible that deadlock checking could be done without touching every +partition in typical cases, but since in a properly functioning system +deadlock checking should not occur often enough to be performance-critical, +trying to make this work does not seem a productive use of effort. + +A backend's internal LOCALLOCK hash table is not partitioned. We do store +the partition number in LOCALLOCK table entries, but this is a straight +speed-for-space tradeoff: we could instead recalculate the partition +number from the LOCKTAG when needed. + + +THE DEADLOCK DETECTION ALGORITHM Since we allow user transactions to request locks in any order, deadlock is possible. We use a deadlock detection/breaking algorithm that is diff --git a/src/backend/storage/lmgr/deadlock.c b/src/backend/storage/lmgr/deadlock.c index adbd373bb7..e72ab00b5b 100644 --- a/src/backend/storage/lmgr/deadlock.c +++ b/src/backend/storage/lmgr/deadlock.c @@ -12,7 +12,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/storage/lmgr/deadlock.c,v 1.37 2005/12/09 01:22:04 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/storage/lmgr/deadlock.c,v 1.38 2005/12/11 21:02:18 tgl Exp $ * * Interface: * @@ -53,9 +53,9 @@ typedef struct * Information saved about each edge in a detected deadlock cycle. This * is used to print a diagnostic message upon failure. * - * Note: because we want to examine this info after releasing the LockMgrLock, - * we can't just store LOCK and PGPROC pointers; we must extract out all the - * info we want to be able to print. + * Note: because we want to examine this info after releasing the lock + * manager's partition locks, we can't just store LOCK and PGPROC pointers; + * we must extract out all the info we want to be able to print. */ typedef struct { @@ -188,19 +188,11 @@ InitDeadLockChecking(void) * deadlock. If resolution is impossible, return TRUE --- the caller * is then expected to abort the given proc's transaction. * - * We can't block on user locks, so no sense testing for deadlock - * because there is no blocking, and no timer for the block. So, - * only look at regular locks. - * - * We must have already locked the master lock before being called. - * NOTE: although the lockmethod structure appears to allow each lock - * table to have a different masterLock, all locks that can block had - * better use the same LWLock, else this code will not be adequately - * interlocked! + * Caller must already have locked all partitions of the lock tables. * * On failure, deadlock details are recorded in deadlockDetails[] for * subsequent printing by DeadLockReport(). That activity is separate - * because we don't want to do it while holding the master lock. + * because we don't want to do it while holding all those LWLocks. */ bool DeadLockCheck(PGPROC *proc) diff --git a/src/backend/storage/lmgr/lock.c b/src/backend/storage/lmgr/lock.c index 344d677cd2..7f42b477cc 100644 --- a/src/backend/storage/lmgr/lock.c +++ b/src/backend/storage/lmgr/lock.c @@ -1,14 +1,14 @@ /*------------------------------------------------------------------------- * * lock.c - * POSTGRES low-level lock mechanism + * POSTGRES primary lock mechanism * * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/storage/lmgr/lock.c,v 1.161 2005/12/09 01:22:04 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/storage/lmgr/lock.c,v 1.162 2005/12/11 21:02:18 tgl Exp $ * * NOTES * A lock table is a shared memory hash table. When @@ -163,10 +163,13 @@ typedef struct TwoPhaseLockRecord /* - * Links to hash tables containing lock state + * Pointers to hash tables containing lock state + * + * The LockMethodLockHash and LockMethodProcLockHash hash tables are in + * shared memory; LockMethodLocalHash is local to each backend. */ -static HTAB *LockMethodLockHash; -static HTAB *LockMethodProcLockHash; +static HTAB *LockMethodLockHash[NUM_LOCK_PARTITIONS]; +static HTAB *LockMethodProcLockHash[NUM_LOCK_PARTITIONS]; static HTAB *LockMethodLocalHash; @@ -255,16 +258,25 @@ PROCLOCK_PRINT(const char *where, const PROCLOCK *proclockP) static void RemoveLocalLock(LOCALLOCK *locallock); static void GrantLockLocal(LOCALLOCK *locallock, ResourceOwner owner); -static void WaitOnLock(LOCKMETHODID lockmethodid, LOCALLOCK *locallock, - ResourceOwner owner); +static void WaitOnLock(LOCALLOCK *locallock, ResourceOwner owner); static bool UnGrantLock(LOCK *lock, LOCKMODE lockmode, PROCLOCK *proclock, LockMethod lockMethodTable); -static void CleanUpLock(LOCKMETHODID lockmethodid, LOCK *lock, - PROCLOCK *proclock, bool wakeupNeeded); +static void CleanUpLock(LOCK *lock, PROCLOCK *proclock, + LockMethod lockMethodTable, int partition, + bool wakeupNeeded); /* - * InitLocks -- Initialize the lock module's shared memory. + * InitLocks -- Initialize the lock manager's data structures. + * + * This is called from CreateSharedMemoryAndSemaphores(), which see for + * more comments. In the normal postmaster case, the shared hash tables + * are created here, as well as a locallock hash table that will remain + * unused and empty in the postmaster itself. Backends inherit the pointers + * to the shared tables via fork(), and also inherit an image of the locallock + * hash table, which they proceed to use. In the EXEC_BACKEND case, each + * backend re-executes this code to obtain pointers to the already existing + * shared hash tables and to create its locallock hash table. */ void InitLocks(void) @@ -274,13 +286,18 @@ InitLocks(void) int hash_flags; long init_table_size, max_table_size; + int i; - /* Compute init/max size to request for lock hashtables */ + /* + * Compute init/max size to request for lock hashtables. Note these + * calculations must agree with LockShmemSize! + */ max_table_size = NLOCKENTS(); + max_table_size = (max_table_size - 1) / NUM_LOCK_PARTITIONS + 1; init_table_size = max_table_size / 2; /* - * allocate a hash table for LOCK structs. This is used to store + * Allocate hash tables for LOCK structs. These are used to store * per-locked-object information. */ MemSet(&info, 0, sizeof(info)); @@ -289,37 +306,45 @@ InitLocks(void) info.hash = tag_hash; hash_flags = (HASH_ELEM | HASH_FUNCTION); - sprintf(shmemName, "LOCK hash"); - LockMethodLockHash = ShmemInitHash(shmemName, - init_table_size, - max_table_size, - &info, - hash_flags); + for (i = 0; i < NUM_LOCK_PARTITIONS; i++) + { + sprintf(shmemName, "LOCK hash %d", i); + LockMethodLockHash[i] = ShmemInitHash(shmemName, + init_table_size, + max_table_size, + &info, + hash_flags); + if (!LockMethodLockHash[i]) + elog(FATAL, "could not initialize lock table \"%s\"", shmemName); + } - if (!LockMethodLockHash) - elog(FATAL, "could not initialize lock table \"%s\"", shmemName); + /* Assume an average of 2 holders per lock */ + max_table_size *= 2; + init_table_size *= 2; /* - * allocate a hash table for PROCLOCK structs. This is used to store - * per-lock-holder information. + * Allocate hash tables for PROCLOCK structs. These are used to store + * per-lock-per-holder information. */ info.keysize = sizeof(PROCLOCKTAG); info.entrysize = sizeof(PROCLOCK); info.hash = tag_hash; hash_flags = (HASH_ELEM | HASH_FUNCTION); - sprintf(shmemName, "PROCLOCK hash"); - LockMethodProcLockHash = ShmemInitHash(shmemName, - init_table_size, - max_table_size, - &info, - hash_flags); - - if (!LockMethodProcLockHash) - elog(FATAL, "could not initialize lock table \"%s\"", shmemName); + for (i = 0; i < NUM_LOCK_PARTITIONS; i++) + { + sprintf(shmemName, "PROCLOCK hash %d", i); + LockMethodProcLockHash[i] = ShmemInitHash(shmemName, + init_table_size, + max_table_size, + &info, + hash_flags); + if (!LockMethodProcLockHash[i]) + elog(FATAL, "could not initialize lock table \"%s\"", shmemName); + } /* - * allocate a non-shared hash table for LOCALLOCK structs. This is used + * Allocate one non-shared hash table for LOCALLOCK structs. This is used * to store lock counts and resource owner information. * * The non-shared table could already exist in this process (this occurs @@ -355,6 +380,39 @@ GetLocksMethodTable(const LOCK *lock) } +/* + * Given a LOCKTAG, determine which partition the lock belongs in. + * + * Basically what we want to do here is hash the locktag. However, it + * seems unwise to use hash_any() because that is the same function that + * will be used to distribute the locks within each partition's hash table; + * if we use it, we run a big risk of having uneven distribution of hash + * codes within each hash table. Instead, we use a simple linear XOR of the + * bits of the locktag. + */ +int +LockTagToPartition(const LOCKTAG *locktag) +{ + const uint8 *ptr = (const uint8 *) locktag; + int result = 0; + int i; + + for (i = 0; i < sizeof(LOCKTAG); i++) + result ^= *ptr++; +#if NUM_LOCK_PARTITIONS == 16 + result ^= result >> 4; + result &= 0x0F; +#elif NUM_LOCK_PARTITIONS == 4 + result ^= result >> 4; + result ^= result >> 2; + result &= 0x03; +#else +#error unsupported NUM_LOCK_PARTITIONS +#endif + return result; +} + + /* * LockAcquire -- Check for lock conflicts, sleep if conflict found, * set lock if/when no conflicts. @@ -397,7 +455,8 @@ LockAcquire(const LOCKTAG *locktag, PROCLOCKTAG proclocktag; bool found; ResourceOwner owner; - LWLockId masterLock; + int partition; + LWLockId partitionLock; int status; if (lockmethodid <= 0 || lockmethodid >= lengthof(LockMethods)) @@ -438,6 +497,7 @@ LockAcquire(const LOCKTAG *locktag, locallock->lock = NULL; locallock->proclock = NULL; locallock->isTempObject = isTempObject; + locallock->partition = LockTagToPartition(&(localtag.lock)); locallock->nLocks = 0; locallock->numLockOwners = 0; locallock->maxLockOwners = 8; @@ -474,9 +534,10 @@ LockAcquire(const LOCKTAG *locktag, /* * Otherwise we've got to mess with the shared lock table. */ - masterLock = LockMgrLock; + partition = locallock->partition; + partitionLock = FirstLockMgrLock + partition; - LWLockAcquire(masterLock, LW_EXCLUSIVE); + LWLockAcquire(partitionLock, LW_EXCLUSIVE); /* * Find or create a lock with this tag. @@ -486,12 +547,12 @@ LockAcquire(const LOCKTAG *locktag, * pointer is valid, since a lock object with no locks can go away * anytime. */ - lock = (LOCK *) hash_search(LockMethodLockHash, + lock = (LOCK *) hash_search(LockMethodLockHash[partition], (void *) locktag, HASH_ENTER_NULL, &found); if (!lock) { - LWLockRelease(masterLock); + LWLockRelease(partitionLock); ereport(ERROR, (errcode(ERRCODE_OUT_OF_MEMORY), errmsg("out of shared memory"), @@ -532,7 +593,7 @@ LockAcquire(const LOCKTAG *locktag, /* * Find or create a proclock entry with this tag */ - proclock = (PROCLOCK *) hash_search(LockMethodProcLockHash, + proclock = (PROCLOCK *) hash_search(LockMethodProcLockHash[partition], (void *) &proclocktag, HASH_ENTER_NULL, &found); if (!proclock) @@ -547,12 +608,12 @@ LockAcquire(const LOCKTAG *locktag, * anyone to release the lock object later. */ Assert(SHMQueueEmpty(&(lock->procLocks))); - if (!hash_search(LockMethodLockHash, + if (!hash_search(LockMethodLockHash[partition], (void *) &(lock->tag), HASH_REMOVE, NULL)) elog(PANIC, "lock table corrupted"); } - LWLockRelease(masterLock); + LWLockRelease(partitionLock); ereport(ERROR, (errcode(ERRCODE_OUT_OF_MEMORY), errmsg("out of shared memory"), @@ -569,7 +630,8 @@ LockAcquire(const LOCKTAG *locktag, proclock->releaseMask = 0; /* Add proclock to appropriate lists */ SHMQueueInsertBefore(&lock->procLocks, &proclock->lockLink); - SHMQueueInsertBefore(&MyProc->procLocks, &proclock->procLink); + SHMQueueInsertBefore(&(MyProc->myProcLocks[partition]), + &proclock->procLink); PROCLOCK_PRINT("LockAcquire: new", proclock); } else @@ -666,7 +728,7 @@ LockAcquire(const LOCKTAG *locktag, { SHMQueueDelete(&proclock->lockLink); SHMQueueDelete(&proclock->procLink); - if (!hash_search(LockMethodProcLockHash, + if (!hash_search(LockMethodProcLockHash[partition], (void *) &(proclock->tag), HASH_REMOVE, NULL)) elog(PANIC, "proclock table corrupted"); @@ -678,7 +740,7 @@ LockAcquire(const LOCKTAG *locktag, LOCK_PRINT("LockAcquire: conditional lock failed", lock, lockmode); Assert((lock->nRequested > 0) && (lock->requested[lockmode] >= 0)); Assert(lock->nGranted <= lock->nRequested); - LWLockRelease(masterLock); + LWLockRelease(partitionLock); if (locallock->nLocks == 0) RemoveLocalLock(locallock); return LOCKACQUIRE_NOT_AVAIL; @@ -692,7 +754,7 @@ LockAcquire(const LOCKTAG *locktag, /* * Sleep till someone wakes me up. */ - WaitOnLock(lockmethodid, locallock, owner); + WaitOnLock(locallock, owner); /* * NOTE: do not do any material change of state between here and @@ -709,14 +771,14 @@ LockAcquire(const LOCKTAG *locktag, PROCLOCK_PRINT("LockAcquire: INCONSISTENT", proclock); LOCK_PRINT("LockAcquire: INCONSISTENT", lock, lockmode); /* Should we retry ? */ - LWLockRelease(masterLock); + LWLockRelease(partitionLock); elog(ERROR, "LockAcquire failed"); } PROCLOCK_PRINT("LockAcquire: granted", proclock); LOCK_PRINT("LockAcquire: granted", lock, lockmode); } - LWLockRelease(masterLock); + LWLockRelease(partitionLock); return LOCKACQUIRE_OK; } @@ -894,11 +956,12 @@ UnGrantLock(LOCK *lock, LOCKMODE lockmode, * should be called after UnGrantLock, and wakeupNeeded is the result from * UnGrantLock.) * - * The locktable's masterLock must be held at entry, and will be + * The lock table's partition lock must be held at entry, and will be * held at exit. */ static void -CleanUpLock(LOCKMETHODID lockmethodid, LOCK *lock, PROCLOCK *proclock, +CleanUpLock(LOCK *lock, PROCLOCK *proclock, + LockMethod lockMethodTable, int partition, bool wakeupNeeded) { /* @@ -910,7 +973,7 @@ CleanUpLock(LOCKMETHODID lockmethodid, LOCK *lock, PROCLOCK *proclock, PROCLOCK_PRINT("CleanUpLock: deleting", proclock); SHMQueueDelete(&proclock->lockLink); SHMQueueDelete(&proclock->procLink); - if (!hash_search(LockMethodProcLockHash, + if (!hash_search(LockMethodProcLockHash[partition], (void *) &(proclock->tag), HASH_REMOVE, NULL)) elog(PANIC, "proclock table corrupted"); @@ -924,7 +987,7 @@ CleanUpLock(LOCKMETHODID lockmethodid, LOCK *lock, PROCLOCK *proclock, */ LOCK_PRINT("CleanUpLock: deleting", lock, 0); Assert(SHMQueueEmpty(&(lock->procLocks))); - if (!hash_search(LockMethodLockHash, + if (!hash_search(LockMethodLockHash[partition], (void *) &(lock->tag), HASH_REMOVE, NULL)) elog(PANIC, "lock table corrupted"); @@ -932,7 +995,7 @@ CleanUpLock(LOCKMETHODID lockmethodid, LOCK *lock, PROCLOCK *proclock, else if (wakeupNeeded) { /* There are waiters on this lock, so wake them up. */ - ProcLockWakeup(LockMethods[lockmethodid], lock); + ProcLockWakeup(lockMethodTable, lock); } } @@ -988,12 +1051,12 @@ GrantAwaitedLock(void) * Caller must have set MyProc->heldLocks to reflect locks already held * on the lockable object by this process. * - * The locktable's masterLock must be held at entry. + * The appropriate partition lock must be held at entry. */ static void -WaitOnLock(LOCKMETHODID lockmethodid, LOCALLOCK *locallock, - ResourceOwner owner) +WaitOnLock(LOCALLOCK *locallock, ResourceOwner owner) { + LOCKMETHODID lockmethodid = LOCALLOCK_LOCKMETHOD(*locallock); LockMethod lockMethodTable = LockMethods[lockmethodid]; const char *old_status; char *new_status; @@ -1025,10 +1088,7 @@ WaitOnLock(LOCKMETHODID lockmethodid, LOCALLOCK *locallock, * will also happen in the cancel/die case. */ - if (ProcSleep(lockMethodTable, - locallock->tag.mode, - locallock->lock, - locallock->proclock) != STATUS_OK) + if (ProcSleep(locallock, lockMethodTable) != STATUS_OK) { /* * We failed as a result of a deadlock, see CheckDeadLock(). Quit now. @@ -1036,10 +1096,10 @@ WaitOnLock(LOCKMETHODID lockmethodid, LOCALLOCK *locallock, awaitedLock = NULL; LOCK_PRINT("WaitOnLock: aborting on lock", locallock->lock, locallock->tag.mode); - LWLockRelease(LockMgrLock); + LWLockRelease(FirstLockMgrLock + locallock->partition); /* - * Now that we aren't holding the LockMgrLock, we can give an error + * Now that we aren't holding the partition lock, we can give an error * report including details about the detected deadlock. */ DeadLockReport(); @@ -1059,12 +1119,12 @@ WaitOnLock(LOCKMETHODID lockmethodid, LOCALLOCK *locallock, * Remove a proc from the wait-queue it is on * (caller must know it is on one). * - * Locktable lock must be held by caller. + * Appropriate partition lock must be held by caller. * * NB: this does not clean up any locallock object that may exist for the lock. */ void -RemoveFromWaitQueue(PGPROC *proc) +RemoveFromWaitQueue(PGPROC *proc, int partition) { LOCK *waitLock = proc->waitLock; PROCLOCK *proclock = proc->waitProcLock; @@ -1102,7 +1162,9 @@ RemoveFromWaitQueue(PGPROC *proc) * LockRelease expects there to be no remaining proclocks.) Then see if * any other waiters for the lock can be woken up now. */ - CleanUpLock(lockmethodid, waitLock, proclock, true); + CleanUpLock(waitLock, proclock, + LockMethods[lockmethodid], partition, + true); } /* @@ -1125,7 +1187,8 @@ LockRelease(const LOCKTAG *locktag, LOCKMODE lockmode, bool sessionLock) LOCALLOCK *locallock; LOCK *lock; PROCLOCK *proclock; - LWLockId masterLock; + int partition; + LWLockId partitionLock; bool wakeupNeeded; if (lockmethodid <= 0 || lockmethodid >= lengthof(LockMethods)) @@ -1212,9 +1275,10 @@ LockRelease(const LOCKTAG *locktag, LOCKMODE lockmode, bool sessionLock) /* * Otherwise we've got to mess with the shared lock table. */ - masterLock = LockMgrLock; + partition = locallock->partition; + partitionLock = FirstLockMgrLock + partition; - LWLockAcquire(masterLock, LW_EXCLUSIVE); + LWLockAcquire(partitionLock, LW_EXCLUSIVE); /* * We don't need to re-find the lock or proclock, since we kept their @@ -1233,7 +1297,7 @@ LockRelease(const LOCKTAG *locktag, LOCKMODE lockmode, bool sessionLock) if (!(proclock->holdMask & LOCKBIT_ON(lockmode))) { PROCLOCK_PRINT("LockRelease: WRONGTYPE", proclock); - LWLockRelease(masterLock); + LWLockRelease(partitionLock); elog(WARNING, "you don't own a lock of type %s", lockMethodTable->lockModeNames[lockmode]); RemoveLocalLock(locallock); @@ -1245,9 +1309,11 @@ LockRelease(const LOCKTAG *locktag, LOCKMODE lockmode, bool sessionLock) */ wakeupNeeded = UnGrantLock(lock, lockmode, proclock, lockMethodTable); - CleanUpLock(lockmethodid, lock, proclock, wakeupNeeded); + CleanUpLock(lock, proclock, + lockMethodTable, partition, + wakeupNeeded); - LWLockRelease(masterLock); + LWLockRelease(partitionLock); RemoveLocalLock(locallock); return TRUE; @@ -1265,14 +1331,13 @@ void LockReleaseAll(LOCKMETHODID lockmethodid, bool allLocks) { HASH_SEQ_STATUS status; - SHM_QUEUE *procLocks = &(MyProc->procLocks); - LWLockId masterLock; LockMethod lockMethodTable; int i, numLockModes; LOCALLOCK *locallock; - PROCLOCK *proclock; LOCK *lock; + PROCLOCK *proclock; + int partition; if (lockmethodid <= 0 || lockmethodid >= lengthof(LockMethods)) elog(ERROR, "unrecognized lock method: %d", lockmethodid); @@ -1284,7 +1349,6 @@ LockReleaseAll(LOCKMETHODID lockmethodid, bool allLocks) #endif numLockModes = lockMethodTable->numLockModes; - masterLock = LockMgrLock; /* * First we run through the locallock table and get rid of unwanted @@ -1351,74 +1415,89 @@ LockReleaseAll(LOCKMETHODID lockmethodid, bool allLocks) RemoveLocalLock(locallock); } - LWLockAcquire(masterLock, LW_EXCLUSIVE); - - proclock = (PROCLOCK *) SHMQueueNext(procLocks, procLocks, - offsetof(PROCLOCK, procLink)); - - while (proclock) + /* + * Now, scan each lock partition separately. + */ + for (partition = 0; partition < NUM_LOCK_PARTITIONS; partition++) { - bool wakeupNeeded = false; - PROCLOCK *nextplock; + LWLockId partitionLock = FirstLockMgrLock + partition; + SHM_QUEUE *procLocks = &(MyProc->myProcLocks[partition]); - /* Get link first, since we may unlink/delete this proclock */ - nextplock = (PROCLOCK *) SHMQueueNext(procLocks, &proclock->procLink, - offsetof(PROCLOCK, procLink)); + proclock = (PROCLOCK *) SHMQueueNext(procLocks, procLocks, + offsetof(PROCLOCK, procLink)); - Assert(proclock->tag.proc == MAKE_OFFSET(MyProc)); + if (!proclock) + continue; /* needn't examine this partition */ - lock = (LOCK *) MAKE_PTR(proclock->tag.lock); + LWLockAcquire(partitionLock, LW_EXCLUSIVE); - /* Ignore items that are not of the lockmethod to be removed */ - if (LOCK_LOCKMETHOD(*lock) != lockmethodid) - goto next_item; - - /* - * In allLocks mode, force release of all locks even if locallock - * table had problems - */ - if (allLocks) - proclock->releaseMask = proclock->holdMask; - else - Assert((proclock->releaseMask & ~proclock->holdMask) == 0); - - /* - * Ignore items that have nothing to be released, unless they have - * holdMask == 0 and are therefore recyclable - */ - if (proclock->releaseMask == 0 && proclock->holdMask != 0) - goto next_item; - - PROCLOCK_PRINT("LockReleaseAll", proclock); - LOCK_PRINT("LockReleaseAll", lock, 0); - Assert(lock->nRequested >= 0); - Assert(lock->nGranted >= 0); - Assert(lock->nGranted <= lock->nRequested); - Assert((proclock->holdMask & ~lock->grantMask) == 0); - - /* - * Release the previously-marked lock modes - */ - for (i = 1; i <= numLockModes; i++) + while (proclock) { - if (proclock->releaseMask & LOCKBIT_ON(i)) - wakeupNeeded |= UnGrantLock(lock, i, proclock, - lockMethodTable); - } - Assert((lock->nRequested >= 0) && (lock->nGranted >= 0)); - Assert(lock->nGranted <= lock->nRequested); - LOCK_PRINT("LockReleaseAll: updated", lock, 0); + bool wakeupNeeded = false; + PROCLOCK *nextplock; - proclock->releaseMask = 0; + /* Get link first, since we may unlink/delete this proclock */ + nextplock = (PROCLOCK *) + SHMQueueNext(procLocks, &proclock->procLink, + offsetof(PROCLOCK, procLink)); - /* CleanUpLock will wake up waiters if needed. */ - CleanUpLock(lockmethodid, lock, proclock, wakeupNeeded); + Assert(proclock->tag.proc == MAKE_OFFSET(MyProc)); -next_item: - proclock = nextplock; - } + lock = (LOCK *) MAKE_PTR(proclock->tag.lock); - LWLockRelease(masterLock); + /* Ignore items that are not of the lockmethod to be removed */ + if (LOCK_LOCKMETHOD(*lock) != lockmethodid) + goto next_item; + + /* + * In allLocks mode, force release of all locks even if locallock + * table had problems + */ + if (allLocks) + proclock->releaseMask = proclock->holdMask; + else + Assert((proclock->releaseMask & ~proclock->holdMask) == 0); + + /* + * Ignore items that have nothing to be released, unless they have + * holdMask == 0 and are therefore recyclable + */ + if (proclock->releaseMask == 0 && proclock->holdMask != 0) + goto next_item; + + PROCLOCK_PRINT("LockReleaseAll", proclock); + LOCK_PRINT("LockReleaseAll", lock, 0); + Assert(lock->nRequested >= 0); + Assert(lock->nGranted >= 0); + Assert(lock->nGranted <= lock->nRequested); + Assert((proclock->holdMask & ~lock->grantMask) == 0); + + /* + * Release the previously-marked lock modes + */ + for (i = 1; i <= numLockModes; i++) + { + if (proclock->releaseMask & LOCKBIT_ON(i)) + wakeupNeeded |= UnGrantLock(lock, i, proclock, + lockMethodTable); + } + Assert((lock->nRequested >= 0) && (lock->nGranted >= 0)); + Assert(lock->nGranted <= lock->nRequested); + LOCK_PRINT("LockReleaseAll: updated", lock, 0); + + proclock->releaseMask = 0; + + /* CleanUpLock will wake up waiters if needed. */ + CleanUpLock(lock, proclock, + lockMethodTable, partition, + wakeupNeeded); + + next_item: + proclock = nextplock; + } /* loop over PROCLOCKs within this partition */ + + LWLockRelease(partitionLock); + } /* loop over partitions */ #ifdef LOCK_DEBUG if (*(lockMethodTable->trace_flag)) @@ -1627,19 +1706,16 @@ PostPrepare_Locks(TransactionId xid) { PGPROC *newproc = TwoPhaseGetDummyProc(xid); HASH_SEQ_STATUS status; - SHM_QUEUE *procLocks = &(MyProc->procLocks); - LWLockId masterLock; LOCALLOCK *locallock; + LOCK *lock; PROCLOCK *proclock; PROCLOCKTAG proclocktag; bool found; - LOCK *lock; + int partition; /* This is a critical section: any error means big trouble */ START_CRIT_SECTION(); - masterLock = LockMgrLock; - /* * First we run through the locallock table and get rid of unwanted * entries, then we scan the process's proclocks and transfer them to the @@ -1678,105 +1754,121 @@ PostPrepare_Locks(TransactionId xid) RemoveLocalLock(locallock); } - LWLockAcquire(masterLock, LW_EXCLUSIVE); - - proclock = (PROCLOCK *) SHMQueueNext(procLocks, procLocks, - offsetof(PROCLOCK, procLink)); - - while (proclock) + /* + * Now, scan each lock partition separately. + */ + for (partition = 0; partition < NUM_LOCK_PARTITIONS; partition++) { - PROCLOCK *nextplock; - LOCKMASK holdMask; - PROCLOCK *newproclock; + LWLockId partitionLock = FirstLockMgrLock + partition; + SHM_QUEUE *procLocks = &(MyProc->myProcLocks[partition]); - /* Get link first, since we may unlink/delete this proclock */ - nextplock = (PROCLOCK *) SHMQueueNext(procLocks, &proclock->procLink, - offsetof(PROCLOCK, procLink)); + proclock = (PROCLOCK *) SHMQueueNext(procLocks, procLocks, + offsetof(PROCLOCK, procLink)); - Assert(proclock->tag.proc == MAKE_OFFSET(MyProc)); + if (!proclock) + continue; /* needn't examine this partition */ - lock = (LOCK *) MAKE_PTR(proclock->tag.lock); + LWLockAcquire(partitionLock, LW_EXCLUSIVE); - /* Ignore nontransactional locks */ - if (!LockMethods[LOCK_LOCKMETHOD(*lock)]->transactional) - goto next_item; - - PROCLOCK_PRINT("PostPrepare_Locks", proclock); - LOCK_PRINT("PostPrepare_Locks", lock, 0); - Assert(lock->nRequested >= 0); - Assert(lock->nGranted >= 0); - Assert(lock->nGranted <= lock->nRequested); - Assert((proclock->holdMask & ~lock->grantMask) == 0); - - /* - * Since there were no session locks, we should be releasing all locks - */ - if (proclock->releaseMask != proclock->holdMask) - elog(PANIC, "we seem to have dropped a bit somewhere"); - - holdMask = proclock->holdMask; - - /* - * We cannot simply modify proclock->tag.proc to reassign ownership of - * the lock, because that's part of the hash key and the proclock - * would then be in the wrong hash chain. So, unlink and delete the - * old proclock; create a new one with the right contents; and link it - * into place. We do it in this order to be certain we won't run out - * of shared memory (the way dynahash.c works, the deleted object is - * certain to be available for reallocation). - */ - SHMQueueDelete(&proclock->lockLink); - SHMQueueDelete(&proclock->procLink); - if (!hash_search(LockMethodProcLockHash, - (void *) &(proclock->tag), - HASH_REMOVE, NULL)) - elog(PANIC, "proclock table corrupted"); - - /* - * Create the hash key for the new proclock table. - */ - MemSet(&proclocktag, 0, sizeof(PROCLOCKTAG)); - proclocktag.lock = MAKE_OFFSET(lock); - proclocktag.proc = MAKE_OFFSET(newproc); - - newproclock = (PROCLOCK *) hash_search(LockMethodProcLockHash, - (void *) &proclocktag, - HASH_ENTER_NULL, &found); - if (!newproclock) - ereport(PANIC, /* should not happen */ - (errcode(ERRCODE_OUT_OF_MEMORY), - errmsg("out of shared memory"), - errdetail("Not enough memory for reassigning the prepared transaction's locks."))); - - /* - * If new, initialize the new entry - */ - if (!found) + while (proclock) { - newproclock->holdMask = 0; - newproclock->releaseMask = 0; - /* Add new proclock to appropriate lists */ - SHMQueueInsertBefore(&lock->procLocks, &newproclock->lockLink); - SHMQueueInsertBefore(&newproc->procLocks, &newproclock->procLink); - PROCLOCK_PRINT("PostPrepare_Locks: new", newproclock); - } - else - { - PROCLOCK_PRINT("PostPrepare_Locks: found", newproclock); - Assert((newproclock->holdMask & ~lock->grantMask) == 0); - } + PROCLOCK *nextplock; + LOCKMASK holdMask; + PROCLOCK *newproclock; - /* - * Pass over the identified lock ownership. - */ - Assert((newproclock->holdMask & holdMask) == 0); - newproclock->holdMask |= holdMask; + /* Get link first, since we may unlink/delete this proclock */ + nextplock = (PROCLOCK *) + SHMQueueNext(procLocks, &proclock->procLink, + offsetof(PROCLOCK, procLink)); -next_item: - proclock = nextplock; - } + Assert(proclock->tag.proc == MAKE_OFFSET(MyProc)); - LWLockRelease(masterLock); + lock = (LOCK *) MAKE_PTR(proclock->tag.lock); + + /* Ignore nontransactional locks */ + if (!LockMethods[LOCK_LOCKMETHOD(*lock)]->transactional) + goto next_item; + + PROCLOCK_PRINT("PostPrepare_Locks", proclock); + LOCK_PRINT("PostPrepare_Locks", lock, 0); + Assert(lock->nRequested >= 0); + Assert(lock->nGranted >= 0); + Assert(lock->nGranted <= lock->nRequested); + Assert((proclock->holdMask & ~lock->grantMask) == 0); + + /* + * Since there were no session locks, we should be releasing all + * locks + */ + if (proclock->releaseMask != proclock->holdMask) + elog(PANIC, "we seem to have dropped a bit somewhere"); + + holdMask = proclock->holdMask; + + /* + * We cannot simply modify proclock->tag.proc to reassign + * ownership of the lock, because that's part of the hash key and + * the proclock would then be in the wrong hash chain. So, unlink + * and delete the old proclock; create a new one with the right + * contents; and link it into place. We do it in this order to be + * certain we won't run out of shared memory (the way dynahash.c + * works, the deleted object is certain to be available for + * reallocation). + */ + SHMQueueDelete(&proclock->lockLink); + SHMQueueDelete(&proclock->procLink); + if (!hash_search(LockMethodProcLockHash[partition], + (void *) &(proclock->tag), + HASH_REMOVE, NULL)) + elog(PANIC, "proclock table corrupted"); + + /* + * Create the hash key for the new proclock table. + */ + MemSet(&proclocktag, 0, sizeof(PROCLOCKTAG)); + proclocktag.lock = MAKE_OFFSET(lock); + proclocktag.proc = MAKE_OFFSET(newproc); + + newproclock = (PROCLOCK *) hash_search(LockMethodProcLockHash[partition], + (void *) &proclocktag, + HASH_ENTER_NULL, &found); + if (!newproclock) + ereport(PANIC, /* should not happen */ + (errcode(ERRCODE_OUT_OF_MEMORY), + errmsg("out of shared memory"), + errdetail("Not enough memory for reassigning the prepared transaction's locks."))); + + /* + * If new, initialize the new entry + */ + if (!found) + { + newproclock->holdMask = 0; + newproclock->releaseMask = 0; + /* Add new proclock to appropriate lists */ + SHMQueueInsertBefore(&lock->procLocks, &newproclock->lockLink); + SHMQueueInsertBefore(&(newproc->myProcLocks[partition]), + &newproclock->procLink); + PROCLOCK_PRINT("PostPrepare_Locks: new", newproclock); + } + else + { + PROCLOCK_PRINT("PostPrepare_Locks: found", newproclock); + Assert((newproclock->holdMask & ~lock->grantMask) == 0); + } + + /* + * Pass over the identified lock ownership. + */ + Assert((newproclock->holdMask & holdMask) == 0); + newproclock->holdMask |= holdMask; + + next_item: + proclock = nextplock; + } /* loop over PROCLOCKs within this partition */ + + LWLockRelease(partitionLock); + } /* loop over partitions */ END_CRIT_SECTION(); } @@ -1789,20 +1881,23 @@ Size LockShmemSize(void) { Size size = 0; - long max_table_size = NLOCKENTS(); + Size tabsize; + long max_table_size; - /* lockHash table */ - size = add_size(size, hash_estimate_size(max_table_size, sizeof(LOCK))); + /* lock hash tables */ + max_table_size = NLOCKENTS(); + max_table_size = (max_table_size - 1) / NUM_LOCK_PARTITIONS + 1; + tabsize = hash_estimate_size(max_table_size, sizeof(LOCK)); + size = add_size(size, mul_size(tabsize, NUM_LOCK_PARTITIONS)); - /* proclockHash table */ - size = add_size(size, hash_estimate_size(max_table_size, sizeof(PROCLOCK))); + /* proclock hash tables */ + max_table_size *= 2; + tabsize = hash_estimate_size(max_table_size, sizeof(PROCLOCK)); + size = add_size(size, mul_size(tabsize, NUM_LOCK_PARTITIONS)); /* - * Note we count only one pair of hash tables, since the userlocks table - * actually overlays the main one. - * - * Since the lockHash entry count above is only an estimate, add 10% - * safety margin. + * Since there is likely to be some space wastage due to uneven use + * of the partitions, add 10% safety margin. */ size = add_size(size, size / 10); @@ -1818,9 +1913,9 @@ LockShmemSize(void) * copies of the same PGPROC and/or LOCK objects are likely to appear. * It is the caller's responsibility to match up duplicates if wanted. * - * The design goal is to hold the LockMgrLock for as short a time as possible; + * The design goal is to hold the LWLocks for as short a time as possible; * thus, this function simply makes a copy of the necessary data and releases - * the lock, allowing the caller to contemplate and format the data for as + * the locks, allowing the caller to contemplate and format the data for as * long as it pleases. */ LockData * @@ -1830,40 +1925,67 @@ GetLockStatusData(void) HTAB *proclockTable; PROCLOCK *proclock; HASH_SEQ_STATUS seqstat; + int els; + int el; int i; data = (LockData *) palloc(sizeof(LockData)); - LWLockAcquire(LockMgrLock, LW_EXCLUSIVE); - - proclockTable = LockMethodProcLockHash; - - data->nelements = i = proclockTable->hctl->nentries; - - data->proclockaddrs = (SHMEM_OFFSET *) palloc(sizeof(SHMEM_OFFSET) * i); - data->proclocks = (PROCLOCK *) palloc(sizeof(PROCLOCK) * i); - data->procs = (PGPROC *) palloc(sizeof(PGPROC) * i); - data->locks = (LOCK *) palloc(sizeof(LOCK) * i); - - hash_seq_init(&seqstat, proclockTable); - - i = 0; - while ((proclock = hash_seq_search(&seqstat))) + /* + * Acquire lock on the entire shared lock data structures. We can't + * operate one partition at a time if we want to deliver a self-consistent + * view of the state. + * + * Since this is a read-only operation, we take shared instead of exclusive + * lock. There's not a whole lot of point to this, because all the normal + * operations require exclusive lock, but it doesn't hurt anything either. + * It will at least allow two backends to do GetLockStatusData in parallel. + * + * Must grab LWLocks in partition-number order to avoid LWLock deadlock. + * + * Use same loop to count up the total number of PROCLOCK objects. + */ + els = 0; + for (i = 0; i < NUM_LOCK_PARTITIONS; i++) { - PGPROC *proc = (PGPROC *) MAKE_PTR(proclock->tag.proc); - LOCK *lock = (LOCK *) MAKE_PTR(proclock->tag.lock); - - data->proclockaddrs[i] = MAKE_OFFSET(proclock); - memcpy(&(data->proclocks[i]), proclock, sizeof(PROCLOCK)); - memcpy(&(data->procs[i]), proc, sizeof(PGPROC)); - memcpy(&(data->locks[i]), lock, sizeof(LOCK)); - - i++; + LWLockAcquire(FirstLockMgrLock + i, LW_SHARED); + proclockTable = LockMethodProcLockHash[i]; + els += proclockTable->hctl->nentries; } - LWLockRelease(LockMgrLock); + data->nelements = els; + data->proclockaddrs = (SHMEM_OFFSET *) palloc(sizeof(SHMEM_OFFSET) * els); + data->proclocks = (PROCLOCK *) palloc(sizeof(PROCLOCK) * els); + data->procs = (PGPROC *) palloc(sizeof(PGPROC) * els); + data->locks = (LOCK *) palloc(sizeof(LOCK) * els); - Assert(i == data->nelements); + el = 0; + + /* Now scan the tables to copy the data */ + for (i = 0; i < NUM_LOCK_PARTITIONS; i++) + { + proclockTable = LockMethodProcLockHash[i]; + hash_seq_init(&seqstat, proclockTable); + + while ((proclock = hash_seq_search(&seqstat))) + { + PGPROC *proc = (PGPROC *) MAKE_PTR(proclock->tag.proc); + LOCK *lock = (LOCK *) MAKE_PTR(proclock->tag.lock); + + data->proclockaddrs[el] = MAKE_OFFSET(proclock); + memcpy(&(data->proclocks[el]), proclock, sizeof(PROCLOCK)); + memcpy(&(data->procs[el]), proc, sizeof(PGPROC)); + memcpy(&(data->locks[el]), lock, sizeof(LOCK)); + + el++; + } + } + + /* And release locks */ + for (i = NUM_LOCK_PARTITIONS; --i >= 0; ) + LWLockRelease(FirstLockMgrLock + i); + + Assert(el == data->nelements); return data; } @@ -1879,7 +2001,7 @@ GetLockmodeName(LOCKMETHODID lockmethodid, LOCKMODE mode) #ifdef LOCK_DEBUG /* - * Dump all locks in the given proc's procLocks list. + * Dump all locks in the given proc's myProcLocks lists. * * Caller is responsible for having acquired appropriate LWLocks. */ @@ -1889,29 +2011,34 @@ DumpLocks(PGPROC *proc) SHM_QUEUE *procLocks; PROCLOCK *proclock; LOCK *lock; + int i; if (proc == NULL) return; - procLocks = &proc->procLocks; - if (proc->waitLock) LOCK_PRINT("DumpLocks: waiting on", proc->waitLock, 0); - proclock = (PROCLOCK *) SHMQueueNext(procLocks, procLocks, - offsetof(PROCLOCK, procLink)); - - while (proclock) + for (i = 0; i < NUM_LOCK_PARTITIONS; i++) { - Assert(proclock->tag.proc == MAKE_OFFSET(proc)); + procLocks = &(proc->myProcLocks[i]); - lock = (LOCK *) MAKE_PTR(proclock->tag.lock); - - PROCLOCK_PRINT("DumpLocks", proclock); - LOCK_PRINT("DumpLocks", lock, 0); - - proclock = (PROCLOCK *) SHMQueueNext(procLocks, &proclock->procLink, + proclock = (PROCLOCK *) SHMQueueNext(procLocks, procLocks, offsetof(PROCLOCK, procLink)); + + while (proclock) + { + Assert(proclock->tag.proc == MAKE_OFFSET(proc)); + + lock = (LOCK *) MAKE_PTR(proclock->tag.lock); + + PROCLOCK_PRINT("DumpLocks", proclock); + LOCK_PRINT("DumpLocks", lock, 0); + + proclock = (PROCLOCK *) + SHMQueueNext(procLocks, &proclock->procLink, + offsetof(PROCLOCK, procLink)); + } } } @@ -1928,25 +2055,30 @@ DumpAllLocks(void) LOCK *lock; HTAB *proclockTable; HASH_SEQ_STATUS status; + int i; proc = MyProc; - proclockTable = LockMethodProcLockHash; if (proc && proc->waitLock) LOCK_PRINT("DumpAllLocks: waiting on", proc->waitLock, 0); - hash_seq_init(&status, proclockTable); - while ((proclock = (PROCLOCK *) hash_seq_search(&status)) != NULL) + for (i = 0; i < NUM_LOCK_PARTITIONS; i++) { - PROCLOCK_PRINT("DumpAllLocks", proclock); + proclockTable = LockMethodProcLockHash[i]; + hash_seq_init(&status, proclockTable); - if (proclock->tag.lock) + while ((proclock = (PROCLOCK *) hash_seq_search(&status)) != NULL) { - lock = (LOCK *) MAKE_PTR(proclock->tag.lock); - LOCK_PRINT("DumpAllLocks", lock, 0); + PROCLOCK_PRINT("DumpAllLocks", proclock); + + if (proclock->tag.lock) + { + lock = (LOCK *) MAKE_PTR(proclock->tag.lock); + LOCK_PRINT("DumpAllLocks", lock, 0); + } + else + elog(LOG, "DumpAllLocks: proclock->tag.lock = NULL"); } - else - elog(LOG, "DumpAllLocks: proclock->tag.lock = NULL"); } } #endif /* LOCK_DEBUG */ @@ -1975,7 +2107,8 @@ lock_twophase_recover(TransactionId xid, uint16 info, PROCLOCK *proclock; PROCLOCKTAG proclocktag; bool found; - LWLockId masterLock; + int partition; + LWLockId partitionLock; LockMethod lockMethodTable; Assert(len == sizeof(TwoPhaseLockRecord)); @@ -1987,19 +2120,20 @@ lock_twophase_recover(TransactionId xid, uint16 info, elog(ERROR, "unrecognized lock method: %d", lockmethodid); lockMethodTable = LockMethods[lockmethodid]; - masterLock = LockMgrLock; + partition = LockTagToPartition(locktag); + partitionLock = FirstLockMgrLock + partition; - LWLockAcquire(masterLock, LW_EXCLUSIVE); + LWLockAcquire(partitionLock, LW_EXCLUSIVE); /* * Find or create a lock with this tag. */ - lock = (LOCK *) hash_search(LockMethodLockHash, + lock = (LOCK *) hash_search(LockMethodLockHash[partition], (void *) locktag, HASH_ENTER_NULL, &found); if (!lock) { - LWLockRelease(masterLock); + LWLockRelease(partitionLock); ereport(ERROR, (errcode(ERRCODE_OUT_OF_MEMORY), errmsg("out of shared memory"), @@ -2039,7 +2173,7 @@ lock_twophase_recover(TransactionId xid, uint16 info, /* * Find or create a proclock entry with this tag */ - proclock = (PROCLOCK *) hash_search(LockMethodProcLockHash, + proclock = (PROCLOCK *) hash_search(LockMethodProcLockHash[partition], (void *) &proclocktag, HASH_ENTER_NULL, &found); if (!proclock) @@ -2054,12 +2188,12 @@ lock_twophase_recover(TransactionId xid, uint16 info, * anyone to release the lock object later. */ Assert(SHMQueueEmpty(&(lock->procLocks))); - if (!hash_search(LockMethodLockHash, + if (!hash_search(LockMethodLockHash[partition], (void *) &(lock->tag), HASH_REMOVE, NULL)) elog(PANIC, "lock table corrupted"); } - LWLockRelease(masterLock); + LWLockRelease(partitionLock); ereport(ERROR, (errcode(ERRCODE_OUT_OF_MEMORY), errmsg("out of shared memory"), @@ -2075,7 +2209,8 @@ lock_twophase_recover(TransactionId xid, uint16 info, proclock->releaseMask = 0; /* Add proclock to appropriate lists */ SHMQueueInsertBefore(&lock->procLocks, &proclock->lockLink); - SHMQueueInsertBefore(&proc->procLocks, &proclock->procLink); + SHMQueueInsertBefore(&(proc->myProcLocks[partition]), + &proclock->procLink); PROCLOCK_PRINT("lock_twophase_recover: new", proclock); } else @@ -2106,7 +2241,7 @@ lock_twophase_recover(TransactionId xid, uint16 info, */ GrantLock(lock, proclock, lockmode); - LWLockRelease(masterLock); + LWLockRelease(partitionLock); } /* @@ -2123,10 +2258,11 @@ lock_twophase_postcommit(TransactionId xid, uint16 info, LOCKTAG *locktag; LOCKMODE lockmode; LOCKMETHODID lockmethodid; - PROCLOCKTAG proclocktag; LOCK *lock; PROCLOCK *proclock; - LWLockId masterLock; + PROCLOCKTAG proclocktag; + int partition; + LWLockId partitionLock; LockMethod lockMethodTable; bool wakeupNeeded; @@ -2139,14 +2275,15 @@ lock_twophase_postcommit(TransactionId xid, uint16 info, elog(ERROR, "unrecognized lock method: %d", lockmethodid); lockMethodTable = LockMethods[lockmethodid]; - masterLock = LockMgrLock; + partition = LockTagToPartition(locktag); + partitionLock = FirstLockMgrLock + partition; - LWLockAcquire(masterLock, LW_EXCLUSIVE); + LWLockAcquire(partitionLock, LW_EXCLUSIVE); /* * Re-find the lock object (it had better be there). */ - lock = (LOCK *) hash_search(LockMethodLockHash, + lock = (LOCK *) hash_search(LockMethodLockHash[partition], (void *) locktag, HASH_FIND, NULL); if (!lock) @@ -2158,7 +2295,7 @@ lock_twophase_postcommit(TransactionId xid, uint16 info, MemSet(&proclocktag, 0, sizeof(PROCLOCKTAG)); /* must clear padding */ proclocktag.lock = MAKE_OFFSET(lock); proclocktag.proc = MAKE_OFFSET(proc); - proclock = (PROCLOCK *) hash_search(LockMethodProcLockHash, + proclock = (PROCLOCK *) hash_search(LockMethodProcLockHash[partition], (void *) &proclocktag, HASH_FIND, NULL); if (!proclock) @@ -2171,7 +2308,7 @@ lock_twophase_postcommit(TransactionId xid, uint16 info, if (!(proclock->holdMask & LOCKBIT_ON(lockmode))) { PROCLOCK_PRINT("lock_twophase_postcommit: WRONGTYPE", proclock); - LWLockRelease(masterLock); + LWLockRelease(partitionLock); elog(WARNING, "you don't own a lock of type %s", lockMethodTable->lockModeNames[lockmode]); return; @@ -2182,9 +2319,11 @@ lock_twophase_postcommit(TransactionId xid, uint16 info, */ wakeupNeeded = UnGrantLock(lock, lockmode, proclock, lockMethodTable); - CleanUpLock(lockmethodid, lock, proclock, wakeupNeeded); + CleanUpLock(lock, proclock, + lockMethodTable, partition, + wakeupNeeded); - LWLockRelease(masterLock); + LWLockRelease(partitionLock); } /* diff --git a/src/backend/storage/lmgr/lwlock.c b/src/backend/storage/lmgr/lwlock.c index a215a65285..e1edabde90 100644 --- a/src/backend/storage/lmgr/lwlock.c +++ b/src/backend/storage/lmgr/lwlock.c @@ -8,14 +8,14 @@ * exclusive and shared lock modes (to support read/write and read-only * access to a shared object). There are few other frammishes. User-level * locking should be done with the full lock manager --- which depends on - * an LWLock to protect its shared state. + * LWLocks to protect its shared state. * * * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/storage/lmgr/lwlock.c,v 1.35 2005/12/06 23:08:33 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/storage/lmgr/lwlock.c,v 1.36 2005/12/11 21:02:18 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -125,7 +125,10 @@ NumLWLocks(void) */ /* Predefined LWLocks */ - numLocks = (int) NumFixedLWLocks; + numLocks = (int) FirstLockMgrLock; + + /* lock.c gets the ones starting at FirstLockMgrLock */ + numLocks += NUM_LOCK_PARTITIONS; /* bufmgr.c needs two for each shared buffer */ numLocks += 2 * NBuffers; @@ -204,10 +207,11 @@ CreateLWLocks(void) /* * Initialize the dynamic-allocation counter, which is stored just before - * the first LWLock. + * the first LWLock. The LWLocks used by lock.c are not dynamically + * allocated, it just assumes it has them. */ LWLockCounter = (int *) ((char *) LWLockArray - 2 * sizeof(int)); - LWLockCounter[0] = (int) NumFixedLWLocks; + LWLockCounter[0] = (int) FirstLockMgrLock + NUM_LOCK_PARTITIONS; LWLockCounter[1] = numLocks; } diff --git a/src/backend/storage/lmgr/proc.c b/src/backend/storage/lmgr/proc.c index 8d8269041e..34d80bfcee 100644 --- a/src/backend/storage/lmgr/proc.c +++ b/src/backend/storage/lmgr/proc.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/storage/lmgr/proc.c,v 1.169 2005/12/09 01:22:04 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/storage/lmgr/proc.c,v 1.170 2005/12/11 21:02:18 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -18,9 +18,8 @@ * ProcQueueAlloc() -- create a shm queue for sleeping processes * ProcQueueInit() -- create a queue without allocing memory * - * Locking and waiting for buffers can cause the backend to be - * put to sleep. Whoever releases the lock, etc. wakes the - * process up again (and gives it an error code so it knows + * Waiting for a lock causes the backend to be put to sleep. Whoever releases + * the lock wakes the process up again (and gives it an error code so it knows * whether it was awoken on an error condition). * * Interface (b): @@ -28,7 +27,7 @@ * ProcReleaseLocks -- frees the locks associated with current transaction * * ProcKill -- destroys the shared memory state (and locks) - * associated with the process. + * associated with the process. */ #include "postgres.h" @@ -65,7 +64,8 @@ NON_EXEC_STATIC slock_t *ProcStructLock = NULL; static PROC_HDR *ProcGlobal = NULL; static PGPROC *DummyProcs = NULL; -static bool waitingForLock = false; +/* If we are waiting for a lock, this points to the associated LOCALLOCK */ +static LOCALLOCK *lockAwaited = NULL; /* Mark these volatile because they can be changed by signal handler */ static volatile bool statement_timeout_active = false; @@ -200,10 +200,10 @@ InitProcGlobal(void) void InitProcess(void) { - SHMEM_OFFSET myOffset; - /* use volatile pointer to prevent code rearrangement */ volatile PROC_HDR *procglobal = ProcGlobal; + SHMEM_OFFSET myOffset; + int i; /* * ProcGlobal should be set by a previous call to InitProcGlobal (if we @@ -264,7 +264,8 @@ InitProcess(void) MyProc->lwWaitLink = NULL; MyProc->waitLock = NULL; MyProc->waitProcLock = NULL; - SHMQueueInit(&(MyProc->procLocks)); + for (i = 0; i < NUM_LOCK_PARTITIONS; i++) + SHMQueueInit(&(MyProc->myProcLocks[i])); /* * Add our PGPROC to the PGPROC array in shared memory. @@ -304,6 +305,7 @@ void InitDummyProcess(int proctype) { PGPROC *dummyproc; + int i; /* * ProcGlobal should be set by a previous call to InitProcGlobal (we @@ -360,7 +362,8 @@ InitDummyProcess(int proctype) MyProc->lwWaitLink = NULL; MyProc->waitLock = NULL; MyProc->waitProcLock = NULL; - SHMQueueInit(&(MyProc->procLocks)); + for (i = 0; i < NUM_LOCK_PARTITIONS; i++) + SHMQueueInit(&(MyProc->myProcLocks[i])); /* * Arrange to clean up at process exit. @@ -416,21 +419,24 @@ HaveNFreeProcs(int n) bool LockWaitCancel(void) { + LWLockId partitionLock; + /* Nothing to do if we weren't waiting for a lock */ - if (!waitingForLock) + if (lockAwaited == NULL) return false; /* Turn off the deadlock timer, if it's still running (see ProcSleep) */ disable_sig_alarm(false); /* Unlink myself from the wait queue, if on it (might not be anymore!) */ - LWLockAcquire(LockMgrLock, LW_EXCLUSIVE); + partitionLock = FirstLockMgrLock + lockAwaited->partition; + LWLockAcquire(partitionLock, LW_EXCLUSIVE); if (MyProc->links.next != INVALID_OFFSET) { /* We could not have been granted the lock yet */ Assert(MyProc->waitStatus == STATUS_ERROR); - RemoveFromWaitQueue(MyProc); + RemoveFromWaitQueue(MyProc, lockAwaited->partition); } else { @@ -444,9 +450,9 @@ LockWaitCancel(void) GrantAwaitedLock(); } - waitingForLock = false; + lockAwaited = NULL; - LWLockRelease(LockMgrLock); + LWLockRelease(partitionLock); /* * Reset the proc wait semaphore to zero. This is necessary in the @@ -606,18 +612,18 @@ ProcQueueInit(PROC_QUEUE *queue) /* - * ProcSleep -- put a process to sleep + * ProcSleep -- put a process to sleep on the specified lock * * Caller must have set MyProc->heldLocks to reflect locks already held * on the lockable object by this process (under all XIDs). * - * Locktable's masterLock must be held at entry, and will be held + * The lock table's partition lock must be held at entry, and will be held * at exit. * * Result: STATUS_OK if we acquired the lock, STATUS_ERROR if not (deadlock). * * ASSUME: that no one will fiddle with the queue until after - * we release the masterLock. + * we release the partition lock. * * NOTES: The process queue is now a priority queue for locking. * @@ -625,12 +631,13 @@ ProcQueueInit(PROC_QUEUE *queue) * semaphore is normally zero, so when we try to acquire it, we sleep. */ int -ProcSleep(LockMethod lockMethodTable, - LOCKMODE lockmode, - LOCK *lock, - PROCLOCK *proclock) +ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable) { - LWLockId masterLock = LockMgrLock; + LOCKMODE lockmode = locallock->tag.mode; + LOCK *lock = locallock->lock; + PROCLOCK *proclock = locallock->proclock; + int partition = locallock->partition; + LWLockId partitionLock = FirstLockMgrLock + partition; PROC_QUEUE *waitQueue = &(lock->waitProcs); LOCKMASK myHeldLocks = MyProc->heldLocks; bool early_deadlock = false; @@ -732,22 +739,22 @@ ProcSleep(LockMethod lockMethodTable, */ if (early_deadlock) { - RemoveFromWaitQueue(MyProc); + RemoveFromWaitQueue(MyProc, partition); return STATUS_ERROR; } /* mark that we are waiting for a lock */ - waitingForLock = true; + lockAwaited = locallock; /* - * Release the locktable's masterLock. + * Release the lock table's partition lock. * * NOTE: this may also cause us to exit critical-section state, possibly * allowing a cancel/die interrupt to be accepted. This is OK because we * have recorded the fact that we are waiting for a lock, and so * LockWaitCancel will clean up if cancel/die happens. */ - LWLockRelease(masterLock); + LWLockRelease(partitionLock); /* * Set timer so we can wake up after awhile and check for a deadlock. If a @@ -785,16 +792,16 @@ ProcSleep(LockMethod lockMethodTable, elog(FATAL, "could not disable timer for process wakeup"); /* - * Re-acquire the locktable's masterLock. We have to do this to hold off - * cancel/die interrupts before we can mess with waitingForLock (else we - * might have a missed or duplicated locallock update). + * Re-acquire the lock table's partition lock. We have to do this to + * hold off cancel/die interrupts before we can mess with lockAwaited + * (else we might have a missed or duplicated locallock update). */ - LWLockAcquire(masterLock, LW_EXCLUSIVE); + LWLockAcquire(partitionLock, LW_EXCLUSIVE); /* * We no longer want LockWaitCancel to do anything. */ - waitingForLock = false; + lockAwaited = NULL; /* * If we got the lock, be sure to remember it in the locallock table. @@ -816,6 +823,8 @@ ProcSleep(LockMethod lockMethodTable, * Also remove the process from the wait queue and set its links invalid. * RETURN: the next process in the wait queue. * + * The appropriate lock partition lock must be held by caller. + * * XXX: presently, this code is only used for the "success" case, and only * works correctly for that case. To clean up in failure case, would need * to twiddle the lock's request counts too --- see RemoveFromWaitQueue. @@ -825,8 +834,6 @@ ProcWakeup(PGPROC *proc, int waitStatus) { PGPROC *retProc; - /* assume that masterLock has been acquired */ - /* Proc should be sleeping ... */ if (proc->links.prev == INVALID_OFFSET || proc->links.next == INVALID_OFFSET) @@ -854,6 +861,8 @@ ProcWakeup(PGPROC *proc, int waitStatus) * ProcLockWakeup -- routine for waking up processes when a lock is * released (or a prior waiter is aborted). Scan all waiters * for lock, waken any that are no longer blocked. + * + * The appropriate lock partition lock must be held by caller. */ void ProcLockWakeup(LockMethod lockMethodTable, LOCK *lock) @@ -908,25 +917,32 @@ ProcLockWakeup(LockMethod lockMethodTable, LOCK *lock) Assert(waitQueue->size >= 0); } -/* -------------------- +/* + * CheckDeadLock + * * We only get to this routine if we got SIGALRM after DeadlockTimeout * while waiting for a lock to be released by some other process. Look * to see if there's a deadlock; if not, just return and continue waiting. * If we have a real deadlock, remove ourselves from the lock's wait queue * and signal an error to ProcSleep. - * -------------------- */ static void CheckDeadLock(void) { + int i; + /* - * Acquire locktable lock. Note that the deadlock check interrupt had - * better not be enabled anywhere that this process itself holds the - * locktable lock, else this will wait forever. Also note that - * LWLockAcquire creates a critical section, so that this routine cannot - * be interrupted by cancel/die interrupts. + * Acquire exclusive lock on the entire shared lock data structures. + * Must grab LWLocks in partition-number order to avoid LWLock deadlock. + * + * Note that the deadlock check interrupt had better not be enabled + * anywhere that this process itself holds lock partition locks, else this + * will wait forever. Also note that LWLockAcquire creates a critical + * section, so that this routine cannot be interrupted by cancel/die + * interrupts. */ - LWLockAcquire(LockMgrLock, LW_EXCLUSIVE); + for (i = 0; i < NUM_LOCK_PARTITIONS; i++) + LWLockAcquire(FirstLockMgrLock + i, LW_EXCLUSIVE); /* * Check to see if we've been awoken by anyone in the interim. @@ -937,14 +953,11 @@ CheckDeadLock(void) * * We check by looking to see if we've been unlinked from the wait queue. * This is quicker than checking our semaphore's state, since no kernel - * call is needed, and it is safe because we hold the locktable lock. + * call is needed, and it is safe because we hold the lock partition lock. */ if (MyProc->links.prev == INVALID_OFFSET || MyProc->links.next == INVALID_OFFSET) - { - LWLockRelease(LockMgrLock); - return; - } + goto check_done; #ifdef LOCK_DEBUG if (Debug_deadlocks) @@ -954,16 +967,19 @@ CheckDeadLock(void) if (!DeadLockCheck(MyProc)) { /* No deadlock, so keep waiting */ - LWLockRelease(LockMgrLock); - return; + goto check_done; } /* * Oops. We have a deadlock. * - * Get this process out of wait state. + * Get this process out of wait state. (Note: we could do this more + * efficiently by relying on lockAwaited, but use this coding to preserve + * the flexibility to kill some other transaction than the one detecting + * the deadlock.) */ - RemoveFromWaitQueue(MyProc); + Assert(MyProc->waitLock != NULL); + RemoveFromWaitQueue(MyProc, LockTagToPartition(&(MyProc->waitLock->tag))); /* * Set MyProc->waitStatus to STATUS_ERROR so that ProcSleep will report an @@ -987,7 +1003,15 @@ CheckDeadLock(void) * them anymore. However, RemoveFromWaitQueue took care of waking up any * such processes. */ - LWLockRelease(LockMgrLock); + + /* + * Release locks acquired at head of routine. Order is not critical, + * so do it back-to-front to avoid waking another CheckDeadLock instance + * before it can get all the locks. + */ +check_done: + for (i = NUM_LOCK_PARTITIONS; --i >= 0; ) + LWLockRelease(FirstLockMgrLock + i); } diff --git a/src/include/storage/lock.h b/src/include/storage/lock.h index e289632054..9af03fb474 100644 --- a/src/include/storage/lock.h +++ b/src/include/storage/lock.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/storage/lock.h,v 1.92 2005/12/09 01:22:04 tgl Exp $ + * $PostgreSQL: pgsql/src/include/storage/lock.h,v 1.93 2005/12/11 21:02:18 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -19,6 +19,13 @@ #include "storage/shmem.h" +/* + * Number of partitions the shared lock tables are divided into. + * + * See LockTagToPartition() if you change this. + */ +#define NUM_LOCK_PARTITIONS 16 + /* originally in procq.h */ typedef struct PROC_QUEUE { @@ -348,6 +355,7 @@ typedef struct LOCALLOCK LOCK *lock; /* associated LOCK object in shared mem */ PROCLOCK *proclock; /* associated PROCLOCK object in shmem */ bool isTempObject; /* true if lock is on a temporary object */ + int partition; /* ID of partition containing this lock */ int nLocks; /* total number of times lock is held */ int numLockOwners; /* # of relevant ResourceOwners */ int maxLockOwners; /* allocated size of array */ @@ -389,6 +397,7 @@ typedef enum */ extern void InitLocks(void); extern LockMethod GetLocksMethodTable(const LOCK *lock); +extern int LockTagToPartition(const LOCKTAG *locktag); extern LockAcquireResult LockAcquire(const LOCKTAG *locktag, bool isTempObject, LOCKMODE lockmode, @@ -406,7 +415,7 @@ extern int LockCheckConflicts(LockMethod lockMethodTable, LOCK *lock, PROCLOCK *proclock, PGPROC *proc); extern void GrantLock(LOCK *lock, PROCLOCK *proclock, LOCKMODE lockmode); extern void GrantAwaitedLock(void); -extern void RemoveFromWaitQueue(PGPROC *proc); +extern void RemoveFromWaitQueue(PGPROC *proc, int partition); extern Size LockShmemSize(void); extern bool DeadLockCheck(PGPROC *proc); extern void DeadLockReport(void); diff --git a/src/include/storage/lwlock.h b/src/include/storage/lwlock.h index 4291e0b2e7..c318e60b57 100644 --- a/src/include/storage/lwlock.h +++ b/src/include/storage/lwlock.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/storage/lwlock.h,v 1.23 2005/10/15 02:49:46 momjian Exp $ + * $PostgreSQL: pgsql/src/include/storage/lwlock.h,v 1.24 2005/12/11 21:02:18 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -16,9 +16,9 @@ /* * We have a number of predefined LWLocks, plus a bunch of LWLocks that are - * dynamically assigned (for shared buffers). The LWLock structures live - * in shared memory (since they contain shared data) and are identified by - * values of this enumerated type. We abuse the notion of an enum somewhat + * dynamically assigned (e.g., for shared buffers). The LWLock structures + * live in shared memory (since they contain shared data) and are identified + * by values of this enumerated type. We abuse the notion of an enum somewhat * by allowing values not listed in the enum declaration to be assigned. * The extra value MaxDynamicLWLock is there to keep the compiler from * deciding that the enum can be represented as char or short ... @@ -27,7 +27,6 @@ typedef enum LWLockId { BufMappingLock, BufFreelistLock, - LockMgrLock, OidGenLock, XidGenLock, ProcArrayLock, @@ -46,8 +45,7 @@ typedef enum LWLockId RelCacheInitLock, BgWriterCommLock, TwoPhaseStateLock, - - NumFixedLWLocks, /* must be last except for MaxDynamicLWLock */ + FirstLockMgrLock, /* must be last except for MaxDynamicLWLock */ MaxDynamicLWLock = 1000000000 } LWLockId; diff --git a/src/include/storage/proc.h b/src/include/storage/proc.h index 4cba391048..2cfee41eff 100644 --- a/src/include/storage/proc.h +++ b/src/include/storage/proc.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/storage/proc.h,v 1.84 2005/10/15 02:49:46 momjian Exp $ + * $PostgreSQL: pgsql/src/include/storage/proc.h,v 1.85 2005/12/11 21:02:18 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -52,7 +52,8 @@ struct XidCache * so that the prepared transactions appear to be still running and are * correctly shown as holding locks. A prepared transaction PGPROC can be * distinguished from a real one at need by the fact that it has pid == 0. - * The semaphore and lock-related fields in a prepared-xact PGPROC are unused. + * The semaphore and lock-activity fields in a prepared-xact PGPROC are unused, + * but its myProcLocks[] lists are valid. */ struct PGPROC { @@ -86,8 +87,12 @@ struct PGPROC LOCKMASK heldLocks; /* bitmask for lock types already held on this * lock object by this backend */ - SHM_QUEUE procLocks; /* list of PROCLOCK objects for locks held or - * awaited by this backend */ + /* + * All PROCLOCK objects for locks held or awaited by this backend are + * linked into one of these lists, according to the partition number of + * their lock. + */ + SHM_QUEUE myProcLocks[NUM_LOCK_PARTITIONS]; struct XidCache subxids; /* cache for subtransaction XIDs */ }; @@ -99,7 +104,7 @@ extern DLLIMPORT PGPROC *MyProc; /* - * There is one ProcGlobal struct for the whole installation. + * There is one ProcGlobal struct for the whole database cluster. */ typedef struct PROC_HDR { @@ -134,8 +139,7 @@ extern bool HaveNFreeProcs(int n); extern void ProcReleaseLocks(bool isCommit); extern void ProcQueueInit(PROC_QUEUE *queue); -extern int ProcSleep(LockMethod lockMethodTable, LOCKMODE lockmode, - LOCK *lock, PROCLOCK *proclock); +extern int ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable); extern PGPROC *ProcWakeup(PGPROC *proc, int waitStatus); extern void ProcLockWakeup(LockMethod lockMethodTable, LOCK *lock); extern bool LockWaitCancel(void);