From 84e37126770dd6de903dad88ce150a49b63b5ef9 Mon Sep 17 00:00:00 2001 From: Robert Haas Date: Thu, 4 Aug 2011 12:38:33 -0400 Subject: [PATCH] Create VXID locks "lazily" in the main lock table. Instead of entering them on transaction startup, we materialize them only when someone wants to wait, which will occur only during CREATE INDEX CONCURRENTLY. In Hot Standby mode, the startup process must also be able to probe for conflicting VXID locks, but the lock need never be fully materialized, because the startup process does not use the normal lock wait mechanism. Since most VXID locks never need to touch the lock manager partition locks, this can significantly reduce blocking contention on read-heavy workloads. Patch by me. Review by Jeff Davis. --- src/backend/commands/indexcmds.c | 6 +- src/backend/postmaster/autovacuum.c | 2 +- src/backend/storage/ipc/sinvaladt.c | 21 +-- src/backend/storage/ipc/standby.c | 2 +- src/backend/storage/lmgr/README | 42 ++++-- src/backend/storage/lmgr/lmgr.c | 64 --------- src/backend/storage/lmgr/lock.c | 201 ++++++++++++++++++++++++++++ src/include/storage/lmgr.h | 5 - src/include/storage/lock.h | 4 + src/include/storage/proc.h | 2 + src/include/storage/sinvaladt.h | 3 +- 11 files changed, 257 insertions(+), 95 deletions(-) diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c index a1c299d00e..77648b4774 100644 --- a/src/backend/commands/indexcmds.c +++ b/src/backend/commands/indexcmds.c @@ -683,7 +683,7 @@ DefineIndex(RangeVar *heapRelation, while (VirtualTransactionIdIsValid(*old_lockholders)) { - VirtualXactLockTableWait(*old_lockholders); + VirtualXactLock(*old_lockholders, true); old_lockholders++; } @@ -769,7 +769,7 @@ DefineIndex(RangeVar *heapRelation, while (VirtualTransactionIdIsValid(*old_lockholders)) { - VirtualXactLockTableWait(*old_lockholders); + VirtualXactLock(*old_lockholders, true); old_lockholders++; } @@ -866,7 +866,7 @@ DefineIndex(RangeVar *heapRelation, } if (VirtualTransactionIdIsValid(old_snapshots[i])) - VirtualXactLockTableWait(old_snapshots[i]); + VirtualXactLock(old_snapshots[i], true); } /* diff --git a/src/backend/postmaster/autovacuum.c b/src/backend/postmaster/autovacuum.c index 2f3fcbf040..f96c350546 100644 --- a/src/backend/postmaster/autovacuum.c +++ b/src/backend/postmaster/autovacuum.c @@ -1992,7 +1992,7 @@ do_autovacuum(void) backendID = GetTempNamespaceBackendId(classForm->relnamespace); /* We just ignore it if the owning backend is still active */ - if (backendID == MyBackendId || !BackendIdIsActive(backendID)) + if (backendID == MyBackendId || BackendIdGetProc(backendID) == NULL) { /* * We found an orphan temp table (which was probably left diff --git a/src/backend/storage/ipc/sinvaladt.c b/src/backend/storage/ipc/sinvaladt.c index 47f1a61560..a1d8e26dd9 100644 --- a/src/backend/storage/ipc/sinvaladt.c +++ b/src/backend/storage/ipc/sinvaladt.c @@ -139,6 +139,7 @@ typedef struct ProcState { /* procPid is zero in an inactive ProcState array entry. */ pid_t procPid; /* PID of backend, for signaling */ + PGPROC *proc; /* PGPROC of backend */ /* nextMsgNum is meaningless if procPid == 0 or resetState is true. */ int nextMsgNum; /* next message number to read */ bool resetState; /* backend needs to reset its state */ @@ -246,6 +247,7 @@ CreateSharedInvalidationState(void) for (i = 0; i < shmInvalBuffer->maxBackends; i++) { shmInvalBuffer->procState[i].procPid = 0; /* inactive */ + shmInvalBuffer->procState[i].proc = NULL; shmInvalBuffer->procState[i].nextMsgNum = 0; /* meaningless */ shmInvalBuffer->procState[i].resetState = false; shmInvalBuffer->procState[i].signaled = false; @@ -313,6 +315,7 @@ SharedInvalBackendInit(bool sendOnly) /* mark myself active, with all extant messages already read */ stateP->procPid = MyProcPid; + stateP->proc = MyProc; stateP->nextMsgNum = segP->maxMsgNum; stateP->resetState = false; stateP->signaled = false; @@ -353,6 +356,7 @@ CleanupInvalidationState(int status, Datum arg) /* Mark myself inactive */ stateP->procPid = 0; + stateP->proc = NULL; stateP->nextMsgNum = 0; stateP->resetState = false; stateP->signaled = false; @@ -369,13 +373,16 @@ CleanupInvalidationState(int status, Datum arg) } /* - * BackendIdIsActive - * Test if the given backend ID is currently assigned to a process. + * BackendIdGetProc + * Get the PGPROC structure for a backend, given the backend ID. + * The result may be out of date arbitrarily quickly, so the caller + * must be careful about how this information is used. NULL is + * returned if the backend is not active. */ -bool -BackendIdIsActive(int backendID) +PGPROC * +BackendIdGetProc(int backendID) { - bool result; + PGPROC *result = NULL; SISeg *segP = shmInvalBuffer; /* Need to lock out additions/removals of backends */ @@ -385,10 +392,8 @@ BackendIdIsActive(int backendID) { ProcState *stateP = &segP->procState[backendID - 1]; - result = (stateP->procPid != 0); + result = stateP->proc; } - else - result = false; LWLockRelease(SInvalWriteLock); diff --git a/src/backend/storage/ipc/standby.c b/src/backend/storage/ipc/standby.c index bf92d25995..5673c27cbe 100644 --- a/src/backend/storage/ipc/standby.c +++ b/src/backend/storage/ipc/standby.c @@ -201,7 +201,7 @@ ResolveRecoveryConflictWithVirtualXIDs(VirtualTransactionId *waitlist, standbyWait_us = STANDBY_INITIAL_WAIT_US; /* wait until the virtual xid is gone */ - while (!ConditionalVirtualXactLockTableWait(*waitlist)) + while (!VirtualXactLock(*waitlist, false)) { /* * Report via ps if we have been waiting for more than 500 msec diff --git a/src/backend/storage/lmgr/README b/src/backend/storage/lmgr/README index 436ba472e8..e3bb116344 100644 --- a/src/backend/storage/lmgr/README +++ b/src/backend/storage/lmgr/README @@ -263,16 +263,23 @@ Fast Path Locking ----------------- Fast path locking is a special purpose mechanism designed to reduce the -overhead of taking and releasing weak relation locks. SELECT, INSERT, -UPDATE, and DELETE must acquire a lock on every relation they operate on, -as well as various system catalogs that can be used internally. These locks -are notable not only for the very high frequency with which they are taken -and released, but also for the fact that they virtually never conflict. -Many DML operations can proceed in parallel against the same table at the -same time; only DDL operations such as CLUSTER, ALTER TABLE, or DROP -- or -explicit user action such as LOCK TABLE -- will create lock conflicts with -the "weak" locks (AccessShareLock, RowShareLock, RowExclusiveLock) acquired -by DML operations. +overhead of taking and releasing certain types of locks which are taken +and released very frequently but rarely conflict. Currently, this includes +two categories of locks: + +(1) Weak relation locks. SELECT, INSERT, UPDATE, and DELETE must acquire a +lock on every relation they operate on, as well as various system catalogs +that can be used internally. Many DML operations can proceed in parallel +against the same table at the same time; only DDL operations such as +CLUSTER, ALTER TABLE, or DROP -- or explicit user action such as LOCK TABLE +-- will create lock conflicts with the "weak" locks (AccessShareLock, +RowShareLock, RowExclusiveLock) acquired by DML operations. + +(2) VXID locks. Every transaction takes a lock on its own virtual +transaction ID. Currently, the only operations that wait for these locks +are CREATE INDEX CONCURRENTLY and Hot Standby (in the case of a conflict), +so most VXID locks are taken and released by the owner without anyone else +needing to care. The primary locking mechanism does not cope well with this workload. Even though the lock manager locks are partitioned, the locktag for any given @@ -284,8 +291,8 @@ even on 2-core servers, and becomes very pronounced as core count increases. To alleviate this bottleneck, beginning in PostgreSQL 9.2, each backend is permitted to record a limited number of locks on unshared relations in an array within its PGPROC structure, rather than using the primary lock table. -This is called the "fast path" mechanism, and can only be used when the -locker can verify that no conflicting locks can possibly exist. +This mechanism can only be used when the locker can verify that no conflicting +locks can possibly exist. A key point of this algorithm is that it must be possible to verify the absence of possibly conflicting locks without fighting over a shared LWLock or @@ -317,6 +324,17 @@ the strong locker has yet to acquire the per-backend LWLock we now hold (or, indeed, even the first per-backend LWLock) and will notice any weak lock we take when it does. +Fast-path VXID locks do not use the FastPathStrongLocks table. The first +lock taken on a VXID is always the ExclusiveLock taken by its owner. Any +subsequent lockers are share lockers waiting for the VXID to terminate. +Indeed, the only reason VXID locks use the lock manager at all (rather than +waiting for the VXID to terminate via some other method) is for deadlock +detection. Thus, the initial VXID lock can *always* be taken via the fast +path without checking for conflicts. Any subsequent locker must check +whether the lock has been transferred to the main lock table, and if not, +do so. The backend owning the VXID must be careful to clean up any entry +made in the main lock table at end of transaction. + The Deadlock Detection Algorithm -------------------------------- diff --git a/src/backend/storage/lmgr/lmgr.c b/src/backend/storage/lmgr/lmgr.c index 3ac098b2a9..25ead29a48 100644 --- a/src/backend/storage/lmgr/lmgr.c +++ b/src/backend/storage/lmgr/lmgr.c @@ -515,70 +515,6 @@ ConditionalXactLockTableWait(TransactionId xid) return true; } - -/* - * VirtualXactLockTableInsert - * - * Insert a lock showing that the given virtual transaction ID is running --- - * this is done at main transaction start when its VXID is assigned. - * The lock can then be used to wait for the transaction to finish. - */ -void -VirtualXactLockTableInsert(VirtualTransactionId vxid) -{ - LOCKTAG tag; - - Assert(VirtualTransactionIdIsValid(vxid)); - - SET_LOCKTAG_VIRTUALTRANSACTION(tag, vxid); - - (void) LockAcquire(&tag, ExclusiveLock, false, false); -} - -/* - * VirtualXactLockTableWait - * - * Waits until the lock on the given VXID is released, which shows that - * the top-level transaction owning the VXID has ended. - */ -void -VirtualXactLockTableWait(VirtualTransactionId vxid) -{ - LOCKTAG tag; - - Assert(VirtualTransactionIdIsValid(vxid)); - - SET_LOCKTAG_VIRTUALTRANSACTION(tag, vxid); - - (void) LockAcquire(&tag, ShareLock, false, false); - - LockRelease(&tag, ShareLock, false); -} - -/* - * ConditionalVirtualXactLockTableWait - * - * As above, but only lock if we can get the lock without blocking. - * Returns TRUE if the lock was acquired. - */ -bool -ConditionalVirtualXactLockTableWait(VirtualTransactionId vxid) -{ - LOCKTAG tag; - - Assert(VirtualTransactionIdIsValid(vxid)); - - SET_LOCKTAG_VIRTUALTRANSACTION(tag, vxid); - - if (LockAcquire(&tag, ShareLock, false, true) == LOCKACQUIRE_NOT_AVAIL) - return false; - - LockRelease(&tag, ShareLock, false); - - return true; -} - - /* * LockDatabaseObject * diff --git a/src/backend/storage/lmgr/lock.c b/src/backend/storage/lmgr/lock.c index 416f5aae99..05efafc0b0 100644 --- a/src/backend/storage/lmgr/lock.c +++ b/src/backend/storage/lmgr/lock.c @@ -38,6 +38,7 @@ #include "miscadmin.h" #include "pg_trace.h" #include "pgstat.h" +#include "storage/sinvaladt.h" #include "storage/standby.h" #include "utils/memutils.h" #include "utils/ps_status.h" @@ -160,6 +161,7 @@ static bool FastPathUnGrantRelationLock(Oid relid, LOCKMODE lockmode); static bool FastPathTransferRelationLocks(LockMethod lockMethodTable, const LOCKTAG *locktag, uint32 hashcode); static PROCLOCK *FastPathGetRelationLockEntry(LOCALLOCK *locallock); +static void VirtualXactLockTableCleanup(void); /* * To make the fast-path lock mechanism work, we must have some way of @@ -1772,6 +1774,15 @@ LockReleaseAll(LOCKMETHODID lockmethodid, bool allLocks) elog(LOG, "LockReleaseAll: lockmethod=%d", lockmethodid); #endif + /* + * Get rid of our fast-path VXID lock, if appropriate. Note that this + * is the only way that the lock we hold on our own VXID can ever get + * released: it is always and only released when a toplevel transaction + * ends. + */ + if (lockmethodid == DEFAULT_LOCKMETHOD) + VirtualXactLockTableCleanup(); + numLockModes = lockMethodTable->numLockModes; /* @@ -3034,6 +3045,33 @@ GetLockStatusData(void) el++; } + if (proc->fpVXIDLock) + { + VirtualTransactionId vxid; + LockInstanceData *instance; + + if (el >= els) + { + els += MaxBackends; + data->locks = (LockInstanceData *) + repalloc(data->locks, sizeof(LockInstanceData) * els); + } + + vxid.backendId = proc->backendId; + vxid.localTransactionId = proc->fpLocalTransactionId; + + instance = &data->locks[el]; + SET_LOCKTAG_VIRTUALTRANSACTION(instance->locktag, vxid); + instance->holdMask = LOCKBIT_ON(ExclusiveLock); + instance->waitLockMode = NoLock; + instance->backend = proc->backendId; + instance->lxid = proc->lxid; + instance->pid = proc->pid; + instance->fastpath = true; + + el++; + } + LWLockRelease(proc->backendLock); } @@ -3529,3 +3567,166 @@ lock_twophase_postabort(TransactionId xid, uint16 info, { lock_twophase_postcommit(xid, info, recdata, len); } + +/* + * VirtualXactLockTableInsert + * + * Take vxid lock via the fast-path. There can't be any pre-existing + * lockers, as we haven't advertised this vxid via the ProcArray yet. + * + * Since MyProc->fpLocalTransactionId will normally contain the same data + * as MyProc->lxid, you might wonder if we really need both. The + * difference is that MyProc->lxid is set and cleared unlocked, and + * examined by procarray.c, while fpLocalTransactionId is protected by + * backendLock and is used only by the locking subsystem. Doing it this + * way makes it easier to verify that there are no funny race conditions. + * + * We don't bother recording this lock in the local lock table, since it's + * only ever released at the end of a transaction. Instead, + * LockReleaseAll() calls VirtualXactLockTableCleanup(). + */ +void +VirtualXactLockTableInsert(VirtualTransactionId vxid) +{ + Assert(VirtualTransactionIdIsValid(vxid)); + + LWLockAcquire(MyProc->backendLock, LW_EXCLUSIVE); + + Assert(MyProc->backendId == vxid.backendId); + Assert(MyProc->fpLocalTransactionId == InvalidLocalTransactionId); + Assert(MyProc->fpVXIDLock == false); + + MyProc->fpVXIDLock = true; + MyProc->fpLocalTransactionId = vxid.localTransactionId; + + LWLockRelease(MyProc->backendLock); +} + +/* + * VirtualXactLockTableCleanup + * + * Check whether a VXID lock has been materialized; if so, release it, + * unblocking waiters. + */ +static void +VirtualXactLockTableCleanup() +{ + bool fastpath; + LocalTransactionId lxid; + + Assert(MyProc->backendId != InvalidBackendId); + + /* + * Clean up shared memory state. + */ + LWLockAcquire(MyProc->backendLock, LW_EXCLUSIVE); + + fastpath = MyProc->fpVXIDLock; + lxid = MyProc->fpLocalTransactionId; + MyProc->fpVXIDLock = false; + MyProc->fpLocalTransactionId = InvalidLocalTransactionId; + + LWLockRelease(MyProc->backendLock); + + /* + * If fpVXIDLock has been cleared without touching fpLocalTransactionId, + * that means someone transferred the lock to the main lock table. + */ + if (!fastpath && LocalTransactionIdIsValid(lxid)) + { + VirtualTransactionId vxid; + LOCKTAG locktag; + + vxid.backendId = MyBackendId; + vxid.localTransactionId = lxid; + SET_LOCKTAG_VIRTUALTRANSACTION(locktag, vxid); + + LockRefindAndRelease(LockMethods[DEFAULT_LOCKMETHOD], MyProc, + &locktag, ExclusiveLock, false); + } +} + +/* + * VirtualXactLock + * + * If wait = true, wait until the given VXID has been released, and then + * return true. + * + * If wait = false, just check whether the VXID is still running, and return + * true or false. + */ +bool +VirtualXactLock(VirtualTransactionId vxid, bool wait) +{ + LOCKTAG tag; + PGPROC *proc; + + Assert(VirtualTransactionIdIsValid(vxid)); + + SET_LOCKTAG_VIRTUALTRANSACTION(tag, vxid); + + /* + * If a lock table entry must be made, this is the PGPROC on whose behalf + * it must be done. Note that the transaction might end or the PGPROC + * might be reassigned to a new backend before we get around to examining + * it, but it doesn't matter. If we find upon examination that the + * relevant lxid is no longer running here, that's enough to prove that + * it's no longer running anywhere. + */ + proc = BackendIdGetProc(vxid.backendId); + + /* + * We must acquire this lock before checking the backendId and lxid + * against the ones we're waiting for. The target backend will only + * set or clear lxid while holding this lock. + */ + LWLockAcquire(proc->backendLock, LW_EXCLUSIVE); + + /* If the transaction has ended, our work here is done. */ + if (proc->backendId != vxid.backendId + || proc->fpLocalTransactionId != vxid.localTransactionId) + { + LWLockRelease(proc->backendLock); + return true; + } + + /* + * If we aren't asked to wait, there's no need to set up a lock table + * entry. The transaction is still in progress, so just return false. + */ + if (!wait) + { + LWLockRelease(proc->backendLock); + return false; + } + + /* + * OK, we're going to need to sleep on the VXID. But first, we must set + * up the primary lock table entry, if needed. + */ + if (proc->fpVXIDLock) + { + PROCLOCK *proclock; + uint32 hashcode; + + hashcode = LockTagHashCode(&tag); + proclock = SetupLockInTable(LockMethods[DEFAULT_LOCKMETHOD], proc, + &tag, hashcode, ExclusiveLock); + if (!proclock) + ereport(ERROR, + (errcode(ERRCODE_OUT_OF_MEMORY), + errmsg("out of shared memory"), + errhint("You might need to increase max_locks_per_transaction."))); + GrantLock(proclock->tag.myLock, proclock, ExclusiveLock); + proc->fpVXIDLock = false; + } + + /* Done with proc->fpLockBits */ + LWLockRelease(proc->backendLock); + + /* Time to wait. */ + (void) LockAcquire(&tag, ShareLock, false, false); + + LockRelease(&tag, ShareLock, false); + return true; +} diff --git a/src/include/storage/lmgr.h b/src/include/storage/lmgr.h index bd44d92be3..340f6a3d30 100644 --- a/src/include/storage/lmgr.h +++ b/src/include/storage/lmgr.h @@ -56,11 +56,6 @@ extern void XactLockTableDelete(TransactionId xid); extern void XactLockTableWait(TransactionId xid); extern bool ConditionalXactLockTableWait(TransactionId xid); -/* Lock a VXID (used to wait for a transaction to finish) */ -extern void VirtualXactLockTableInsert(VirtualTransactionId vxid); -extern void VirtualXactLockTableWait(VirtualTransactionId vxid); -extern bool ConditionalVirtualXactLockTableWait(VirtualTransactionId vxid); - /* Lock a general object (other than a relation) of the current database */ extern void LockDatabaseObject(Oid classid, Oid objid, uint16 objsubid, LOCKMODE lockmode); diff --git a/src/include/storage/lock.h b/src/include/storage/lock.h index 21b77f3059..e106ad5401 100644 --- a/src/include/storage/lock.h +++ b/src/include/storage/lock.h @@ -543,4 +543,8 @@ extern void DumpLocks(PGPROC *proc); extern void DumpAllLocks(void); #endif +/* Lock a VXID (used to wait for a transaction to finish) */ +extern void VirtualXactLockTableInsert(VirtualTransactionId vxid); +extern bool VirtualXactLock(VirtualTransactionId vxid, bool wait); + #endif /* LOCK_H */ diff --git a/src/include/storage/proc.h b/src/include/storage/proc.h index 92b3b19f2d..a149a55855 100644 --- a/src/include/storage/proc.h +++ b/src/include/storage/proc.h @@ -152,6 +152,8 @@ struct PGPROC /* Lock manager data, recording fast-path locks taken by this backend. */ uint64 fpLockBits; /* lock modes held for each fast-path slot */ Oid fpRelId[FP_LOCK_SLOTS_PER_BACKEND]; /* slots for rel oids */ + bool fpVXIDLock; /* are we holding a fast-path VXID lock? */ + LocalTransactionId fpLocalTransactionId; /* lxid for fast-path VXID lock */ }; /* NOTE: "typedef struct PGPROC PGPROC" appears in storage/lock.h. */ diff --git a/src/include/storage/sinvaladt.h b/src/include/storage/sinvaladt.h index c70355847a..a61d696508 100644 --- a/src/include/storage/sinvaladt.h +++ b/src/include/storage/sinvaladt.h @@ -22,6 +22,7 @@ #ifndef SINVALADT_H #define SINVALADT_H +#include "storage/proc.h" #include "storage/sinval.h" /* @@ -30,7 +31,7 @@ extern Size SInvalShmemSize(void); extern void CreateSharedInvalidationState(void); extern void SharedInvalBackendInit(bool sendOnly); -extern bool BackendIdIsActive(int backendID); +extern PGPROC *BackendIdGetProc(int backendID); extern void SIInsertDataEntries(const SharedInvalidationMessage *data, int n); extern int SIGetDataEntries(SharedInvalidationMessage *data, int datasize);