From a4adc31f6902f6fc29d74868e8969412fc590da9 Mon Sep 17 00:00:00 2001 From: Andres Freund Date: Sun, 20 Nov 2022 11:56:32 -0800 Subject: [PATCH] lwlock: Fix quadratic behavior with very long wait lists Until now LWLockDequeueSelf() sequentially searched the list of waiters to see if the current proc is still is on the list of waiters, or has already been removed. In extreme workloads, where the wait lists are very long, this leads to a quadratic behavior. #backends iterating over a list #backends long. Additionally, the likelihood of needing to call LWLockDequeueSelf() in the first place also increases with the increased length of the wait queue, as it becomes more likely that a lock is released while waiting for the wait list lock, which is held for longer during lock release. Due to the exponential back-off in perform_spin_delay() this is surprisingly hard to detect. We should make that easier, e.g. by adding a wait event around the pg_usleep() - but that's a separate patch. The fix is simple - track whether a proc is currently waiting in the wait list or already removed but waiting to be woken up in PGPROC->lwWaiting. In some workloads with a lot of clients contending for a small number of lwlocks (e.g. WALWriteLock), the fix can substantially increase throughput. As the quadratic behavior arguably is a bug, we might want to decide to backpatch this fix in the future. Author: Andres Freund Reviewed-by: Bharath Rupireddy Discussion: https://postgr.es/m/20221027165914.2hofzp4cvutj6gin@awork3.anarazel.de Discussion: https://postgr.es/m/CALj2ACXktNbG=K8Xi7PSqbofTZozavhaxjatVc14iYaLu4Maag@mail.gmail.com --- src/backend/access/transam/twophase.c | 2 +- src/backend/storage/lmgr/lwlock.c | 53 +++++++++++++++------------ src/backend/storage/lmgr/proc.c | 4 +- src/include/storage/lwlock.h | 8 ++++ src/include/storage/proc.h | 2 +- 5 files changed, 42 insertions(+), 27 deletions(-) diff --git a/src/backend/access/transam/twophase.c b/src/backend/access/transam/twophase.c index 803d169f57..5017f4451e 100644 --- a/src/backend/access/transam/twophase.c +++ b/src/backend/access/transam/twophase.c @@ -485,7 +485,7 @@ MarkAsPreparingGuts(GlobalTransaction gxact, TransactionId xid, const char *gid, proc->roleId = owner; proc->tempNamespaceId = InvalidOid; proc->isBackgroundWorker = false; - proc->lwWaiting = false; + proc->lwWaiting = LW_WS_NOT_WAITING; proc->lwWaitMode = 0; proc->waitLock = NULL; proc->waitProcLock = NULL; diff --git a/src/backend/storage/lmgr/lwlock.c b/src/backend/storage/lmgr/lwlock.c index 532cd67f4e..a5ad36ca78 100644 --- a/src/backend/storage/lmgr/lwlock.c +++ b/src/backend/storage/lmgr/lwlock.c @@ -982,6 +982,15 @@ LWLockWakeup(LWLock *lock) wokeup_somebody = true; } + /* + * Signal that the process isn't on the wait list anymore. This allows + * LWLockDequeueSelf() to remove itself of the waitlist with a + * proclist_delete(), rather than having to check if it has been + * removed from the list. + */ + Assert(waiter->lwWaiting == LW_WS_WAITING); + waiter->lwWaiting = LW_WS_PENDING_WAKEUP; + /* * Once we've woken up an exclusive lock, there's no point in waking * up anybody else. @@ -1039,7 +1048,7 @@ LWLockWakeup(LWLock *lock) * another lock. */ pg_write_barrier(); - waiter->lwWaiting = false; + waiter->lwWaiting = LW_WS_NOT_WAITING; PGSemaphoreUnlock(waiter->sem); } } @@ -1060,7 +1069,7 @@ LWLockQueueSelf(LWLock *lock, LWLockMode mode) if (MyProc == NULL) elog(PANIC, "cannot wait without a PGPROC structure"); - if (MyProc->lwWaiting) + if (MyProc->lwWaiting != LW_WS_NOT_WAITING) elog(PANIC, "queueing for lock while waiting on another one"); LWLockWaitListLock(lock); @@ -1068,7 +1077,7 @@ LWLockQueueSelf(LWLock *lock, LWLockMode mode) /* setting the flag is protected by the spinlock */ pg_atomic_fetch_or_u32(&lock->state, LW_FLAG_HAS_WAITERS); - MyProc->lwWaiting = true; + MyProc->lwWaiting = LW_WS_WAITING; MyProc->lwWaitMode = mode; /* LW_WAIT_UNTIL_FREE waiters are always at the front of the queue */ @@ -1095,8 +1104,7 @@ LWLockQueueSelf(LWLock *lock, LWLockMode mode) static void LWLockDequeueSelf(LWLock *lock) { - bool found = false; - proclist_mutable_iter iter; + bool on_waitlist; #ifdef LWLOCK_STATS lwlock_stats *lwstats; @@ -1109,18 +1117,13 @@ LWLockDequeueSelf(LWLock *lock) LWLockWaitListLock(lock); /* - * Can't just remove ourselves from the list, but we need to iterate over - * all entries as somebody else could have dequeued us. + * Remove ourselves from the waitlist, unless we've already been + * removed. The removal happens with the wait list lock held, so there's + * no race in this check. */ - proclist_foreach_modify(iter, &lock->waiters, lwWaitLink) - { - if (iter.cur == MyProc->pgprocno) - { - found = true; - proclist_delete(&lock->waiters, iter.cur, lwWaitLink); - break; - } - } + on_waitlist = MyProc->lwWaiting == LW_WS_WAITING; + if (on_waitlist) + proclist_delete(&lock->waiters, MyProc->pgprocno, lwWaitLink); if (proclist_is_empty(&lock->waiters) && (pg_atomic_read_u32(&lock->state) & LW_FLAG_HAS_WAITERS) != 0) @@ -1132,8 +1135,8 @@ LWLockDequeueSelf(LWLock *lock) LWLockWaitListUnlock(lock); /* clear waiting state again, nice for debugging */ - if (found) - MyProc->lwWaiting = false; + if (on_waitlist) + MyProc->lwWaiting = LW_WS_NOT_WAITING; else { int extraWaits = 0; @@ -1157,7 +1160,7 @@ LWLockDequeueSelf(LWLock *lock) for (;;) { PGSemaphoreLock(MyProc->sem); - if (!MyProc->lwWaiting) + if (MyProc->lwWaiting == LW_WS_NOT_WAITING) break; extraWaits++; } @@ -1308,7 +1311,7 @@ LWLockAcquire(LWLock *lock, LWLockMode mode) for (;;) { PGSemaphoreLock(proc->sem); - if (!proc->lwWaiting) + if (proc->lwWaiting == LW_WS_NOT_WAITING) break; extraWaits++; } @@ -1473,7 +1476,7 @@ LWLockAcquireOrWait(LWLock *lock, LWLockMode mode) for (;;) { PGSemaphoreLock(proc->sem); - if (!proc->lwWaiting) + if (proc->lwWaiting == LW_WS_NOT_WAITING) break; extraWaits++; } @@ -1689,7 +1692,7 @@ LWLockWaitForVar(LWLock *lock, uint64 *valptr, uint64 oldval, uint64 *newval) for (;;) { PGSemaphoreLock(proc->sem); - if (!proc->lwWaiting) + if (proc->lwWaiting == LW_WS_NOT_WAITING) break; extraWaits++; } @@ -1767,6 +1770,10 @@ LWLockUpdateVar(LWLock *lock, uint64 *valptr, uint64 val) proclist_delete(&lock->waiters, iter.cur, lwWaitLink); proclist_push_tail(&wakeup, iter.cur, lwWaitLink); + + /* see LWLockWakeup() */ + Assert(waiter->lwWaiting == LW_WS_WAITING); + waiter->lwWaiting = LW_WS_PENDING_WAKEUP; } /* We are done updating shared state of the lock itself. */ @@ -1782,7 +1789,7 @@ LWLockUpdateVar(LWLock *lock, uint64 *valptr, uint64 val) proclist_delete(&wakeup, iter.cur, lwWaitLink); /* check comment in LWLockWakeup() about this barrier */ pg_write_barrier(); - waiter->lwWaiting = false; + waiter->lwWaiting = LW_WS_NOT_WAITING; PGSemaphoreUnlock(waiter->sem); } } diff --git a/src/backend/storage/lmgr/proc.c b/src/backend/storage/lmgr/proc.c index 0aa304c835..b1c35653fc 100644 --- a/src/backend/storage/lmgr/proc.c +++ b/src/backend/storage/lmgr/proc.c @@ -397,7 +397,7 @@ InitProcess(void) /* NB -- autovac launcher intentionally does not set IS_AUTOVACUUM */ if (IsAutoVacuumWorkerProcess()) MyProc->statusFlags |= PROC_IS_AUTOVACUUM; - MyProc->lwWaiting = false; + MyProc->lwWaiting = LW_WS_NOT_WAITING; MyProc->lwWaitMode = 0; MyProc->waitLock = NULL; MyProc->waitProcLock = NULL; @@ -579,7 +579,7 @@ InitAuxiliaryProcess(void) MyProc->isBackgroundWorker = IsBackgroundWorker; MyProc->delayChkptFlags = 0; MyProc->statusFlags = 0; - MyProc->lwWaiting = false; + MyProc->lwWaiting = LW_WS_NOT_WAITING; MyProc->lwWaitMode = 0; MyProc->waitLock = NULL; MyProc->waitProcLock = NULL; diff --git a/src/include/storage/lwlock.h b/src/include/storage/lwlock.h index ca4eca76f4..a494cb598f 100644 --- a/src/include/storage/lwlock.h +++ b/src/include/storage/lwlock.h @@ -23,6 +23,14 @@ struct PGPROC; +/* what state of the wait process is a backend in */ +typedef enum LWLockWaitState +{ + LW_WS_NOT_WAITING, /* not currently waiting / woken up */ + LW_WS_WAITING, /* currently waiting */ + LW_WS_PENDING_WAKEUP, /* removed from waitlist, but not yet signalled */ +} LWLockWaitState; + /* * Code outside of lwlock.c should not manipulate the contents of this * structure directly, but we have to declare it here to allow LWLocks to be diff --git a/src/include/storage/proc.h b/src/include/storage/proc.h index 8d096fdeeb..aa13e1d66e 100644 --- a/src/include/storage/proc.h +++ b/src/include/storage/proc.h @@ -217,7 +217,7 @@ struct PGPROC bool recoveryConflictPending; /* Info about LWLock the process is currently waiting for, if any. */ - bool lwWaiting; /* true if waiting for an LW lock */ + uint8 lwWaiting; /* see LWLockWaitState */ uint8 lwWaitMode; /* lwlock mode being waited for */ proclist_node lwWaitLink; /* position in LW lock wait list */