From 0d3431497dbe91ba1e428ace4e9537c109eaff07 Mon Sep 17 00:00:00 2001 From: Thomas Munro Date: Wed, 11 May 2022 18:03:03 +1200 Subject: [PATCH] Add logging for excessive ProcSignalBarrier waits. To enable diagnosis of systems that are not processing ProcSignalBarrier requests promptly, add a LOG message every 5 seconds if we seem to be wedged. Although you could already see this state as a wait event in pg_stat_activity, the log message also shows the PID of the process that is preventing progress. Also add DEBUG1 logging around the whole wait loop. Reviewed-by: Robert Haas Discussion: https://postgr.es/m/CA%2BTgmoYJ03r5359gQutRGP9BtigYCg3_UskcmnVjBf-QO3-0pQ%40mail.gmail.com --- src/backend/storage/ipc/procsignal.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/src/backend/storage/ipc/procsignal.c b/src/backend/storage/ipc/procsignal.c index 00d66902d8..21a9fc0fdd 100644 --- a/src/backend/storage/ipc/procsignal.c +++ b/src/backend/storage/ipc/procsignal.c @@ -393,6 +393,11 @@ WaitForProcSignalBarrier(uint64 generation) { Assert(generation <= pg_atomic_read_u64(&ProcSignal->psh_barrierGeneration)); + elog(DEBUG1, + "waiting for all backends to process ProcSignalBarrier generation " + UINT64_FORMAT, + generation); + for (int i = NumProcSignalSlots - 1; i >= 0; i--) { ProcSignalSlot *slot = &ProcSignal->psh_slot[i]; @@ -407,13 +412,22 @@ WaitForProcSignalBarrier(uint64 generation) oldval = pg_atomic_read_u64(&slot->pss_barrierGeneration); while (oldval < generation) { - ConditionVariableSleep(&slot->pss_barrierCV, - WAIT_EVENT_PROC_SIGNAL_BARRIER); + if (ConditionVariableTimedSleep(&slot->pss_barrierCV, + 5000, + WAIT_EVENT_PROC_SIGNAL_BARRIER)) + ereport(LOG, + (errmsg("still waiting for backend with PID %lu to accept ProcSignalBarrier", + (unsigned long) slot->pss_pid))); oldval = pg_atomic_read_u64(&slot->pss_barrierGeneration); } ConditionVariableCancelSleep(); } + elog(DEBUG1, + "finished waiting for all backends to process ProcSignalBarrier generation " + UINT64_FORMAT, + generation); + /* * The caller is probably calling this function because it wants to read * the shared state or perform further writes to shared state once all