From 46d6e5f567906389c31c4fb3a2653da1885c18ee Mon Sep 17 00:00:00 2001 From: Fujii Masao Date: Mon, 15 Feb 2021 15:13:37 +0900 Subject: [PATCH] Display the time when the process started waiting for the lock, in pg_locks, take 2 This commit adds new column "waitstart" into pg_locks view. This column reports the time when the server process started waiting for the lock if the lock is not held. This information is useful, for example, when examining the amount of time to wait on a lock by subtracting "waitstart" in pg_locks from the current time, and identify the lock that the processes are waiting for very long. This feature uses the current time obtained for the deadlock timeout timer as "waitstart" (i.e., the time when this process started waiting for the lock). Since getting the current time newly can cause overhead, we reuse the already-obtained time to avoid that overhead. Note that "waitstart" is updated without holding the lock table's partition lock, to avoid the overhead by additional lock acquisition. This can cause "waitstart" in pg_locks to become NULL for a very short period of time after the wait started even though "granted" is false. This is OK in practice because we can assume that users are likely to look at "waitstart" when waiting for the lock for a long time. The first attempt of this patch (commit 3b733fcd04) caused the buildfarm member "rorqual" (built with --disable-atomics --disable-spinlocks) to report the failure of the regression test. It was reverted by commit 890d2182a2. The cause of this failure was that the atomic variable for "waitstart" in the dummy process entry created at the end of prepare transaction was not initialized. This second attempt fixes that issue. Bump catalog version. Author: Atsushi Torikoshi Reviewed-by: Ian Lawrence Barwick, Robert Haas, Justin Pryzby, Fujii Masao Discussion: https://postgr.es/m/a96013dc51cdc56b2a2b84fa8a16a993@oss.nttdata.com --- contrib/amcheck/expected/check_btree.out | 4 ++-- doc/src/sgml/catalogs.sgml | 13 +++++++++++++ src/backend/access/transam/twophase.c | 9 ++++++++- src/backend/storage/ipc/standby.c | 24 +++++++++++++++++++++++- src/backend/storage/lmgr/lock.c | 8 ++++++++ src/backend/storage/lmgr/proc.c | 20 ++++++++++++++++++++ src/backend/utils/adt/lockfuncs.c | 9 ++++++++- src/include/catalog/catversion.h | 2 +- src/include/catalog/pg_proc.dat | 6 +++--- src/include/storage/lock.h | 3 +++ src/include/storage/proc.h | 2 ++ src/test/regress/expected/rules.out | 5 +++-- 12 files changed, 94 insertions(+), 11 deletions(-) diff --git a/contrib/amcheck/expected/check_btree.out b/contrib/amcheck/expected/check_btree.out index 13848b7449..5a3f1ef737 100644 --- a/contrib/amcheck/expected/check_btree.out +++ b/contrib/amcheck/expected/check_btree.out @@ -97,8 +97,8 @@ SELECT bt_index_parent_check('bttest_b_idx'); SELECT * FROM pg_locks WHERE relation = ANY(ARRAY['bttest_a', 'bttest_a_idx', 'bttest_b', 'bttest_b_idx']::regclass[]) AND pid = pg_backend_pid(); - locktype | database | relation | page | tuple | virtualxid | transactionid | classid | objid | objsubid | virtualtransaction | pid | mode | granted | fastpath -----------+----------+----------+------+-------+------------+---------------+---------+-------+----------+--------------------+-----+------+---------+---------- + locktype | database | relation | page | tuple | virtualxid | transactionid | classid | objid | objsubid | virtualtransaction | pid | mode | granted | fastpath | waitstart +----------+----------+----------+------+-------+------------+---------------+---------+-------+----------+--------------------+-----+------+---------+----------+----------- (0 rows) COMMIT; diff --git a/doc/src/sgml/catalogs.sgml b/doc/src/sgml/catalogs.sgml index 692ad65de2..db29905e91 100644 --- a/doc/src/sgml/catalogs.sgml +++ b/doc/src/sgml/catalogs.sgml @@ -10605,6 +10605,19 @@ SCRAM-SHA-256$<iteration count>:&l lock table + + + + waitstart timestamptz + + + Time when the server process started waiting for this lock, + or null if the lock is held. + Note that this can be null for a very short period of time after + the wait started even though granted + is false. + + diff --git a/src/backend/access/transam/twophase.c b/src/backend/access/transam/twophase.c index fc18b77832..70d22577ce 100644 --- a/src/backend/access/transam/twophase.c +++ b/src/backend/access/transam/twophase.c @@ -873,8 +873,15 @@ PGPROC * TwoPhaseGetDummyProc(TransactionId xid, bool lock_held) { GlobalTransaction gxact = TwoPhaseGetGXact(xid, lock_held); + PGPROC *dummy = &ProcGlobal->allProcs[gxact->pgprocno]; - return &ProcGlobal->allProcs[gxact->pgprocno]; + /* + * Initialize atomic variable in dummy proc so that GetLockStatusData() + * can read it later. + */ + pg_atomic_init_u64(&dummy->waitStart, 0); + + return dummy; } /************************************************************************/ diff --git a/src/backend/storage/ipc/standby.c b/src/backend/storage/ipc/standby.c index 39a30c00f7..5877a60715 100644 --- a/src/backend/storage/ipc/standby.c +++ b/src/backend/storage/ipc/standby.c @@ -540,12 +540,34 @@ void ResolveRecoveryConflictWithLock(LOCKTAG locktag, bool logging_conflict) { TimestampTz ltime; + TimestampTz now; Assert(InHotStandby); ltime = GetStandbyLimitTime(); + now = GetCurrentTimestamp(); - if (GetCurrentTimestamp() >= ltime && ltime != 0) + /* + * Update waitStart if first time through after the startup process + * started waiting for the lock. It should not be updated every time + * ResolveRecoveryConflictWithLock() is called during the wait. + * + * Use the current time obtained for comparison with ltime as waitStart + * (i.e., the time when this process started waiting for the lock). Since + * getting the current time newly can cause overhead, we reuse the + * already-obtained time to avoid that overhead. + * + * Note that waitStart is updated without holding the lock table's + * partition lock, to avoid the overhead by additional lock acquisition. + * This can cause "waitstart" in pg_locks to become NULL for a very short + * period of time after the wait started even though "granted" is false. + * This is OK in practice because we can assume that users are likely to + * look at "waitstart" when waiting for the lock for a long time. + */ + if (pg_atomic_read_u64(&MyProc->waitStart) == 0) + pg_atomic_write_u64(&MyProc->waitStart, now); + + if (now >= ltime && ltime != 0) { /* * We're already behind, so clear a path as quickly as possible. diff --git a/src/backend/storage/lmgr/lock.c b/src/backend/storage/lmgr/lock.c index 79c1cf9b8b..108b4d9023 100644 --- a/src/backend/storage/lmgr/lock.c +++ b/src/backend/storage/lmgr/lock.c @@ -3619,6 +3619,12 @@ GetLockStatusData(void) instance->leaderPid = proc->pid; instance->fastpath = true; + /* + * Successfully taking fast path lock means there were no + * conflicting locks. + */ + instance->waitStart = 0; + el++; } @@ -3646,6 +3652,7 @@ GetLockStatusData(void) instance->pid = proc->pid; instance->leaderPid = proc->pid; instance->fastpath = true; + instance->waitStart = 0; el++; } @@ -3698,6 +3705,7 @@ GetLockStatusData(void) instance->pid = proc->pid; instance->leaderPid = proclock->groupLeader->pid; instance->fastpath = false; + instance->waitStart = (TimestampTz) pg_atomic_read_u64(&proc->waitStart); el++; } diff --git a/src/backend/storage/lmgr/proc.c b/src/backend/storage/lmgr/proc.c index c87ffc6549..0884909a22 100644 --- a/src/backend/storage/lmgr/proc.c +++ b/src/backend/storage/lmgr/proc.c @@ -402,6 +402,7 @@ InitProcess(void) MyProc->lwWaitMode = 0; MyProc->waitLock = NULL; MyProc->waitProcLock = NULL; + pg_atomic_init_u64(&MyProc->waitStart, 0); #ifdef USE_ASSERT_CHECKING { int i; @@ -580,6 +581,7 @@ InitAuxiliaryProcess(void) MyProc->lwWaitMode = 0; MyProc->waitLock = NULL; MyProc->waitProcLock = NULL; + pg_atomic_init_u64(&MyProc->waitStart, 0); #ifdef USE_ASSERT_CHECKING { int i; @@ -1262,6 +1264,23 @@ ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable) } else enable_timeout_after(DEADLOCK_TIMEOUT, DeadlockTimeout); + + /* + * Use the current time obtained for the deadlock timeout timer as + * waitStart (i.e., the time when this process started waiting for the + * lock). Since getting the current time newly can cause overhead, we + * reuse the already-obtained time to avoid that overhead. + * + * Note that waitStart is updated without holding the lock table's + * partition lock, to avoid the overhead by additional lock + * acquisition. This can cause "waitstart" in pg_locks to become NULL + * for a very short period of time after the wait started even though + * "granted" is false. This is OK in practice because we can assume + * that users are likely to look at "waitstart" when waiting for the + * lock for a long time. + */ + pg_atomic_write_u64(&MyProc->waitStart, + get_timeout_start_time(DEADLOCK_TIMEOUT)); } else if (log_recovery_conflict_waits) { @@ -1678,6 +1697,7 @@ ProcWakeup(PGPROC *proc, ProcWaitStatus waitStatus) proc->waitLock = NULL; proc->waitProcLock = NULL; proc->waitStatus = waitStatus; + pg_atomic_write_u64(&MyProc->waitStart, 0); /* And awaken it */ SetLatch(&proc->procLatch); diff --git a/src/backend/utils/adt/lockfuncs.c b/src/backend/utils/adt/lockfuncs.c index b1cf5b79a7..97f0265c12 100644 --- a/src/backend/utils/adt/lockfuncs.c +++ b/src/backend/utils/adt/lockfuncs.c @@ -63,7 +63,7 @@ typedef struct } PG_Lock_Status; /* Number of columns in pg_locks output */ -#define NUM_LOCK_STATUS_COLUMNS 15 +#define NUM_LOCK_STATUS_COLUMNS 16 /* * VXIDGetDatum - Construct a text representation of a VXID @@ -142,6 +142,8 @@ pg_lock_status(PG_FUNCTION_ARGS) BOOLOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 15, "fastpath", BOOLOID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 16, "waitstart", + TIMESTAMPTZOID, -1, 0); funcctx->tuple_desc = BlessTupleDesc(tupdesc); @@ -336,6 +338,10 @@ pg_lock_status(PG_FUNCTION_ARGS) values[12] = CStringGetTextDatum(GetLockmodeName(instance->locktag.locktag_lockmethodid, mode)); values[13] = BoolGetDatum(granted); values[14] = BoolGetDatum(instance->fastpath); + if (!granted && instance->waitStart != 0) + values[15] = TimestampTzGetDatum(instance->waitStart); + else + nulls[15] = true; tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls); result = HeapTupleGetDatum(tuple); @@ -406,6 +412,7 @@ pg_lock_status(PG_FUNCTION_ARGS) values[12] = CStringGetTextDatum("SIReadLock"); values[13] = BoolGetDatum(true); values[14] = BoolGetDatum(false); + nulls[15] = true; tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls); result = HeapTupleGetDatum(tuple); diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h index 2efd937e12..ac8b8e7ee8 100644 --- a/src/include/catalog/catversion.h +++ b/src/include/catalog/catversion.h @@ -53,6 +53,6 @@ */ /* yyyymmddN */ -#define CATALOG_VERSION_NO 202102121 +#define CATALOG_VERSION_NO 202102151 #endif diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat index 4e0c9be58c..1487710d59 100644 --- a/src/include/catalog/pg_proc.dat +++ b/src/include/catalog/pg_proc.dat @@ -5994,9 +5994,9 @@ { oid => '1371', descr => 'view system lock information', proname => 'pg_lock_status', prorows => '1000', proretset => 't', provolatile => 'v', prorettype => 'record', proargtypes => '', - proallargtypes => '{text,oid,oid,int4,int2,text,xid,oid,oid,int2,text,int4,text,bool,bool}', - proargmodes => '{o,o,o,o,o,o,o,o,o,o,o,o,o,o,o}', - proargnames => '{locktype,database,relation,page,tuple,virtualxid,transactionid,classid,objid,objsubid,virtualtransaction,pid,mode,granted,fastpath}', + proallargtypes => '{text,oid,oid,int4,int2,text,xid,oid,oid,int2,text,int4,text,bool,bool,timestamptz}', + proargmodes => '{o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o}', + proargnames => '{locktype,database,relation,page,tuple,virtualxid,transactionid,classid,objid,objsubid,virtualtransaction,pid,mode,granted,fastpath,waitstart}', prosrc => 'pg_lock_status' }, { oid => '2561', descr => 'get array of PIDs of sessions blocking specified backend PID from acquiring a heavyweight lock', diff --git a/src/include/storage/lock.h b/src/include/storage/lock.h index 68a3487d49..9b2a421c32 100644 --- a/src/include/storage/lock.h +++ b/src/include/storage/lock.h @@ -22,6 +22,7 @@ #include "storage/lockdefs.h" #include "storage/lwlock.h" #include "storage/shmem.h" +#include "utils/timestamp.h" /* struct PGPROC is declared in proc.h, but must forward-reference it */ typedef struct PGPROC PGPROC; @@ -446,6 +447,8 @@ typedef struct LockInstanceData LOCKMODE waitLockMode; /* lock awaited by this PGPROC, if any */ BackendId backend; /* backend ID of this PGPROC */ LocalTransactionId lxid; /* local transaction ID of this PGPROC */ + TimestampTz waitStart; /* time at which this PGPROC started waiting + * for lock */ int pid; /* pid of this PGPROC */ int leaderPid; /* pid of group leader; = pid if no group */ bool fastpath; /* taken via fastpath? */ diff --git a/src/include/storage/proc.h b/src/include/storage/proc.h index 683ab64f76..a777cb64a1 100644 --- a/src/include/storage/proc.h +++ b/src/include/storage/proc.h @@ -181,6 +181,8 @@ struct PGPROC LOCKMODE waitLockMode; /* type of lock we're waiting for */ LOCKMASK heldLocks; /* bitmask for lock types already held on this * lock object by this backend */ + pg_atomic_uint64 waitStart; /* time at which wait for lock acquisition + * started */ bool delayChkpt; /* true if this proc delays checkpoint start */ diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out index b632d9f2ea..10a1f34ebc 100644 --- a/src/test/regress/expected/rules.out +++ b/src/test/regress/expected/rules.out @@ -1394,8 +1394,9 @@ pg_locks| SELECT l.locktype, l.pid, l.mode, l.granted, - l.fastpath - FROM pg_lock_status() l(locktype, database, relation, page, tuple, virtualxid, transactionid, classid, objid, objsubid, virtualtransaction, pid, mode, granted, fastpath); + l.fastpath, + l.waitstart + FROM pg_lock_status() l(locktype, database, relation, page, tuple, virtualxid, transactionid, classid, objid, objsubid, virtualtransaction, pid, mode, granted, fastpath, waitstart); pg_matviews| SELECT n.nspname AS schemaname, c.relname AS matviewname, pg_get_userbyid(c.relowner) AS matviewowner,