Add code to print information about a detected deadlock cycle. The

printed data is comparable to what you could read in the pg_locks view,
were you fortunate enough to have been looking at it at the right time.
This commit is contained in:
Tom Lane 2003-01-16 21:01:45 +00:00
parent 136828c699
commit 227a404cf4
4 changed files with 166 additions and 12 deletions

View File

@ -12,11 +12,13 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/storage/lmgr/deadlock.c,v 1.15 2002/11/01 00:40:23 tgl Exp $ * $Header: /cvsroot/pgsql/src/backend/storage/lmgr/deadlock.c,v 1.16 2003/01/16 21:01:44 tgl Exp $
* *
* Interface: * Interface:
* *
* DeadLockCheck() * DeadLockCheck()
* DeadLockReport()
* RememberSimpleDeadLock()
* InitDeadLockChecking() * InitDeadLockChecking()
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
@ -45,12 +47,27 @@ typedef struct
int nProcs; int nProcs;
} WAIT_ORDER; } WAIT_ORDER;
/*
* Information saved about each edge in a detected deadlock cycle. This
* is used to print a diagnostic message upon failure.
*
* Note: because we want to examine this info after releasing the LockMgrLock,
* we can't just store LOCK and PGPROC pointers; we must extract out all the
* info we want to be able to print.
*/
typedef struct
{
LOCKTAG locktag; /* ID of awaited lock object */
LOCKMODE lockmode; /* type of lock we're waiting for */
int pid; /* PID of blocked backend */
} DEADLOCK_INFO;
static bool DeadLockCheckRecurse(PGPROC *proc); static bool DeadLockCheckRecurse(PGPROC *proc);
static bool TestConfiguration(PGPROC *startProc); static bool TestConfiguration(PGPROC *startProc);
static bool FindLockCycle(PGPROC *checkProc, static bool FindLockCycle(PGPROC *checkProc,
EDGE *softEdges, int *nSoftEdges); EDGE *softEdges, int *nSoftEdges);
static bool FindLockCycleRecurse(PGPROC *checkProc, static bool FindLockCycleRecurse(PGPROC *checkProc, int depth,
EDGE *softEdges, int *nSoftEdges); EDGE *softEdges, int *nSoftEdges);
static bool ExpandConstraints(EDGE *constraints, int nConstraints); static bool ExpandConstraints(EDGE *constraints, int nConstraints);
static bool TopoSort(LOCK *lock, EDGE *constraints, int nConstraints, static bool TopoSort(LOCK *lock, EDGE *constraints, int nConstraints,
@ -88,6 +105,8 @@ static int maxCurConstraints;
static EDGE *possibleConstraints; static EDGE *possibleConstraints;
static int nPossibleConstraints; static int nPossibleConstraints;
static int maxPossibleConstraints; static int maxPossibleConstraints;
static DEADLOCK_INFO *deadlockDetails;
static int nDeadlockDetails;
/* /*
@ -110,8 +129,10 @@ InitDeadLockChecking(void)
/* /*
* FindLockCycle needs at most MaxBackends entries in visitedProcs[] * FindLockCycle needs at most MaxBackends entries in visitedProcs[]
* and deadlockDetails[].
*/ */
visitedProcs = (PGPROC **) palloc(MaxBackends * sizeof(PGPROC *)); visitedProcs = (PGPROC **) palloc(MaxBackends * sizeof(PGPROC *));
deadlockDetails = (DEADLOCK_INFO *) palloc(MaxBackends * sizeof(DEADLOCK_INFO));
/* /*
* TopoSort needs to consider at most MaxBackends wait-queue entries, * TopoSort needs to consider at most MaxBackends wait-queue entries,
@ -176,6 +197,10 @@ InitDeadLockChecking(void)
* table to have a different masterLock, all locks that can block had * table to have a different masterLock, all locks that can block had
* better use the same LWLock, else this code will not be adequately * better use the same LWLock, else this code will not be adequately
* interlocked! * interlocked!
*
* On failure, deadlock details are recorded in deadlockDetails[] for
* subsequent printing by DeadLockReport(). That activity is separate
* because we don't want to do it while holding the master lock.
*/ */
bool bool
DeadLockCheck(PGPROC *proc) DeadLockCheck(PGPROC *proc)
@ -190,7 +215,19 @@ DeadLockCheck(PGPROC *proc)
/* Search for deadlocks and possible fixes */ /* Search for deadlocks and possible fixes */
if (DeadLockCheckRecurse(proc)) if (DeadLockCheckRecurse(proc))
{
/*
* Call FindLockCycle one more time, to record the correct
* deadlockDetails[] for the basic state with no rearrangements.
*/
int nSoftEdges;
nWaitOrders = 0;
if (!FindLockCycle(proc, possibleConstraints, &nSoftEdges))
elog(FATAL, "DeadLockCheck: deadlock seems to have disappeared");
return true; /* cannot find a non-deadlocked state */ return true; /* cannot find a non-deadlocked state */
}
/* Apply any needed rearrangements of wait queues */ /* Apply any needed rearrangements of wait queues */
for (i = 0; i < nWaitOrders; i++) for (i = 0; i < nWaitOrders; i++)
@ -357,9 +394,12 @@ TestConfiguration(PGPROC *startProc)
* *
* Scan outward from the given proc to see if there is a cycle in the * Scan outward from the given proc to see if there is a cycle in the
* waits-for graph that includes this proc. Return TRUE if a cycle * waits-for graph that includes this proc. Return TRUE if a cycle
* is found, else FALSE. If a cycle is found, we also return a list of * is found, else FALSE. If a cycle is found, we return a list of
* the "soft edges", if any, included in the cycle. These edges could * the "soft edges", if any, included in the cycle. These edges could
* potentially be eliminated by rearranging wait queues. * potentially be eliminated by rearranging wait queues. We also fill
* deadlockDetails[] with information about the detected cycle; this info
* is not used by the deadlock algorithm itself, only to print a useful
* message after failing.
* *
* Since we need to be able to check hypothetical configurations that would * Since we need to be able to check hypothetical configurations that would
* exist after wait queue rearrangement, the routine pays attention to the * exist after wait queue rearrangement, the routine pays attention to the
@ -372,12 +412,14 @@ FindLockCycle(PGPROC *checkProc,
int *nSoftEdges) /* output argument */ int *nSoftEdges) /* output argument */
{ {
nVisitedProcs = 0; nVisitedProcs = 0;
nDeadlockDetails = 0;
*nSoftEdges = 0; *nSoftEdges = 0;
return FindLockCycleRecurse(checkProc, softEdges, nSoftEdges); return FindLockCycleRecurse(checkProc, 0, softEdges, nSoftEdges);
} }
static bool static bool
FindLockCycleRecurse(PGPROC *checkProc, FindLockCycleRecurse(PGPROC *checkProc,
int depth,
EDGE *softEdges, /* output argument */ EDGE *softEdges, /* output argument */
int *nSoftEdges) /* output argument */ int *nSoftEdges) /* output argument */
{ {
@ -402,7 +444,16 @@ FindLockCycleRecurse(PGPROC *checkProc,
{ {
/* If we return to starting point, we have a deadlock cycle */ /* If we return to starting point, we have a deadlock cycle */
if (i == 0) if (i == 0)
{
/*
* record total length of cycle --- outer levels will now
* fill deadlockDetails[]
*/
Assert(depth <= MaxBackends);
nDeadlockDetails = depth;
return true; return true;
}
/* /*
* Otherwise, we have a cycle but it does not include the * Otherwise, we have a cycle but it does not include the
@ -449,8 +500,18 @@ FindLockCycleRecurse(PGPROC *checkProc,
((1 << lm) & conflictMask) != 0) ((1 << lm) & conflictMask) != 0)
{ {
/* This proc hard-blocks checkProc */ /* This proc hard-blocks checkProc */
if (FindLockCycleRecurse(proc, softEdges, nSoftEdges)) if (FindLockCycleRecurse(proc, depth+1,
softEdges, nSoftEdges))
{
/* fill deadlockDetails[] */
DEADLOCK_INFO *info = &deadlockDetails[depth];
info->locktag = lock->tag;
info->lockmode = checkProc->waitLockMode;
info->pid = checkProc->pid;
return true; return true;
}
/* If no deadlock, we're done looking at this holder */ /* If no deadlock, we're done looking at this holder */
break; break;
} }
@ -496,8 +557,16 @@ FindLockCycleRecurse(PGPROC *checkProc,
if (((1 << proc->waitLockMode) & conflictMask) != 0) if (((1 << proc->waitLockMode) & conflictMask) != 0)
{ {
/* This proc soft-blocks checkProc */ /* This proc soft-blocks checkProc */
if (FindLockCycleRecurse(proc, softEdges, nSoftEdges)) if (FindLockCycleRecurse(proc, depth+1,
softEdges, nSoftEdges))
{ {
/* fill deadlockDetails[] */
DEADLOCK_INFO *info = &deadlockDetails[depth];
info->locktag = lock->tag;
info->lockmode = checkProc->waitLockMode;
info->pid = checkProc->pid;
/* /*
* Add this edge to the list of soft edges in the * Add this edge to the list of soft edges in the
* cycle * cycle
@ -529,8 +598,16 @@ FindLockCycleRecurse(PGPROC *checkProc,
if (((1 << proc->waitLockMode) & conflictMask) != 0) if (((1 << proc->waitLockMode) & conflictMask) != 0)
{ {
/* This proc soft-blocks checkProc */ /* This proc soft-blocks checkProc */
if (FindLockCycleRecurse(proc, softEdges, nSoftEdges)) if (FindLockCycleRecurse(proc, depth+1,
softEdges, nSoftEdges))
{ {
/* fill deadlockDetails[] */
DEADLOCK_INFO *info = &deadlockDetails[depth];
info->locktag = lock->tag;
info->lockmode = checkProc->waitLockMode;
info->pid = checkProc->pid;
/* /*
* Add this edge to the list of soft edges in the * Add this edge to the list of soft edges in the
* cycle * cycle
@ -758,3 +835,67 @@ PrintLockQueue(LOCK *lock, const char *info)
} }
#endif #endif
/*
* Report details about a detected deadlock.
*/
void
DeadLockReport(void)
{
int i;
for (i = 0; i < nDeadlockDetails; i++)
{
DEADLOCK_INFO *info = &deadlockDetails[i];
int nextpid;
/* The last proc waits for the first one... */
if (i < nDeadlockDetails-1)
nextpid = info[1].pid;
else
nextpid = deadlockDetails[0].pid;
if (info->locktag.relId == XactLockTableId && info->locktag.dbId == 0)
{
/* Lock is for transaction ID */
elog(NOTICE, "Proc %d waits for %s on transaction %u; blocked by %d",
info->pid,
GetLockmodeName(info->lockmode),
info->locktag.objId.xid,
nextpid);
}
else
{
/* Lock is for a relation */
elog(NOTICE, "Proc %d waits for %s on relation %u database %u; blocked by %d",
info->pid,
GetLockmodeName(info->lockmode),
info->locktag.relId,
info->locktag.dbId,
nextpid);
}
}
}
/*
* RememberSimpleDeadLock: set up info for DeadLockReport when ProcSleep
* detects a trivial (two-way) deadlock. proc1 wants to block for lockmode
* on lock, but proc2 is already waiting and would be blocked by proc1.
*/
void
RememberSimpleDeadLock(PGPROC *proc1,
LOCKMODE lockmode,
LOCK *lock,
PGPROC *proc2)
{
DEADLOCK_INFO *info = &deadlockDetails[0];
info->locktag = lock->tag;
info->lockmode = lockmode;
info->pid = proc1->pid;
info++;
info->locktag = proc2->waitLock->tag;
info->lockmode = proc2->waitLockMode;
info->pid = proc2->pid;
nDeadlockDetails = 2;
}

View File

@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/storage/lmgr/lock.c,v 1.118 2002/11/01 00:40:23 tgl Exp $ * $Header: /cvsroot/pgsql/src/backend/storage/lmgr/lock.c,v 1.119 2003/01/16 21:01:44 tgl Exp $
* *
* NOTES * NOTES
* Outside modules can create a lock table and acquire/release * Outside modules can create a lock table and acquire/release
@ -905,6 +905,13 @@ WaitOnLock(LOCKMETHOD lockmethod, LOCKMODE lockmode,
*/ */
LOCK_PRINT("WaitOnLock: aborting on lock", lock, lockmode); LOCK_PRINT("WaitOnLock: aborting on lock", lock, lockmode);
LWLockRelease(lockMethodTable->masterLock); LWLockRelease(lockMethodTable->masterLock);
/*
* Now that we aren't holding the LockMgrLock, print details about
* the detected deadlock. We didn't want to do this before because
* sending elog messages to the client while holding the shared lock
* is bad for concurrency.
*/
DeadLockReport();
elog(ERROR, "deadlock detected"); elog(ERROR, "deadlock detected");
/* not reached */ /* not reached */
} }

View File

@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/storage/lmgr/proc.c,v 1.127 2002/10/31 21:34:16 tgl Exp $ * $Header: /cvsroot/pgsql/src/backend/storage/lmgr/proc.c,v 1.128 2003/01/16 21:01:44 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
@ -566,8 +566,9 @@ ProcSleep(LOCKMETHODTABLE *lockMethodTable,
* up correctly is to call RemoveFromWaitQueue(), but * up correctly is to call RemoveFromWaitQueue(), but
* we can't do that until we are *on* the wait queue. * we can't do that until we are *on* the wait queue.
* So, set a flag to check below, and break out of * So, set a flag to check below, and break out of
* loop. * loop. Also, record deadlock info for later message.
*/ */
RememberSimpleDeadLock(MyProc, lockmode, lock, proc);
early_deadlock = true; early_deadlock = true;
break; break;
} }

View File

@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $Id: lock.h,v 1.67 2002/09/04 20:31:45 momjian Exp $ * $Id: lock.h,v 1.68 2003/01/16 21:01:45 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
@ -243,6 +243,11 @@ extern void GrantLock(LOCK *lock, PROCLOCK *holder, LOCKMODE lockmode);
extern void RemoveFromWaitQueue(PGPROC *proc); extern void RemoveFromWaitQueue(PGPROC *proc);
extern int LockShmemSize(int maxBackends); extern int LockShmemSize(int maxBackends);
extern bool DeadLockCheck(PGPROC *proc); extern bool DeadLockCheck(PGPROC *proc);
extern void DeadLockReport(void);
extern void RememberSimpleDeadLock(PGPROC *proc1,
LOCKMODE lockmode,
LOCK *lock,
PGPROC *proc2);
extern void InitDeadLockChecking(void); extern void InitDeadLockChecking(void);
extern LockData *GetLockStatusData(void); extern LockData *GetLockStatusData(void);
extern const char *GetLockmodeName(LOCKMODE mode); extern const char *GetLockmodeName(LOCKMODE mode);