Skip to content

Commit 227a404

Browse files
committed
Add code to print information about a detected deadlock cycle. The
printed data is comparable to what you could read in the pg_locks view, were you fortunate enough to have been looking at it at the right time.
1 parent 136828c commit 227a404

File tree

4 files changed

+166
-12
lines changed

4 files changed

+166
-12
lines changed

src/backend/storage/lmgr/deadlock.c

Lines changed: 149 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,13 @@
1212
*
1313
*
1414
* IDENTIFICATION
15-
* $Header: /cvsroot/pgsql/src/backend/storage/lmgr/deadlock.c,v 1.15 2002/11/01 00:40:23 tgl Exp $
15+
* $Header: /cvsroot/pgsql/src/backend/storage/lmgr/deadlock.c,v 1.16 2003/01/16 21:01:44 tgl Exp $
1616
*
1717
* Interface:
1818
*
1919
* DeadLockCheck()
20+
* DeadLockReport()
21+
* RememberSimpleDeadLock()
2022
* InitDeadLockChecking()
2123
*
2224
*-------------------------------------------------------------------------
@@ -45,12 +47,27 @@ typedef struct
4547
int nProcs;
4648
} WAIT_ORDER;
4749

50+
/*
51+
* Information saved about each edge in a detected deadlock cycle. This
52+
* is used to print a diagnostic message upon failure.
53+
*
54+
* Note: because we want to examine this info after releasing the LockMgrLock,
55+
* we can't just store LOCK and PGPROC pointers; we must extract out all the
56+
* info we want to be able to print.
57+
*/
58+
typedef struct
59+
{
60+
LOCKTAG locktag; /* ID of awaited lock object */
61+
LOCKMODE lockmode; /* type of lock we're waiting for */
62+
int pid; /* PID of blocked backend */
63+
} DEADLOCK_INFO;
64+
4865

4966
static bool DeadLockCheckRecurse(PGPROC *proc);
5067
static bool TestConfiguration(PGPROC *startProc);
5168
static bool FindLockCycle(PGPROC *checkProc,
5269
EDGE *softEdges, int *nSoftEdges);
53-
static bool FindLockCycleRecurse(PGPROC *checkProc,
70+
static bool FindLockCycleRecurse(PGPROC *checkProc, int depth,
5471
EDGE *softEdges, int *nSoftEdges);
5572
static bool ExpandConstraints(EDGE *constraints, int nConstraints);
5673
static bool TopoSort(LOCK *lock, EDGE *constraints, int nConstraints,
@@ -88,6 +105,8 @@ static int maxCurConstraints;
88105
static EDGE *possibleConstraints;
89106
static int nPossibleConstraints;
90107
static int maxPossibleConstraints;
108+
static DEADLOCK_INFO *deadlockDetails;
109+
static int nDeadlockDetails;
91110

92111

93112
/*
@@ -110,8 +129,10 @@ InitDeadLockChecking(void)
110129

111130
/*
112131
* FindLockCycle needs at most MaxBackends entries in visitedProcs[]
132+
* and deadlockDetails[].
113133
*/
114134
visitedProcs = (PGPROC **) palloc(MaxBackends * sizeof(PGPROC *));
135+
deadlockDetails = (DEADLOCK_INFO *) palloc(MaxBackends * sizeof(DEADLOCK_INFO));
115136

116137
/*
117138
* TopoSort needs to consider at most MaxBackends wait-queue entries,
@@ -176,6 +197,10 @@ InitDeadLockChecking(void)
176197
* table to have a different masterLock, all locks that can block had
177198
* better use the same LWLock, else this code will not be adequately
178199
* interlocked!
200+
*
201+
* On failure, deadlock details are recorded in deadlockDetails[] for
202+
* subsequent printing by DeadLockReport(). That activity is separate
203+
* because we don't want to do it while holding the master lock.
179204
*/
180205
bool
181206
DeadLockCheck(PGPROC *proc)
@@ -190,7 +215,19 @@ DeadLockCheck(PGPROC *proc)
190215

191216
/* Search for deadlocks and possible fixes */
192217
if (DeadLockCheckRecurse(proc))
218+
{
219+
/*
220+
* Call FindLockCycle one more time, to record the correct
221+
* deadlockDetails[] for the basic state with no rearrangements.
222+
*/
223+
int nSoftEdges;
224+
225+
nWaitOrders = 0;
226+
if (!FindLockCycle(proc, possibleConstraints, &nSoftEdges))
227+
elog(FATAL, "DeadLockCheck: deadlock seems to have disappeared");
228+
193229
return true; /* cannot find a non-deadlocked state */
230+
}
194231

195232
/* Apply any needed rearrangements of wait queues */
196233
for (i = 0; i < nWaitOrders; i++)
@@ -357,9 +394,12 @@ TestConfiguration(PGPROC *startProc)
357394
*
358395
* Scan outward from the given proc to see if there is a cycle in the
359396
* waits-for graph that includes this proc. Return TRUE if a cycle
360-
* is found, else FALSE. If a cycle is found, we also return a list of
397+
* is found, else FALSE. If a cycle is found, we return a list of
361398
* the "soft edges", if any, included in the cycle. These edges could
362-
* potentially be eliminated by rearranging wait queues.
399+
* potentially be eliminated by rearranging wait queues. We also fill
400+
* deadlockDetails[] with information about the detected cycle; this info
401+
* is not used by the deadlock algorithm itself, only to print a useful
402+
* message after failing.
363403
*
364404
* Since we need to be able to check hypothetical configurations that would
365405
* exist after wait queue rearrangement, the routine pays attention to the
@@ -372,12 +412,14 @@ FindLockCycle(PGPROC *checkProc,
372412
int *nSoftEdges) /* output argument */
373413
{
374414
nVisitedProcs = 0;
415+
nDeadlockDetails = 0;
375416
*nSoftEdges = 0;
376-
return FindLockCycleRecurse(checkProc, softEdges, nSoftEdges);
417+
return FindLockCycleRecurse(checkProc, 0, softEdges, nSoftEdges);
377418
}
378419

379420
static bool
380421
FindLockCycleRecurse(PGPROC *checkProc,
422+
int depth,
381423
EDGE *softEdges, /* output argument */
382424
int *nSoftEdges) /* output argument */
383425
{
@@ -402,7 +444,16 @@ FindLockCycleRecurse(PGPROC *checkProc,
402444
{
403445
/* If we return to starting point, we have a deadlock cycle */
404446
if (i == 0)
447+
{
448+
/*
449+
* record total length of cycle --- outer levels will now
450+
* fill deadlockDetails[]
451+
*/
452+
Assert(depth <= MaxBackends);
453+
nDeadlockDetails = depth;
454+
405455
return true;
456+
}
406457

407458
/*
408459
* Otherwise, we have a cycle but it does not include the
@@ -449,8 +500,18 @@ FindLockCycleRecurse(PGPROC *checkProc,
449500
((1 << lm) & conflictMask) != 0)
450501
{
451502
/* This proc hard-blocks checkProc */
452-
if (FindLockCycleRecurse(proc, softEdges, nSoftEdges))
503+
if (FindLockCycleRecurse(proc, depth+1,
504+
softEdges, nSoftEdges))
505+
{
506+
/* fill deadlockDetails[] */
507+
DEADLOCK_INFO *info = &deadlockDetails[depth];
508+
509+
info->locktag = lock->tag;
510+
info->lockmode = checkProc->waitLockMode;
511+
info->pid = checkProc->pid;
512+
453513
return true;
514+
}
454515
/* If no deadlock, we're done looking at this holder */
455516
break;
456517
}
@@ -496,8 +557,16 @@ FindLockCycleRecurse(PGPROC *checkProc,
496557
if (((1 << proc->waitLockMode) & conflictMask) != 0)
497558
{
498559
/* This proc soft-blocks checkProc */
499-
if (FindLockCycleRecurse(proc, softEdges, nSoftEdges))
560+
if (FindLockCycleRecurse(proc, depth+1,
561+
softEdges, nSoftEdges))
500562
{
563+
/* fill deadlockDetails[] */
564+
DEADLOCK_INFO *info = &deadlockDetails[depth];
565+
566+
info->locktag = lock->tag;
567+
info->lockmode = checkProc->waitLockMode;
568+
info->pid = checkProc->pid;
569+
501570
/*
502571
* Add this edge to the list of soft edges in the
503572
* cycle
@@ -529,8 +598,16 @@ FindLockCycleRecurse(PGPROC *checkProc,
529598
if (((1 << proc->waitLockMode) & conflictMask) != 0)
530599
{
531600
/* This proc soft-blocks checkProc */
532-
if (FindLockCycleRecurse(proc, softEdges, nSoftEdges))
601+
if (FindLockCycleRecurse(proc, depth+1,
602+
softEdges, nSoftEdges))
533603
{
604+
/* fill deadlockDetails[] */
605+
DEADLOCK_INFO *info = &deadlockDetails[depth];
606+
607+
info->locktag = lock->tag;
608+
info->lockmode = checkProc->waitLockMode;
609+
info->pid = checkProc->pid;
610+
534611
/*
535612
* Add this edge to the list of soft edges in the
536613
* cycle
@@ -758,3 +835,67 @@ PrintLockQueue(LOCK *lock, const char *info)
758835
}
759836

760837
#endif
838+
839+
/*
840+
* Report details about a detected deadlock.
841+
*/
842+
void
843+
DeadLockReport(void)
844+
{
845+
int i;
846+
847+
for (i = 0; i < nDeadlockDetails; i++)
848+
{
849+
DEADLOCK_INFO *info = &deadlockDetails[i];
850+
int nextpid;
851+
852+
/* The last proc waits for the first one... */
853+
if (i < nDeadlockDetails-1)
854+
nextpid = info[1].pid;
855+
else
856+
nextpid = deadlockDetails[0].pid;
857+
858+
if (info->locktag.relId == XactLockTableId && info->locktag.dbId == 0)
859+
{
860+
/* Lock is for transaction ID */
861+
elog(NOTICE, "Proc %d waits for %s on transaction %u; blocked by %d",
862+
info->pid,
863+
GetLockmodeName(info->lockmode),
864+
info->locktag.objId.xid,
865+
nextpid);
866+
}
867+
else
868+
{
869+
/* Lock is for a relation */
870+
elog(NOTICE, "Proc %d waits for %s on relation %u database %u; blocked by %d",
871+
info->pid,
872+
GetLockmodeName(info->lockmode),
873+
info->locktag.relId,
874+
info->locktag.dbId,
875+
nextpid);
876+
}
877+
}
878+
}
879+
880+
/*
881+
* RememberSimpleDeadLock: set up info for DeadLockReport when ProcSleep
882+
* detects a trivial (two-way) deadlock. proc1 wants to block for lockmode
883+
* on lock, but proc2 is already waiting and would be blocked by proc1.
884+
*/
885+
void
886+
RememberSimpleDeadLock(PGPROC *proc1,
887+
LOCKMODE lockmode,
888+
LOCK *lock,
889+
PGPROC *proc2)
890+
{
891+
DEADLOCK_INFO *info = &deadlockDetails[0];
892+
893+
info->locktag = lock->tag;
894+
info->lockmode = lockmode;
895+
info->pid = proc1->pid;
896+
info++;
897+
info->locktag = proc2->waitLock->tag;
898+
info->lockmode = proc2->waitLockMode;
899+
info->pid = proc2->pid;
900+
nDeadlockDetails = 2;
901+
}

src/backend/storage/lmgr/lock.c

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $Header: /cvsroot/pgsql/src/backend/storage/lmgr/lock.c,v 1.118 2002/11/01 00:40:23 tgl Exp $
11+
* $Header: /cvsroot/pgsql/src/backend/storage/lmgr/lock.c,v 1.119 2003/01/16 21:01:44 tgl Exp $
1212
*
1313
* NOTES
1414
* Outside modules can create a lock table and acquire/release
@@ -905,6 +905,13 @@ WaitOnLock(LOCKMETHOD lockmethod, LOCKMODE lockmode,
905905
*/
906906
LOCK_PRINT("WaitOnLock: aborting on lock", lock, lockmode);
907907
LWLockRelease(lockMethodTable->masterLock);
908+
/*
909+
* Now that we aren't holding the LockMgrLock, print details about
910+
* the detected deadlock. We didn't want to do this before because
911+
* sending elog messages to the client while holding the shared lock
912+
* is bad for concurrency.
913+
*/
914+
DeadLockReport();
908915
elog(ERROR, "deadlock detected");
909916
/* not reached */
910917
}

src/backend/storage/lmgr/proc.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $Header: /cvsroot/pgsql/src/backend/storage/lmgr/proc.c,v 1.127 2002/10/31 21:34:16 tgl Exp $
11+
* $Header: /cvsroot/pgsql/src/backend/storage/lmgr/proc.c,v 1.128 2003/01/16 21:01:44 tgl Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
@@ -566,8 +566,9 @@ ProcSleep(LOCKMETHODTABLE *lockMethodTable,
566566
* up correctly is to call RemoveFromWaitQueue(), but
567567
* we can't do that until we are *on* the wait queue.
568568
* So, set a flag to check below, and break out of
569-
* loop.
569+
* loop. Also, record deadlock info for later message.
570570
*/
571+
RememberSimpleDeadLock(MyProc, lockmode, lock, proc);
571572
early_deadlock = true;
572573
break;
573574
}

src/include/storage/lock.h

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
* Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
88
* Portions Copyright (c) 1994, Regents of the University of California
99
*
10-
* $Id: lock.h,v 1.67 2002/09/04 20:31:45 momjian Exp $
10+
* $Id: lock.h,v 1.68 2003/01/16 21:01:45 tgl Exp $
1111
*
1212
*-------------------------------------------------------------------------
1313
*/
@@ -243,6 +243,11 @@ extern void GrantLock(LOCK *lock, PROCLOCK *holder, LOCKMODE lockmode);
243243
extern void RemoveFromWaitQueue(PGPROC *proc);
244244
extern int LockShmemSize(int maxBackends);
245245
extern bool DeadLockCheck(PGPROC *proc);
246+
extern void DeadLockReport(void);
247+
extern void RememberSimpleDeadLock(PGPROC *proc1,
248+
LOCKMODE lockmode,
249+
LOCK *lock,
250+
PGPROC *proc2);
246251
extern void InitDeadLockChecking(void);
247252
extern LockData *GetLockStatusData(void);
248253
extern const char *GetLockmodeName(LOCKMODE mode);

0 commit comments

Comments
 (0)