Skip to content

Commit e52c4a6

Browse files
committed
Add GUC log_lock_waits to log long wait times.
Simon Riggs
1 parent f52019c commit e52c4a6

File tree

7 files changed

+121
-60
lines changed

7 files changed

+121
-60
lines changed

doc/src/sgml/config.sgml

+21-5
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
<!-- $PostgreSQL: pgsql/doc/src/sgml/config.sgml,v 1.113 2007/03/02 23:37:22 tgl Exp $ -->
1+
<!-- $PostgreSQL: pgsql/doc/src/sgml/config.sgml,v 1.114 2007/03/03 18:46:40 momjian Exp $ -->
22

33
<chapter Id="runtime-config">
44
<title>Server Configuration</title>
@@ -2946,6 +2946,21 @@ SELECT * FROM parent WHERE key = 2400;
29462946
</listitem>
29472947
</varlistentry>
29482948

2949+
<varlistentry id="guc-log-lock-waits" xreflabel="log_lock_waits">
2950+
<term><varname>log_lock_waits</varname> (<type>boolean</type>)</term>
2951+
<indexterm>
2952+
<primary><varname>log_lock_waits</> configuration parameter</primary>
2953+
</indexterm>
2954+
<listitem>
2955+
<para>
2956+
Controls whether a log message is produced when a statement waits
2957+
longer than <xref linkend="guc-deadlock-timeout"> to acquire a
2958+
lock. This is useful in determining if lock waits are causing
2959+
poor performance. The default is <literal>off</>.
2960+
</para>
2961+
</listitem>
2962+
</varlistentry>
2963+
29492964
<varlistentry id="guc-log-temp-files" xreflabel="log_temp_files">
29502965
<term><varname>log_temp_files</varname> (<type>integer</type>)</term>
29512966
<indexterm>
@@ -3980,17 +3995,18 @@ dynamic_library_path = 'C:\tools\postgresql;H:\my_project\lib;$libdir'
39803995
This is the amount of time, in milliseconds, to wait on a lock
39813996
before checking to see if there is a deadlock condition. The
39823997
check for deadlock is relatively slow, so the server doesn't run
3983-
it every time it waits for a lock. We (optimistically?) assume
3998+
it every time it waits for a lock. We optimistically assume
39843999
that deadlocks are not common in production applications and
39854000
just wait on the lock for a while before starting the check for a
39864001
deadlock. Increasing this value reduces the amount of time
39874002
wasted in needless deadlock checks, but slows down reporting of
39884003
real deadlock errors. The default is one second (<literal>1s</>),
39894004
which is probably about the smallest value you would want in
3990-
practice. On a heavily loaded server you might want to raise it.
4005+
practice. Set <xref linkend="guc-log-lock-waits"> to log deadlock
4006+
checks. On a heavily loaded server you might want to raise it.
39914007
Ideally the setting should exceed your typical transaction time,
3992-
so as to improve the odds that a lock will be released before
3993-
the waiter decides to check for deadlock.
4008+
so as to improve the odds that a lock will be released before the
4009+
waiter decides to check for deadlock.
39944010
</para>
39954011
</listitem>
39964012
</varlistentry>

src/backend/storage/lmgr/deadlock.c

+16-6
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
*
1313
*
1414
* IDENTIFICATION
15-
* $PostgreSQL: pgsql/src/backend/storage/lmgr/deadlock.c,v 1.44 2007/01/05 22:19:38 momjian Exp $
15+
* $PostgreSQL: pgsql/src/backend/storage/lmgr/deadlock.c,v 1.45 2007/03/03 18:46:40 momjian Exp $
1616
*
1717
* Interface:
1818
*
@@ -187,13 +187,14 @@ InitDeadLockChecking(void)
187187
* deadlock. If resolution is impossible, return TRUE --- the caller
188188
* is then expected to abort the given proc's transaction.
189189
*
190-
* Caller must already have locked all partitions of the lock tables.
190+
* Caller must already have locked all partitions of the lock tables,
191+
* so standard error logging/reporting code is handled by caller.
191192
*
192193
* On failure, deadlock details are recorded in deadlockDetails[] for
193194
* subsequent printing by DeadLockReport(). That activity is separate
194195
* because we don't want to do it while holding all those LWLocks.
195196
*/
196-
bool
197+
DeadlockState
197198
DeadLockCheck(PGPROC *proc)
198199
{
199200
int i,
@@ -204,6 +205,11 @@ DeadLockCheck(PGPROC *proc)
204205
nPossibleConstraints = 0;
205206
nWaitOrders = 0;
206207

208+
#ifdef LOCK_DEBUG
209+
if (Debug_deadlocks)
210+
DumpAllLocks();
211+
#endif
212+
207213
/* Search for deadlocks and possible fixes */
208214
if (DeadLockCheckRecurse(proc))
209215
{
@@ -217,7 +223,7 @@ DeadLockCheck(PGPROC *proc)
217223
if (!FindLockCycle(proc, possibleConstraints, &nSoftEdges))
218224
elog(FATAL, "deadlock seems to have disappeared");
219225

220-
return true; /* cannot find a non-deadlocked state */
226+
return DS_HARD_DEADLOCK; /* cannot find a non-deadlocked state */
221227
}
222228

223229
/* Apply any needed rearrangements of wait queues */
@@ -249,7 +255,11 @@ DeadLockCheck(PGPROC *proc)
249255
/* See if any waiters for the lock can be woken up now */
250256
ProcLockWakeup(GetLocksMethodTable(lock), lock);
251257
}
252-
return false;
258+
259+
if (nWaitOrders > 0)
260+
return DS_SOFT_DEADLOCK;
261+
else
262+
return DS_DEADLOCK_NOT_FOUND;
253263
}
254264

255265
/*
@@ -896,7 +906,7 @@ DescribeLockTag(StringInfo buf, const LOCKTAG *lock)
896906
}
897907

898908
/*
899-
* Report a detected deadlock, with available details.
909+
* Report a detected DS_HARD_DEADLOCK, with available details.
900910
*/
901911
void
902912
DeadLockReport(void)

src/backend/storage/lmgr/proc.c

+63-44
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $PostgreSQL: pgsql/src/backend/storage/lmgr/proc.c,v 1.184 2007/02/15 23:23:23 alvherre Exp $
11+
* $PostgreSQL: pgsql/src/backend/storage/lmgr/proc.c,v 1.185 2007/03/03 18:46:40 momjian Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
@@ -48,6 +48,7 @@
4848
/* GUC variables */
4949
int DeadlockTimeout = 1000;
5050
int StatementTimeout = 0;
51+
bool log_lock_waits = false;
5152

5253
/* Pointer to this process's PGPROC struct, if any */
5354
PGPROC *MyProc = NULL;
@@ -979,6 +980,7 @@ static void
979980
CheckDeadLock(void)
980981
{
981982
int i;
983+
DeadlockState deadlock_state = DS_DEADLOCK_NOT_FOUND;
982984

983985
/*
984986
* Acquire exclusive lock on the entire shared lock data structures. Must
@@ -1004,60 +1006,77 @@ CheckDeadLock(void)
10041006
* This is quicker than checking our semaphore's state, since no kernel
10051007
* call is needed, and it is safe because we hold the lock partition lock.
10061008
*/
1007-
if (MyProc->links.prev == INVALID_OFFSET ||
1008-
MyProc->links.next == INVALID_OFFSET)
1009-
goto check_done;
1010-
1011-
#ifdef LOCK_DEBUG
1012-
if (Debug_deadlocks)
1013-
DumpAllLocks();
1014-
#endif
1015-
1016-
if (!DeadLockCheck(MyProc))
1009+
if (MyProc->links.prev != INVALID_OFFSET &&
1010+
MyProc->links.next != INVALID_OFFSET)
1011+
deadlock_state = DeadLockCheck(MyProc);
1012+
1013+
if (deadlock_state == DS_HARD_DEADLOCK)
10171014
{
1018-
/* No deadlock, so keep waiting */
1019-
goto check_done;
1020-
}
1021-
1022-
/*
1023-
* Oops. We have a deadlock.
1024-
*
1025-
* Get this process out of wait state. (Note: we could do this more
1026-
* efficiently by relying on lockAwaited, but use this coding to preserve
1027-
* the flexibility to kill some other transaction than the one detecting
1028-
* the deadlock.)
1029-
*
1030-
* RemoveFromWaitQueue sets MyProc->waitStatus to STATUS_ERROR, so
1031-
* ProcSleep will report an error after we return from the signal handler.
1032-
*/
1033-
Assert(MyProc->waitLock != NULL);
1034-
RemoveFromWaitQueue(MyProc, LockTagHashCode(&(MyProc->waitLock->tag)));
1015+
/*
1016+
* Oops. We have a deadlock.
1017+
*
1018+
* Get this process out of wait state. (Note: we could do this more
1019+
* efficiently by relying on lockAwaited, but use this coding to preserve
1020+
* the flexibility to kill some other transaction than the one detecting
1021+
* the deadlock.)
1022+
*
1023+
* RemoveFromWaitQueue sets MyProc->waitStatus to STATUS_ERROR, so
1024+
* ProcSleep will report an error after we return from the signal handler.
1025+
*/
1026+
Assert(MyProc->waitLock != NULL);
1027+
RemoveFromWaitQueue(MyProc, LockTagHashCode(&(MyProc->waitLock->tag)));
10351028

1036-
/*
1037-
* Unlock my semaphore so that the interrupted ProcSleep() call can
1038-
* finish.
1039-
*/
1040-
PGSemaphoreUnlock(&MyProc->sem);
1029+
/*
1030+
* Unlock my semaphore so that the interrupted ProcSleep() call can
1031+
* finish.
1032+
*/
1033+
PGSemaphoreUnlock(&MyProc->sem);
10411034

1042-
/*
1043-
* We're done here. Transaction abort caused by the error that ProcSleep
1044-
* will raise will cause any other locks we hold to be released, thus
1045-
* allowing other processes to wake up; we don't need to do that here.
1046-
* NOTE: an exception is that releasing locks we hold doesn't consider the
1047-
* possibility of waiters that were blocked behind us on the lock we just
1048-
* failed to get, and might now be wakable because we're not in front of
1049-
* them anymore. However, RemoveFromWaitQueue took care of waking up any
1050-
* such processes.
1051-
*/
1035+
/*
1036+
* We're done here. Transaction abort caused by the error that ProcSleep
1037+
* will raise will cause any other locks we hold to be released, thus
1038+
* allowing other processes to wake up; we don't need to do that here.
1039+
* NOTE: an exception is that releasing locks we hold doesn't consider the
1040+
* possibility of waiters that were blocked behind us on the lock we just
1041+
* failed to get, and might now be wakable because we're not in front of
1042+
* them anymore. However, RemoveFromWaitQueue took care of waking up any
1043+
* such processes.
1044+
*/
1045+
}
10521046

10531047
/*
10541048
* Release locks acquired at head of routine. Order is not critical, so
10551049
* do it back-to-front to avoid waking another CheckDeadLock instance
10561050
* before it can get all the locks.
10571051
*/
1058-
check_done:
10591052
for (i = NUM_LOCK_PARTITIONS; --i >= 0;)
10601053
LWLockRelease(FirstLockMgrLock + i);
1054+
1055+
/*
1056+
* Issue any log messages requested.
1057+
*
1058+
* Deadlock ERROR messages are issued as part of transaction abort, so
1059+
* these messages should not raise error states intentionally.
1060+
*/
1061+
if (log_lock_waits)
1062+
{
1063+
switch (deadlock_state)
1064+
{
1065+
case DS_SOFT_DEADLOCK:
1066+
ereport(LOG,
1067+
(errmsg("deadlock avoided by rearranging lock order")));
1068+
break;
1069+
case DS_DEADLOCK_NOT_FOUND:
1070+
ereport(LOG,
1071+
(errmsg("statement waiting for lock for at least %d ms",
1072+
DeadlockTimeout)));
1073+
break;
1074+
case DS_HARD_DEADLOCK:
1075+
break; /* ERROR message handled during abort */
1076+
default:
1077+
break;
1078+
}
1079+
}
10611080
}
10621081

10631082

src/backend/utils/misc/guc.c

+9-1
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
* Written by Peter Eisentraut <peter_e@gmx.net>.
1111
*
1212
* IDENTIFICATION
13-
* $PostgreSQL: pgsql/src/backend/utils/misc/guc.c,v 1.377 2007/02/23 21:36:18 momjian Exp $
13+
* $PostgreSQL: pgsql/src/backend/utils/misc/guc.c,v 1.378 2007/03/03 18:46:40 momjian Exp $
1414
*
1515
*--------------------------------------------------------------------
1616
*/
@@ -811,6 +811,14 @@ static struct config_bool ConfigureNamesBool[] =
811811
},
812812
#endif
813813

814+
{
815+
{"log_lock_waits", PGC_SIGHUP, LOGGING_WHAT,
816+
gettext_noop("Logs long lock wait events."),
817+
NULL
818+
},
819+
&log_lock_waits,
820+
false, NULL, NULL
821+
},
814822
{
815823
{"log_hostname", PGC_SIGHUP, LOGGING_WHAT,
816824
gettext_noop("Logs the host name in the connection logs."),

src/backend/utils/misc/postgresql.conf.sample

+1-1
Original file line numberDiff line numberDiff line change
@@ -340,7 +340,7 @@
340340
# e.g. '<%u%%%d> '
341341
#log_statement = 'none' # none, ddl, mod, all
342342
#log_hostname = off
343-
343+
#log_lock_waits = off # Log lock waits longer than deadlock_timeout
344344
#log_temp_files = -1 # Log temporary files equal or larger
345345
# than the specified number of kilobytes.
346346
# -1 disables; 0 logs all temp files

src/include/storage/lock.h

+9-2
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
88
* Portions Copyright (c) 1994, Regents of the University of California
99
*
10-
* $PostgreSQL: pgsql/src/include/storage/lock.h,v 1.103 2007/01/05 22:19:58 momjian Exp $
10+
* $PostgreSQL: pgsql/src/include/storage/lock.h,v 1.104 2007/03/03 18:46:40 momjian Exp $
1111
*
1212
*-------------------------------------------------------------------------
1313
*/
@@ -394,6 +394,13 @@ typedef enum
394394
LOCKACQUIRE_ALREADY_HELD /* incremented count for lock already held */
395395
} LockAcquireResult;
396396

397+
/* Deadlock states identified by DeadlockCheck() */
398+
typedef enum
399+
{
400+
DS_DEADLOCK_NOT_FOUND, /* no deadlock found within database server */
401+
DS_SOFT_DEADLOCK, /* deadlock, but lock queues rearrangeable */
402+
DS_HARD_DEADLOCK /* deadlock, no way out but ERROR */
403+
} DeadlockState;
397404

398405
/*
399406
* The lockmgr's shared hash tables are partitioned to reduce contention.
@@ -442,7 +449,7 @@ extern void lock_twophase_postcommit(TransactionId xid, uint16 info,
442449
extern void lock_twophase_postabort(TransactionId xid, uint16 info,
443450
void *recdata, uint32 len);
444451

445-
extern bool DeadLockCheck(PGPROC *proc);
452+
extern DeadlockState DeadLockCheck(PGPROC *proc);
446453
extern void DeadLockReport(void);
447454
extern void RememberSimpleDeadLock(PGPROC *proc1,
448455
LOCKMODE lockmode,

src/include/storage/proc.h

+2-1
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
88
* Portions Copyright (c) 1994, Regents of the University of California
99
*
10-
* $PostgreSQL: pgsql/src/include/storage/proc.h,v 1.94 2007/02/15 23:23:23 alvherre Exp $
10+
* $PostgreSQL: pgsql/src/include/storage/proc.h,v 1.95 2007/03/03 18:46:40 momjian Exp $
1111
*
1212
*-------------------------------------------------------------------------
1313
*/
@@ -127,6 +127,7 @@ typedef struct PROC_HDR
127127
/* configurable options */
128128
extern int DeadlockTimeout;
129129
extern int StatementTimeout;
130+
extern bool log_lock_waits;
130131

131132
extern volatile bool cancel_from_timeout;
132133

0 commit comments

Comments
 (0)