Add GUC log_lock_waits to log long wait times.

bmomjian · bmomjian · commit e52c4a6e26f2 · 2007-03-03T18:46:40.000Z
Simon Riggs
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
@@ -1,4 +1,4 @@
-<!-- $PostgreSQL: pgsql/doc/src/sgml/config.sgml,v 1.113 2007/03/02 23:37:22 tgl Exp $ -->
+<!-- $PostgreSQL: pgsql/doc/src/sgml/config.sgml,v 1.114 2007/03/03 18:46:40 momjian Exp $ -->
 
 <chapter Id="runtime-config">
   <title>Server Configuration</title>
@@ -2946,6 +2946,21 @@ SELECT * FROM parent WHERE key = 2400;
       </listitem>
      </varlistentry>
 
+     <varlistentry id="guc-log-lock-waits" xreflabel="log_lock_waits">
+      <term><varname>log_lock_waits</varname> (<type>boolean</type>)</term>
+      <indexterm>
+       <primary><varname>log_lock_waits</> configuration parameter</primary>
+      </indexterm>
+      <listitem>
+       <para>
+        Controls whether a log message is produced when a statement waits
+        longer than <xref linkend="guc-deadlock-timeout"> to acquire a
+        lock.  This is useful in determining if lock waits are causing
+        poor performance.  The default is <literal>off</>.
+       </para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry id="guc-log-temp-files" xreflabel="log_temp_files">
       <term><varname>log_temp_files</varname> (<type>integer</type>)</term>
       <indexterm>
@@ -3980,17 +3995,18 @@ dynamic_library_path = 'C:\tools\postgresql;H:\my_project\lib;$libdir'
         This is the amount of time, in milliseconds, to wait on a lock
         before checking to see if there is a deadlock condition. The
         check for deadlock is relatively slow, so the server doesn't run
-        it every time it waits for a lock. We (optimistically?) assume
+        it every time it waits for a lock. We optimistically assume
         that deadlocks are not common in production applications and
         just wait on the lock for a while before starting the check for a
         deadlock. Increasing this value reduces the amount of time
         wasted in needless deadlock checks, but slows down reporting of
         real deadlock errors. The default is one second (<literal>1s</>),
         which is probably about the smallest value you would want in
-        practice. On a heavily loaded server you might want to raise it.
+        practice.  Set <xref linkend="guc-log-lock-waits"> to log deadlock
+        checks.  On a heavily loaded server you might want to raise it.
         Ideally the setting should exceed your typical transaction time,
-        so as to improve the odds that a lock will be released before
-        the waiter decides to check for deadlock.
+        so as to improve the odds that a lock will be released before the
+        waiter decides to check for deadlock.
        </para>
       </listitem>
      </varlistentry>
diff --git a/src/backend/storage/lmgr/deadlock.c b/src/backend/storage/lmgr/deadlock.c
@@ -12,7 +12,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/storage/lmgr/deadlock.c,v 1.44 2007/01/05 22:19:38 momjian Exp $
+ *	  $PostgreSQL: pgsql/src/backend/storage/lmgr/deadlock.c,v 1.45 2007/03/03 18:46:40 momjian Exp $
  *
  *	Interface:
  *
@@ -187,13 +187,14 @@ InitDeadLockChecking(void)
  * deadlock.  If resolution is impossible, return TRUE --- the caller
  * is then expected to abort the given proc's transaction.
  *
- * Caller must already have locked all partitions of the lock tables.
+ * Caller must already have locked all partitions of the lock tables,
+ * so standard error logging/reporting code is handled by caller.
  *
  * On failure, deadlock details are recorded in deadlockDetails[] for
  * subsequent printing by DeadLockReport().  That activity is separate
  * because we don't want to do it while holding all those LWLocks.
  */
-bool
+DeadlockState
 DeadLockCheck(PGPROC *proc)
 {
 	int			i,
@@ -204,6 +205,11 @@ DeadLockCheck(PGPROC *proc)
 	nPossibleConstraints = 0;
 	nWaitOrders = 0;
 
+#ifdef LOCK_DEBUG
+	if (Debug_deadlocks)
+		DumpAllLocks();
+#endif
+
 	/* Search for deadlocks and possible fixes */
 	if (DeadLockCheckRecurse(proc))
 	{
@@ -217,7 +223,7 @@ DeadLockCheck(PGPROC *proc)
 		if (!FindLockCycle(proc, possibleConstraints, &nSoftEdges))
 			elog(FATAL, "deadlock seems to have disappeared");
 
-		return true;			/* cannot find a non-deadlocked state */
+		return DS_HARD_DEADLOCK;	/* cannot find a non-deadlocked state */
 	}
 
 	/* Apply any needed rearrangements of wait queues */
@@ -249,7 +255,11 @@ DeadLockCheck(PGPROC *proc)
 		/* See if any waiters for the lock can be woken up now */
 		ProcLockWakeup(GetLocksMethodTable(lock), lock);
 	}
-	return false;
+
+	if (nWaitOrders > 0)
+		return DS_SOFT_DEADLOCK;
+	else
+		return DS_DEADLOCK_NOT_FOUND;
 }
 
 /*
@@ -896,7 +906,7 @@ DescribeLockTag(StringInfo buf, const LOCKTAG *lock)
 }
 
 /*
- * Report a detected deadlock, with available details.
+ * Report a detected DS_HARD_DEADLOCK, with available details.
  */
 void
 DeadLockReport(void)
diff --git a/src/backend/storage/lmgr/proc.c b/src/backend/storage/lmgr/proc.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/storage/lmgr/proc.c,v 1.184 2007/02/15 23:23:23 alvherre Exp $
+ *	  $PostgreSQL: pgsql/src/backend/storage/lmgr/proc.c,v 1.185 2007/03/03 18:46:40 momjian Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -48,6 +48,7 @@
 /* GUC variables */
 int			DeadlockTimeout = 1000;
 int			StatementTimeout = 0;
+bool		log_lock_waits = false;
 
 /* Pointer to this process's PGPROC struct, if any */
 PGPROC	   *MyProc = NULL;
@@ -979,6 +980,7 @@ static void
 CheckDeadLock(void)
 {
 	int			i;
+	DeadlockState deadlock_state = DS_DEADLOCK_NOT_FOUND;
 
 	/*
 	 * Acquire exclusive lock on the entire shared lock data structures. Must
@@ -1004,60 +1006,77 @@ CheckDeadLock(void)
 	 * This is quicker than checking our semaphore's state, since no kernel
 	 * call is needed, and it is safe because we hold the lock partition lock.
 	 */
-	if (MyProc->links.prev == INVALID_OFFSET ||
-		MyProc->links.next == INVALID_OFFSET)
-		goto check_done;
-
-#ifdef LOCK_DEBUG
-	if (Debug_deadlocks)
-		DumpAllLocks();
-#endif
-
-	if (!DeadLockCheck(MyProc))
+	if (MyProc->links.prev != INVALID_OFFSET &&
+		MyProc->links.next != INVALID_OFFSET)
+		deadlock_state = DeadLockCheck(MyProc);
+	
+	if (deadlock_state == DS_HARD_DEADLOCK)
 	{
-		/* No deadlock, so keep waiting */
-		goto check_done;
-	}
-
-	/*
-	 * Oops.  We have a deadlock.
-	 *
-	 * Get this process out of wait state.	(Note: we could do this more
-	 * efficiently by relying on lockAwaited, but use this coding to preserve
-	 * the flexibility to kill some other transaction than the one detecting
-	 * the deadlock.)
-	 *
-	 * RemoveFromWaitQueue sets MyProc->waitStatus to STATUS_ERROR, so
-	 * ProcSleep will report an error after we return from the signal handler.
-	 */
-	Assert(MyProc->waitLock != NULL);
-	RemoveFromWaitQueue(MyProc, LockTagHashCode(&(MyProc->waitLock->tag)));
+		/*
+		 * Oops.  We have a deadlock.
+		 *
+		 * Get this process out of wait state.	(Note: we could do this more
+		 * efficiently by relying on lockAwaited, but use this coding to preserve
+		 * the flexibility to kill some other transaction than the one detecting
+		 * the deadlock.)
+		 *
+		 * RemoveFromWaitQueue sets MyProc->waitStatus to STATUS_ERROR, so
+		 * ProcSleep will report an error after we return from the signal handler.
+		 */
+		Assert(MyProc->waitLock != NULL);
+		RemoveFromWaitQueue(MyProc, LockTagHashCode(&(MyProc->waitLock->tag)));
 
-	/*
-	 * Unlock my semaphore so that the interrupted ProcSleep() call can
-	 * finish.
-	 */
-	PGSemaphoreUnlock(&MyProc->sem);
+		/*
+		 * Unlock my semaphore so that the interrupted ProcSleep() call can
+		 * finish.
+		 */
+		PGSemaphoreUnlock(&MyProc->sem);
 
-	/*
-	 * We're done here.  Transaction abort caused by the error that ProcSleep
-	 * will raise will cause any other locks we hold to be released, thus
-	 * allowing other processes to wake up; we don't need to do that here.
-	 * NOTE: an exception is that releasing locks we hold doesn't consider the
-	 * possibility of waiters that were blocked behind us on the lock we just
-	 * failed to get, and might now be wakable because we're not in front of
-	 * them anymore.  However, RemoveFromWaitQueue took care of waking up any
-	 * such processes.
-	 */
+		/*
+		 * We're done here.  Transaction abort caused by the error that ProcSleep
+		 * will raise will cause any other locks we hold to be released, thus
+		 * allowing other processes to wake up; we don't need to do that here.
+		 * NOTE: an exception is that releasing locks we hold doesn't consider the
+		 * possibility of waiters that were blocked behind us on the lock we just
+		 * failed to get, and might now be wakable because we're not in front of
+		 * them anymore.  However, RemoveFromWaitQueue took care of waking up any
+		 * such processes.
+		 */
+	}
 
 	/*
 	 * Release locks acquired at head of routine.  Order is not critical, so
 	 * do it back-to-front to avoid waking another CheckDeadLock instance
 	 * before it can get all the locks.
 	 */
-check_done:
 	for (i = NUM_LOCK_PARTITIONS; --i >= 0;)
 		LWLockRelease(FirstLockMgrLock + i);
+
+	/*
+	 * Issue any log messages requested.
+	 *
+	 * Deadlock ERROR messages are issued as part of transaction abort, so 
+	 * these messages should not raise error states intentionally.
+	 */
+	if (log_lock_waits)
+	{
+		switch (deadlock_state)
+		{
+			case DS_SOFT_DEADLOCK:
+				ereport(LOG,
+					(errmsg("deadlock avoided by rearranging lock order")));
+				break;
+			case DS_DEADLOCK_NOT_FOUND:
+				ereport(LOG,
+					(errmsg("statement waiting for lock for at least %d ms",
+								DeadlockTimeout)));
+				break;
+			case DS_HARD_DEADLOCK:
+				break;	/* ERROR message handled during abort */
+			default:
+				break;
+		}
+	}
 }
 
 
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
@@ -10,7 +10,7 @@
  * Written by Peter Eisentraut <peter_e@gmx.net>.
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/utils/misc/guc.c,v 1.377 2007/02/23 21:36:18 momjian Exp $
+ *	  $PostgreSQL: pgsql/src/backend/utils/misc/guc.c,v 1.378 2007/03/03 18:46:40 momjian Exp $
  *
  *--------------------------------------------------------------------
  */
@@ -811,6 +811,14 @@ static struct config_bool ConfigureNamesBool[] =
 	},
 #endif
 
+	{
+		{"log_lock_waits", PGC_SIGHUP, LOGGING_WHAT,
+			gettext_noop("Logs long lock wait events."),
+			NULL
+		},
+		&log_lock_waits,
+		false, NULL, NULL
+	},
 	{
 		{"log_hostname", PGC_SIGHUP, LOGGING_WHAT,
 			gettext_noop("Logs the host name in the connection logs."),
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
@@ -340,7 +340,7 @@
 					# e.g. '<%u%%%d> '
 #log_statement = 'none'			# none, ddl, mod, all
 #log_hostname = off
-
+#log_lock_waits = off			# Log lock waits longer than deadlock_timeout
 #log_temp_files = -1			# Log temporary files equal or larger
 					# than the specified number of kilobytes.
 					# -1 disables;  0 logs all temp files
diff --git a/src/include/storage/lock.h b/src/include/storage/lock.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/storage/lock.h,v 1.103 2007/01/05 22:19:58 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/storage/lock.h,v 1.104 2007/03/03 18:46:40 momjian Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -394,6 +394,13 @@ typedef enum
 	LOCKACQUIRE_ALREADY_HELD	/* incremented count for lock already held */
 } LockAcquireResult;
 
+/* Deadlock states identified by DeadlockCheck() */
+typedef enum
+{
+	DS_DEADLOCK_NOT_FOUND,		/* no deadlock found within database server */
+	DS_SOFT_DEADLOCK,			/* deadlock, but lock queues rearrangeable */
+	DS_HARD_DEADLOCK			/* deadlock, no way out but ERROR */
+} DeadlockState;
 
 /*
  * The lockmgr's shared hash tables are partitioned to reduce contention.
@@ -442,7 +449,7 @@ extern void lock_twophase_postcommit(TransactionId xid, uint16 info,
 extern void lock_twophase_postabort(TransactionId xid, uint16 info,
 						void *recdata, uint32 len);
 
-extern bool DeadLockCheck(PGPROC *proc);
+extern DeadlockState DeadLockCheck(PGPROC *proc);
 extern void DeadLockReport(void);
 extern void RememberSimpleDeadLock(PGPROC *proc1,
 					   LOCKMODE lockmode,
diff --git a/src/include/storage/proc.h b/src/include/storage/proc.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/storage/proc.h,v 1.94 2007/02/15 23:23:23 alvherre Exp $
+ * $PostgreSQL: pgsql/src/include/storage/proc.h,v 1.95 2007/03/03 18:46:40 momjian Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -127,6 +127,7 @@ typedef struct PROC_HDR
 /* configurable options */
 extern int	DeadlockTimeout;
 extern int	StatementTimeout;
+extern bool	log_lock_waits;
 
 extern volatile bool cancel_from_timeout;
 

Original file line number	Diff line number	Diff line change
`@@ -12,7 +12,7 @@`
`12`	`12`	`*`
`13`	`13`	`*`
`14`	`14`	`* IDENTIFICATION`
`15`		`- * $PostgreSQL: pgsql/src/backend/storage/lmgr/deadlock.c,v 1.44 2007/01/05 22:19:38 momjian Exp $`
	`15`	`+ * $PostgreSQL: pgsql/src/backend/storage/lmgr/deadlock.c,v 1.45 2007/03/03 18:46:40 momjian Exp $`
`16`	`16`	`*`
`17`	`17`	`* Interface:`
`18`	`18`	`*`
`@@ -187,13 +187,14 @@ InitDeadLockChecking(void)`
`187`	`187`	`* deadlock. If resolution is impossible, return TRUE --- the caller`
`188`	`188`	`* is then expected to abort the given proc's transaction.`
`189`	`189`	`*`
`190`		`- * Caller must already have locked all partitions of the lock tables.`
	`190`	`+ * Caller must already have locked all partitions of the lock tables,`
	`191`	`+ * so standard error logging/reporting code is handled by caller.`
`191`	`192`	`*`
`192`	`193`	`* On failure, deadlock details are recorded in deadlockDetails[] for`
`193`	`194`	`* subsequent printing by DeadLockReport(). That activity is separate`
`194`	`195`	`* because we don't want to do it while holding all those LWLocks.`
`195`	`196`	`*/`
`196`		`-bool`
	`197`	`+DeadlockState`
`197`	`198`	`DeadLockCheck(PGPROC *proc)`
`198`	`199`	`{`
`199`	`200`	`int i,`
`@@ -204,6 +205,11 @@ DeadLockCheck(PGPROC *proc)`
`204`	`205`	`nPossibleConstraints = 0;`
`205`	`206`	`nWaitOrders = 0;`
`206`	`207`
	`208`	`+#ifdef LOCK_DEBUG`
	`209`	`+ if (Debug_deadlocks)`
	`210`	`+ DumpAllLocks();`
	`211`	`+#endif`
	`212`	`+`
`207`	`213`	`/* Search for deadlocks and possible fixes */`
`208`	`214`	`if (DeadLockCheckRecurse(proc))`
`209`	`215`	`{`
`@@ -217,7 +223,7 @@ DeadLockCheck(PGPROC *proc)`
`217`	`223`	`if (!FindLockCycle(proc, possibleConstraints, &nSoftEdges))`
`218`	`224`	`elog(FATAL, "deadlock seems to have disappeared");`
`219`	`225`
`220`		`- return true; /* cannot find a non-deadlocked state */`
	`226`	`+ return DS_HARD_DEADLOCK; /* cannot find a non-deadlocked state */`
`221`	`227`	`}`
`222`	`228`
`223`	`229`	`/* Apply any needed rearrangements of wait queues */`
`@@ -249,7 +255,11 @@ DeadLockCheck(PGPROC *proc)`
`249`	`255`	`/* See if any waiters for the lock can be woken up now */`
`250`	`256`	`ProcLockWakeup(GetLocksMethodTable(lock), lock);`
`251`	`257`	`}`
`252`		`- return false;`
	`258`	`+`
	`259`	`+ if (nWaitOrders > 0)`
	`260`	`+ return DS_SOFT_DEADLOCK;`
	`261`	`+ else`
	`262`	`+ return DS_DEADLOCK_NOT_FOUND;`
`253`	`263`	`}`
`254`	`264`
`255`	`265`	`/*`
`@@ -896,7 +906,7 @@ DescribeLockTag(StringInfo buf, const LOCKTAG *lock)`
`896`	`906`	`}`
`897`	`907`
`898`	`908`	`/*`
`899`		`- * Report a detected deadlock, with available details.`
	`909`	`+ * Report a detected DS_HARD_DEADLOCK, with available details.`
`900`	`910`	`*/`
`901`	`911`	`void`
`902`	`912`	`DeadLockReport(void)`
Original file line number	Diff line number	Diff line change
`@@ -7,7 +7,7 @@`
`7`	`7`	`* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group`
`8`	`8`	`* Portions Copyright (c) 1994, Regents of the University of California`
`9`	`9`	`*`
`10`		`- * $PostgreSQL: pgsql/src/include/storage/proc.h,v 1.94 2007/02/15 23:23:23 alvherre Exp $`
	`10`	`+ * $PostgreSQL: pgsql/src/include/storage/proc.h,v 1.95 2007/03/03 18:46:40 momjian Exp $`
`11`	`11`	`*`
`12`	`12`	`*-------------------------------------------------------------------------`
`13`	`13`	`*/`
`@@ -127,6 +127,7 @@ typedef struct PROC_HDR`
`127`	`127`	`/* configurable options */`
`128`	`128`	`extern int DeadlockTimeout;`
`129`	`129`	`extern int StatementTimeout;`
	`130`	`+extern bool log_lock_waits;`
`130`	`131`
`131`	`132`	`extern volatile bool cancel_from_timeout;`
`132`	`133`