Skip to content

Commit 6d376c3

Browse files
committed
Add GUC option to log lock acquisition failures.
This commit introduces a new GUC, log_lock_failure, which controls whether a detailed log message is produced when a lock acquisition fails. Currently, it only supports logging lock failures caused by SELECT ... NOWAIT. The log message includes information about all processes holding or waiting for the lock that couldn't be acquired, helping users analyze and diagnose the causes of lock failures. Currently, this option does not log failures from SELECT ... SKIP LOCKED, as that could generate excessive log messages if many locks are skipped, causing unnecessary noise. This mechanism can be extended in the future to support for logging lock failures from other commands, such as LOCK TABLE ... NOWAIT. Author: Yuki Seino <seinoyu@oss.nttdata.com> Co-authored-by: Fujii Masao <masao.fujii@gmail.com> Reviewed-by: Jelte Fennema-Nio <postgres@jeltef.nl> Discussion: https://postgr.es/m/411280a186cc26ef7034e0f2dfe54131@oss.nttdata.com
1 parent e80171d commit 6d376c3

File tree

11 files changed

+211
-86
lines changed

11 files changed

+211
-86
lines changed

doc/src/sgml/config.sgml

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7757,6 +7757,26 @@ log_line_prefix = '%m [%p] %q%u@%d/%a '
77577757
</listitem>
77587758
</varlistentry>
77597759

7760+
<varlistentry id="guc-log-lock-failure" xreflabel="log_lock_failure">
7761+
<term><varname>log_lock_failure</varname> (<type>boolean</type>)
7762+
<indexterm>
7763+
<primary><varname>log_lock_failure</varname> configuration parameter</primary>
7764+
</indexterm>
7765+
</term>
7766+
<listitem>
7767+
<para>
7768+
Controls whether a detailed log message is produced
7769+
when a lock acquisition fails. This is useful for analyzing
7770+
the causes of lock failures. Currently, only lock failures
7771+
due to <literal>SELECT NOWAIT</literal> is supported.
7772+
The default is <literal>off</literal>. Only superusers and
7773+
users with the appropriate <literal>SET</literal> privilege
7774+
can change this setting.
7775+
</para>
7776+
</listitem>
7777+
</varlistentry>
7778+
7779+
77607780
<varlistentry id="guc-log-recovery-conflict-waits" xreflabel="log_recovery_conflict_waits">
77617781
<term><varname>log_recovery_conflict_waits</varname> (<type>boolean</type>)
77627782
<indexterm>

src/backend/access/heap/heapam.c

Lines changed: 18 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,8 @@ static void MultiXactIdWait(MultiXactId multi, MultiXactStatus status, uint16 in
9898
Relation rel, ItemPointer ctid, XLTW_Oper oper,
9999
int *remaining);
100100
static bool ConditionalMultiXactIdWait(MultiXactId multi, MultiXactStatus status,
101-
uint16 infomask, Relation rel, int *remaining);
101+
uint16 infomask, Relation rel, int *remaining,
102+
bool logLockFailure);
102103
static void index_delete_sort(TM_IndexDeleteOp *delstate);
103104
static int bottomup_sort_and_shrink(TM_IndexDeleteOp *delstate);
104105
static XLogRecPtr log_heap_new_cid(Relation relation, HeapTuple tup);
@@ -162,8 +163,8 @@ static const struct
162163
LockTuple((rel), (tup), tupleLockExtraInfo[mode].hwlock)
163164
#define UnlockTupleTuplock(rel, tup, mode) \
164165
UnlockTuple((rel), (tup), tupleLockExtraInfo[mode].hwlock)
165-
#define ConditionalLockTupleTuplock(rel, tup, mode) \
166-
ConditionalLockTuple((rel), (tup), tupleLockExtraInfo[mode].hwlock)
166+
#define ConditionalLockTupleTuplock(rel, tup, mode, log) \
167+
ConditionalLockTuple((rel), (tup), tupleLockExtraInfo[mode].hwlock, (log))
167168

168169
#ifdef USE_PREFETCH
169170
/*
@@ -4894,7 +4895,7 @@ heap_lock_tuple(Relation relation, HeapTuple tuple,
48944895
case LockWaitSkip:
48954896
if (!ConditionalMultiXactIdWait((MultiXactId) xwait,
48964897
status, infomask, relation,
4897-
NULL))
4898+
NULL, false))
48984899
{
48994900
result = TM_WouldBlock;
49004901
/* recovery code expects to have buffer lock held */
@@ -4905,7 +4906,7 @@ heap_lock_tuple(Relation relation, HeapTuple tuple,
49054906
case LockWaitError:
49064907
if (!ConditionalMultiXactIdWait((MultiXactId) xwait,
49074908
status, infomask, relation,
4908-
NULL))
4909+
NULL, log_lock_failure))
49094910
ereport(ERROR,
49104911
(errcode(ERRCODE_LOCK_NOT_AVAILABLE),
49114912
errmsg("could not obtain lock on row in relation \"%s\"",
@@ -4934,7 +4935,7 @@ heap_lock_tuple(Relation relation, HeapTuple tuple,
49344935
XLTW_Lock);
49354936
break;
49364937
case LockWaitSkip:
4937-
if (!ConditionalXactLockTableWait(xwait))
4938+
if (!ConditionalXactLockTableWait(xwait, false))
49384939
{
49394940
result = TM_WouldBlock;
49404941
/* recovery code expects to have buffer lock held */
@@ -4943,7 +4944,7 @@ heap_lock_tuple(Relation relation, HeapTuple tuple,
49434944
}
49444945
break;
49454946
case LockWaitError:
4946-
if (!ConditionalXactLockTableWait(xwait))
4947+
if (!ConditionalXactLockTableWait(xwait, log_lock_failure))
49474948
ereport(ERROR,
49484949
(errcode(ERRCODE_LOCK_NOT_AVAILABLE),
49494950
errmsg("could not obtain lock on row in relation \"%s\"",
@@ -5203,12 +5204,12 @@ heap_acquire_tuplock(Relation relation, ItemPointer tid, LockTupleMode mode,
52035204
break;
52045205

52055206
case LockWaitSkip:
5206-
if (!ConditionalLockTupleTuplock(relation, tid, mode))
5207+
if (!ConditionalLockTupleTuplock(relation, tid, mode, false))
52075208
return false;
52085209
break;
52095210

52105211
case LockWaitError:
5211-
if (!ConditionalLockTupleTuplock(relation, tid, mode))
5212+
if (!ConditionalLockTupleTuplock(relation, tid, mode, log_lock_failure))
52125213
ereport(ERROR,
52135214
(errcode(ERRCODE_LOCK_NOT_AVAILABLE),
52145215
errmsg("could not obtain lock on row in relation \"%s\"",
@@ -7581,7 +7582,8 @@ DoesMultiXactIdConflict(MultiXactId multi, uint16 infomask,
75817582
* fail if lock is unavailable. 'rel', 'ctid' and 'oper' are used to set up
75827583
* context information for error messages. 'remaining', if not NULL, receives
75837584
* the number of members that are still running, including any (non-aborted)
7584-
* subtransactions of our own transaction.
7585+
* subtransactions of our own transaction. 'logLockFailure' indicates whether
7586+
* to log details when a lock acquisition fails with 'nowait' enabled.
75857587
*
75867588
* We do this by sleeping on each member using XactLockTableWait. Any
75877589
* members that belong to the current backend are *not* waited for, however;
@@ -7602,7 +7604,7 @@ static bool
76027604
Do_MultiXactIdWait(MultiXactId multi, MultiXactStatus status,
76037605
uint16 infomask, bool nowait,
76047606
Relation rel, ItemPointer ctid, XLTW_Oper oper,
7605-
int *remaining)
7607+
int *remaining, bool logLockFailure)
76067608
{
76077609
bool result = true;
76087610
MultiXactMember *members;
@@ -7649,7 +7651,7 @@ Do_MultiXactIdWait(MultiXactId multi, MultiXactStatus status,
76497651
*/
76507652
if (nowait)
76517653
{
7652-
result = ConditionalXactLockTableWait(memxid);
7654+
result = ConditionalXactLockTableWait(memxid, logLockFailure);
76537655
if (!result)
76547656
break;
76557657
}
@@ -7682,7 +7684,7 @@ MultiXactIdWait(MultiXactId multi, MultiXactStatus status, uint16 infomask,
76827684
int *remaining)
76837685
{
76847686
(void) Do_MultiXactIdWait(multi, status, infomask, false,
7685-
rel, ctid, oper, remaining);
7687+
rel, ctid, oper, remaining, false);
76867688
}
76877689

76887690
/*
@@ -7700,10 +7702,11 @@ MultiXactIdWait(MultiXactId multi, MultiXactStatus status, uint16 infomask,
77007702
*/
77017703
static bool
77027704
ConditionalMultiXactIdWait(MultiXactId multi, MultiXactStatus status,
7703-
uint16 infomask, Relation rel, int *remaining)
7705+
uint16 infomask, Relation rel, int *remaining,
7706+
bool logLockFailure)
77047707
{
77057708
return Do_MultiXactIdWait(multi, status, infomask, true,
7706-
rel, NULL, XLTW_None, remaining);
7709+
rel, NULL, XLTW_None, remaining, logLockFailure);
77077710
}
77087711

77097712
/*

src/backend/access/heap/heapam_handler.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -455,12 +455,12 @@ heapam_tuple_lock(Relation relation, ItemPointer tid, Snapshot snapshot,
455455
XLTW_FetchUpdated);
456456
break;
457457
case LockWaitSkip:
458-
if (!ConditionalXactLockTableWait(SnapshotDirty.xmax))
458+
if (!ConditionalXactLockTableWait(SnapshotDirty.xmax, false))
459459
/* skip instead of waiting */
460460
return TM_WouldBlock;
461461
break;
462462
case LockWaitError:
463-
if (!ConditionalXactLockTableWait(SnapshotDirty.xmax))
463+
if (!ConditionalXactLockTableWait(SnapshotDirty.xmax, log_lock_failure))
464464
ereport(ERROR,
465465
(errcode(ERRCODE_LOCK_NOT_AVAILABLE),
466466
errmsg("could not obtain lock on row in relation \"%s\"",

src/backend/storage/lmgr/lmgr.c

Lines changed: 22 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,8 @@ LockRelationOid(Oid relid, LOCKMODE lockmode)
112112

113113
SetLocktagRelationOid(&tag, relid);
114114

115-
res = LockAcquireExtended(&tag, lockmode, false, false, true, &locallock);
115+
res = LockAcquireExtended(&tag, lockmode, false, false, true, &locallock,
116+
false);
116117

117118
/*
118119
* Now that we have the lock, check for invalidation messages, so that we
@@ -155,7 +156,8 @@ ConditionalLockRelationOid(Oid relid, LOCKMODE lockmode)
155156

156157
SetLocktagRelationOid(&tag, relid);
157158

158-
res = LockAcquireExtended(&tag, lockmode, false, true, true, &locallock);
159+
res = LockAcquireExtended(&tag, lockmode, false, true, true, &locallock,
160+
false);
159161

160162
if (res == LOCKACQUIRE_NOT_AVAIL)
161163
return false;
@@ -188,7 +190,8 @@ LockRelationId(LockRelId *relid, LOCKMODE lockmode)
188190

189191
SET_LOCKTAG_RELATION(tag, relid->dbId, relid->relId);
190192

191-
res = LockAcquireExtended(&tag, lockmode, false, false, true, &locallock);
193+
res = LockAcquireExtended(&tag, lockmode, false, false, true, &locallock,
194+
false);
192195

193196
/*
194197
* Now that we have the lock, check for invalidation messages; see notes
@@ -250,7 +253,8 @@ LockRelation(Relation relation, LOCKMODE lockmode)
250253
relation->rd_lockInfo.lockRelId.dbId,
251254
relation->rd_lockInfo.lockRelId.relId);
252255

253-
res = LockAcquireExtended(&tag, lockmode, false, false, true, &locallock);
256+
res = LockAcquireExtended(&tag, lockmode, false, false, true, &locallock,
257+
false);
254258

255259
/*
256260
* Now that we have the lock, check for invalidation messages; see notes
@@ -281,7 +285,8 @@ ConditionalLockRelation(Relation relation, LOCKMODE lockmode)
281285
relation->rd_lockInfo.lockRelId.dbId,
282286
relation->rd_lockInfo.lockRelId.relId);
283287

284-
res = LockAcquireExtended(&tag, lockmode, false, true, true, &locallock);
288+
res = LockAcquireExtended(&tag, lockmode, false, true, true, &locallock,
289+
false);
285290

286291
if (res == LOCKACQUIRE_NOT_AVAIL)
287292
return false;
@@ -574,7 +579,8 @@ LockTuple(Relation relation, ItemPointer tid, LOCKMODE lockmode)
574579
* Returns true iff the lock was acquired.
575580
*/
576581
bool
577-
ConditionalLockTuple(Relation relation, ItemPointer tid, LOCKMODE lockmode)
582+
ConditionalLockTuple(Relation relation, ItemPointer tid, LOCKMODE lockmode,
583+
bool logLockFailure)
578584
{
579585
LOCKTAG tag;
580586

@@ -584,7 +590,8 @@ ConditionalLockTuple(Relation relation, ItemPointer tid, LOCKMODE lockmode)
584590
ItemPointerGetBlockNumber(tid),
585591
ItemPointerGetOffsetNumber(tid));
586592

587-
return (LockAcquire(&tag, lockmode, false, true) != LOCKACQUIRE_NOT_AVAIL);
593+
return (LockAcquireExtended(&tag, lockmode, false, true, true, NULL,
594+
logLockFailure) != LOCKACQUIRE_NOT_AVAIL);
588595
}
589596

590597
/*
@@ -726,7 +733,7 @@ XactLockTableWait(TransactionId xid, Relation rel, ItemPointer ctid,
726733
* Returns true if the lock was acquired.
727734
*/
728735
bool
729-
ConditionalXactLockTableWait(TransactionId xid)
736+
ConditionalXactLockTableWait(TransactionId xid, bool logLockFailure)
730737
{
731738
LOCKTAG tag;
732739
bool first = true;
@@ -738,7 +745,9 @@ ConditionalXactLockTableWait(TransactionId xid)
738745

739746
SET_LOCKTAG_TRANSACTION(tag, xid);
740747

741-
if (LockAcquire(&tag, ShareLock, false, true) == LOCKACQUIRE_NOT_AVAIL)
748+
if (LockAcquireExtended(&tag, ShareLock, false, true, true, NULL,
749+
logLockFailure)
750+
== LOCKACQUIRE_NOT_AVAIL)
742751
return false;
743752

744753
LockRelease(&tag, ShareLock, false);
@@ -1027,7 +1036,8 @@ ConditionalLockDatabaseObject(Oid classid, Oid objid, uint16 objsubid,
10271036
objid,
10281037
objsubid);
10291038

1030-
res = LockAcquireExtended(&tag, lockmode, false, true, true, &locallock);
1039+
res = LockAcquireExtended(&tag, lockmode, false, true, true, &locallock,
1040+
false);
10311041

10321042
if (res == LOCKACQUIRE_NOT_AVAIL)
10331043
return false;
@@ -1106,7 +1116,8 @@ ConditionalLockSharedObject(Oid classid, Oid objid, uint16 objsubid,
11061116
objid,
11071117
objsubid);
11081118

1109-
res = LockAcquireExtended(&tag, lockmode, false, true, true, &locallock);
1119+
res = LockAcquireExtended(&tag, lockmode, false, true, true, &locallock,
1120+
false);
11101121

11111122
if (res == LOCKACQUIRE_NOT_AVAIL)
11121123
return false;

src/backend/storage/lmgr/lock.c

Lines changed: 51 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939
#include "access/xlogutils.h"
4040
#include "miscadmin.h"
4141
#include "pg_trace.h"
42+
#include "storage/lmgr.h"
4243
#include "storage/proc.h"
4344
#include "storage/procarray.h"
4445
#include "storage/spin.h"
@@ -48,8 +49,9 @@
4849
#include "utils/resowner.h"
4950

5051

51-
/* This configuration variable is used to set the lock table size */
52-
int max_locks_per_xact; /* set by guc.c */
52+
/* GUC variables */
53+
int max_locks_per_xact; /* used to set the lock table size */
54+
bool log_lock_failure = false;
5355

5456
#define NLOCKENTS() \
5557
mul_size(max_locks_per_xact, add_size(MaxBackends, max_prepared_xacts))
@@ -806,7 +808,7 @@ LockAcquire(const LOCKTAG *locktag,
806808
bool dontWait)
807809
{
808810
return LockAcquireExtended(locktag, lockmode, sessionLock, dontWait,
809-
true, NULL);
811+
true, NULL, false);
810812
}
811813

812814
/*
@@ -822,14 +824,18 @@ LockAcquire(const LOCKTAG *locktag,
822824
*
823825
* If locallockp isn't NULL, *locallockp receives a pointer to the LOCALLOCK
824826
* table entry if a lock is successfully acquired, or NULL if not.
827+
*
828+
* logLockFailure indicates whether to log details when a lock acquisition
829+
* fails with dontWait = true.
825830
*/
826831
LockAcquireResult
827832
LockAcquireExtended(const LOCKTAG *locktag,
828833
LOCKMODE lockmode,
829834
bool sessionLock,
830835
bool dontWait,
831836
bool reportMemoryError,
832-
LOCALLOCK **locallockp)
837+
LOCALLOCK **locallockp,
838+
bool logLockFailure)
833839
{
834840
LOCKMETHODID lockmethodid = locktag->locktag_lockmethodid;
835841
LockMethod lockMethodTable;
@@ -1145,6 +1151,47 @@ LockAcquireExtended(const LOCKTAG *locktag,
11451151

11461152
if (dontWait)
11471153
{
1154+
/*
1155+
* Log lock holders and waiters as a detail log message if
1156+
* logLockFailure = true and lock acquisition fails with dontWait
1157+
* = true
1158+
*/
1159+
if (logLockFailure)
1160+
{
1161+
StringInfoData buf,
1162+
lock_waiters_sbuf,
1163+
lock_holders_sbuf;
1164+
const char *modename;
1165+
int lockHoldersNum = 0;
1166+
1167+
initStringInfo(&buf);
1168+
initStringInfo(&lock_waiters_sbuf);
1169+
initStringInfo(&lock_holders_sbuf);
1170+
1171+
DescribeLockTag(&buf, &locallock->tag.lock);
1172+
modename = GetLockmodeName(locallock->tag.lock.locktag_lockmethodid,
1173+
lockmode);
1174+
1175+
/* Gather a list of all lock holders and waiters */
1176+
LWLockAcquire(partitionLock, LW_SHARED);
1177+
GetLockHoldersAndWaiters(locallock, &lock_holders_sbuf,
1178+
&lock_waiters_sbuf, &lockHoldersNum);
1179+
LWLockRelease(partitionLock);
1180+
1181+
ereport(LOG,
1182+
(errmsg("process %d could not obtain %s on %s",
1183+
MyProcPid, modename, buf.data),
1184+
errdetail_log_plural(
1185+
"Process holding the lock: %s, Wait queue: %s.",
1186+
"Processes holding the lock: %s, Wait queue: %s.",
1187+
lockHoldersNum,
1188+
lock_holders_sbuf.data,
1189+
lock_waiters_sbuf.data)));
1190+
1191+
pfree(buf.data);
1192+
pfree(lock_holders_sbuf.data);
1193+
pfree(lock_waiters_sbuf.data);
1194+
}
11481195
if (locallockp)
11491196
*locallockp = NULL;
11501197
return LOCKACQUIRE_NOT_AVAIL;

0 commit comments

Comments
 (0)