Skip to content

Commit 0e5680f

Browse files
committed
Grab heavyweight tuple lock only before sleeping
We were trying to acquire the lock even when we were subsequently not sleeping in some other transaction, which opens us up unnecessarily to deadlocks. In particular, this is troublesome if an update tries to lock an updated version of a tuple and finds itself doing EvalPlanQual update chain walking; more than two sessions doing this concurrently will find themselves sleeping on each other because the HW tuple lock acquisition in heap_lock_tuple called from EvalPlanQualFetch races with the same tuple lock being acquired in heap_update -- one of these sessions sleeps on the other one to finish while holding the tuple lock, and the other one sleeps on the tuple lock. Per trouble report from Andrew Sackville-West in http://www.postgresql.org/message-id/20140731233051.GN17765@andrew-ThinkPad-X230 His scenario can be simplified down to a relatively simple isolationtester spec file which I don't include in this commit; the reason is that the current isolationtester is not able to deal with more than one blocked session concurrently and it blocks instead of raising the expected deadlock. In the future, if we improve isolationtester, it would be good to include the spec file in the isolation schedule. I posted it in http://www.postgresql.org/message-id/20141212205254.GC1768@alvh.no-ip.org Hat tip to Mark Kirkwood, who helped diagnose the trouble.
1 parent 8d9cb0b commit 0e5680f

File tree

1 file changed

+172
-111
lines changed

1 file changed

+172
-111
lines changed

src/backend/access/heap/heapam.c

+172-111
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,9 @@ static void HeapSatisfiesHOTandKeyUpdate(Relation relation,
9393
bool *satisfies_hot, bool *satisfies_key,
9494
bool *satisfies_id,
9595
HeapTuple oldtup, HeapTuple newtup);
96+
static bool heap_acquire_tuplock(Relation relation, ItemPointer tid,
97+
LockTupleMode mode, LockWaitPolicy wait_policy,
98+
bool *have_tuple_lock);
9699
static void compute_new_xmax_infomask(TransactionId xmax, uint16 old_infomask,
97100
uint16 old_infomask2, TransactionId add_to_xmax,
98101
LockTupleMode mode, bool is_update,
@@ -105,6 +108,8 @@ static void GetMultiXactIdHintBits(MultiXactId multi, uint16 *new_infomask,
105108
uint16 *new_infomask2);
106109
static TransactionId MultiXactIdGetUpdateXid(TransactionId xmax,
107110
uint16 t_infomask);
111+
static bool DoesMultiXactIdConflict(MultiXactId multi, uint16 infomask,
112+
LockTupleMode lockmode);
108113
static void MultiXactIdWait(MultiXactId multi, MultiXactStatus status, uint16 infomask,
109114
Relation rel, ItemPointer ctid, XLTW_Oper oper,
110115
int *remaining);
@@ -2700,11 +2705,8 @@ heap_delete(Relation relation, ItemPointer tid,
27002705
* this arranges that we stay at the head of the line while rechecking
27012706
* tuple state.
27022707
*/
2703-
if (!have_tuple_lock)
2704-
{
2705-
LockTupleTuplock(relation, &(tp.t_self), LockTupleExclusive);
2706-
have_tuple_lock = true;
2707-
}
2708+
heap_acquire_tuplock(relation, &(tp.t_self), LockTupleExclusive,
2709+
LockWaitBlock, &have_tuple_lock);
27082710

27092711
/*
27102712
* Sleep until concurrent transaction ends. Note that we don't care
@@ -3223,21 +3225,6 @@ heap_update(Relation relation, ItemPointer otid, HeapTuple newtup,
32233225

32243226
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
32253227

3226-
/*
3227-
* Acquire tuple lock to establish our priority for the tuple (see
3228-
* heap_lock_tuple). LockTuple will release us when we are
3229-
* next-in-line for the tuple.
3230-
*
3231-
* If we are forced to "start over" below, we keep the tuple lock;
3232-
* this arranges that we stay at the head of the line while rechecking
3233-
* tuple state.
3234-
*/
3235-
if (!have_tuple_lock)
3236-
{
3237-
LockTupleTuplock(relation, &(oldtup.t_self), *lockmode);
3238-
have_tuple_lock = true;
3239-
}
3240-
32413228
/*
32423229
* Now we have to do something about the existing locker. If it's a
32433230
* multi, sleep on it; we might be awakened before it is completely
@@ -3248,12 +3235,30 @@ heap_update(Relation relation, ItemPointer otid, HeapTuple newtup,
32483235
* before actually going to sleep. If the update doesn't conflict
32493236
* with the locks, we just continue without sleeping (but making sure
32503237
* it is preserved).
3238+
*
3239+
* Before sleeping, we need to acquire tuple lock to establish our
3240+
* priority for the tuple (see heap_lock_tuple). LockTuple will
3241+
* release us when we are next-in-line for the tuple. Note we must not
3242+
* acquire the tuple lock until we're sure we're going to sleep;
3243+
* otherwise we're open for race conditions with other transactions
3244+
* holding the tuple lock which sleep on us.
3245+
*
3246+
* If we are forced to "start over" below, we keep the tuple lock;
3247+
* this arranges that we stay at the head of the line while rechecking
3248+
* tuple state.
32513249
*/
32523250
if (infomask & HEAP_XMAX_IS_MULTI)
32533251
{
32543252
TransactionId update_xact;
32553253
int remain;
32563254

3255+
/* acquire tuple lock, if necessary */
3256+
if (DoesMultiXactIdConflict((MultiXactId) xwait, infomask, *lockmode))
3257+
{
3258+
heap_acquire_tuplock(relation, &(oldtup.t_self), *lockmode,
3259+
LockWaitBlock, &have_tuple_lock);
3260+
}
3261+
32573262
/* wait for multixact */
32583263
MultiXactIdWait((MultiXactId) xwait, mxact_status, infomask,
32593264
relation, &oldtup.t_data->t_ctid, XLTW_Update,
@@ -3330,7 +3335,12 @@ heap_update(Relation relation, ItemPointer otid, HeapTuple newtup,
33303335
}
33313336
else
33323337
{
3333-
/* wait for regular transaction to end */
3338+
/*
3339+
* Wait for regular transaction to end; but first, acquire
3340+
* tuple lock.
3341+
*/
3342+
heap_acquire_tuplock(relation, &(oldtup.t_self), *lockmode,
3343+
LockWaitBlock, &have_tuple_lock);
33343344
XactLockTableWait(xwait, relation, &oldtup.t_data->t_ctid,
33353345
XLTW_Update);
33363346
LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
@@ -4038,7 +4048,6 @@ get_mxact_status_for_lock(LockTupleMode mode, bool is_update)
40384048
return (MultiXactStatus) retval;
40394049
}
40404050

4041-
40424051
/*
40434052
* heap_lock_tuple - lock a tuple in shared or exclusive mode
40444053
*
@@ -4170,42 +4179,6 @@ heap_lock_tuple(Relation relation, HeapTuple tuple,
41704179
pfree(members);
41714180
}
41724181

4173-
/*
4174-
* Acquire tuple lock to establish our priority for the tuple.
4175-
* LockTuple will release us when we are next-in-line for the tuple.
4176-
* We must do this even if we are share-locking.
4177-
*
4178-
* If we are forced to "start over" below, we keep the tuple lock;
4179-
* this arranges that we stay at the head of the line while rechecking
4180-
* tuple state.
4181-
*/
4182-
if (!have_tuple_lock)
4183-
{
4184-
switch (wait_policy)
4185-
{
4186-
case LockWaitBlock:
4187-
LockTupleTuplock(relation, tid, mode);
4188-
break;
4189-
case LockWaitSkip:
4190-
if (!ConditionalLockTupleTuplock(relation, tid, mode))
4191-
{
4192-
result = HeapTupleWouldBlock;
4193-
/* recovery code expects to have buffer lock held */
4194-
LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
4195-
goto failed;
4196-
}
4197-
break;
4198-
case LockWaitError:
4199-
if (!ConditionalLockTupleTuplock(relation, tid, mode))
4200-
ereport(ERROR,
4201-
(errcode(ERRCODE_LOCK_NOT_AVAILABLE),
4202-
errmsg("could not obtain lock on row in relation \"%s\"",
4203-
RelationGetRelationName(relation))));
4204-
break;
4205-
}
4206-
have_tuple_lock = true;
4207-
}
4208-
42094182
/*
42104183
* Initially assume that we will have to wait for the locking
42114184
* transaction(s) to finish. We check various cases below in which
@@ -4312,66 +4285,26 @@ heap_lock_tuple(Relation relation, HeapTuple tuple,
43124285
{
43134286
/*
43144287
* If we're requesting NoKeyExclusive, we might also be able to
4315-
* avoid sleeping; just ensure that there's no other lock type
4316-
* than KeyShare. Note that this is a bit more involved than just
4317-
* checking hint bits -- we need to expand the multixact to figure
4318-
* out lock modes for each one (unless there was only one such
4319-
* locker).
4288+
* avoid sleeping; just ensure that there no conflicting lock
4289+
* already acquired.
43204290
*/
43214291
if (infomask & HEAP_XMAX_IS_MULTI)
43224292
{
4323-
int nmembers;
4324-
MultiXactMember *members;
4325-
4326-
/*
4327-
* We don't need to allow old multixacts here; if that had
4328-
* been the case, HeapTupleSatisfiesUpdate would have returned
4329-
* MayBeUpdated and we wouldn't be here.
4330-
*/
4331-
nmembers =
4332-
GetMultiXactIdMembers(xwait, &members, false,
4333-
HEAP_XMAX_IS_LOCKED_ONLY(infomask));
4334-
4335-
if (nmembers <= 0)
4293+
if (!DoesMultiXactIdConflict((MultiXactId) xwait, infomask,
4294+
mode))
43364295
{
43374296
/*
4338-
* No need to keep the previous xmax here. This is
4339-
* unlikely to happen.
4297+
* No conflict, but if the xmax changed under us in the
4298+
* meantime, start over.
43404299
*/
4341-
require_sleep = false;
4342-
}
4343-
else
4344-
{
4345-
int i;
4346-
bool allowed = true;
4347-
4348-
for (i = 0; i < nmembers; i++)
4349-
{
4350-
if (members[i].status != MultiXactStatusForKeyShare)
4351-
{
4352-
allowed = false;
4353-
break;
4354-
}
4355-
}
4356-
if (allowed)
4357-
{
4358-
/*
4359-
* if the xmax changed under us in the meantime, start
4360-
* over.
4361-
*/
4362-
LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
4363-
if (xmax_infomask_changed(tuple->t_data->t_infomask, infomask) ||
4364-
!TransactionIdEquals(HeapTupleHeaderGetRawXmax(tuple->t_data),
4365-
xwait))
4366-
{
4367-
pfree(members);
4368-
goto l3;
4369-
}
4370-
/* otherwise, we're good */
4371-
require_sleep = false;
4372-
}
4300+
LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
4301+
if (xmax_infomask_changed(tuple->t_data->t_infomask, infomask) ||
4302+
!TransactionIdEquals(HeapTupleHeaderGetRawXmax(tuple->t_data),
4303+
xwait))
4304+
goto l3;
43734305

4374-
pfree(members);
4306+
/* otherwise, we're good */
4307+
require_sleep = false;
43754308
}
43764309
}
43774310
else if (HEAP_XMAX_IS_KEYSHR_LOCKED(infomask))
@@ -4397,6 +4330,28 @@ heap_lock_tuple(Relation relation, HeapTuple tuple,
43974330

43984331
if (require_sleep)
43994332
{
4333+
/*
4334+
* Acquire tuple lock to establish our priority for the tuple.
4335+
* LockTuple will release us when we are next-in-line for the tuple.
4336+
* We must do this even if we are share-locking.
4337+
*
4338+
* If we are forced to "start over" below, we keep the tuple lock;
4339+
* this arranges that we stay at the head of the line while rechecking
4340+
* tuple state.
4341+
*/
4342+
if (!heap_acquire_tuplock(relation, tid, mode, wait_policy,
4343+
&have_tuple_lock))
4344+
{
4345+
/*
4346+
* This can only happen if wait_policy is Skip and the lock
4347+
* couldn't be obtained.
4348+
*/
4349+
result = HeapTupleWouldBlock;
4350+
/* recovery code expects to have buffer lock held */
4351+
LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
4352+
goto failed;
4353+
}
4354+
44004355
if (infomask & HEAP_XMAX_IS_MULTI)
44014356
{
44024357
MultiXactStatus status = get_mxact_status_for_lock(mode, false);
@@ -4713,6 +4668,48 @@ heap_lock_tuple(Relation relation, HeapTuple tuple,
47134668
return HeapTupleMayBeUpdated;
47144669
}
47154670

4671+
/*
4672+
* Acquire heavyweight lock on the given tuple, in preparation for acquiring
4673+
* its normal, Xmax-based tuple lock.
4674+
*
4675+
* have_tuple_lock is an input and output parameter: on input, it indicates
4676+
* whether the lock has previously been acquired (and this function does
4677+
* nothing in that case). If this function returns success, have_tuple_lock
4678+
* has been flipped to true.
4679+
*
4680+
* Returns false if it was unable to obtain the lock; this can only happen if
4681+
* wait_policy is Skip.
4682+
*/
4683+
static bool
4684+
heap_acquire_tuplock(Relation relation, ItemPointer tid, LockTupleMode mode,
4685+
LockWaitPolicy wait_policy, bool *have_tuple_lock)
4686+
{
4687+
if (*have_tuple_lock)
4688+
return true;
4689+
4690+
switch (wait_policy)
4691+
{
4692+
case LockWaitBlock:
4693+
LockTupleTuplock(relation, tid, mode);
4694+
break;
4695+
4696+
case LockWaitSkip:
4697+
if (!ConditionalLockTupleTuplock(relation, tid, mode))
4698+
return false;
4699+
break;
4700+
4701+
case LockWaitError:
4702+
if (!ConditionalLockTupleTuplock(relation, tid, mode))
4703+
ereport(ERROR,
4704+
(errcode(ERRCODE_LOCK_NOT_AVAILABLE),
4705+
errmsg("could not obtain lock on row in relation \"%s\"",
4706+
RelationGetRelationName(relation))));
4707+
break;
4708+
}
4709+
*have_tuple_lock = true;
4710+
4711+
return true;
4712+
}
47164713

47174714
/*
47184715
* Given an original set of Xmax and infomask, and a transaction (identified by
@@ -6091,6 +6088,70 @@ HeapTupleGetUpdateXid(HeapTupleHeader tuple)
60916088
tuple->t_infomask);
60926089
}
60936090

6091+
/*
6092+
* Does the given multixact conflict with the current transaction grabbing a
6093+
* tuple lock of the given strength?
6094+
*
6095+
* The passed infomask pairs up with the given multixact in the tuple header.
6096+
*/
6097+
static bool
6098+
DoesMultiXactIdConflict(MultiXactId multi, uint16 infomask,
6099+
LockTupleMode lockmode)
6100+
{
6101+
bool allow_old;
6102+
int nmembers;
6103+
MultiXactMember *members;
6104+
bool result = false;
6105+
6106+
allow_old = !(infomask & HEAP_LOCK_MASK) && HEAP_XMAX_IS_LOCKED_ONLY(infomask);
6107+
nmembers = GetMultiXactIdMembers(multi, &members, allow_old,
6108+
HEAP_XMAX_IS_LOCKED_ONLY(infomask));
6109+
if (nmembers >= 0)
6110+
{
6111+
int i;
6112+
6113+
for (i = 0; i < nmembers; i++)
6114+
{
6115+
TransactionId memxid;
6116+
LockTupleMode memlockmode;
6117+
6118+
memlockmode = LOCKMODE_from_mxstatus(members[i].status);
6119+
/* ignore members that don't conflict with the lock we want */
6120+
if (!DoLockModesConflict(memlockmode, lockmode))
6121+
continue;
6122+
6123+
/* ignore members from current xact */
6124+
memxid = members[i].xid;
6125+
if (TransactionIdIsCurrentTransactionId(memxid))
6126+
continue;
6127+
6128+
if (ISUPDATE_from_mxstatus(members[i].status))
6129+
{
6130+
/* ignore aborted updaters */
6131+
if (TransactionIdDidAbort(memxid))
6132+
continue;
6133+
}
6134+
else
6135+
{
6136+
/* ignore lockers-only that are no longer in progress */
6137+
if (!TransactionIdIsInProgress(memxid))
6138+
continue;
6139+
}
6140+
6141+
/*
6142+
* Whatever remains are either live lockers that conflict with our
6143+
* wanted lock, and updaters that are not aborted. Those conflict
6144+
* with what we want, so return true.
6145+
*/
6146+
result = true;
6147+
break;
6148+
}
6149+
pfree(members);
6150+
}
6151+
6152+
return result;
6153+
}
6154+
60946155
/*
60956156
* Do_MultiXactIdWait
60966157
* Actual implementation for the two functions below.

0 commit comments

Comments
 (0)