Skip to content

Commit 7039760

Browse files
committed
Fix issues around the "variable" support in the lwlock infrastructure.
The lwlock scalability work introduced two race conditions into the lwlock variable support provided for xlog.c. First, and harmlessly on most platforms, it set/read the variable without the spinlock in some places. Secondly, due to the removal of the spinlock, it was possible that a backend missed changes to the variable's state if it changed in the wrong moment because checking the lock's state, the variable's state and the queuing are not protected by a single spinlock acquisition anymore. To fix first move resetting the variable's from LWLockAcquireWithVar to WALInsertLockRelease, via a new function LWLockReleaseClearVar. That prevents issues around waiting for a variable's value to change when a new locker has acquired the lock, but not yet set the value. Secondly re-check that the variable hasn't changed after enqueing, that prevents the issue that the lock has been released and already re-acquired by the time the woken up backend checks for the lock's state. Reported-By: Jeff Janes Analyzed-By: Heikki Linnakangas Reviewed-By: Heikki Linnakangas Discussion: 5592DB35.2060401@iki.fi Backpatch: 9.5, where the lwlock scalability went in
1 parent f69b4b9 commit 7039760

File tree

3 files changed

+129
-100
lines changed

3 files changed

+129
-100
lines changed

src/backend/access/transam/xlog.c

Lines changed: 19 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1408,9 +1408,7 @@ WALInsertLockAcquire(void)
14081408
* The insertingAt value is initially set to 0, as we don't know our
14091409
* insert location yet.
14101410
*/
1411-
immed = LWLockAcquireWithVar(&WALInsertLocks[MyLockNo].l.lock,
1412-
&WALInsertLocks[MyLockNo].l.insertingAt,
1413-
0);
1411+
immed = LWLockAcquire(&WALInsertLocks[MyLockNo].l.lock, LW_EXCLUSIVE);
14141412
if (!immed)
14151413
{
14161414
/*
@@ -1435,26 +1433,28 @@ WALInsertLockAcquireExclusive(void)
14351433
int i;
14361434

14371435
/*
1438-
* When holding all the locks, we only update the last lock's insertingAt
1439-
* indicator. The others are set to 0xFFFFFFFFFFFFFFFF, which is higher
1440-
* than any real XLogRecPtr value, to make sure that no-one blocks waiting
1441-
* on those.
1436+
* When holding all the locks, all but the last lock's insertingAt
1437+
* indicator is set to 0xFFFFFFFFFFFFFFFF, which is higher than any real
1438+
* XLogRecPtr value, to make sure that no-one blocks waiting on those.
14421439
*/
14431440
for (i = 0; i < NUM_XLOGINSERT_LOCKS - 1; i++)
14441441
{
1445-
LWLockAcquireWithVar(&WALInsertLocks[i].l.lock,
1446-
&WALInsertLocks[i].l.insertingAt,
1447-
PG_UINT64_MAX);
1442+
LWLockAcquire(&WALInsertLocks[i].l.lock, LW_EXCLUSIVE);
1443+
LWLockUpdateVar(&WALInsertLocks[i].l.lock,
1444+
&WALInsertLocks[i].l.insertingAt,
1445+
PG_UINT64_MAX);
14481446
}
1449-
LWLockAcquireWithVar(&WALInsertLocks[i].l.lock,
1450-
&WALInsertLocks[i].l.insertingAt,
1451-
0);
1447+
/* Variable value reset to 0 at release */
1448+
LWLockAcquire(&WALInsertLocks[i].l.lock, LW_EXCLUSIVE);
14521449

14531450
holdingAllLocks = true;
14541451
}
14551452

14561453
/*
14571454
* Release our insertion lock (or locks, if we're holding them all).
1455+
*
1456+
* NB: Reset all variables to 0, so they cause LWLockWaitForVar to block the
1457+
* next time the lock is acquired.
14581458
*/
14591459
static void
14601460
WALInsertLockRelease(void)
@@ -1464,13 +1464,17 @@ WALInsertLockRelease(void)
14641464
int i;
14651465

14661466
for (i = 0; i < NUM_XLOGINSERT_LOCKS; i++)
1467-
LWLockRelease(&WALInsertLocks[i].l.lock);
1467+
LWLockReleaseClearVar(&WALInsertLocks[i].l.lock,
1468+
&WALInsertLocks[i].l.insertingAt,
1469+
0);
14681470

14691471
holdingAllLocks = false;
14701472
}
14711473
else
14721474
{
1473-
LWLockRelease(&WALInsertLocks[MyLockNo].l.lock);
1475+
LWLockReleaseClearVar(&WALInsertLocks[MyLockNo].l.lock,
1476+
&WALInsertLocks[MyLockNo].l.insertingAt,
1477+
0);
14741478
}
14751479
}
14761480

src/backend/storage/lmgr/lwlock.c

Lines changed: 109 additions & 84 deletions
Original file line numberDiff line numberDiff line change
@@ -10,13 +10,15 @@
1010
* locking should be done with the full lock manager --- which depends on
1111
* LWLocks to protect its shared state.
1212
*
13-
* In addition to exclusive and shared modes, lightweight locks can be used
14-
* to wait until a variable changes value. The variable is initially set
15-
* when the lock is acquired with LWLockAcquireWithVar, and can be updated
13+
* In addition to exclusive and shared modes, lightweight locks can be used to
14+
* wait until a variable changes value. The variable is initially not set
15+
* when the lock is acquired with LWLockAcquire, i.e. it remains set to the
16+
* value it was set to when the lock was released last, and can be updated
1617
* without releasing the lock by calling LWLockUpdateVar. LWLockWaitForVar
17-
* waits for the variable to be updated, or until the lock is free. The
18-
* meaning of the variable is up to the caller, the lightweight lock code
19-
* just assigns and compares it.
18+
* waits for the variable to be updated, or until the lock is free. When
19+
* releasing the lock with LWLockReleaseClearVar() the value can be set to an
20+
* appropriate value for a free lock. The meaning of the variable is up to
21+
* the caller, the lightweight lock code just assigns and compares it.
2022
*
2123
* Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group
2224
* Portions Copyright (c) 1994, Regents of the University of California
@@ -150,9 +152,6 @@ static LWLockHandle held_lwlocks[MAX_SIMUL_LWLOCKS];
150152
static int lock_addin_request = 0;
151153
static bool lock_addin_request_allowed = true;
152154

153-
static inline bool LWLockAcquireCommon(LWLock *l, LWLockMode mode,
154-
uint64 *valptr, uint64 val);
155-
156155
#ifdef LWLOCK_STATS
157156
typedef struct lwlock_stats_key
158157
{
@@ -899,25 +898,7 @@ LWLockDequeueSelf(LWLock *lock)
899898
* Side effect: cancel/die interrupts are held off until lock release.
900899
*/
901900
bool
902-
LWLockAcquire(LWLock *l, LWLockMode mode)
903-
{
904-
return LWLockAcquireCommon(l, mode, NULL, 0);
905-
}
906-
907-
/*
908-
* LWLockAcquireWithVar - like LWLockAcquire, but also sets *valptr = val
909-
*
910-
* The lock is always acquired in exclusive mode with this function.
911-
*/
912-
bool
913-
LWLockAcquireWithVar(LWLock *l, uint64 *valptr, uint64 val)
914-
{
915-
return LWLockAcquireCommon(l, LW_EXCLUSIVE, valptr, val);
916-
}
917-
918-
/* internal function to implement LWLockAcquire and LWLockAcquireWithVar */
919-
static inline bool
920-
LWLockAcquireCommon(LWLock *lock, LWLockMode mode, uint64 *valptr, uint64 val)
901+
LWLockAcquire(LWLock *lock, LWLockMode mode)
921902
{
922903
PGPROC *proc = MyProc;
923904
bool result = true;
@@ -1064,10 +1045,6 @@ LWLockAcquireCommon(LWLock *lock, LWLockMode mode, uint64 *valptr, uint64 val)
10641045
result = false;
10651046
}
10661047

1067-
/* If there's a variable associated with this lock, initialize it */
1068-
if (valptr)
1069-
*valptr = val;
1070-
10711048
TRACE_POSTGRESQL_LWLOCK_ACQUIRE(T_NAME(lock), T_ID(lock), mode);
10721049

10731050
/* Add lock to list of locks held by this backend */
@@ -1258,6 +1235,71 @@ LWLockAcquireOrWait(LWLock *lock, LWLockMode mode)
12581235
return !mustwait;
12591236
}
12601237

1238+
/*
1239+
* Does the lwlock in its current state need to wait for the variable value to
1240+
* change?
1241+
*
1242+
* If we don't need to wait, and it's because the value of the variable has
1243+
* changed, store the current value in newval.
1244+
*
1245+
* *result is set to true if the lock was free, and false otherwise.
1246+
*/
1247+
static bool
1248+
LWLockConflictsWithVar(LWLock *lock,
1249+
uint64 *valptr, uint64 oldval, uint64 *newval,
1250+
bool *result)
1251+
{
1252+
bool mustwait;
1253+
uint64 value;
1254+
#ifdef LWLOCK_STATS
1255+
lwlock_stats *lwstats;
1256+
1257+
lwstats = get_lwlock_stats_entry(lock);
1258+
#endif
1259+
1260+
/*
1261+
* Test first to see if it the slot is free right now.
1262+
*
1263+
* XXX: the caller uses a spinlock before this, so we don't need a memory
1264+
* barrier here as far as the current usage is concerned. But that might
1265+
* not be safe in general.
1266+
*/
1267+
mustwait = (pg_atomic_read_u32(&lock->state) & LW_VAL_EXCLUSIVE) != 0;
1268+
1269+
if (!mustwait)
1270+
{
1271+
*result = true;
1272+
return false;
1273+
}
1274+
1275+
*result = false;
1276+
1277+
/*
1278+
* Read value using spinlock as we can't rely on atomic 64 bit
1279+
* reads/stores. TODO: On platforms with a way to do atomic 64 bit
1280+
* reads/writes the spinlock could be optimized away.
1281+
*/
1282+
#ifdef LWLOCK_STATS
1283+
lwstats->spin_delay_count += SpinLockAcquire(&lock->mutex);
1284+
#else
1285+
SpinLockAcquire(&lock->mutex);
1286+
#endif
1287+
value = *valptr;
1288+
SpinLockRelease(&lock->mutex);
1289+
1290+
if (value != oldval)
1291+
{
1292+
mustwait = false;
1293+
*newval = value;
1294+
}
1295+
else
1296+
{
1297+
mustwait = true;
1298+
}
1299+
1300+
return mustwait;
1301+
}
1302+
12611303
/*
12621304
* LWLockWaitForVar - Wait until lock is free, or a variable is updated.
12631305
*
@@ -1268,11 +1310,6 @@ LWLockAcquireOrWait(LWLock *lock, LWLockMode mode)
12681310
* matches oldval, returns false and sets *newval to the current value in
12691311
* *valptr.
12701312
*
1271-
* It's possible that the lock holder releases the lock, but another backend
1272-
* acquires it again before we get a chance to observe that the lock was
1273-
* momentarily released. We wouldn't need to wait for the new lock holder,
1274-
* but we cannot distinguish that case, so we will have to wait.
1275-
*
12761313
* Note: this function ignores shared lock holders; if the lock is held
12771314
* in shared mode, returns 'true'.
12781315
*/
@@ -1290,16 +1327,6 @@ LWLockWaitForVar(LWLock *lock, uint64 *valptr, uint64 oldval, uint64 *newval)
12901327

12911328
PRINT_LWDEBUG("LWLockWaitForVar", lock, LW_WAIT_UNTIL_FREE);
12921329

1293-
/*
1294-
* Quick test first to see if it the slot is free right now.
1295-
*
1296-
* XXX: the caller uses a spinlock before this, so we don't need a memory
1297-
* barrier here as far as the current usage is concerned. But that might
1298-
* not be safe in general.
1299-
*/
1300-
if ((pg_atomic_read_u32(&lock->state) & LW_VAL_EXCLUSIVE) == 0)
1301-
return true;
1302-
13031330
/*
13041331
* Lock out cancel/die interrupts while we sleep on the lock. There is no
13051332
* cleanup mechanism to remove us from the wait queue if we got
@@ -1313,39 +1340,9 @@ LWLockWaitForVar(LWLock *lock, uint64 *valptr, uint64 oldval, uint64 *newval)
13131340
for (;;)
13141341
{
13151342
bool mustwait;
1316-
uint64 value;
1317-
1318-
mustwait = (pg_atomic_read_u32(&lock->state) & LW_VAL_EXCLUSIVE) != 0;
1319-
1320-
if (mustwait)
1321-
{
1322-
/*
1323-
* Perform comparison using spinlock as we can't rely on atomic 64
1324-
* bit reads/stores.
1325-
*/
1326-
#ifdef LWLOCK_STATS
1327-
lwstats->spin_delay_count += SpinLockAcquire(&lock->mutex);
1328-
#else
1329-
SpinLockAcquire(&lock->mutex);
1330-
#endif
13311343

1332-
/*
1333-
* XXX: We can significantly optimize this on platforms with 64bit
1334-
* atomics.
1335-
*/
1336-
value = *valptr;
1337-
if (value != oldval)
1338-
{
1339-
result = false;
1340-
mustwait = false;
1341-
*newval = value;
1342-
}
1343-
else
1344-
mustwait = true;
1345-
SpinLockRelease(&lock->mutex);
1346-
}
1347-
else
1348-
mustwait = false;
1344+
mustwait = LWLockConflictsWithVar(lock, valptr, oldval, newval,
1345+
&result);
13491346

13501347
if (!mustwait)
13511348
break; /* the lock was free or value didn't match */
@@ -1354,7 +1351,9 @@ LWLockWaitForVar(LWLock *lock, uint64 *valptr, uint64 oldval, uint64 *newval)
13541351
* Add myself to wait queue. Note that this is racy, somebody else
13551352
* could wakeup before we're finished queuing. NB: We're using nearly
13561353
* the same twice-in-a-row lock acquisition protocol as
1357-
* LWLockAcquire(). Check its comments for details.
1354+
* LWLockAcquire(). Check its comments for details. The only
1355+
* difference is that we also have to check the variable's values when
1356+
* checking the state of the lock.
13581357
*/
13591358
LWLockQueueSelf(lock, LW_WAIT_UNTIL_FREE);
13601359

@@ -1365,12 +1364,13 @@ LWLockWaitForVar(LWLock *lock, uint64 *valptr, uint64 oldval, uint64 *newval)
13651364
pg_atomic_fetch_or_u32(&lock->state, LW_FLAG_RELEASE_OK);
13661365

13671366
/*
1368-
* We're now guaranteed to be woken up if necessary. Recheck the
1369-
* lock's state.
1367+
* We're now guaranteed to be woken up if necessary. Recheck the lock
1368+
* and variables state.
13701369
*/
1371-
mustwait = (pg_atomic_read_u32(&lock->state) & LW_VAL_EXCLUSIVE) != 0;
1370+
mustwait = LWLockConflictsWithVar(lock, valptr, oldval, newval,
1371+
&result);
13721372

1373-
/* Ok, lock is free after we queued ourselves. Undo queueing. */
1373+
/* Ok, no conflict after we queued ourselves. Undo queueing. */
13741374
if (!mustwait)
13751375
{
13761376
LOG_LWDEBUG("LWLockWaitForVar", lock, "free, undoing queue");
@@ -1587,6 +1587,31 @@ LWLockRelease(LWLock *lock)
15871587
RESUME_INTERRUPTS();
15881588
}
15891589

1590+
/*
1591+
* LWLockReleaseClearVar - release a previously acquired lock, reset variable
1592+
*/
1593+
void
1594+
LWLockReleaseClearVar(LWLock *lock, uint64 *valptr, uint64 val)
1595+
{
1596+
#ifdef LWLOCK_STATS
1597+
lwlock_stats *lwstats;
1598+
1599+
lwstats = get_lwlock_stats_entry(lock);
1600+
lwstats->spin_delay_count += SpinLockAcquire(&lock->mutex);
1601+
#else
1602+
SpinLockAcquire(&lock->mutex);
1603+
#endif
1604+
/*
1605+
* Set the variable's value before releasing the lock, that prevents race
1606+
* a race condition wherein a new locker acquires the lock, but hasn't yet
1607+
* set the variables value.
1608+
*/
1609+
*valptr = val;
1610+
SpinLockRelease(&lock->mutex);
1611+
1612+
LWLockRelease(lock);
1613+
}
1614+
15901615

15911616
/*
15921617
* LWLockReleaseAll - release all currently-held locks

src/include/storage/lwlock.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -182,10 +182,10 @@ extern bool LWLockAcquire(LWLock *lock, LWLockMode mode);
182182
extern bool LWLockConditionalAcquire(LWLock *lock, LWLockMode mode);
183183
extern bool LWLockAcquireOrWait(LWLock *lock, LWLockMode mode);
184184
extern void LWLockRelease(LWLock *lock);
185+
extern void LWLockReleaseClearVar(LWLock *lock, uint64 *valptr, uint64 val);
185186
extern void LWLockReleaseAll(void);
186187
extern bool LWLockHeldByMe(LWLock *lock);
187188

188-
extern bool LWLockAcquireWithVar(LWLock *lock, uint64 *valptr, uint64 val);
189189
extern bool LWLockWaitForVar(LWLock *lock, uint64 *valptr, uint64 oldval, uint64 *newval);
190190
extern void LWLockUpdateVar(LWLock *lock, uint64 *valptr, uint64 value);
191191

0 commit comments

Comments
 (0)