Skip to content

Commit 7db285a

Browse files
committed
Fix stale-pointer problem in fast-path locking logic.
When acquiring a lock in fast-path mode, we must reset the locallock object's lock and proclock fields to NULL. They are not necessarily that way to start with, because the locallock could be left over from a failed lock acquisition attempt earlier in the transaction. Failure to do this led to all sorts of interesting misbehaviors when LockRelease tried to clean up no-longer-related lock and proclock objects in shared memory. Per report from Dan Wood. In passing, modify LockRelease to elog not just Assert if it doesn't find lock and proclock objects for a formerly fast-path lock, matching the code in FastPathGetRelationLockEntry and LockRefindAndRelease. This isn't a bug but it will help in diagnosing any future bugs in this area. Also, modify FastPathTransferRelationLocks and FastPathGetRelationLockEntry to break out of their loops over the fastpath array once they've found the sole matching entry. This was inconsistently done in some search loops and not others. Improve assorted related comments, too. Back-patch to 9.2 where the fast-path mechanism was introduced.
1 parent 89ba815 commit 7db285a

File tree

2 files changed

+73
-30
lines changed

2 files changed

+73
-30
lines changed

src/backend/storage/lmgr/lock.c

Lines changed: 56 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -788,14 +788,16 @@ LockAcquireExtended(const LOCKTAG *locktag,
788788
}
789789

790790
/*
791-
* Emit a WAL record if acquisition of this lock needs to be replayed in a
792-
* standby server. Only AccessExclusiveLocks can conflict with lock types
793-
* that read-only transactions can acquire in a standby server.
791+
* Prepare to emit a WAL record if acquisition of this lock needs to be
792+
* replayed in a standby server.
794793
*
795-
* Make sure this definition matches the one in
796-
* GetRunningTransactionLocks().
794+
* Here we prepare to log; after lock is acquired we'll issue log record.
795+
* This arrangement simplifies error recovery in case the preparation step
796+
* fails.
797797
*
798-
* First we prepare to log, then after lock acquired we issue log record.
798+
* Only AccessExclusiveLocks can conflict with lock types that read-only
799+
* transactions can acquire in a standby server. Make sure this definition
800+
* matches the one in GetRunningTransactionLocks().
799801
*/
800802
if (lockmode >= AccessExclusiveLock &&
801803
locktag->locktag_type == LOCKTAG_RELATION &&
@@ -816,8 +818,8 @@ LockAcquireExtended(const LOCKTAG *locktag,
816818
* lock type on a relation we have already locked using the fast-path, but
817819
* for now we don't worry about that case either.
818820
*/
819-
if (EligibleForRelationFastPath(locktag, lockmode)
820-
&& FastPathLocalUseCount < FP_LOCK_SLOTS_PER_BACKEND)
821+
if (EligibleForRelationFastPath(locktag, lockmode) &&
822+
FastPathLocalUseCount < FP_LOCK_SLOTS_PER_BACKEND)
821823
{
822824
uint32 fasthashcode = FastPathStrongLockHashPartition(hashcode);
823825
bool acquired;
@@ -837,6 +839,13 @@ LockAcquireExtended(const LOCKTAG *locktag,
837839
LWLockRelease(MyProc->backendLock);
838840
if (acquired)
839841
{
842+
/*
843+
* The locallock might contain stale pointers to some old shared
844+
* objects; we MUST reset these to null before considering the
845+
* lock to be acquired via fast-path.
846+
*/
847+
locallock->lock = NULL;
848+
locallock->proclock = NULL;
840849
GrantLockLocal(locallock, owner);
841850
return LOCKACQUIRE_OK;
842851
}
@@ -877,7 +886,13 @@ LockAcquireExtended(const LOCKTAG *locktag,
877886
LWLockAcquire(partitionLock, LW_EXCLUSIVE);
878887

879888
/*
880-
* Find or create a proclock entry with this tag
889+
* Find or create lock and proclock entries with this tag
890+
*
891+
* Note: if the locallock object already existed, it might have a pointer
892+
* to the lock already ... but we should not assume that that pointer is
893+
* valid, since a lock object with zero hold and request counts can go
894+
* away anytime. So we have to use SetupLockInTable() to recompute the
895+
* lock and proclock pointers, even if they're already set.
881896
*/
882897
proclock = SetupLockInTable(lockMethodTable, MyProc, locktag,
883898
hashcode, lockmode);
@@ -1010,7 +1025,7 @@ LockAcquireExtended(const LOCKTAG *locktag,
10101025
LWLockRelease(partitionLock);
10111026

10121027
/*
1013-
* Emit a WAL record if acquisition of this lock need to be replayed in a
1028+
* Emit a WAL record if acquisition of this lock needs to be replayed in a
10141029
* standby server.
10151030
*/
10161031
if (log_lock)
@@ -1049,11 +1064,6 @@ SetupLockInTable(LockMethod lockMethodTable, PGPROC *proc,
10491064

10501065
/*
10511066
* Find or create a lock with this tag.
1052-
*
1053-
* Note: if the locallock object already existed, it might have a pointer
1054-
* to the lock already ... but we probably should not assume that that
1055-
* pointer is valid, since a lock object with no locks can go away
1056-
* anytime.
10571067
*/
10581068
lock = (LOCK *) hash_search_with_hash_value(LockMethodLockHash,
10591069
(const void *) locktag,
@@ -1821,8 +1831,8 @@ LockRelease(const LOCKTAG *locktag, LOCKMODE lockmode, bool sessionLock)
18211831
return TRUE;
18221832

18231833
/* Attempt fast release of any lock eligible for the fast path. */
1824-
if (EligibleForRelationFastPath(locktag, lockmode)
1825-
&& FastPathLocalUseCount > 0)
1834+
if (EligibleForRelationFastPath(locktag, lockmode) &&
1835+
FastPathLocalUseCount > 0)
18261836
{
18271837
bool released;
18281838

@@ -1852,30 +1862,33 @@ LockRelease(const LOCKTAG *locktag, LOCKMODE lockmode, bool sessionLock)
18521862
* Normally, we don't need to re-find the lock or proclock, since we kept
18531863
* their addresses in the locallock table, and they couldn't have been
18541864
* removed while we were holding a lock on them. But it's possible that
1855-
* the locks have been moved to the main hash table by another backend, in
1856-
* which case we might need to go look them up after all.
1865+
* the lock was taken fast-path and has since been moved to the main hash
1866+
* table by another backend, in which case we will need to look up the
1867+
* objects here. We assume the lock field is NULL if so.
18571868
*/
18581869
lock = locallock->lock;
18591870
if (!lock)
18601871
{
18611872
PROCLOCKTAG proclocktag;
1862-
bool found;
18631873

18641874
Assert(EligibleForRelationFastPath(locktag, lockmode));
18651875
lock = (LOCK *) hash_search_with_hash_value(LockMethodLockHash,
18661876
(const void *) locktag,
18671877
locallock->hashcode,
18681878
HASH_FIND,
1869-
&found);
1870-
Assert(found && lock != NULL);
1879+
NULL);
1880+
if (!lock)
1881+
elog(ERROR, "failed to re-find shared lock object");
18711882
locallock->lock = lock;
18721883

18731884
proclocktag.myLock = lock;
18741885
proclocktag.myProc = MyProc;
18751886
locallock->proclock = (PROCLOCK *) hash_search(LockMethodProcLockHash,
18761887
(void *) &proclocktag,
1877-
HASH_FIND, &found);
1878-
Assert(found);
1888+
HASH_FIND,
1889+
NULL);
1890+
if (!locallock->proclock)
1891+
elog(ERROR, "failed to re-find shared proclock object");
18791892
}
18801893
LOCK_PRINT("LockRelease: found", lock, lockmode);
18811894
proclock = locallock->proclock;
@@ -1956,7 +1969,8 @@ LockReleaseAll(LOCKMETHODID lockmethodid, bool allLocks)
19561969
* entries, then we scan the process's proclocks and get rid of those. We
19571970
* do this separately because we may have multiple locallock entries
19581971
* pointing to the same proclock, and we daren't end up with any dangling
1959-
* pointers.
1972+
* pointers. Fast-path locks are cleaned up during the locallock table
1973+
* scan, though.
19601974
*/
19611975
hash_seq_init(&status, LockMethodLocalHash);
19621976

@@ -2011,7 +2025,7 @@ LockReleaseAll(LOCKMETHODID lockmethodid, bool allLocks)
20112025

20122026
/*
20132027
* If the lock or proclock pointers are NULL, this lock was taken via
2014-
* the relation fast-path.
2028+
* the relation fast-path (and is not known to have been transferred).
20152029
*/
20162030
if (locallock->proclock == NULL || locallock->lock == NULL)
20172031
{
@@ -2025,7 +2039,10 @@ LockReleaseAll(LOCKMETHODID lockmethodid, bool allLocks)
20252039
/*
20262040
* If we don't currently hold the LWLock that protects our
20272041
* fast-path data structures, we must acquire it before attempting
2028-
* to release the lock via the fast-path.
2042+
* to release the lock via the fast-path. We will continue to
2043+
* hold the LWLock until we're done scanning the locallock table,
2044+
* unless we hit a transferred fast-path lock. (XXX is this
2045+
* really such a good idea? There could be a lot of entries ...)
20292046
*/
20302047
if (!have_fast_path_lwlock)
20312048
{
@@ -2070,6 +2087,7 @@ LockReleaseAll(LOCKMETHODID lockmethodid, bool allLocks)
20702087
RemoveLocalLock(locallock);
20712088
}
20722089

2090+
/* Done with the fast-path data structures */
20732091
if (have_fast_path_lwlock)
20742092
LWLockRelease(MyProc->backendLock);
20752093

@@ -2421,6 +2439,7 @@ FastPathUnGrantRelationLock(Oid relid, LOCKMODE lockmode)
24212439
Assert(!result);
24222440
FAST_PATH_CLEAR_LOCKMODE(MyProc, f, lockmode);
24232441
result = true;
2442+
/* we continue iterating so as to update FastPathLocalUseCount */
24242443
}
24252444
if (FAST_PATH_GET_BITS(MyProc, f) != 0)
24262445
++FastPathLocalUseCount;
@@ -2506,6 +2525,9 @@ FastPathTransferRelationLocks(LockMethod lockMethodTable, const LOCKTAG *locktag
25062525
FAST_PATH_CLEAR_LOCKMODE(proc, f, lockmode);
25072526
}
25082527
LWLockRelease(partitionLock);
2528+
2529+
/* No need to examine remaining slots. */
2530+
break;
25092531
}
25102532
LWLockRelease(proc->backendLock);
25112533
}
@@ -2516,6 +2538,8 @@ FastPathTransferRelationLocks(LockMethod lockMethodTable, const LOCKTAG *locktag
25162538
* FastPathGetLockEntry
25172539
* Return the PROCLOCK for a lock originally taken via the fast-path,
25182540
* transferring it to the primary lock table if necessary.
2541+
*
2542+
* Note: caller takes care of updating the locallock object.
25192543
*/
25202544
static PROCLOCK *
25212545
FastPathGetRelationLockEntry(LOCALLOCK *locallock)
@@ -2559,6 +2583,9 @@ FastPathGetRelationLockEntry(LOCALLOCK *locallock)
25592583
FAST_PATH_CLEAR_LOCKMODE(MyProc, f, lockmode);
25602584

25612585
LWLockRelease(partitionLock);
2586+
2587+
/* No need to examine remaining slots. */
2588+
break;
25622589
}
25632590

25642591
LWLockRelease(MyProc->backendLock);
@@ -2731,6 +2758,8 @@ GetLockConflicts(const LOCKTAG *locktag, LOCKMODE lockmode)
27312758
*/
27322759
if (VirtualTransactionIdIsValid(vxid))
27332760
vxids[count++] = vxid;
2761+
2762+
/* No need to examine remaining slots. */
27342763
break;
27352764
}
27362765

src/include/storage/lock.h

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -379,6 +379,20 @@ typedef struct PROCLOCK
379379
* shared memory. We also track the number of lock acquisitions per
380380
* ResourceOwner, so that we can release just those locks belonging to a
381381
* particular ResourceOwner.
382+
*
383+
* When holding a lock taken "normally", the lock and proclock fields always
384+
* point to the associated objects in shared memory. However, if we acquired
385+
* the lock via the fast-path mechanism, the lock and proclock fields are set
386+
* to NULL, since there probably aren't any such objects in shared memory.
387+
* (If the lock later gets promoted to normal representation, we may eventually
388+
* update our locallock's lock/proclock fields after finding the shared
389+
* objects.)
390+
*
391+
* Caution: a locallock object can be left over from a failed lock acquisition
392+
* attempt. In this case its lock/proclock fields are untrustworthy, since
393+
* the shared lock object is neither held nor awaited, and hence is available
394+
* to be reclaimed. If nLocks > 0 then these pointers must either be valid or
395+
* NULL, but when nLocks == 0 they should be considered garbage.
382396
*/
383397
typedef struct LOCALLOCKTAG
384398
{
@@ -404,13 +418,13 @@ typedef struct LOCALLOCK
404418
LOCALLOCKTAG tag; /* unique identifier of locallock entry */
405419

406420
/* data */
407-
LOCK *lock; /* associated LOCK object in shared mem */
408-
PROCLOCK *proclock; /* associated PROCLOCK object in shmem */
421+
LOCK *lock; /* associated LOCK object, if any */
422+
PROCLOCK *proclock; /* associated PROCLOCK object, if any */
409423
uint32 hashcode; /* copy of LOCKTAG's hash value */
410424
int64 nLocks; /* total number of times lock is held */
411425
int numLockOwners; /* # of relevant ResourceOwners */
412426
int maxLockOwners; /* allocated size of array */
413-
bool holdsStrongLockCount; /* bumped FastPathStrongRelatonLocks? */
427+
bool holdsStrongLockCount; /* bumped FastPathStrongRelationLocks */
414428
LOCALLOCKOWNER *lockOwners; /* dynamically resizable array */
415429
} LOCALLOCK;
416430

0 commit comments

Comments
 (0)