Skip to content

Commit d32e838

Browse files
committed
Fix stale-pointer problem in fast-path locking logic.
When acquiring a lock in fast-path mode, we must reset the locallock object's lock and proclock fields to NULL. They are not necessarily that way to start with, because the locallock could be left over from a failed lock acquisition attempt earlier in the transaction. Failure to do this led to all sorts of interesting misbehaviors when LockRelease tried to clean up no-longer-related lock and proclock objects in shared memory. Per report from Dan Wood. In passing, modify LockRelease to elog not just Assert if it doesn't find lock and proclock objects for a formerly fast-path lock, matching the code in FastPathGetRelationLockEntry and LockRefindAndRelease. This isn't a bug but it will help in diagnosing any future bugs in this area. Also, modify FastPathTransferRelationLocks and FastPathGetRelationLockEntry to break out of their loops over the fastpath array once they've found the sole matching entry. This was inconsistently done in some search loops and not others. Improve assorted related comments, too. Back-patch to 9.2 where the fast-path mechanism was introduced.
1 parent 08302a3 commit d32e838

File tree

2 files changed

+73
-30
lines changed

2 files changed

+73
-30
lines changed

src/backend/storage/lmgr/lock.c

Lines changed: 56 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -788,14 +788,16 @@ LockAcquireExtended(const LOCKTAG *locktag,
788788
}
789789

790790
/*
791-
* Emit a WAL record if acquisition of this lock needs to be replayed in a
792-
* standby server. Only AccessExclusiveLocks can conflict with lock types
793-
* that read-only transactions can acquire in a standby server.
791+
* Prepare to emit a WAL record if acquisition of this lock needs to be
792+
* replayed in a standby server.
794793
*
795-
* Make sure this definition matches the one in
796-
* GetRunningTransactionLocks().
794+
* Here we prepare to log; after lock is acquired we'll issue log record.
795+
* This arrangement simplifies error recovery in case the preparation step
796+
* fails.
797797
*
798-
* First we prepare to log, then after lock acquired we issue log record.
798+
* Only AccessExclusiveLocks can conflict with lock types that read-only
799+
* transactions can acquire in a standby server. Make sure this definition
800+
* matches the one in GetRunningTransactionLocks().
799801
*/
800802
if (lockmode >= AccessExclusiveLock &&
801803
locktag->locktag_type == LOCKTAG_RELATION &&
@@ -816,8 +818,8 @@ LockAcquireExtended(const LOCKTAG *locktag,
816818
* lock type on a relation we have already locked using the fast-path, but
817819
* for now we don't worry about that case either.
818820
*/
819-
if (EligibleForRelationFastPath(locktag, lockmode)
820-
&& FastPathLocalUseCount < FP_LOCK_SLOTS_PER_BACKEND)
821+
if (EligibleForRelationFastPath(locktag, lockmode) &&
822+
FastPathLocalUseCount < FP_LOCK_SLOTS_PER_BACKEND)
821823
{
822824
uint32 fasthashcode = FastPathStrongLockHashPartition(hashcode);
823825
bool acquired;
@@ -837,6 +839,13 @@ LockAcquireExtended(const LOCKTAG *locktag,
837839
LWLockRelease(MyProc->backendLock);
838840
if (acquired)
839841
{
842+
/*
843+
* The locallock might contain stale pointers to some old shared
844+
* objects; we MUST reset these to null before considering the
845+
* lock to be acquired via fast-path.
846+
*/
847+
locallock->lock = NULL;
848+
locallock->proclock = NULL;
840849
GrantLockLocal(locallock, owner);
841850
return LOCKACQUIRE_OK;
842851
}
@@ -877,7 +886,13 @@ LockAcquireExtended(const LOCKTAG *locktag,
877886
LWLockAcquire(partitionLock, LW_EXCLUSIVE);
878887

879888
/*
880-
* Find or create a proclock entry with this tag
889+
* Find or create lock and proclock entries with this tag
890+
*
891+
* Note: if the locallock object already existed, it might have a pointer
892+
* to the lock already ... but we should not assume that that pointer is
893+
* valid, since a lock object with zero hold and request counts can go
894+
* away anytime. So we have to use SetupLockInTable() to recompute the
895+
* lock and proclock pointers, even if they're already set.
881896
*/
882897
proclock = SetupLockInTable(lockMethodTable, MyProc, locktag,
883898
hashcode, lockmode);
@@ -1010,7 +1025,7 @@ LockAcquireExtended(const LOCKTAG *locktag,
10101025
LWLockRelease(partitionLock);
10111026

10121027
/*
1013-
* Emit a WAL record if acquisition of this lock need to be replayed in a
1028+
* Emit a WAL record if acquisition of this lock needs to be replayed in a
10141029
* standby server.
10151030
*/
10161031
if (log_lock)
@@ -1049,11 +1064,6 @@ SetupLockInTable(LockMethod lockMethodTable, PGPROC *proc,
10491064

10501065
/*
10511066
* Find or create a lock with this tag.
1052-
*
1053-
* Note: if the locallock object already existed, it might have a pointer
1054-
* to the lock already ... but we probably should not assume that that
1055-
* pointer is valid, since a lock object with no locks can go away
1056-
* anytime.
10571067
*/
10581068
lock = (LOCK *) hash_search_with_hash_value(LockMethodLockHash,
10591069
(const void *) locktag,
@@ -1822,8 +1832,8 @@ LockRelease(const LOCKTAG *locktag, LOCKMODE lockmode, bool sessionLock)
18221832
return TRUE;
18231833

18241834
/* Attempt fast release of any lock eligible for the fast path. */
1825-
if (EligibleForRelationFastPath(locktag, lockmode)
1826-
&& FastPathLocalUseCount > 0)
1835+
if (EligibleForRelationFastPath(locktag, lockmode) &&
1836+
FastPathLocalUseCount > 0)
18271837
{
18281838
bool released;
18291839

@@ -1853,30 +1863,33 @@ LockRelease(const LOCKTAG *locktag, LOCKMODE lockmode, bool sessionLock)
18531863
* Normally, we don't need to re-find the lock or proclock, since we kept
18541864
* their addresses in the locallock table, and they couldn't have been
18551865
* removed while we were holding a lock on them. But it's possible that
1856-
* the locks have been moved to the main hash table by another backend, in
1857-
* which case we might need to go look them up after all.
1866+
* the lock was taken fast-path and has since been moved to the main hash
1867+
* table by another backend, in which case we will need to look up the
1868+
* objects here. We assume the lock field is NULL if so.
18581869
*/
18591870
lock = locallock->lock;
18601871
if (!lock)
18611872
{
18621873
PROCLOCKTAG proclocktag;
1863-
bool found;
18641874

18651875
Assert(EligibleForRelationFastPath(locktag, lockmode));
18661876
lock = (LOCK *) hash_search_with_hash_value(LockMethodLockHash,
18671877
(const void *) locktag,
18681878
locallock->hashcode,
18691879
HASH_FIND,
1870-
&found);
1871-
Assert(found && lock != NULL);
1880+
NULL);
1881+
if (!lock)
1882+
elog(ERROR, "failed to re-find shared lock object");
18721883
locallock->lock = lock;
18731884

18741885
proclocktag.myLock = lock;
18751886
proclocktag.myProc = MyProc;
18761887
locallock->proclock = (PROCLOCK *) hash_search(LockMethodProcLockHash,
18771888
(void *) &proclocktag,
1878-
HASH_FIND, &found);
1879-
Assert(found);
1889+
HASH_FIND,
1890+
NULL);
1891+
if (!locallock->proclock)
1892+
elog(ERROR, "failed to re-find shared proclock object");
18801893
}
18811894
LOCK_PRINT("LockRelease: found", lock, lockmode);
18821895
proclock = locallock->proclock;
@@ -1957,7 +1970,8 @@ LockReleaseAll(LOCKMETHODID lockmethodid, bool allLocks)
19571970
* entries, then we scan the process's proclocks and get rid of those. We
19581971
* do this separately because we may have multiple locallock entries
19591972
* pointing to the same proclock, and we daren't end up with any dangling
1960-
* pointers.
1973+
* pointers. Fast-path locks are cleaned up during the locallock table
1974+
* scan, though.
19611975
*/
19621976
hash_seq_init(&status, LockMethodLocalHash);
19631977

@@ -2012,7 +2026,7 @@ LockReleaseAll(LOCKMETHODID lockmethodid, bool allLocks)
20122026

20132027
/*
20142028
* If the lock or proclock pointers are NULL, this lock was taken via
2015-
* the relation fast-path.
2029+
* the relation fast-path (and is not known to have been transferred).
20162030
*/
20172031
if (locallock->proclock == NULL || locallock->lock == NULL)
20182032
{
@@ -2026,7 +2040,10 @@ LockReleaseAll(LOCKMETHODID lockmethodid, bool allLocks)
20262040
/*
20272041
* If we don't currently hold the LWLock that protects our
20282042
* fast-path data structures, we must acquire it before attempting
2029-
* to release the lock via the fast-path.
2043+
* to release the lock via the fast-path. We will continue to
2044+
* hold the LWLock until we're done scanning the locallock table,
2045+
* unless we hit a transferred fast-path lock. (XXX is this
2046+
* really such a good idea? There could be a lot of entries ...)
20302047
*/
20312048
if (!have_fast_path_lwlock)
20322049
{
@@ -2071,6 +2088,7 @@ LockReleaseAll(LOCKMETHODID lockmethodid, bool allLocks)
20712088
RemoveLocalLock(locallock);
20722089
}
20732090

2091+
/* Done with the fast-path data structures */
20742092
if (have_fast_path_lwlock)
20752093
LWLockRelease(MyProc->backendLock);
20762094

@@ -2422,6 +2440,7 @@ FastPathUnGrantRelationLock(Oid relid, LOCKMODE lockmode)
24222440
Assert(!result);
24232441
FAST_PATH_CLEAR_LOCKMODE(MyProc, f, lockmode);
24242442
result = true;
2443+
/* we continue iterating so as to update FastPathLocalUseCount */
24252444
}
24262445
if (FAST_PATH_GET_BITS(MyProc, f) != 0)
24272446
++FastPathLocalUseCount;
@@ -2507,6 +2526,9 @@ FastPathTransferRelationLocks(LockMethod lockMethodTable, const LOCKTAG *locktag
25072526
FAST_PATH_CLEAR_LOCKMODE(proc, f, lockmode);
25082527
}
25092528
LWLockRelease(partitionLock);
2529+
2530+
/* No need to examine remaining slots. */
2531+
break;
25102532
}
25112533
LWLockRelease(proc->backendLock);
25122534
}
@@ -2517,6 +2539,8 @@ FastPathTransferRelationLocks(LockMethod lockMethodTable, const LOCKTAG *locktag
25172539
* FastPathGetLockEntry
25182540
* Return the PROCLOCK for a lock originally taken via the fast-path,
25192541
* transferring it to the primary lock table if necessary.
2542+
*
2543+
* Note: caller takes care of updating the locallock object.
25202544
*/
25212545
static PROCLOCK *
25222546
FastPathGetRelationLockEntry(LOCALLOCK *locallock)
@@ -2560,6 +2584,9 @@ FastPathGetRelationLockEntry(LOCALLOCK *locallock)
25602584
FAST_PATH_CLEAR_LOCKMODE(MyProc, f, lockmode);
25612585

25622586
LWLockRelease(partitionLock);
2587+
2588+
/* No need to examine remaining slots. */
2589+
break;
25632590
}
25642591

25652592
LWLockRelease(MyProc->backendLock);
@@ -2732,6 +2759,8 @@ GetLockConflicts(const LOCKTAG *locktag, LOCKMODE lockmode)
27322759
*/
27332760
if (VirtualTransactionIdIsValid(vxid))
27342761
vxids[count++] = vxid;
2762+
2763+
/* No need to examine remaining slots. */
27352764
break;
27362765
}
27372766

src/include/storage/lock.h

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -379,6 +379,20 @@ typedef struct PROCLOCK
379379
* shared memory. We also track the number of lock acquisitions per
380380
* ResourceOwner, so that we can release just those locks belonging to a
381381
* particular ResourceOwner.
382+
*
383+
* When holding a lock taken "normally", the lock and proclock fields always
384+
* point to the associated objects in shared memory. However, if we acquired
385+
* the lock via the fast-path mechanism, the lock and proclock fields are set
386+
* to NULL, since there probably aren't any such objects in shared memory.
387+
* (If the lock later gets promoted to normal representation, we may eventually
388+
* update our locallock's lock/proclock fields after finding the shared
389+
* objects.)
390+
*
391+
* Caution: a locallock object can be left over from a failed lock acquisition
392+
* attempt. In this case its lock/proclock fields are untrustworthy, since
393+
* the shared lock object is neither held nor awaited, and hence is available
394+
* to be reclaimed. If nLocks > 0 then these pointers must either be valid or
395+
* NULL, but when nLocks == 0 they should be considered garbage.
382396
*/
383397
typedef struct LOCALLOCKTAG
384398
{
@@ -404,13 +418,13 @@ typedef struct LOCALLOCK
404418
LOCALLOCKTAG tag; /* unique identifier of locallock entry */
405419

406420
/* data */
407-
LOCK *lock; /* associated LOCK object in shared mem */
408-
PROCLOCK *proclock; /* associated PROCLOCK object in shmem */
421+
LOCK *lock; /* associated LOCK object, if any */
422+
PROCLOCK *proclock; /* associated PROCLOCK object, if any */
409423
uint32 hashcode; /* copy of LOCKTAG's hash value */
410424
int64 nLocks; /* total number of times lock is held */
411425
int numLockOwners; /* # of relevant ResourceOwners */
412426
int maxLockOwners; /* allocated size of array */
413-
bool holdsStrongLockCount; /* bumped FastPathStrongRelatonLocks? */
427+
bool holdsStrongLockCount; /* bumped FastPathStrongRelationLocks */
414428
LOCALLOCKOWNER *lockOwners; /* dynamically resizable array */
415429
} LOCALLOCK;
416430

0 commit comments

Comments
 (0)