Skip to content

Commit 024edb4

Browse files
committed
Fix stale-pointer problem in fast-path locking logic.
When acquiring a lock in fast-path mode, we must reset the locallock object's lock and proclock fields to NULL. They are not necessarily that way to start with, because the locallock could be left over from a failed lock acquisition attempt earlier in the transaction. Failure to do this led to all sorts of interesting misbehaviors when LockRelease tried to clean up no-longer-related lock and proclock objects in shared memory. Per report from Dan Wood. In passing, modify LockRelease to elog not just Assert if it doesn't find lock and proclock objects for a formerly fast-path lock, matching the code in FastPathGetRelationLockEntry and LockRefindAndRelease. This isn't a bug but it will help in diagnosing any future bugs in this area. Also, modify FastPathTransferRelationLocks and FastPathGetRelationLockEntry to break out of their loops over the fastpath array once they've found the sole matching entry. This was inconsistently done in some search loops and not others. Improve assorted related comments, too. Back-patch to 9.2 where the fast-path mechanism was introduced.
1 parent 0b132b9 commit 024edb4

File tree

2 files changed

+73
-30
lines changed

2 files changed

+73
-30
lines changed

src/backend/storage/lmgr/lock.c

Lines changed: 56 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -773,14 +773,16 @@ LockAcquireExtended(const LOCKTAG *locktag,
773773
}
774774

775775
/*
776-
* Emit a WAL record if acquisition of this lock needs to be replayed in a
777-
* standby server. Only AccessExclusiveLocks can conflict with lock types
778-
* that read-only transactions can acquire in a standby server.
776+
* Prepare to emit a WAL record if acquisition of this lock needs to be
777+
* replayed in a standby server.
779778
*
780-
* Make sure this definition matches the one in
781-
* GetRunningTransactionLocks().
779+
* Here we prepare to log; after lock is acquired we'll issue log record.
780+
* This arrangement simplifies error recovery in case the preparation step
781+
* fails.
782782
*
783-
* First we prepare to log, then after lock acquired we issue log record.
783+
* Only AccessExclusiveLocks can conflict with lock types that read-only
784+
* transactions can acquire in a standby server. Make sure this definition
785+
* matches the one in GetRunningTransactionLocks().
784786
*/
785787
if (lockmode >= AccessExclusiveLock &&
786788
locktag->locktag_type == LOCKTAG_RELATION &&
@@ -801,8 +803,8 @@ LockAcquireExtended(const LOCKTAG *locktag,
801803
* lock type on a relation we have already locked using the fast-path, but
802804
* for now we don't worry about that case either.
803805
*/
804-
if (EligibleForRelationFastPath(locktag, lockmode)
805-
&& FastPathLocalUseCount < FP_LOCK_SLOTS_PER_BACKEND)
806+
if (EligibleForRelationFastPath(locktag, lockmode) &&
807+
FastPathLocalUseCount < FP_LOCK_SLOTS_PER_BACKEND)
806808
{
807809
uint32 fasthashcode = FastPathStrongLockHashPartition(hashcode);
808810
bool acquired;
@@ -822,6 +824,13 @@ LockAcquireExtended(const LOCKTAG *locktag,
822824
LWLockRelease(MyProc->backendLock);
823825
if (acquired)
824826
{
827+
/*
828+
* The locallock might contain stale pointers to some old shared
829+
* objects; we MUST reset these to null before considering the
830+
* lock to be acquired via fast-path.
831+
*/
832+
locallock->lock = NULL;
833+
locallock->proclock = NULL;
825834
GrantLockLocal(locallock, owner);
826835
return LOCKACQUIRE_OK;
827836
}
@@ -862,7 +871,13 @@ LockAcquireExtended(const LOCKTAG *locktag,
862871
LWLockAcquire(partitionLock, LW_EXCLUSIVE);
863872

864873
/*
865-
* Find or create a proclock entry with this tag
874+
* Find or create lock and proclock entries with this tag
875+
*
876+
* Note: if the locallock object already existed, it might have a pointer
877+
* to the lock already ... but we should not assume that that pointer is
878+
* valid, since a lock object with zero hold and request counts can go
879+
* away anytime. So we have to use SetupLockInTable() to recompute the
880+
* lock and proclock pointers, even if they're already set.
866881
*/
867882
proclock = SetupLockInTable(lockMethodTable, MyProc, locktag,
868883
hashcode, lockmode);
@@ -995,7 +1010,7 @@ LockAcquireExtended(const LOCKTAG *locktag,
9951010
LWLockRelease(partitionLock);
9961011

9971012
/*
998-
* Emit a WAL record if acquisition of this lock need to be replayed in a
1013+
* Emit a WAL record if acquisition of this lock needs to be replayed in a
9991014
* standby server.
10001015
*/
10011016
if (log_lock)
@@ -1034,11 +1049,6 @@ SetupLockInTable(LockMethod lockMethodTable, PGPROC *proc,
10341049

10351050
/*
10361051
* Find or create a lock with this tag.
1037-
*
1038-
* Note: if the locallock object already existed, it might have a pointer
1039-
* to the lock already ... but we probably should not assume that that
1040-
* pointer is valid, since a lock object with no locks can go away
1041-
* anytime.
10421052
*/
10431053
lock = (LOCK *) hash_search_with_hash_value(LockMethodLockHash,
10441054
(const void *) locktag,
@@ -1794,8 +1804,8 @@ LockRelease(const LOCKTAG *locktag, LOCKMODE lockmode, bool sessionLock)
17941804
return TRUE;
17951805

17961806
/* Attempt fast release of any lock eligible for the fast path. */
1797-
if (EligibleForRelationFastPath(locktag, lockmode)
1798-
&& FastPathLocalUseCount > 0)
1807+
if (EligibleForRelationFastPath(locktag, lockmode) &&
1808+
FastPathLocalUseCount > 0)
17991809
{
18001810
bool released;
18011811

@@ -1825,30 +1835,33 @@ LockRelease(const LOCKTAG *locktag, LOCKMODE lockmode, bool sessionLock)
18251835
* Normally, we don't need to re-find the lock or proclock, since we kept
18261836
* their addresses in the locallock table, and they couldn't have been
18271837
* removed while we were holding a lock on them. But it's possible that
1828-
* the locks have been moved to the main hash table by another backend, in
1829-
* which case we might need to go look them up after all.
1838+
* the lock was taken fast-path and has since been moved to the main hash
1839+
* table by another backend, in which case we will need to look up the
1840+
* objects here. We assume the lock field is NULL if so.
18301841
*/
18311842
lock = locallock->lock;
18321843
if (!lock)
18331844
{
18341845
PROCLOCKTAG proclocktag;
1835-
bool found;
18361846

18371847
Assert(EligibleForRelationFastPath(locktag, lockmode));
18381848
lock = (LOCK *) hash_search_with_hash_value(LockMethodLockHash,
18391849
(const void *) locktag,
18401850
locallock->hashcode,
18411851
HASH_FIND,
1842-
&found);
1843-
Assert(found && lock != NULL);
1852+
NULL);
1853+
if (!lock)
1854+
elog(ERROR, "failed to re-find shared lock object");
18441855
locallock->lock = lock;
18451856

18461857
proclocktag.myLock = lock;
18471858
proclocktag.myProc = MyProc;
18481859
locallock->proclock = (PROCLOCK *) hash_search(LockMethodProcLockHash,
18491860
(void *) &proclocktag,
1850-
HASH_FIND, &found);
1851-
Assert(found);
1861+
HASH_FIND,
1862+
NULL);
1863+
if (!locallock->proclock)
1864+
elog(ERROR, "failed to re-find shared proclock object");
18521865
}
18531866
LOCK_PRINT("LockRelease: found", lock, lockmode);
18541867
proclock = locallock->proclock;
@@ -1929,7 +1942,8 @@ LockReleaseAll(LOCKMETHODID lockmethodid, bool allLocks)
19291942
* entries, then we scan the process's proclocks and get rid of those. We
19301943
* do this separately because we may have multiple locallock entries
19311944
* pointing to the same proclock, and we daren't end up with any dangling
1932-
* pointers.
1945+
* pointers. Fast-path locks are cleaned up during the locallock table
1946+
* scan, though.
19331947
*/
19341948
hash_seq_init(&status, LockMethodLocalHash);
19351949

@@ -1983,7 +1997,7 @@ LockReleaseAll(LOCKMETHODID lockmethodid, bool allLocks)
19831997

19841998
/*
19851999
* If the lock or proclock pointers are NULL, this lock was taken via
1986-
* the relation fast-path.
2000+
* the relation fast-path (and is not known to have been transferred).
19872001
*/
19882002
if (locallock->proclock == NULL || locallock->lock == NULL)
19892003
{
@@ -1997,7 +2011,10 @@ LockReleaseAll(LOCKMETHODID lockmethodid, bool allLocks)
19972011
/*
19982012
* If we don't currently hold the LWLock that protects our
19992013
* fast-path data structures, we must acquire it before attempting
2000-
* to release the lock via the fast-path.
2014+
* to release the lock via the fast-path. We will continue to
2015+
* hold the LWLock until we're done scanning the locallock table,
2016+
* unless we hit a transferred fast-path lock. (XXX is this
2017+
* really such a good idea? There could be a lot of entries ...)
20012018
*/
20022019
if (!have_fast_path_lwlock)
20032020
{
@@ -2042,6 +2059,7 @@ LockReleaseAll(LOCKMETHODID lockmethodid, bool allLocks)
20422059
RemoveLocalLock(locallock);
20432060
}
20442061

2062+
/* Done with the fast-path data structures */
20452063
if (have_fast_path_lwlock)
20462064
LWLockRelease(MyProc->backendLock);
20472065

@@ -2352,6 +2370,7 @@ FastPathUnGrantRelationLock(Oid relid, LOCKMODE lockmode)
23522370
Assert(!result);
23532371
FAST_PATH_CLEAR_LOCKMODE(MyProc, f, lockmode);
23542372
result = true;
2373+
/* we continue iterating so as to update FastPathLocalUseCount */
23552374
}
23562375
if (FAST_PATH_GET_BITS(MyProc, f) != 0)
23572376
++FastPathLocalUseCount;
@@ -2437,6 +2456,9 @@ FastPathTransferRelationLocks(LockMethod lockMethodTable, const LOCKTAG *locktag
24372456
FAST_PATH_CLEAR_LOCKMODE(proc, f, lockmode);
24382457
}
24392458
LWLockRelease(partitionLock);
2459+
2460+
/* No need to examine remaining slots. */
2461+
break;
24402462
}
24412463
LWLockRelease(proc->backendLock);
24422464
}
@@ -2447,6 +2469,8 @@ FastPathTransferRelationLocks(LockMethod lockMethodTable, const LOCKTAG *locktag
24472469
* FastPathGetLockEntry
24482470
* Return the PROCLOCK for a lock originally taken via the fast-path,
24492471
* transferring it to the primary lock table if necessary.
2472+
*
2473+
* Note: caller takes care of updating the locallock object.
24502474
*/
24512475
static PROCLOCK *
24522476
FastPathGetRelationLockEntry(LOCALLOCK *locallock)
@@ -2490,6 +2514,9 @@ FastPathGetRelationLockEntry(LOCALLOCK *locallock)
24902514
FAST_PATH_CLEAR_LOCKMODE(MyProc, f, lockmode);
24912515

24922516
LWLockRelease(partitionLock);
2517+
2518+
/* No need to examine remaining slots. */
2519+
break;
24932520
}
24942521

24952522
LWLockRelease(MyProc->backendLock);
@@ -2662,6 +2689,8 @@ GetLockConflicts(const LOCKTAG *locktag, LOCKMODE lockmode)
26622689
*/
26632690
if (VirtualTransactionIdIsValid(vxid))
26642691
vxids[count++] = vxid;
2692+
2693+
/* No need to examine remaining slots. */
26652694
break;
26662695
}
26672696

src/include/storage/lock.h

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -379,6 +379,20 @@ typedef struct PROCLOCK
379379
* shared memory. We also track the number of lock acquisitions per
380380
* ResourceOwner, so that we can release just those locks belonging to a
381381
* particular ResourceOwner.
382+
*
383+
* When holding a lock taken "normally", the lock and proclock fields always
384+
* point to the associated objects in shared memory. However, if we acquired
385+
* the lock via the fast-path mechanism, the lock and proclock fields are set
386+
* to NULL, since there probably aren't any such objects in shared memory.
387+
* (If the lock later gets promoted to normal representation, we may eventually
388+
* update our locallock's lock/proclock fields after finding the shared
389+
* objects.)
390+
*
391+
* Caution: a locallock object can be left over from a failed lock acquisition
392+
* attempt. In this case its lock/proclock fields are untrustworthy, since
393+
* the shared lock object is neither held nor awaited, and hence is available
394+
* to be reclaimed. If nLocks > 0 then these pointers must either be valid or
395+
* NULL, but when nLocks == 0 they should be considered garbage.
382396
*/
383397
typedef struct LOCALLOCKTAG
384398
{
@@ -404,13 +418,13 @@ typedef struct LOCALLOCK
404418
LOCALLOCKTAG tag; /* unique identifier of locallock entry */
405419

406420
/* data */
407-
LOCK *lock; /* associated LOCK object in shared mem */
408-
PROCLOCK *proclock; /* associated PROCLOCK object in shmem */
421+
LOCK *lock; /* associated LOCK object, if any */
422+
PROCLOCK *proclock; /* associated PROCLOCK object, if any */
409423
uint32 hashcode; /* copy of LOCKTAG's hash value */
410424
int64 nLocks; /* total number of times lock is held */
411425
int numLockOwners; /* # of relevant ResourceOwners */
412426
int maxLockOwners; /* allocated size of array */
413-
bool holdsStrongLockCount; /* bumped FastPathStrongRelatonLocks? */
427+
bool holdsStrongLockCount; /* bumped FastPathStrongRelationLocks */
414428
LOCALLOCKOWNER *lockOwners; /* dynamically resizable array */
415429
} LOCALLOCK;
416430

0 commit comments

Comments
 (0)