Skip to content

Commit d2a6aae

Browse files
committed
Merge branch 'locking-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull locking fixes from Ingo Molnar: "An rtmutex (PI-futex) deadlock scenario fix, plus a locking documentation fix" * 'locking-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: futex: Handle early deadlock return correctly futex: Fix barrier comment
2 parents df3865f + 1a1fb98 commit d2a6aae

File tree

2 files changed

+52
-17
lines changed

2 files changed

+52
-17
lines changed

kernel/futex.c

Lines changed: 20 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2221,11 +2221,11 @@ static inline struct futex_hash_bucket *queue_lock(struct futex_q *q)
22212221
* decrement the counter at queue_unlock() when some error has
22222222
* occurred and we don't end up adding the task to the list.
22232223
*/
2224-
hb_waiters_inc(hb);
2224+
hb_waiters_inc(hb); /* implies smp_mb(); (A) */
22252225

22262226
q->lock_ptr = &hb->lock;
22272227

2228-
spin_lock(&hb->lock); /* implies smp_mb(); (A) */
2228+
spin_lock(&hb->lock);
22292229
return hb;
22302230
}
22312231

@@ -2861,35 +2861,39 @@ static int futex_lock_pi(u32 __user *uaddr, unsigned int flags,
28612861
* and BUG when futex_unlock_pi() interleaves with this.
28622862
*
28632863
* Therefore acquire wait_lock while holding hb->lock, but drop the
2864-
* latter before calling rt_mutex_start_proxy_lock(). This still fully
2865-
* serializes against futex_unlock_pi() as that does the exact same
2866-
* lock handoff sequence.
2864+
* latter before calling __rt_mutex_start_proxy_lock(). This
2865+
* interleaves with futex_unlock_pi() -- which does a similar lock
2866+
* handoff -- such that the latter can observe the futex_q::pi_state
2867+
* before __rt_mutex_start_proxy_lock() is done.
28672868
*/
28682869
raw_spin_lock_irq(&q.pi_state->pi_mutex.wait_lock);
28692870
spin_unlock(q.lock_ptr);
2871+
/*
2872+
* __rt_mutex_start_proxy_lock() unconditionally enqueues the @rt_waiter
2873+
* such that futex_unlock_pi() is guaranteed to observe the waiter when
2874+
* it sees the futex_q::pi_state.
2875+
*/
28702876
ret = __rt_mutex_start_proxy_lock(&q.pi_state->pi_mutex, &rt_waiter, current);
28712877
raw_spin_unlock_irq(&q.pi_state->pi_mutex.wait_lock);
28722878

28732879
if (ret) {
28742880
if (ret == 1)
28752881
ret = 0;
2876-
2877-
spin_lock(q.lock_ptr);
2878-
goto no_block;
2882+
goto cleanup;
28792883
}
28802884

2881-
28822885
if (unlikely(to))
28832886
hrtimer_start_expires(&to->timer, HRTIMER_MODE_ABS);
28842887

28852888
ret = rt_mutex_wait_proxy_lock(&q.pi_state->pi_mutex, to, &rt_waiter);
28862889

2890+
cleanup:
28872891
spin_lock(q.lock_ptr);
28882892
/*
2889-
* If we failed to acquire the lock (signal/timeout), we must
2893+
* If we failed to acquire the lock (deadlock/signal/timeout), we must
28902894
* first acquire the hb->lock before removing the lock from the
2891-
* rt_mutex waitqueue, such that we can keep the hb and rt_mutex
2892-
* wait lists consistent.
2895+
* rt_mutex waitqueue, such that we can keep the hb and rt_mutex wait
2896+
* lists consistent.
28932897
*
28942898
* In particular; it is important that futex_unlock_pi() can not
28952899
* observe this inconsistency.
@@ -3013,6 +3017,10 @@ static int futex_unlock_pi(u32 __user *uaddr, unsigned int flags)
30133017
* there is no point where we hold neither; and therefore
30143018
* wake_futex_pi() must observe a state consistent with what we
30153019
* observed.
3020+
*
3021+
* In particular; this forces __rt_mutex_start_proxy() to
3022+
* complete such that we're guaranteed to observe the
3023+
* rt_waiter. Also see the WARN in wake_futex_pi().
30163024
*/
30173025
raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
30183026
spin_unlock(&hb->lock);

kernel/locking/rtmutex.c

Lines changed: 32 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1726,12 +1726,33 @@ void rt_mutex_proxy_unlock(struct rt_mutex *lock,
17261726
rt_mutex_set_owner(lock, NULL);
17271727
}
17281728

1729+
/**
1730+
* __rt_mutex_start_proxy_lock() - Start lock acquisition for another task
1731+
* @lock: the rt_mutex to take
1732+
* @waiter: the pre-initialized rt_mutex_waiter
1733+
* @task: the task to prepare
1734+
*
1735+
* Starts the rt_mutex acquire; it enqueues the @waiter and does deadlock
1736+
* detection. It does not wait, see rt_mutex_wait_proxy_lock() for that.
1737+
*
1738+
* NOTE: does _NOT_ remove the @waiter on failure; must either call
1739+
* rt_mutex_wait_proxy_lock() or rt_mutex_cleanup_proxy_lock() after this.
1740+
*
1741+
* Returns:
1742+
* 0 - task blocked on lock
1743+
* 1 - acquired the lock for task, caller should wake it up
1744+
* <0 - error
1745+
*
1746+
* Special API call for PI-futex support.
1747+
*/
17291748
int __rt_mutex_start_proxy_lock(struct rt_mutex *lock,
17301749
struct rt_mutex_waiter *waiter,
17311750
struct task_struct *task)
17321751
{
17331752
int ret;
17341753

1754+
lockdep_assert_held(&lock->wait_lock);
1755+
17351756
if (try_to_take_rt_mutex(lock, task, NULL))
17361757
return 1;
17371758

@@ -1749,9 +1770,6 @@ int __rt_mutex_start_proxy_lock(struct rt_mutex *lock,
17491770
ret = 0;
17501771
}
17511772

1752-
if (unlikely(ret))
1753-
remove_waiter(lock, waiter);
1754-
17551773
debug_rt_mutex_print_deadlock(waiter);
17561774

17571775
return ret;
@@ -1763,12 +1781,18 @@ int __rt_mutex_start_proxy_lock(struct rt_mutex *lock,
17631781
* @waiter: the pre-initialized rt_mutex_waiter
17641782
* @task: the task to prepare
17651783
*
1784+
* Starts the rt_mutex acquire; it enqueues the @waiter and does deadlock
1785+
* detection. It does not wait, see rt_mutex_wait_proxy_lock() for that.
1786+
*
1787+
* NOTE: unlike __rt_mutex_start_proxy_lock this _DOES_ remove the @waiter
1788+
* on failure.
1789+
*
17661790
* Returns:
17671791
* 0 - task blocked on lock
17681792
* 1 - acquired the lock for task, caller should wake it up
17691793
* <0 - error
17701794
*
1771-
* Special API call for FUTEX_REQUEUE_PI support.
1795+
* Special API call for PI-futex support.
17721796
*/
17731797
int rt_mutex_start_proxy_lock(struct rt_mutex *lock,
17741798
struct rt_mutex_waiter *waiter,
@@ -1778,6 +1802,8 @@ int rt_mutex_start_proxy_lock(struct rt_mutex *lock,
17781802

17791803
raw_spin_lock_irq(&lock->wait_lock);
17801804
ret = __rt_mutex_start_proxy_lock(lock, waiter, task);
1805+
if (unlikely(ret))
1806+
remove_waiter(lock, waiter);
17811807
raw_spin_unlock_irq(&lock->wait_lock);
17821808

17831809
return ret;
@@ -1845,7 +1871,8 @@ int rt_mutex_wait_proxy_lock(struct rt_mutex *lock,
18451871
* @lock: the rt_mutex we were woken on
18461872
* @waiter: the pre-initialized rt_mutex_waiter
18471873
*
1848-
* Attempt to clean up after a failed rt_mutex_wait_proxy_lock().
1874+
* Attempt to clean up after a failed __rt_mutex_start_proxy_lock() or
1875+
* rt_mutex_wait_proxy_lock().
18491876
*
18501877
* Unless we acquired the lock; we're still enqueued on the wait-list and can
18511878
* in fact still be granted ownership until we're removed. Therefore we can

0 commit comments

Comments
 (0)