Skip to content

Commit 56222b2

Browse files
Peter ZijlstraKAGA-KOKO
authored andcommitted
futex: Drop hb->lock before enqueueing on the rtmutex
When PREEMPT_RT_FULL does the spinlock -> rt_mutex substitution the PI chain code will (falsely) report a deadlock and BUG. The problem is that it hold hb->lock (now an rt_mutex) while doing task_blocks_on_rt_mutex on the futex's pi_state::rtmutex. This, when interleaved just right with futex_unlock_pi() leads it to believe to see an AB-BA deadlock. Task1 (holds rt_mutex, Task2 (does FUTEX_LOCK_PI) does FUTEX_UNLOCK_PI) lock hb->lock lock rt_mutex (as per start_proxy) lock hb->lock Which is a trivial AB-BA. It is not an actual deadlock, because it won't be holding hb->lock by the time it actually blocks on the rt_mutex, but the chainwalk code doesn't know that and it would be a nightmare to handle this gracefully. To avoid this problem, do the same as in futex_unlock_pi() and drop hb->lock after acquiring wait_lock. This still fully serializes against futex_unlock_pi(), since adding to the wait_list does the very same lock dance, and removing it holds both locks. Aside of solving the RT problem this makes the lock and unlock mechanism symetric and reduces the hb->lock held time. Reported-and-tested-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> Suggested-by: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: juri.lelli@arm.com Cc: xlpang@redhat.com Cc: rostedt@goodmis.org Cc: mathieu.desnoyers@efficios.com Cc: jdesfossez@efficios.com Cc: dvhart@infradead.org Cc: bristot@redhat.com Link: http://lkml.kernel.org/r/20170322104152.161341537@infradead.org Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
1 parent bebe5b5 commit 56222b2

File tree

3 files changed

+52
-30
lines changed

3 files changed

+52
-30
lines changed

kernel/futex.c

Lines changed: 21 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2654,20 +2654,33 @@ static int futex_lock_pi(u32 __user *uaddr, unsigned int flags,
26542654
goto no_block;
26552655
}
26562656

2657+
rt_mutex_init_waiter(&rt_waiter);
2658+
26572659
/*
2658-
* We must add ourselves to the rt_mutex waitlist while holding hb->lock
2659-
* such that the hb and rt_mutex wait lists match.
2660+
* On PREEMPT_RT_FULL, when hb->lock becomes an rt_mutex, we must not
2661+
* hold it while doing rt_mutex_start_proxy(), because then it will
2662+
* include hb->lock in the blocking chain, even through we'll not in
2663+
* fact hold it while blocking. This will lead it to report -EDEADLK
2664+
* and BUG when futex_unlock_pi() interleaves with this.
2665+
*
2666+
* Therefore acquire wait_lock while holding hb->lock, but drop the
2667+
* latter before calling rt_mutex_start_proxy_lock(). This still fully
2668+
* serializes against futex_unlock_pi() as that does the exact same
2669+
* lock handoff sequence.
26602670
*/
2661-
rt_mutex_init_waiter(&rt_waiter);
2662-
ret = rt_mutex_start_proxy_lock(&q.pi_state->pi_mutex, &rt_waiter, current);
2671+
raw_spin_lock_irq(&q.pi_state->pi_mutex.wait_lock);
2672+
spin_unlock(q.lock_ptr);
2673+
ret = __rt_mutex_start_proxy_lock(&q.pi_state->pi_mutex, &rt_waiter, current);
2674+
raw_spin_unlock_irq(&q.pi_state->pi_mutex.wait_lock);
2675+
26632676
if (ret) {
26642677
if (ret == 1)
26652678
ret = 0;
26662679

2680+
spin_lock(q.lock_ptr);
26672681
goto no_block;
26682682
}
26692683

2670-
spin_unlock(q.lock_ptr);
26712684

26722685
if (unlikely(to))
26732686
hrtimer_start_expires(&to->timer, HRTIMER_MODE_ABS);
@@ -2680,6 +2693,9 @@ static int futex_lock_pi(u32 __user *uaddr, unsigned int flags,
26802693
* first acquire the hb->lock before removing the lock from the
26812694
* rt_mutex waitqueue, such that we can keep the hb and rt_mutex
26822695
* wait lists consistent.
2696+
*
2697+
* In particular; it is important that futex_unlock_pi() can not
2698+
* observe this inconsistency.
26832699
*/
26842700
if (ret && !rt_mutex_cleanup_proxy_lock(&q.pi_state->pi_mutex, &rt_waiter))
26852701
ret = 0;
@@ -2791,10 +2807,6 @@ static int futex_unlock_pi(u32 __user *uaddr, unsigned int flags)
27912807

27922808
get_pi_state(pi_state);
27932809
/*
2794-
* Since modifying the wait_list is done while holding both
2795-
* hb->lock and wait_lock, holding either is sufficient to
2796-
* observe it.
2797-
*
27982810
* By taking wait_lock while still holding hb->lock, we ensure
27992811
* there is no point where we hold neither; and therefore
28002812
* wake_futex_pi() must observe a state consistent with what we

kernel/locking/rtmutex.c

Lines changed: 28 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1669,31 +1669,14 @@ void rt_mutex_proxy_unlock(struct rt_mutex *lock,
16691669
rt_mutex_set_owner(lock, NULL);
16701670
}
16711671

1672-
/**
1673-
* rt_mutex_start_proxy_lock() - Start lock acquisition for another task
1674-
* @lock: the rt_mutex to take
1675-
* @waiter: the pre-initialized rt_mutex_waiter
1676-
* @task: the task to prepare
1677-
*
1678-
* Returns:
1679-
* 0 - task blocked on lock
1680-
* 1 - acquired the lock for task, caller should wake it up
1681-
* <0 - error
1682-
*
1683-
* Special API call for FUTEX_REQUEUE_PI support.
1684-
*/
1685-
int rt_mutex_start_proxy_lock(struct rt_mutex *lock,
1672+
int __rt_mutex_start_proxy_lock(struct rt_mutex *lock,
16861673
struct rt_mutex_waiter *waiter,
16871674
struct task_struct *task)
16881675
{
16891676
int ret;
16901677

1691-
raw_spin_lock_irq(&lock->wait_lock);
1692-
1693-
if (try_to_take_rt_mutex(lock, task, NULL)) {
1694-
raw_spin_unlock_irq(&lock->wait_lock);
1678+
if (try_to_take_rt_mutex(lock, task, NULL))
16951679
return 1;
1696-
}
16971680

16981681
/* We enforce deadlock detection for futexes */
16991682
ret = task_blocks_on_rt_mutex(lock, waiter, task,
@@ -1712,13 +1695,37 @@ int rt_mutex_start_proxy_lock(struct rt_mutex *lock,
17121695
if (unlikely(ret))
17131696
remove_waiter(lock, waiter);
17141697

1715-
raw_spin_unlock_irq(&lock->wait_lock);
1716-
17171698
debug_rt_mutex_print_deadlock(waiter);
17181699

17191700
return ret;
17201701
}
17211702

1703+
/**
1704+
* rt_mutex_start_proxy_lock() - Start lock acquisition for another task
1705+
* @lock: the rt_mutex to take
1706+
* @waiter: the pre-initialized rt_mutex_waiter
1707+
* @task: the task to prepare
1708+
*
1709+
* Returns:
1710+
* 0 - task blocked on lock
1711+
* 1 - acquired the lock for task, caller should wake it up
1712+
* <0 - error
1713+
*
1714+
* Special API call for FUTEX_REQUEUE_PI support.
1715+
*/
1716+
int rt_mutex_start_proxy_lock(struct rt_mutex *lock,
1717+
struct rt_mutex_waiter *waiter,
1718+
struct task_struct *task)
1719+
{
1720+
int ret;
1721+
1722+
raw_spin_lock_irq(&lock->wait_lock);
1723+
ret = __rt_mutex_start_proxy_lock(lock, waiter, task);
1724+
raw_spin_unlock_irq(&lock->wait_lock);
1725+
1726+
return ret;
1727+
}
1728+
17221729
/**
17231730
* rt_mutex_next_owner - return the next owner of the lock
17241731
*

kernel/locking/rtmutex_common.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,9 @@ extern void rt_mutex_init_proxy_locked(struct rt_mutex *lock,
104104
extern void rt_mutex_proxy_unlock(struct rt_mutex *lock,
105105
struct task_struct *proxy_owner);
106106
extern void rt_mutex_init_waiter(struct rt_mutex_waiter *waiter);
107+
extern int __rt_mutex_start_proxy_lock(struct rt_mutex *lock,
108+
struct rt_mutex_waiter *waiter,
109+
struct task_struct *task);
107110
extern int rt_mutex_start_proxy_lock(struct rt_mutex *lock,
108111
struct rt_mutex_waiter *waiter,
109112
struct task_struct *task);

0 commit comments

Comments
 (0)