Skip to content

Commit 1d0dcb3

Browse files
Davidlohr BuesoIngo Molnar
authored andcommitted
futex: Implement lockless wakeups
Given the overall futex architecture, any chance of reducing hb->lock contention is welcome. In this particular case, using wake-queues to enable lockless wakeups addresses very much real world performance concerns, even cases of soft-lockups in cases of large amounts of blocked tasks (which is not hard to find in large boxes, using but just a handful of futex). At the lowest level, this patch can reduce latency of a single thread attempting to acquire hb->lock in highly contended scenarios by a up to 2x. At lower counts of nr_wake there are no regressions, confirming, of course, that the wake_q handling overhead is practically non existent. For instance, while a fair amount of variation, the extended pef-bench wakeup benchmark shows for a 20 core machine the following avg per-thread time to wakeup its share of tasks: nr_thr ms-before ms-after 16 0.0590 0.0215 32 0.0396 0.0220 48 0.0417 0.0182 64 0.0536 0.0236 80 0.0414 0.0097 96 0.0672 0.0152 Naturally, this can cause spurious wakeups. However there is no core code that cannot handle them afaict, and furthermore tglx does have the point that other events can already trigger them anyway. Signed-off-by: Davidlohr Bueso <dbueso@suse.de> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Acked-by: Thomas Gleixner <tglx@linutronix.de> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Borislav Petkov <bp@alien8.de> Cc: Chris Mason <clm@fb.com> Cc: Davidlohr Bueso <dave@stgolabs.net> Cc: George Spelvin <linux@horizon.com> Cc: H. Peter Anvin <hpa@zytor.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Manfred Spraul <manfred@colorfullife.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de> Cc: Steven Rostedt <rostedt@goodmis.org> Link: http://lkml.kernel.org/r/1430494072-30283-3-git-send-email-dave@stgolabs.net Signed-off-by: Ingo Molnar <mingo@kernel.org>
1 parent 7675104 commit 1d0dcb3

File tree

1 file changed

+17
-16
lines changed

1 file changed

+17
-16
lines changed

kernel/futex.c

Lines changed: 17 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1090,24 +1090,22 @@ static void __unqueue_futex(struct futex_q *q)
10901090

10911091
/*
10921092
* The hash bucket lock must be held when this is called.
1093-
* Afterwards, the futex_q must not be accessed.
1093+
* Afterwards, the futex_q must not be accessed. Callers
1094+
* must ensure to later call wake_up_q() for the actual
1095+
* wakeups to occur.
10941096
*/
1095-
static void wake_futex(struct futex_q *q)
1097+
static void mark_wake_futex(struct wake_q_head *wake_q, struct futex_q *q)
10961098
{
10971099
struct task_struct *p = q->task;
10981100

10991101
if (WARN(q->pi_state || q->rt_waiter, "refusing to wake PI futex\n"))
11001102
return;
11011103

11021104
/*
1103-
* We set q->lock_ptr = NULL _before_ we wake up the task. If
1104-
* a non-futex wake up happens on another CPU then the task
1105-
* might exit and p would dereference a non-existing task
1106-
* struct. Prevent this by holding a reference on p across the
1107-
* wake up.
1105+
* Queue the task for later wakeup for after we've released
1106+
* the hb->lock. wake_q_add() grabs reference to p.
11081107
*/
1109-
get_task_struct(p);
1110-
1108+
wake_q_add(wake_q, p);
11111109
__unqueue_futex(q);
11121110
/*
11131111
* The waiting task can free the futex_q as soon as
@@ -1117,9 +1115,6 @@ static void wake_futex(struct futex_q *q)
11171115
*/
11181116
smp_wmb();
11191117
q->lock_ptr = NULL;
1120-
1121-
wake_up_state(p, TASK_NORMAL);
1122-
put_task_struct(p);
11231118
}
11241119

11251120
static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this)
@@ -1217,6 +1212,7 @@ futex_wake(u32 __user *uaddr, unsigned int flags, int nr_wake, u32 bitset)
12171212
struct futex_q *this, *next;
12181213
union futex_key key = FUTEX_KEY_INIT;
12191214
int ret;
1215+
WAKE_Q(wake_q);
12201216

12211217
if (!bitset)
12221218
return -EINVAL;
@@ -1244,13 +1240,14 @@ futex_wake(u32 __user *uaddr, unsigned int flags, int nr_wake, u32 bitset)
12441240
if (!(this->bitset & bitset))
12451241
continue;
12461242

1247-
wake_futex(this);
1243+
mark_wake_futex(&wake_q, this);
12481244
if (++ret >= nr_wake)
12491245
break;
12501246
}
12511247
}
12521248

12531249
spin_unlock(&hb->lock);
1250+
wake_up_q(&wake_q);
12541251
out_put_key:
12551252
put_futex_key(&key);
12561253
out:
@@ -1269,6 +1266,7 @@ futex_wake_op(u32 __user *uaddr1, unsigned int flags, u32 __user *uaddr2,
12691266
struct futex_hash_bucket *hb1, *hb2;
12701267
struct futex_q *this, *next;
12711268
int ret, op_ret;
1269+
WAKE_Q(wake_q);
12721270

12731271
retry:
12741272
ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1, VERIFY_READ);
@@ -1320,7 +1318,7 @@ futex_wake_op(u32 __user *uaddr1, unsigned int flags, u32 __user *uaddr2,
13201318
ret = -EINVAL;
13211319
goto out_unlock;
13221320
}
1323-
wake_futex(this);
1321+
mark_wake_futex(&wake_q, this);
13241322
if (++ret >= nr_wake)
13251323
break;
13261324
}
@@ -1334,7 +1332,7 @@ futex_wake_op(u32 __user *uaddr1, unsigned int flags, u32 __user *uaddr2,
13341332
ret = -EINVAL;
13351333
goto out_unlock;
13361334
}
1337-
wake_futex(this);
1335+
mark_wake_futex(&wake_q, this);
13381336
if (++op_ret >= nr_wake2)
13391337
break;
13401338
}
@@ -1344,6 +1342,7 @@ futex_wake_op(u32 __user *uaddr1, unsigned int flags, u32 __user *uaddr2,
13441342

13451343
out_unlock:
13461344
double_unlock_hb(hb1, hb2);
1345+
wake_up_q(&wake_q);
13471346
out_put_keys:
13481347
put_futex_key(&key2);
13491348
out_put_key1:
@@ -1503,6 +1502,7 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags,
15031502
struct futex_pi_state *pi_state = NULL;
15041503
struct futex_hash_bucket *hb1, *hb2;
15051504
struct futex_q *this, *next;
1505+
WAKE_Q(wake_q);
15061506

15071507
if (requeue_pi) {
15081508
/*
@@ -1679,7 +1679,7 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags,
16791679
* woken by futex_unlock_pi().
16801680
*/
16811681
if (++task_count <= nr_wake && !requeue_pi) {
1682-
wake_futex(this);
1682+
mark_wake_futex(&wake_q, this);
16831683
continue;
16841684
}
16851685

@@ -1719,6 +1719,7 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags,
17191719
out_unlock:
17201720
free_pi_state(pi_state);
17211721
double_unlock_hb(hb1, hb2);
1722+
wake_up_q(&wake_q);
17221723
hb_waiters_dec(hb2);
17231724

17241725
/*

0 commit comments

Comments
 (0)