Skip to content

Commit da791a6

Browse files
committed
futex: Cure exit race
Stefan reported, that the glibc tst-robustpi4 test case fails occasionally. That case creates the following race between sys_exit() and sys_futex_lock_pi(): CPU0 CPU1 sys_exit() sys_futex() do_exit() futex_lock_pi() exit_signals(tsk) No waiters: tsk->flags |= PF_EXITING; *uaddr == 0x00000PID mm_release(tsk) Set waiter bit exit_robust_list(tsk) { *uaddr = 0x80000PID; Set owner died attach_to_pi_owner() { *uaddr = 0xC0000000; tsk = get_task(PID); } if (!tsk->flags & PF_EXITING) { ... attach(); tsk->flags |= PF_EXITPIDONE; } else { if (!(tsk->flags & PF_EXITPIDONE)) return -EAGAIN; return -ESRCH; <--- FAIL } ESRCH is returned all the way to user space, which triggers the glibc test case assert. Returning ESRCH unconditionally is wrong here because the user space value has been changed by the exiting task to 0xC0000000, i.e. the FUTEX_OWNER_DIED bit is set and the futex PID value has been cleared. This is a valid state and the kernel has to handle it, i.e. taking the futex. Cure it by rereading the user space value when PF_EXITING and PF_EXITPIDONE is set in the task which 'owns' the futex. If the value has changed, let the kernel retry the operation, which includes all regular sanity checks and correctly handles the FUTEX_OWNER_DIED case. If it hasn't changed, then return ESRCH as there is no way to distinguish this case from malfunctioning user space. This happens when the exiting task did not have a robust list, the robust list was corrupted or the user space value in the futex was simply bogus. Reported-by: Stefan Liebler <stli@linux.ibm.com> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Acked-by: Peter Zijlstra <peterz@infradead.org> Cc: Heiko Carstens <heiko.carstens@de.ibm.com> Cc: Darren Hart <dvhart@infradead.org> Cc: Ingo Molnar <mingo@kernel.org> Cc: Sasha Levin <sashal@kernel.org> Cc: stable@vger.kernel.org Link: https://bugzilla.kernel.org/show_bug.cgi?id=200467 Link: https://lkml.kernel.org/r/20181210152311.986181245@linutronix.de
1 parent 1a9430d commit da791a6

File tree

1 file changed

+63
-6
lines changed

1 file changed

+63
-6
lines changed

kernel/futex.c

Lines changed: 63 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1148,11 +1148,65 @@ static int attach_to_pi_state(u32 __user *uaddr, u32 uval,
11481148
return ret;
11491149
}
11501150

1151+
static int handle_exit_race(u32 __user *uaddr, u32 uval,
1152+
struct task_struct *tsk)
1153+
{
1154+
u32 uval2;
1155+
1156+
/*
1157+
* If PF_EXITPIDONE is not yet set, then try again.
1158+
*/
1159+
if (tsk && !(tsk->flags & PF_EXITPIDONE))
1160+
return -EAGAIN;
1161+
1162+
/*
1163+
* Reread the user space value to handle the following situation:
1164+
*
1165+
* CPU0 CPU1
1166+
*
1167+
* sys_exit() sys_futex()
1168+
* do_exit() futex_lock_pi()
1169+
* futex_lock_pi_atomic()
1170+
* exit_signals(tsk) No waiters:
1171+
* tsk->flags |= PF_EXITING; *uaddr == 0x00000PID
1172+
* mm_release(tsk) Set waiter bit
1173+
* exit_robust_list(tsk) { *uaddr = 0x80000PID;
1174+
* Set owner died attach_to_pi_owner() {
1175+
* *uaddr = 0xC0000000; tsk = get_task(PID);
1176+
* } if (!tsk->flags & PF_EXITING) {
1177+
* ... attach();
1178+
* tsk->flags |= PF_EXITPIDONE; } else {
1179+
* if (!(tsk->flags & PF_EXITPIDONE))
1180+
* return -EAGAIN;
1181+
* return -ESRCH; <--- FAIL
1182+
* }
1183+
*
1184+
* Returning ESRCH unconditionally is wrong here because the
1185+
* user space value has been changed by the exiting task.
1186+
*
1187+
* The same logic applies to the case where the exiting task is
1188+
* already gone.
1189+
*/
1190+
if (get_futex_value_locked(&uval2, uaddr))
1191+
return -EFAULT;
1192+
1193+
/* If the user space value has changed, try again. */
1194+
if (uval2 != uval)
1195+
return -EAGAIN;
1196+
1197+
/*
1198+
* The exiting task did not have a robust list, the robust list was
1199+
* corrupted or the user space value in *uaddr is simply bogus.
1200+
* Give up and tell user space.
1201+
*/
1202+
return -ESRCH;
1203+
}
1204+
11511205
/*
11521206
* Lookup the task for the TID provided from user space and attach to
11531207
* it after doing proper sanity checks.
11541208
*/
1155-
static int attach_to_pi_owner(u32 uval, union futex_key *key,
1209+
static int attach_to_pi_owner(u32 __user *uaddr, u32 uval, union futex_key *key,
11561210
struct futex_pi_state **ps)
11571211
{
11581212
pid_t pid = uval & FUTEX_TID_MASK;
@@ -1162,12 +1216,15 @@ static int attach_to_pi_owner(u32 uval, union futex_key *key,
11621216
/*
11631217
* We are the first waiter - try to look up the real owner and attach
11641218
* the new pi_state to it, but bail out when TID = 0 [1]
1219+
*
1220+
* The !pid check is paranoid. None of the call sites should end up
1221+
* with pid == 0, but better safe than sorry. Let the caller retry
11651222
*/
11661223
if (!pid)
1167-
return -ESRCH;
1224+
return -EAGAIN;
11681225
p = find_get_task_by_vpid(pid);
11691226
if (!p)
1170-
return -ESRCH;
1227+
return handle_exit_race(uaddr, uval, NULL);
11711228

11721229
if (unlikely(p->flags & PF_KTHREAD)) {
11731230
put_task_struct(p);
@@ -1187,7 +1244,7 @@ static int attach_to_pi_owner(u32 uval, union futex_key *key,
11871244
* set, we know that the task has finished the
11881245
* cleanup:
11891246
*/
1190-
int ret = (p->flags & PF_EXITPIDONE) ? -ESRCH : -EAGAIN;
1247+
int ret = handle_exit_race(uaddr, uval, p);
11911248

11921249
raw_spin_unlock_irq(&p->pi_lock);
11931250
put_task_struct(p);
@@ -1244,7 +1301,7 @@ static int lookup_pi_state(u32 __user *uaddr, u32 uval,
12441301
* We are the first waiter - try to look up the owner based on
12451302
* @uval and attach to it.
12461303
*/
1247-
return attach_to_pi_owner(uval, key, ps);
1304+
return attach_to_pi_owner(uaddr, uval, key, ps);
12481305
}
12491306

12501307
static int lock_pi_update_atomic(u32 __user *uaddr, u32 uval, u32 newval)
@@ -1352,7 +1409,7 @@ static int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb,
13521409
* attach to the owner. If that fails, no harm done, we only
13531410
* set the FUTEX_WAITERS bit in the user space variable.
13541411
*/
1355-
return attach_to_pi_owner(uval, key, ps);
1412+
return attach_to_pi_owner(uaddr, newval, key, ps);
13561413
}
13571414

13581415
/**

0 commit comments

Comments
 (0)