Skip to content

Commit abd50b3

Browse files
oleg-nesterovtorvalds
authored andcommitted
wait: introduce EXIT_TRACE to avoid the racy EXIT_DEAD->EXIT_ZOMBIE transition
wait_task_zombie() first does EXIT_ZOMBIE->EXIT_DEAD transition and drops tasklist_lock. If this task is not the natural child and it is traced, we change its state back to EXIT_ZOMBIE for ->real_parent. The last transition is racy, this is even documented in 50b8d25 "ptrace: partially fix the do_wait(WEXITED) vs EXIT_DEAD->EXIT_ZOMBIE race". wait_consider_task() tries to detect this transition and clear ->notask_error but we can't rely on ptrace_reparented(), debugger can exit and do ptrace_unlink() before its sub-thread sets EXIT_ZOMBIE. And there is another problem which were missed before: this transition can also race with reparent_leader() which doesn't reset >exit_signal if EXIT_DEAD, assuming that this task must be reaped by someone else. So the tracee can be re-parented with ->exit_signal != SIGCHLD, and if /sbin/init doesn't use __WALL it becomes unreapable. This was fixed by the previous commit, but it was the temporary hack. 1. Add the new exit_state, EXIT_TRACE. It means that the task is the traced zombie, debugger is going to detach and notify its natural parent. This new state is actually EXIT_ZOMBIE | EXIT_DEAD. This way we can avoid the changes in proc/kgdb code, get_task_state() still reports "X (dead)" in this case. Note: with or without this change userspace can see Z -> X -> Z transition. Not really bad, but probably makes sense to fix. 2. Change wait_task_zombie() to use EXIT_TRACE instead of EXIT_DEAD if we need to notify the ->real_parent. 3. Revert the previous hack in reparent_leader(), now that EXIT_DEAD is always the final state we can safely ignore such a task. 4. Change wait_consider_task() to check EXIT_TRACE separately and kill the racy and no longer needed ptrace_reparented() case. If ptrace == T an EXIT_TRACE thread should be simply ignored, the owner of this state is going to ptrace_unlink() this task. We can pretend that it was already removed from ->ptraced list. Otherwise we should skip this thread too but clear ->notask_error, we must be the natural parent and debugger is going to untrace and notify us. IOW, this doesn't differ from "EXIT_ZOMBIE && p->ptrace" even if the task was already untraced. Signed-off-by: Oleg Nesterov <oleg@redhat.com> Reported-by: Jan Kratochvil <jan.kratochvil@redhat.com> Reported-by: Michal Schmidt <mschmidt@redhat.com> Tested-by: Michal Schmidt <mschmidt@redhat.com> Cc: Al Viro <viro@ZenIV.linux.org.uk> Cc: Lennart Poettering <lpoetter@redhat.com> Cc: Roland McGrath <roland@hack.frob.com> Cc: Tejun Heo <tj@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
1 parent dfccbb5 commit abd50b3

File tree

2 files changed

+22
-29
lines changed

2 files changed

+22
-29
lines changed

include/linux/sched.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -212,6 +212,7 @@ print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq);
212212
/* in tsk->exit_state */
213213
#define EXIT_ZOMBIE 16
214214
#define EXIT_DEAD 32
215+
#define EXIT_TRACE (EXIT_ZOMBIE | EXIT_DEAD)
215216
/* in tsk->state again */
216217
#define TASK_DEAD 64
217218
#define TASK_WAKEKILL 128

kernel/exit.c

Lines changed: 21 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -560,26 +560,19 @@ static void reparent_leader(struct task_struct *father, struct task_struct *p,
560560
struct list_head *dead)
561561
{
562562
list_move_tail(&p->sibling, &p->real_parent->children);
563+
564+
if (p->exit_state == EXIT_DEAD)
565+
return;
563566
/*
564567
* If this is a threaded reparent there is no need to
565568
* notify anyone anything has happened.
566569
*/
567570
if (same_thread_group(p->real_parent, father))
568571
return;
569572

570-
/*
571-
* We don't want people slaying init.
572-
*
573-
* Note: we do this even if it is EXIT_DEAD, wait_task_zombie()
574-
* can change ->exit_state to EXIT_ZOMBIE. If this is the final
575-
* state, do_notify_parent() was already called and ->exit_signal
576-
* doesn't matter.
577-
*/
573+
/* We don't want people slaying init. */
578574
p->exit_signal = SIGCHLD;
579575

580-
if (p->exit_state == EXIT_DEAD)
581-
return;
582-
583576
/* If it has exited notify the new parent about this child's death. */
584577
if (!p->ptrace &&
585578
p->exit_state == EXIT_ZOMBIE && thread_group_empty(p)) {
@@ -1043,17 +1036,13 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)
10431036
return wait_noreap_copyout(wo, p, pid, uid, why, status);
10441037
}
10451038

1039+
traced = ptrace_reparented(p);
10461040
/*
1047-
* Try to move the task's state to DEAD
1048-
* only one thread is allowed to do this:
1041+
* Move the task's state to DEAD/TRACE, only one thread can do this.
10491042
*/
1050-
state = xchg(&p->exit_state, EXIT_DEAD);
1051-
if (state != EXIT_ZOMBIE) {
1052-
BUG_ON(state != EXIT_DEAD);
1043+
state = traced ? EXIT_TRACE : EXIT_DEAD;
1044+
if (cmpxchg(&p->exit_state, EXIT_ZOMBIE, state) != EXIT_ZOMBIE)
10531045
return 0;
1054-
}
1055-
1056-
traced = ptrace_reparented(p);
10571046
/*
10581047
* It can be ptraced but not reparented, check
10591048
* thread_group_leader() to filter out sub-threads.
@@ -1114,7 +1103,7 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)
11141103

11151104
/*
11161105
* Now we are sure this task is interesting, and no other
1117-
* thread can reap it because we set its state to EXIT_DEAD.
1106+
* thread can reap it because we its state == DEAD/TRACE.
11181107
*/
11191108
read_unlock(&tasklist_lock);
11201109

@@ -1159,14 +1148,14 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)
11591148
* If this is not a sub-thread, notify the parent.
11601149
* If parent wants a zombie, don't release it now.
11611150
*/
1151+
state = EXIT_DEAD;
11621152
if (thread_group_leader(p) &&
1163-
!do_notify_parent(p, p->exit_signal)) {
1164-
p->exit_state = EXIT_ZOMBIE;
1165-
p = NULL;
1166-
}
1153+
!do_notify_parent(p, p->exit_signal))
1154+
state = EXIT_ZOMBIE;
1155+
p->exit_state = state;
11671156
write_unlock_irq(&tasklist_lock);
11681157
}
1169-
if (p != NULL)
1158+
if (state == EXIT_DEAD)
11701159
release_task(p);
11711160

11721161
return retval;
@@ -1362,12 +1351,15 @@ static int wait_consider_task(struct wait_opts *wo, int ptrace,
13621351
}
13631352

13641353
/* dead body doesn't have much to contribute */
1365-
if (unlikely(p->exit_state == EXIT_DEAD)) {
1354+
if (unlikely(p->exit_state == EXIT_DEAD))
1355+
return 0;
1356+
1357+
if (unlikely(p->exit_state == EXIT_TRACE)) {
13661358
/*
1367-
* But do not ignore this task until the tracer does
1368-
* wait_task_zombie()->do_notify_parent().
1359+
* ptrace == 0 means we are the natural parent. In this case
1360+
* we should clear notask_error, debugger will notify us.
13691361
*/
1370-
if (likely(!ptrace) && unlikely(ptrace_reparented(p)))
1362+
if (likely(!ptrace))
13711363
wo->notask_error = 0;
13721364
return 0;
13731365
}

0 commit comments

Comments
 (0)