Skip to content

Commit 1cef115

Browse files
Peter ZijlstraIngo Molnar
authored andcommitted
kthread, sched/core: Fix kthread_parkme() (again...)
Gaurav reports that commit: 85f1abe ("kthread, sched/wait: Fix kthread_parkme() completion issue") isn't working for him. Because of the following race: > controller Thread CPUHP Thread > takedown_cpu > kthread_park > kthread_parkme > Set KTHREAD_SHOULD_PARK > smpboot_thread_fn > set Task interruptible > > > wake_up_process > if (!(p->state & state)) > goto out; > > Kthread_parkme > SET TASK_PARKED > schedule > raw_spin_lock(&rq->lock) > ttwu_remote > waiting for __task_rq_lock > context_switch > > finish_lock_switch > > > > Case TASK_PARKED > kthread_park_complete > > > SET Running Furthermore, Oleg noticed that the whole scheduler TASK_PARKED handling is buggered because the TASK_DEAD thing is done with preemption disabled, the current code can still complete early on preemption :/ So basically revert that earlier fix and go with a variant of the alternative mentioned in the commit. Promote TASK_PARKED to special state to avoid the store-store issue on task->state leading to the WARN in kthread_unpark() -> __kthread_bind(). But in addition, add wait_task_inactive() to kthread_park() to ensure the task really is PARKED when we return from kthread_park(). This avoids the whole kthread still gets migrated nonsense -- although it would be really good to get this done differently. Reported-by: Gaurav Kohli <gkohli@codeaurora.org> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Oleg Nesterov <oleg@redhat.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Fixes: 85f1abe ("kthread, sched/wait: Fix kthread_parkme() completion issue") Signed-off-by: Ingo Molnar <mingo@kernel.org>
1 parent 3482d98 commit 1cef115

File tree

4 files changed

+36
-28
lines changed

4 files changed

+36
-28
lines changed

include/linux/kthread.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,6 @@ void *kthread_probe_data(struct task_struct *k);
6262
int kthread_park(struct task_struct *k);
6363
void kthread_unpark(struct task_struct *k);
6464
void kthread_parkme(void);
65-
void kthread_park_complete(struct task_struct *k);
6665

6766
int kthreadd(void *unused);
6867
extern struct task_struct *kthreadd_task;

include/linux/sched.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,7 @@ struct task_group;
118118
* the comment with set_special_state().
119119
*/
120120
#define is_special_task_state(state) \
121-
((state) & (__TASK_STOPPED | __TASK_TRACED | TASK_DEAD))
121+
((state) & (__TASK_STOPPED | __TASK_TRACED | TASK_PARKED | TASK_DEAD))
122122

123123
#define __set_current_state(state_value) \
124124
do { \

kernel/kthread.c

Lines changed: 24 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -177,9 +177,20 @@ void *kthread_probe_data(struct task_struct *task)
177177
static void __kthread_parkme(struct kthread *self)
178178
{
179179
for (;;) {
180-
set_current_state(TASK_PARKED);
180+
/*
181+
* TASK_PARKED is a special state; we must serialize against
182+
* possible pending wakeups to avoid store-store collisions on
183+
* task->state.
184+
*
185+
* Such a collision might possibly result in the task state
186+
* changin from TASK_PARKED and us failing the
187+
* wait_task_inactive() in kthread_park().
188+
*/
189+
set_special_state(TASK_PARKED);
181190
if (!test_bit(KTHREAD_SHOULD_PARK, &self->flags))
182191
break;
192+
193+
complete_all(&self->parked);
183194
schedule();
184195
}
185196
__set_current_state(TASK_RUNNING);
@@ -191,11 +202,6 @@ void kthread_parkme(void)
191202
}
192203
EXPORT_SYMBOL_GPL(kthread_parkme);
193204

194-
void kthread_park_complete(struct task_struct *k)
195-
{
196-
complete_all(&to_kthread(k)->parked);
197-
}
198-
199205
static int kthread(void *_create)
200206
{
201207
/* Copy data: it's on kthread's stack */
@@ -461,6 +467,9 @@ void kthread_unpark(struct task_struct *k)
461467

462468
reinit_completion(&kthread->parked);
463469
clear_bit(KTHREAD_SHOULD_PARK, &kthread->flags);
470+
/*
471+
* __kthread_parkme() will either see !SHOULD_PARK or get the wakeup.
472+
*/
464473
wake_up_state(k, TASK_PARKED);
465474
}
466475
EXPORT_SYMBOL_GPL(kthread_unpark);
@@ -487,7 +496,16 @@ int kthread_park(struct task_struct *k)
487496
set_bit(KTHREAD_SHOULD_PARK, &kthread->flags);
488497
if (k != current) {
489498
wake_up_process(k);
499+
/*
500+
* Wait for __kthread_parkme() to complete(), this means we
501+
* _will_ have TASK_PARKED and are about to call schedule().
502+
*/
490503
wait_for_completion(&kthread->parked);
504+
/*
505+
* Now wait for that schedule() to complete and the task to
506+
* get scheduled out.
507+
*/
508+
WARN_ON_ONCE(!wait_task_inactive(k, TASK_PARKED));
491509
}
492510

493511
return 0;

kernel/sched/core.c

Lines changed: 11 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77
*/
88
#include "sched.h"
99

10-
#include <linux/kthread.h>
1110
#include <linux/nospec.h>
1211

1312
#include <linux/kcov.h>
@@ -2724,28 +2723,20 @@ static struct rq *finish_task_switch(struct task_struct *prev)
27242723
membarrier_mm_sync_core_before_usermode(mm);
27252724
mmdrop(mm);
27262725
}
2727-
if (unlikely(prev_state & (TASK_DEAD|TASK_PARKED))) {
2728-
switch (prev_state) {
2729-
case TASK_DEAD:
2730-
if (prev->sched_class->task_dead)
2731-
prev->sched_class->task_dead(prev);
2726+
if (unlikely(prev_state == TASK_DEAD)) {
2727+
if (prev->sched_class->task_dead)
2728+
prev->sched_class->task_dead(prev);
27322729

2733-
/*
2734-
* Remove function-return probe instances associated with this
2735-
* task and put them back on the free list.
2736-
*/
2737-
kprobe_flush_task(prev);
2738-
2739-
/* Task is done with its stack. */
2740-
put_task_stack(prev);
2730+
/*
2731+
* Remove function-return probe instances associated with this
2732+
* task and put them back on the free list.
2733+
*/
2734+
kprobe_flush_task(prev);
27412735

2742-
put_task_struct(prev);
2743-
break;
2736+
/* Task is done with its stack. */
2737+
put_task_stack(prev);
27442738

2745-
case TASK_PARKED:
2746-
kthread_park_complete(prev);
2747-
break;
2748-
}
2739+
put_task_struct(prev);
27492740
}
27502741

27512742
tick_nohz_task_switch();

0 commit comments

Comments
 (0)