Skip to content

Commit cca26e8

Browse files
Kirill TkhaiIngo Molnar
authored andcommitted
sched: Teach scheduler to understand TASK_ON_RQ_MIGRATING state
This is a new p->on_rq state which will be used to indicate that a task is in a process of migrating between two RQs. It allows to get rid of double_rq_lock(), which we used to use to change a rq of a queued task before. Let's consider an example. To move a task between src_rq and dst_rq we will do the following: raw_spin_lock(&src_rq->lock); /* p is a task which is queued on src_rq */ p = ...; dequeue_task(src_rq, p, 0); p->on_rq = TASK_ON_RQ_MIGRATING; set_task_cpu(p, dst_cpu); raw_spin_unlock(&src_rq->lock); /* * Both RQs are unlocked here. * Task p is dequeued from src_rq * but its on_rq value is not zero. */ raw_spin_lock(&dst_rq->lock); p->on_rq = TASK_ON_RQ_QUEUED; enqueue_task(dst_rq, p, 0); raw_spin_unlock(&dst_rq->lock); While p->on_rq is TASK_ON_RQ_MIGRATING, task is considered as "migrating", and other parallel scheduler actions with it are not available to parallel callers. The parallel caller is spining till migration is completed. The unavailable actions are changing of cpu affinity, changing of priority etc, in other words all the functionality which used to require task_rq(p)->lock before (and related to the task). To implement TASK_ON_RQ_MIGRATING support we primarily are using the following fact. Most of scheduler users (from which we are protecting a migrating task) use task_rq_lock() and __task_rq_lock() to get the lock of task_rq(p). These primitives know that task's cpu may change, and they are spining while the lock of the right RQ is not held. We add one more condition into them, so they will be also spinning until the migration is finished. Signed-off-by: Kirill Tkhai <ktkhai@parallels.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Paul Turner <pjt@google.com> Cc: Oleg Nesterov <oleg@redhat.com> Cc: Steven Rostedt <rostedt@goodmis.org> Cc: Mike Galbraith <umgwanakikbuti@gmail.com> Cc: Kirill Tkhai <tkhai@yandex.ru> Cc: Tim Chen <tim.c.chen@linux.intel.com> Cc: Nicolas Pitre <nicolas.pitre@linaro.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Link: http://lkml.kernel.org/r/1408528062.23412.88.camel@tkhai Signed-off-by: Ingo Molnar <mingo@kernel.org>
1 parent da0c1e6 commit cca26e8

File tree

2 files changed

+15
-3
lines changed

2 files changed

+15
-3
lines changed

kernel/sched/core.c

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -333,9 +333,12 @@ static inline struct rq *__task_rq_lock(struct task_struct *p)
333333
for (;;) {
334334
rq = task_rq(p);
335335
raw_spin_lock(&rq->lock);
336-
if (likely(rq == task_rq(p)))
336+
if (likely(rq == task_rq(p) && !task_on_rq_migrating(p)))
337337
return rq;
338338
raw_spin_unlock(&rq->lock);
339+
340+
while (unlikely(task_on_rq_migrating(p)))
341+
cpu_relax();
339342
}
340343
}
341344

@@ -352,10 +355,13 @@ static struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags)
352355
raw_spin_lock_irqsave(&p->pi_lock, *flags);
353356
rq = task_rq(p);
354357
raw_spin_lock(&rq->lock);
355-
if (likely(rq == task_rq(p)))
358+
if (likely(rq == task_rq(p) && !task_on_rq_migrating(p)))
356359
return rq;
357360
raw_spin_unlock(&rq->lock);
358361
raw_spin_unlock_irqrestore(&p->pi_lock, *flags);
362+
363+
while (unlikely(task_on_rq_migrating(p)))
364+
cpu_relax();
359365
}
360366
}
361367

@@ -1678,7 +1684,7 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
16781684
success = 1; /* we're going to change ->state */
16791685
cpu = task_cpu(p);
16801686

1681-
if (task_on_rq_queued(p) && ttwu_remote(p, wake_flags))
1687+
if (p->on_rq && ttwu_remote(p, wake_flags))
16821688
goto stat;
16831689

16841690
#ifdef CONFIG_SMP

kernel/sched/sched.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ struct rq;
1717

1818
/* task_struct::on_rq states: */
1919
#define TASK_ON_RQ_QUEUED 1
20+
#define TASK_ON_RQ_MIGRATING 2
2021

2122
extern __read_mostly int scheduler_running;
2223

@@ -950,6 +951,11 @@ static inline int task_on_rq_queued(struct task_struct *p)
950951
return p->on_rq == TASK_ON_RQ_QUEUED;
951952
}
952953

954+
static inline int task_on_rq_migrating(struct task_struct *p)
955+
{
956+
return p->on_rq == TASK_ON_RQ_MIGRATING;
957+
}
958+
953959
#ifndef prepare_arch_switch
954960
# define prepare_arch_switch(next) do { } while (0)
955961
#endif

0 commit comments

Comments
 (0)