Skip to content

Commit 1aaf90a

Browse files
vingu-linaroIngo Molnar
authored andcommitted
sched: Move CFS tasks to CPUs with higher capacity
When a CPU is used to handle a lot of IRQs or some RT tasks, the remaining capacity for CFS tasks can be significantly reduced. Once we detect such situation by comparing cpu_capacity_orig and cpu_capacity, we trig an idle load balance to check if it's worth moving its tasks on an idle CPU. It's worth trying to move the task before the CPU is fully utilized to minimize the preemption by irq or RT tasks. Once the idle load_balance has selected the busiest CPU, it will look for an active load balance for only two cases: - There is only 1 task on the busiest CPU. - We haven't been able to move a task of the busiest rq. A CPU with a reduced capacity is included in the 1st case, and it's worth to actively migrate its task if the idle CPU has got more available capacity for CFS tasks. This test has been added in need_active_balance. As a sidenote, this will not generate more spurious ilb because we already trig an ilb if there is more than 1 busy cpu. If this cpu is the only one that has a task, we will trig the ilb once for migrating the task. The nohz_kick_needed function has been cleaned up a bit while adding the new test env.src_cpu and env.src_rq must be set unconditionnally because they are used in need_active_balance which is called even if busiest->nr_running equals 1 Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Morten.Rasmussen@arm.com Cc: dietmar.eggemann@arm.com Cc: efault@gmx.de Cc: kamalesh@linux.vnet.ibm.com Cc: linaro-kernel@lists.linaro.org Cc: nicolas.pitre@linaro.org Cc: preeti@linux.vnet.ibm.com Cc: riel@redhat.com Link: http://lkml.kernel.org/r/1425052454-25797-12-git-send-email-vincent.guittot@linaro.org Signed-off-by: Ingo Molnar <mingo@kernel.org>
1 parent caff37e commit 1aaf90a

File tree

1 file changed

+47
-22
lines changed

1 file changed

+47
-22
lines changed

kernel/sched/fair.c

Lines changed: 47 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -6855,6 +6855,19 @@ static int need_active_balance(struct lb_env *env)
68556855
return 1;
68566856
}
68576857

6858+
/*
6859+
* The dst_cpu is idle and the src_cpu CPU has only 1 CFS task.
6860+
* It's worth migrating the task if the src_cpu's capacity is reduced
6861+
* because of other sched_class or IRQs if more capacity stays
6862+
* available on dst_cpu.
6863+
*/
6864+
if ((env->idle != CPU_NOT_IDLE) &&
6865+
(env->src_rq->cfs.h_nr_running == 1)) {
6866+
if ((check_cpu_capacity(env->src_rq, sd)) &&
6867+
(capacity_of(env->src_cpu)*sd->imbalance_pct < capacity_of(env->dst_cpu)*100))
6868+
return 1;
6869+
}
6870+
68586871
return unlikely(sd->nr_balance_failed > sd->cache_nice_tries+2);
68596872
}
68606873

@@ -6954,6 +6967,9 @@ static int load_balance(int this_cpu, struct rq *this_rq,
69546967

69556968
schedstat_add(sd, lb_imbalance[idle], env.imbalance);
69566969

6970+
env.src_cpu = busiest->cpu;
6971+
env.src_rq = busiest;
6972+
69576973
ld_moved = 0;
69586974
if (busiest->nr_running > 1) {
69596975
/*
@@ -6963,8 +6979,6 @@ static int load_balance(int this_cpu, struct rq *this_rq,
69636979
* correctly treated as an imbalance.
69646980
*/
69656981
env.flags |= LBF_ALL_PINNED;
6966-
env.src_cpu = busiest->cpu;
6967-
env.src_rq = busiest;
69686982
env.loop_max = min(sysctl_sched_nr_migrate, busiest->nr_running);
69696983

69706984
more_balance:
@@ -7664,22 +7678,25 @@ static void nohz_idle_balance(struct rq *this_rq, enum cpu_idle_type idle)
76647678

76657679
/*
76667680
* Current heuristic for kicking the idle load balancer in the presence
7667-
* of an idle cpu is the system.
7681+
* of an idle cpu in the system.
76687682
* - This rq has more than one task.
7669-
* - At any scheduler domain level, this cpu's scheduler group has multiple
7670-
* busy cpu's exceeding the group's capacity.
7683+
* - This rq has at least one CFS task and the capacity of the CPU is
7684+
* significantly reduced because of RT tasks or IRQs.
7685+
* - At parent of LLC scheduler domain level, this cpu's scheduler group has
7686+
* multiple busy cpu.
76717687
* - For SD_ASYM_PACKING, if the lower numbered cpu's in the scheduler
76727688
* domain span are idle.
76737689
*/
7674-
static inline int nohz_kick_needed(struct rq *rq)
7690+
static inline bool nohz_kick_needed(struct rq *rq)
76757691
{
76767692
unsigned long now = jiffies;
76777693
struct sched_domain *sd;
76787694
struct sched_group_capacity *sgc;
76797695
int nr_busy, cpu = rq->cpu;
7696+
bool kick = false;
76807697

76817698
if (unlikely(rq->idle_balance))
7682-
return 0;
7699+
return false;
76837700

76847701
/*
76857702
* We may be recently in ticked or tickless idle mode. At the first
@@ -7693,38 +7710,46 @@ static inline int nohz_kick_needed(struct rq *rq)
76937710
* balancing.
76947711
*/
76957712
if (likely(!atomic_read(&nohz.nr_cpus)))
7696-
return 0;
7713+
return false;
76977714

76987715
if (time_before(now, nohz.next_balance))
7699-
return 0;
7716+
return false;
77007717

77017718
if (rq->nr_running >= 2)
7702-
goto need_kick;
7719+
return true;
77037720

77047721
rcu_read_lock();
77057722
sd = rcu_dereference(per_cpu(sd_busy, cpu));
7706-
77077723
if (sd) {
77087724
sgc = sd->groups->sgc;
77097725
nr_busy = atomic_read(&sgc->nr_busy_cpus);
77107726

7711-
if (nr_busy > 1)
7712-
goto need_kick_unlock;
7727+
if (nr_busy > 1) {
7728+
kick = true;
7729+
goto unlock;
7730+
}
7731+
77137732
}
77147733

7715-
sd = rcu_dereference(per_cpu(sd_asym, cpu));
7734+
sd = rcu_dereference(rq->sd);
7735+
if (sd) {
7736+
if ((rq->cfs.h_nr_running >= 1) &&
7737+
check_cpu_capacity(rq, sd)) {
7738+
kick = true;
7739+
goto unlock;
7740+
}
7741+
}
77167742

7743+
sd = rcu_dereference(per_cpu(sd_asym, cpu));
77177744
if (sd && (cpumask_first_and(nohz.idle_cpus_mask,
7718-
sched_domain_span(sd)) < cpu))
7719-
goto need_kick_unlock;
7720-
7721-
rcu_read_unlock();
7722-
return 0;
7745+
sched_domain_span(sd)) < cpu)) {
7746+
kick = true;
7747+
goto unlock;
7748+
}
77237749

7724-
need_kick_unlock:
7750+
unlock:
77257751
rcu_read_unlock();
7726-
need_kick:
7727-
return 1;
7752+
return kick;
77287753
}
77297754
#else
77307755
static void nohz_idle_balance(struct rq *this_rq, enum cpu_idle_type idle) { }

0 commit comments

Comments
 (0)