Skip to content

Commit 37dc6b5

Browse files
Preeti U MurthyIngo Molnar
authored andcommitted
sched: Remove unnecessary iteration over sched domains to update nr_busy_cpus
nr_busy_cpus parameter is used by nohz_kick_needed() to find out the number of busy cpus in a sched domain which has SD_SHARE_PKG_RESOURCES flag set. Therefore instead of updating nr_busy_cpus at every level of sched domain, since it is irrelevant, we can update this parameter only at the parent domain of the sd which has this flag set. Introduce a per-cpu parameter sd_busy which represents this parent domain. In nohz_kick_needed() we directly query the nr_busy_cpus parameter associated with the groups of sd_busy. By associating sd_busy with the highest domain which has SD_SHARE_PKG_RESOURCES flag set, we cover all lower level domains which could have this flag set and trigger nohz_idle_balancing if any of the levels have more than one busy cpu. sd_busy is irrelevant for asymmetric load balancing. However sd_asym has been introduced to represent the highest sched domain which has SD_ASYM_PACKING flag set so that it can be queried directly when required. While we are at it, we might as well change the nohz_idle parameter to be updated at the sd_busy domain level alone and not the base domain level of a CPU. This will unify the concept of busy cpus at just one level of sched domain where it is currently used. Signed-off-by: Preeti U Murthy<preeti@linux.vnet.ibm.com> Signed-off-by: Peter Zijlstra <peterz@infradead.org> Cc: svaidy@linux.vnet.ibm.com Cc: vincent.guittot@linaro.org Cc: bitbucket@online.de Cc: benh@kernel.crashing.org Cc: anton@samba.org Cc: Morten.Rasmussen@arm.com Cc: pjt@google.com Cc: peterz@infradead.org Cc: mikey@neuling.org Link: http://lkml.kernel.org/r/20131030031252.23426.4417.stgit@preeti.in.ibm.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
1 parent 2042abe commit 37dc6b5

File tree

3 files changed

+28
-18
lines changed

3 files changed

+28
-18
lines changed

kernel/sched/core.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4883,6 +4883,8 @@ DEFINE_PER_CPU(struct sched_domain *, sd_llc);
48834883
DEFINE_PER_CPU(int, sd_llc_size);
48844884
DEFINE_PER_CPU(int, sd_llc_id);
48854885
DEFINE_PER_CPU(struct sched_domain *, sd_numa);
4886+
DEFINE_PER_CPU(struct sched_domain *, sd_busy);
4887+
DEFINE_PER_CPU(struct sched_domain *, sd_asym);
48864888

48874889
static void update_top_cache_domain(int cpu)
48884890
{
@@ -4894,6 +4896,7 @@ static void update_top_cache_domain(int cpu)
48944896
if (sd) {
48954897
id = cpumask_first(sched_domain_span(sd));
48964898
size = cpumask_weight(sched_domain_span(sd));
4899+
rcu_assign_pointer(per_cpu(sd_busy, cpu), sd->parent);
48974900
}
48984901

48994902
rcu_assign_pointer(per_cpu(sd_llc, cpu), sd);
@@ -4902,6 +4905,9 @@ static void update_top_cache_domain(int cpu)
49024905

49034906
sd = lowest_flag_domain(cpu, SD_NUMA);
49044907
rcu_assign_pointer(per_cpu(sd_numa, cpu), sd);
4908+
4909+
sd = highest_flag_domain(cpu, SD_ASYM_PACKING);
4910+
rcu_assign_pointer(per_cpu(sd_asym, cpu), sd);
49054911
}
49064912

49074913
/*

kernel/sched/fair.c

Lines changed: 20 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -6534,33 +6534,33 @@ static inline void nohz_balance_exit_idle(int cpu)
65346534
static inline void set_cpu_sd_state_busy(void)
65356535
{
65366536
struct sched_domain *sd;
6537+
int cpu = smp_processor_id();
65376538

65386539
rcu_read_lock();
6539-
sd = rcu_dereference_check_sched_domain(this_rq()->sd);
6540+
sd = rcu_dereference(per_cpu(sd_busy, cpu));
65406541

65416542
if (!sd || !sd->nohz_idle)
65426543
goto unlock;
65436544
sd->nohz_idle = 0;
65446545

6545-
for (; sd; sd = sd->parent)
6546-
atomic_inc(&sd->groups->sgp->nr_busy_cpus);
6546+
atomic_inc(&sd->groups->sgp->nr_busy_cpus);
65476547
unlock:
65486548
rcu_read_unlock();
65496549
}
65506550

65516551
void set_cpu_sd_state_idle(void)
65526552
{
65536553
struct sched_domain *sd;
6554+
int cpu = smp_processor_id();
65546555

65556556
rcu_read_lock();
6556-
sd = rcu_dereference_check_sched_domain(this_rq()->sd);
6557+
sd = rcu_dereference(per_cpu(sd_busy, cpu));
65576558

65586559
if (!sd || sd->nohz_idle)
65596560
goto unlock;
65606561
sd->nohz_idle = 1;
65616562

6562-
for (; sd; sd = sd->parent)
6563-
atomic_dec(&sd->groups->sgp->nr_busy_cpus);
6563+
atomic_dec(&sd->groups->sgp->nr_busy_cpus);
65646564
unlock:
65656565
rcu_read_unlock();
65666566
}
@@ -6767,6 +6767,8 @@ static inline int nohz_kick_needed(struct rq *rq, int cpu)
67676767
{
67686768
unsigned long now = jiffies;
67696769
struct sched_domain *sd;
6770+
struct sched_group_power *sgp;
6771+
int nr_busy;
67706772

67716773
if (unlikely(idle_cpu(cpu)))
67726774
return 0;
@@ -6792,22 +6794,22 @@ static inline int nohz_kick_needed(struct rq *rq, int cpu)
67926794
goto need_kick;
67936795

67946796
rcu_read_lock();
6795-
for_each_domain(cpu, sd) {
6796-
struct sched_group *sg = sd->groups;
6797-
struct sched_group_power *sgp = sg->sgp;
6798-
int nr_busy = atomic_read(&sgp->nr_busy_cpus);
6797+
sd = rcu_dereference(per_cpu(sd_busy, cpu));
67996798

6800-
if (sd->flags & SD_SHARE_PKG_RESOURCES && nr_busy > 1)
6801-
goto need_kick_unlock;
6799+
if (sd) {
6800+
sgp = sd->groups->sgp;
6801+
nr_busy = atomic_read(&sgp->nr_busy_cpus);
68026802

6803-
if (sd->flags & SD_ASYM_PACKING
6804-
&& (cpumask_first_and(nohz.idle_cpus_mask,
6805-
sched_domain_span(sd)) < cpu))
6803+
if (nr_busy > 1)
68066804
goto need_kick_unlock;
6807-
6808-
if (!(sd->flags & (SD_SHARE_PKG_RESOURCES | SD_ASYM_PACKING)))
6809-
break;
68106805
}
6806+
6807+
sd = rcu_dereference(per_cpu(sd_asym, cpu));
6808+
6809+
if (sd && (cpumask_first_and(nohz.idle_cpus_mask,
6810+
sched_domain_span(sd)) < cpu))
6811+
goto need_kick_unlock;
6812+
68116813
rcu_read_unlock();
68126814
return 0;
68136815

kernel/sched/sched.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -623,6 +623,8 @@ DECLARE_PER_CPU(struct sched_domain *, sd_llc);
623623
DECLARE_PER_CPU(int, sd_llc_size);
624624
DECLARE_PER_CPU(int, sd_llc_id);
625625
DECLARE_PER_CPU(struct sched_domain *, sd_numa);
626+
DECLARE_PER_CPU(struct sched_domain *, sd_busy);
627+
DECLARE_PER_CPU(struct sched_domain *, sd_asym);
626628

627629
struct sched_group_power {
628630
atomic_t ref;

0 commit comments

Comments
 (0)