Skip to content

Commit f643ea2

Browse files
vingu-linaroIngo Molnar
authored andcommitted
sched/nohz: Stop NOHZ stats when decayed
Stopped the periodic update of blocked load when all idle CPUs have fully decayed. We introduce a new nohz.has_blocked that reflect if some idle CPUs has blocked load that have to be periodiccally updated. nohz.has_blocked is set everytime that a Idle CPU can have blocked load and it is then clear when no more blocked load has been detected during an update. We don't need atomic operation but only to make cure of the right ordering when updating nohz.idle_cpus_mask and nohz.has_blocked. Suggested-by: Peter Zijlstra (Intel) <peterz@infradead.org> Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: brendan.jackman@arm.com Cc: dietmar.eggemann@arm.com Cc: morten.rasmussen@foss.arm.com Cc: valentin.schneider@arm.com Link: http://lkml.kernel.org/r/1518517879-2280-2-git-send-email-vincent.guittot@linaro.org Signed-off-by: Ingo Molnar <mingo@kernel.org>
1 parent ea14b57 commit f643ea2

File tree

2 files changed

+97
-20
lines changed

2 files changed

+97
-20
lines changed

kernel/sched/fair.c

Lines changed: 96 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -5387,8 +5387,9 @@ decay_load_missed(unsigned long load, unsigned long missed_updates, int idx)
53875387
static struct {
53885388
cpumask_var_t idle_cpus_mask;
53895389
atomic_t nr_cpus;
5390+
int has_blocked; /* Idle CPUS has blocked load */
53905391
unsigned long next_balance; /* in jiffy units */
5391-
unsigned long next_stats;
5392+
unsigned long next_blocked; /* Next update of blocked load in jiffies */
53925393
} nohz ____cacheline_aligned;
53935394

53945395
#endif /* CONFIG_NO_HZ_COMMON */
@@ -7038,6 +7039,7 @@ enum fbq_type { regular, remote, all };
70387039
#define LBF_DST_PINNED 0x04
70397040
#define LBF_SOME_PINNED 0x08
70407041
#define LBF_NOHZ_STATS 0x10
7042+
#define LBF_NOHZ_AGAIN 0x20
70417043

70427044
struct lb_env {
70437045
struct sched_domain *sd;
@@ -7422,8 +7424,6 @@ static void attach_tasks(struct lb_env *env)
74227424
rq_unlock(env->dst_rq, &rf);
74237425
}
74247426

7425-
#ifdef CONFIG_FAIR_GROUP_SCHED
7426-
74277427
static inline bool cfs_rq_is_decayed(struct cfs_rq *cfs_rq)
74287428
{
74297429
if (cfs_rq->load.weight)
@@ -7441,11 +7441,14 @@ static inline bool cfs_rq_is_decayed(struct cfs_rq *cfs_rq)
74417441
return true;
74427442
}
74437443

7444+
#ifdef CONFIG_FAIR_GROUP_SCHED
7445+
74447446
static void update_blocked_averages(int cpu)
74457447
{
74467448
struct rq *rq = cpu_rq(cpu);
74477449
struct cfs_rq *cfs_rq, *pos;
74487450
struct rq_flags rf;
7451+
bool done = true;
74497452

74507453
rq_lock_irqsave(rq, &rf);
74517454
update_rq_clock(rq);
@@ -7475,10 +7478,14 @@ static void update_blocked_averages(int cpu)
74757478
*/
74767479
if (cfs_rq_is_decayed(cfs_rq))
74777480
list_del_leaf_cfs_rq(cfs_rq);
7481+
else
7482+
done = false;
74787483
}
74797484

74807485
#ifdef CONFIG_NO_HZ_COMMON
74817486
rq->last_blocked_load_update_tick = jiffies;
7487+
if (done)
7488+
rq->has_blocked_load = 0;
74827489
#endif
74837490
rq_unlock_irqrestore(rq, &rf);
74847491
}
@@ -7541,6 +7548,8 @@ static inline void update_blocked_averages(int cpu)
75417548
update_cfs_rq_load_avg(cfs_rq_clock_task(cfs_rq), cfs_rq);
75427549
#ifdef CONFIG_NO_HZ_COMMON
75437550
rq->last_blocked_load_update_tick = jiffies;
7551+
if (cfs_rq_is_decayed(cfs_rq))
7552+
rq->has_blocked_load = 0;
75447553
#endif
75457554
rq_unlock_irqrestore(rq, &rf);
75467555
}
@@ -7876,18 +7885,25 @@ group_type group_classify(struct sched_group *group,
78767885
return group_other;
78777886
}
78787887

7879-
static void update_nohz_stats(struct rq *rq)
7888+
static bool update_nohz_stats(struct rq *rq)
78807889
{
78817890
#ifdef CONFIG_NO_HZ_COMMON
78827891
unsigned int cpu = rq->cpu;
78837892

7893+
if (!rq->has_blocked_load)
7894+
return false;
7895+
78847896
if (!cpumask_test_cpu(cpu, nohz.idle_cpus_mask))
7885-
return;
7897+
return false;
78867898

78877899
if (!time_after(jiffies, rq->last_blocked_load_update_tick))
7888-
return;
7900+
return true;
78897901

78907902
update_blocked_averages(cpu);
7903+
7904+
return rq->has_blocked_load;
7905+
#else
7906+
return false;
78917907
#endif
78927908
}
78937909

@@ -7913,8 +7929,8 @@ static inline void update_sg_lb_stats(struct lb_env *env,
79137929
for_each_cpu_and(i, sched_group_span(group), env->cpus) {
79147930
struct rq *rq = cpu_rq(i);
79157931

7916-
if (env->flags & LBF_NOHZ_STATS)
7917-
update_nohz_stats(rq);
7932+
if ((env->flags & LBF_NOHZ_STATS) && update_nohz_stats(rq))
7933+
env->flags |= LBF_NOHZ_AGAIN;
79187934

79197935
/* Bias balancing toward CPUs of our domain: */
79207936
if (local_group)
@@ -8072,12 +8088,8 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd
80728088
prefer_sibling = 1;
80738089

80748090
#ifdef CONFIG_NO_HZ_COMMON
8075-
if (env->idle == CPU_NEWLY_IDLE) {
8091+
if (env->idle == CPU_NEWLY_IDLE && READ_ONCE(nohz.has_blocked))
80768092
env->flags |= LBF_NOHZ_STATS;
8077-
8078-
if (cpumask_subset(nohz.idle_cpus_mask, sched_domain_span(env->sd)))
8079-
nohz.next_stats = jiffies + msecs_to_jiffies(LOAD_AVG_PERIOD);
8080-
}
80818093
#endif
80828094

80838095
load_idx = get_sd_load_idx(env->sd, env->idle);
@@ -8133,6 +8145,15 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd
81338145
sg = sg->next;
81348146
} while (sg != env->sd->groups);
81358147

8148+
#ifdef CONFIG_NO_HZ_COMMON
8149+
if ((env->flags & LBF_NOHZ_AGAIN) &&
8150+
cpumask_subset(nohz.idle_cpus_mask, sched_domain_span(env->sd))) {
8151+
8152+
WRITE_ONCE(nohz.next_blocked,
8153+
jiffies + msecs_to_jiffies(LOAD_AVG_PERIOD));
8154+
}
8155+
#endif
8156+
81368157
if (env->sd->flags & SD_NUMA)
81378158
env->fbq_type = fbq_classify_group(&sds->busiest_stat);
81388159

@@ -9174,7 +9195,8 @@ static void nohz_balancer_kick(struct rq *rq)
91749195
if (likely(!atomic_read(&nohz.nr_cpus)))
91759196
return;
91769197

9177-
if (time_after(now, nohz.next_stats))
9198+
if (READ_ONCE(nohz.has_blocked) &&
9199+
time_after(now, READ_ONCE(nohz.next_blocked)))
91789200
flags = NOHZ_STATS_KICK;
91799201

91809202
if (time_before(now, nohz.next_balance))
@@ -9293,8 +9315,21 @@ void nohz_balance_enter_idle(int cpu)
92939315
if (!housekeeping_cpu(cpu, HK_FLAG_SCHED))
92949316
return;
92959317

9318+
/*
9319+
* Can be set safely without rq->lock held
9320+
* If a clear happens, it will have evaluated last additions because
9321+
* rq->lock is held during the check and the clear
9322+
*/
9323+
rq->has_blocked_load = 1;
9324+
9325+
/*
9326+
* The tick is still stopped but load could have been added in the
9327+
* meantime. We set the nohz.has_blocked flag to trig a check of the
9328+
* *_avg. The CPU is already part of nohz.idle_cpus_mask so the clear
9329+
* of nohz.has_blocked can only happen after checking the new load
9330+
*/
92969331
if (rq->nohz_tick_stopped)
9297-
return;
9332+
goto out;
92989333

92999334
/* If we're a completely isolated CPU, we don't play: */
93009335
if (on_null_domain(rq))
@@ -9305,7 +9340,21 @@ void nohz_balance_enter_idle(int cpu)
93059340
cpumask_set_cpu(cpu, nohz.idle_cpus_mask);
93069341
atomic_inc(&nohz.nr_cpus);
93079342

9343+
/*
9344+
* Ensures that if nohz_idle_balance() fails to observe our
9345+
* @idle_cpus_mask store, it must observe the @has_blocked
9346+
* store.
9347+
*/
9348+
smp_mb__after_atomic();
9349+
93089350
set_cpu_sd_state_idle(cpu);
9351+
9352+
out:
9353+
/*
9354+
* Each time a cpu enter idle, we assume that it has blocked load and
9355+
* enable the periodic update of the load of idle cpus
9356+
*/
9357+
WRITE_ONCE(nohz.has_blocked, 1);
93099358
}
93109359
#else
93119360
static inline void nohz_balancer_kick(struct rq *rq) { }
@@ -9439,7 +9488,7 @@ static bool nohz_idle_balance(struct rq *this_rq, enum cpu_idle_type idle)
94399488
/* Earliest time when we have to do rebalance again */
94409489
unsigned long now = jiffies;
94419490
unsigned long next_balance = now + 60*HZ;
9442-
unsigned long next_stats = now + msecs_to_jiffies(LOAD_AVG_PERIOD);
9491+
bool has_blocked_load = false;
94439492
int update_next_balance = 0;
94449493
int this_cpu = this_rq->cpu;
94459494
unsigned int flags;
@@ -9458,6 +9507,22 @@ static bool nohz_idle_balance(struct rq *this_rq, enum cpu_idle_type idle)
94589507

94599508
SCHED_WARN_ON((flags & NOHZ_KICK_MASK) == NOHZ_BALANCE_KICK);
94609509

9510+
/*
9511+
* We assume there will be no idle load after this update and clear
9512+
* the has_blocked flag. If a cpu enters idle in the mean time, it will
9513+
* set the has_blocked flag and trig another update of idle load.
9514+
* Because a cpu that becomes idle, is added to idle_cpus_mask before
9515+
* setting the flag, we are sure to not clear the state and not
9516+
* check the load of an idle cpu.
9517+
*/
9518+
WRITE_ONCE(nohz.has_blocked, 0);
9519+
9520+
/*
9521+
* Ensures that if we miss the CPU, we must see the has_blocked
9522+
* store from nohz_balance_enter_idle().
9523+
*/
9524+
smp_mb();
9525+
94619526
for_each_cpu(balance_cpu, nohz.idle_cpus_mask) {
94629527
if (balance_cpu == this_cpu || !idle_cpu(balance_cpu))
94639528
continue;
@@ -9467,11 +9532,16 @@ static bool nohz_idle_balance(struct rq *this_rq, enum cpu_idle_type idle)
94679532
* work being done for other CPUs. Next load
94689533
* balancing owner will pick it up.
94699534
*/
9470-
if (need_resched())
9471-
break;
9535+
if (need_resched()) {
9536+
has_blocked_load = true;
9537+
goto abort;
9538+
}
94729539

94739540
rq = cpu_rq(balance_cpu);
94749541

9542+
update_blocked_averages(rq->cpu);
9543+
has_blocked_load |= rq->has_blocked_load;
9544+
94759545
/*
94769546
* If time for next balance is due,
94779547
* do the balance.
@@ -9484,7 +9554,6 @@ static bool nohz_idle_balance(struct rq *this_rq, enum cpu_idle_type idle)
94849554
cpu_load_update_idle(rq);
94859555
rq_unlock_irq(rq, &rf);
94869556

9487-
update_blocked_averages(rq->cpu);
94889557
if (flags & NOHZ_BALANCE_KICK)
94899558
rebalance_domains(rq, CPU_IDLE);
94909559
}
@@ -9499,7 +9568,13 @@ static bool nohz_idle_balance(struct rq *this_rq, enum cpu_idle_type idle)
94999568
if (flags & NOHZ_BALANCE_KICK)
95009569
rebalance_domains(this_rq, CPU_IDLE);
95019570

9502-
nohz.next_stats = next_stats;
9571+
WRITE_ONCE(nohz.next_blocked,
9572+
now + msecs_to_jiffies(LOAD_AVG_PERIOD));
9573+
9574+
abort:
9575+
/* There is still blocked load, enable periodic update */
9576+
if (has_blocked_load)
9577+
WRITE_ONCE(nohz.has_blocked, 1);
95039578

95049579
/*
95059580
* next_balance will be updated only when there is a need.
@@ -10135,6 +10210,7 @@ __init void init_sched_fair_class(void)
1013510210

1013610211
#ifdef CONFIG_NO_HZ_COMMON
1013710212
nohz.next_balance = jiffies;
10213+
nohz.next_blocked = jiffies;
1013810214
zalloc_cpumask_var(&nohz.idle_cpus_mask, GFP_NOWAIT);
1013910215
#endif
1014010216
#endif /* SMP */

kernel/sched/sched.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -763,6 +763,7 @@ struct rq {
763763
#ifdef CONFIG_SMP
764764
unsigned long last_load_update_tick;
765765
unsigned long last_blocked_load_update_tick;
766+
unsigned int has_blocked_load;
766767
#endif /* CONFIG_SMP */
767768
unsigned int nohz_tick_stopped;
768769
atomic_t nohz_flags;

0 commit comments

Comments
 (0)