Skip to content

Commit 231c807

Browse files
committed
Merge branch 'sched-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull scheduler updates from Thomas Gleixner: "Third more careful attempt for this set of fixes: - Prevent a 32bit math overflow in the cpufreq code - Fix a buffer overflow when scanning the cgroup2 cpu.max property - A set of fixes for the NOHZ scheduler logic to prevent waking up CPUs even if the capacity of the busy CPUs is sufficient along with other tweaks optimizing the behaviour for asymmetric systems (big/little)" * 'sched-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: sched/fair: Skip LLC NOHZ logic for asymmetric systems sched/fair: Tune down misfit NOHZ kicks sched/fair: Comment some nohz_balancer_kick() kick conditions sched/core: Fix buffer overflow in cgroup2 property cpu.max sched/cpufreq: Fix 32-bit math overflow
2 parents 49ef015 + b9a7b88 commit 231c807

File tree

3 files changed

+89
-56
lines changed

3 files changed

+89
-56
lines changed

kernel/sched/core.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6998,7 +6998,7 @@ static int __maybe_unused cpu_period_quota_parse(char *buf,
69986998
{
69996999
char tok[21]; /* U64_MAX */
70007000

7001-
if (!sscanf(buf, "%s %llu", tok, periodp))
7001+
if (sscanf(buf, "%20s %llu", tok, periodp) < 1)
70027002
return -EINVAL;
70037003

70047004
*periodp *= NSEC_PER_USEC;

kernel/sched/cpufreq_schedutil.c

Lines changed: 25 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -48,10 +48,10 @@ struct sugov_cpu {
4848

4949
bool iowait_boost_pending;
5050
unsigned int iowait_boost;
51-
unsigned int iowait_boost_max;
5251
u64 last_update;
5352

5453
unsigned long bw_dl;
54+
unsigned long min;
5555
unsigned long max;
5656

5757
/* The field below is for single-CPU policies only: */
@@ -303,8 +303,7 @@ static bool sugov_iowait_reset(struct sugov_cpu *sg_cpu, u64 time,
303303
if (delta_ns <= TICK_NSEC)
304304
return false;
305305

306-
sg_cpu->iowait_boost = set_iowait_boost
307-
? sg_cpu->sg_policy->policy->min : 0;
306+
sg_cpu->iowait_boost = set_iowait_boost ? sg_cpu->min : 0;
308307
sg_cpu->iowait_boost_pending = set_iowait_boost;
309308

310309
return true;
@@ -344,14 +343,13 @@ static void sugov_iowait_boost(struct sugov_cpu *sg_cpu, u64 time,
344343

345344
/* Double the boost at each request */
346345
if (sg_cpu->iowait_boost) {
347-
sg_cpu->iowait_boost <<= 1;
348-
if (sg_cpu->iowait_boost > sg_cpu->iowait_boost_max)
349-
sg_cpu->iowait_boost = sg_cpu->iowait_boost_max;
346+
sg_cpu->iowait_boost =
347+
min_t(unsigned int, sg_cpu->iowait_boost << 1, SCHED_CAPACITY_SCALE);
350348
return;
351349
}
352350

353351
/* First wakeup after IO: start with minimum boost */
354-
sg_cpu->iowait_boost = sg_cpu->sg_policy->policy->min;
352+
sg_cpu->iowait_boost = sg_cpu->min;
355353
}
356354

357355
/**
@@ -373,47 +371,38 @@ static void sugov_iowait_boost(struct sugov_cpu *sg_cpu, u64 time,
373371
* This mechanism is designed to boost high frequently IO waiting tasks, while
374372
* being more conservative on tasks which does sporadic IO operations.
375373
*/
376-
static void sugov_iowait_apply(struct sugov_cpu *sg_cpu, u64 time,
377-
unsigned long *util, unsigned long *max)
374+
static unsigned long sugov_iowait_apply(struct sugov_cpu *sg_cpu, u64 time,
375+
unsigned long util, unsigned long max)
378376
{
379-
unsigned int boost_util, boost_max;
377+
unsigned long boost;
380378

381379
/* No boost currently required */
382380
if (!sg_cpu->iowait_boost)
383-
return;
381+
return util;
384382

385383
/* Reset boost if the CPU appears to have been idle enough */
386384
if (sugov_iowait_reset(sg_cpu, time, false))
387-
return;
385+
return util;
388386

389-
/*
390-
* An IO waiting task has just woken up:
391-
* allow to further double the boost value
392-
*/
393-
if (sg_cpu->iowait_boost_pending) {
394-
sg_cpu->iowait_boost_pending = false;
395-
} else {
387+
if (!sg_cpu->iowait_boost_pending) {
396388
/*
397-
* Otherwise: reduce the boost value and disable it when we
398-
* reach the minimum.
389+
* No boost pending; reduce the boost value.
399390
*/
400391
sg_cpu->iowait_boost >>= 1;
401-
if (sg_cpu->iowait_boost < sg_cpu->sg_policy->policy->min) {
392+
if (sg_cpu->iowait_boost < sg_cpu->min) {
402393
sg_cpu->iowait_boost = 0;
403-
return;
394+
return util;
404395
}
405396
}
406397

398+
sg_cpu->iowait_boost_pending = false;
399+
407400
/*
408-
* Apply the current boost value: a CPU is boosted only if its current
409-
* utilization is smaller then the current IO boost level.
401+
* @util is already in capacity scale; convert iowait_boost
402+
* into the same scale so we can compare.
410403
*/
411-
boost_util = sg_cpu->iowait_boost;
412-
boost_max = sg_cpu->iowait_boost_max;
413-
if (*util * boost_max < *max * boost_util) {
414-
*util = boost_util;
415-
*max = boost_max;
416-
}
404+
boost = (sg_cpu->iowait_boost * max) >> SCHED_CAPACITY_SHIFT;
405+
return max(boost, util);
417406
}
418407

419408
#ifdef CONFIG_NO_HZ_COMMON
@@ -460,7 +449,7 @@ static void sugov_update_single(struct update_util_data *hook, u64 time,
460449

461450
util = sugov_get_util(sg_cpu);
462451
max = sg_cpu->max;
463-
sugov_iowait_apply(sg_cpu, time, &util, &max);
452+
util = sugov_iowait_apply(sg_cpu, time, util, max);
464453
next_f = get_next_freq(sg_policy, util, max);
465454
/*
466455
* Do not reduce the frequency if the CPU has not been idle
@@ -500,7 +489,7 @@ static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, u64 time)
500489

501490
j_util = sugov_get_util(j_sg_cpu);
502491
j_max = j_sg_cpu->max;
503-
sugov_iowait_apply(j_sg_cpu, time, &j_util, &j_max);
492+
j_util = sugov_iowait_apply(j_sg_cpu, time, j_util, j_max);
504493

505494
if (j_util * max > j_max * util) {
506495
util = j_util;
@@ -837,7 +826,9 @@ static int sugov_start(struct cpufreq_policy *policy)
837826
memset(sg_cpu, 0, sizeof(*sg_cpu));
838827
sg_cpu->cpu = cpu;
839828
sg_cpu->sg_policy = sg_policy;
840-
sg_cpu->iowait_boost_max = policy->cpuinfo.max_freq;
829+
sg_cpu->min =
830+
(SCHED_CAPACITY_SCALE * policy->cpuinfo.min_freq) /
831+
policy->cpuinfo.max_freq;
841832
}
842833

843834
for_each_cpu(cpu, policy->cpus) {

kernel/sched/fair.c

Lines changed: 63 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -8059,6 +8059,18 @@ check_cpu_capacity(struct rq *rq, struct sched_domain *sd)
80598059
(rq->cpu_capacity_orig * 100));
80608060
}
80618061

8062+
/*
8063+
* Check whether a rq has a misfit task and if it looks like we can actually
8064+
* help that task: we can migrate the task to a CPU of higher capacity, or
8065+
* the task's current CPU is heavily pressured.
8066+
*/
8067+
static inline int check_misfit_status(struct rq *rq, struct sched_domain *sd)
8068+
{
8069+
return rq->misfit_task_load &&
8070+
(rq->cpu_capacity_orig < rq->rd->max_cpu_capacity ||
8071+
check_cpu_capacity(rq, sd));
8072+
}
8073+
80628074
/*
80638075
* Group imbalance indicates (and tries to solve) the problem where balancing
80648076
* groups is inadequate due to ->cpus_allowed constraints.
@@ -9586,49 +9598,79 @@ static void nohz_balancer_kick(struct rq *rq)
95869598
if (time_before(now, nohz.next_balance))
95879599
goto out;
95889600

9589-
if (rq->nr_running >= 2 || rq->misfit_task_load) {
9601+
if (rq->nr_running >= 2) {
95909602
flags = NOHZ_KICK_MASK;
95919603
goto out;
95929604
}
95939605

95949606
rcu_read_lock();
9595-
sds = rcu_dereference(per_cpu(sd_llc_shared, cpu));
9596-
if (sds) {
9597-
/*
9598-
* If there is an imbalance between LLC domains (IOW we could
9599-
* increase the overall cache use), we need some less-loaded LLC
9600-
* domain to pull some load. Likewise, we may need to spread
9601-
* load within the current LLC domain (e.g. packed SMT cores but
9602-
* other CPUs are idle). We can't really know from here how busy
9603-
* the others are - so just get a nohz balance going if it looks
9604-
* like this LLC domain has tasks we could move.
9605-
*/
9606-
nr_busy = atomic_read(&sds->nr_busy_cpus);
9607-
if (nr_busy > 1) {
9608-
flags = NOHZ_KICK_MASK;
9609-
goto unlock;
9610-
}
9611-
9612-
}
96139607

96149608
sd = rcu_dereference(rq->sd);
96159609
if (sd) {
9616-
if ((rq->cfs.h_nr_running >= 1) &&
9617-
check_cpu_capacity(rq, sd)) {
9610+
/*
9611+
* If there's a CFS task and the current CPU has reduced
9612+
* capacity; kick the ILB to see if there's a better CPU to run
9613+
* on.
9614+
*/
9615+
if (rq->cfs.h_nr_running >= 1 && check_cpu_capacity(rq, sd)) {
96189616
flags = NOHZ_KICK_MASK;
96199617
goto unlock;
96209618
}
96219619
}
96229620

96239621
sd = rcu_dereference(per_cpu(sd_asym_packing, cpu));
96249622
if (sd) {
9623+
/*
9624+
* When ASYM_PACKING; see if there's a more preferred CPU
9625+
* currently idle; in which case, kick the ILB to move tasks
9626+
* around.
9627+
*/
96259628
for_each_cpu_and(i, sched_domain_span(sd), nohz.idle_cpus_mask) {
96269629
if (sched_asym_prefer(i, cpu)) {
96279630
flags = NOHZ_KICK_MASK;
96289631
goto unlock;
96299632
}
96309633
}
96319634
}
9635+
9636+
sd = rcu_dereference(per_cpu(sd_asym_cpucapacity, cpu));
9637+
if (sd) {
9638+
/*
9639+
* When ASYM_CPUCAPACITY; see if there's a higher capacity CPU
9640+
* to run the misfit task on.
9641+
*/
9642+
if (check_misfit_status(rq, sd)) {
9643+
flags = NOHZ_KICK_MASK;
9644+
goto unlock;
9645+
}
9646+
9647+
/*
9648+
* For asymmetric systems, we do not want to nicely balance
9649+
* cache use, instead we want to embrace asymmetry and only
9650+
* ensure tasks have enough CPU capacity.
9651+
*
9652+
* Skip the LLC logic because it's not relevant in that case.
9653+
*/
9654+
goto unlock;
9655+
}
9656+
9657+
sds = rcu_dereference(per_cpu(sd_llc_shared, cpu));
9658+
if (sds) {
9659+
/*
9660+
* If there is an imbalance between LLC domains (IOW we could
9661+
* increase the overall cache use), we need some less-loaded LLC
9662+
* domain to pull some load. Likewise, we may need to spread
9663+
* load within the current LLC domain (e.g. packed SMT cores but
9664+
* other CPUs are idle). We can't really know from here how busy
9665+
* the others are - so just get a nohz balance going if it looks
9666+
* like this LLC domain has tasks we could move.
9667+
*/
9668+
nr_busy = atomic_read(&sds->nr_busy_cpus);
9669+
if (nr_busy > 1) {
9670+
flags = NOHZ_KICK_MASK;
9671+
goto unlock;
9672+
}
9673+
}
96329674
unlock:
96339675
rcu_read_unlock();
96349676
out:

0 commit comments

Comments
 (0)