Skip to content

Commit 4550487

Browse files
Peter ZijlstraIngo Molnar
authored andcommitted
sched/fair: Restructure nohz_balance_kick()
The current: if (nohz_kick_needed()) nohz_balancer_kick() is pointless complexity, fold them into a single call and avoid the various conditions at the call site. When we introduce multiple different needs to kick the ilb, the above construct also becomes a problem. Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Mike Galbraith <efault@gmx.de> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar <mingo@kernel.org>
1 parent b7031a0 commit 4550487

File tree

1 file changed

+111
-107
lines changed

1 file changed

+111
-107
lines changed

kernel/sched/fair.c

Lines changed: 111 additions & 107 deletions
Original file line numberDiff line numberDiff line change
@@ -9065,12 +9065,29 @@ static inline int find_new_ilb(void)
90659065
return nr_cpu_ids;
90669066
}
90679067

9068+
static inline void set_cpu_sd_state_busy(void)
9069+
{
9070+
struct sched_domain *sd;
9071+
int cpu = smp_processor_id();
9072+
9073+
rcu_read_lock();
9074+
sd = rcu_dereference(per_cpu(sd_llc, cpu));
9075+
9076+
if (!sd || !sd->nohz_idle)
9077+
goto unlock;
9078+
sd->nohz_idle = 0;
9079+
9080+
atomic_inc(&sd->shared->nr_busy_cpus);
9081+
unlock:
9082+
rcu_read_unlock();
9083+
}
9084+
90689085
/*
90699086
* Kick a CPU to do the nohz balancing, if it is time for it. We pick the
90709087
* nohz_load_balancer CPU (if there is one) otherwise fallback to any idle
90719088
* CPU (if there is one).
90729089
*/
9073-
static void nohz_balancer_kick(void)
9090+
static void kick_ilb(void)
90749091
{
90759092
unsigned int flags;
90769093
int ilb_cpu;
@@ -9085,14 +9102,102 @@ static void nohz_balancer_kick(void)
90859102
flags = atomic_fetch_or(NOHZ_KICK_MASK, nohz_flags(ilb_cpu));
90869103
if (flags & NOHZ_KICK_MASK)
90879104
return;
9105+
90889106
/*
90899107
* Use smp_send_reschedule() instead of resched_cpu().
90909108
* This way we generate a sched IPI on the target CPU which
90919109
* is idle. And the softirq performing nohz idle load balance
90929110
* will be run before returning from the IPI.
90939111
*/
90949112
smp_send_reschedule(ilb_cpu);
9095-
return;
9113+
}
9114+
9115+
/*
9116+
* Current heuristic for kicking the idle load balancer in the presence
9117+
* of an idle cpu in the system.
9118+
* - This rq has more than one task.
9119+
* - This rq has at least one CFS task and the capacity of the CPU is
9120+
* significantly reduced because of RT tasks or IRQs.
9121+
* - At parent of LLC scheduler domain level, this cpu's scheduler group has
9122+
* multiple busy cpu.
9123+
* - For SD_ASYM_PACKING, if the lower numbered cpu's in the scheduler
9124+
* domain span are idle.
9125+
*/
9126+
static void nohz_balancer_kick(struct rq *rq)
9127+
{
9128+
unsigned long now = jiffies;
9129+
struct sched_domain_shared *sds;
9130+
struct sched_domain *sd;
9131+
int nr_busy, i, cpu = rq->cpu;
9132+
bool kick = false;
9133+
9134+
if (unlikely(rq->idle_balance))
9135+
return;
9136+
9137+
/*
9138+
* We may be recently in ticked or tickless idle mode. At the first
9139+
* busy tick after returning from idle, we will update the busy stats.
9140+
*/
9141+
set_cpu_sd_state_busy();
9142+
nohz_balance_exit_idle(cpu);
9143+
9144+
/*
9145+
* None are in tickless mode and hence no need for NOHZ idle load
9146+
* balancing.
9147+
*/
9148+
if (likely(!atomic_read(&nohz.nr_cpus)))
9149+
return;
9150+
9151+
if (time_before(now, nohz.next_balance))
9152+
return;
9153+
9154+
if (rq->nr_running >= 2) {
9155+
kick = true;
9156+
goto out;
9157+
}
9158+
9159+
rcu_read_lock();
9160+
sds = rcu_dereference(per_cpu(sd_llc_shared, cpu));
9161+
if (sds) {
9162+
/*
9163+
* XXX: write a coherent comment on why we do this.
9164+
* See also: http://lkml.kernel.org/r/20111202010832.602203411@sbsiddha-desk.sc.intel.com
9165+
*/
9166+
nr_busy = atomic_read(&sds->nr_busy_cpus);
9167+
if (nr_busy > 1) {
9168+
kick = true;
9169+
goto unlock;
9170+
}
9171+
9172+
}
9173+
9174+
sd = rcu_dereference(rq->sd);
9175+
if (sd) {
9176+
if ((rq->cfs.h_nr_running >= 1) &&
9177+
check_cpu_capacity(rq, sd)) {
9178+
kick = true;
9179+
goto unlock;
9180+
}
9181+
}
9182+
9183+
sd = rcu_dereference(per_cpu(sd_asym, cpu));
9184+
if (sd) {
9185+
for_each_cpu(i, sched_domain_span(sd)) {
9186+
if (i == cpu ||
9187+
!cpumask_test_cpu(i, nohz.idle_cpus_mask))
9188+
continue;
9189+
9190+
if (sched_asym_prefer(i, cpu)) {
9191+
kick = true;
9192+
goto unlock;
9193+
}
9194+
}
9195+
}
9196+
unlock:
9197+
rcu_read_unlock();
9198+
out:
9199+
if (kick)
9200+
kick_ilb();
90969201
}
90979202

90989203
void nohz_balance_exit_idle(unsigned int cpu)
@@ -9112,23 +9217,6 @@ void nohz_balance_exit_idle(unsigned int cpu)
91129217
}
91139218
}
91149219

9115-
static inline void set_cpu_sd_state_busy(void)
9116-
{
9117-
struct sched_domain *sd;
9118-
int cpu = smp_processor_id();
9119-
9120-
rcu_read_lock();
9121-
sd = rcu_dereference(per_cpu(sd_llc, cpu));
9122-
9123-
if (!sd || !sd->nohz_idle)
9124-
goto unlock;
9125-
sd->nohz_idle = 0;
9126-
9127-
atomic_inc(&sd->shared->nr_busy_cpus);
9128-
unlock:
9129-
rcu_read_unlock();
9130-
}
9131-
91329220
void set_cpu_sd_state_idle(void)
91339221
{
91349222
struct sched_domain *sd;
@@ -9171,6 +9259,8 @@ void nohz_balance_enter_idle(int cpu)
91719259
atomic_inc(&nohz.nr_cpus);
91729260
atomic_or(NOHZ_TICK_STOPPED, nohz_flags(cpu));
91739261
}
9262+
#else
9263+
static inline void nohz_balancer_kick(struct rq *rq) { }
91749264
#endif
91759265

91769266
static DEFINE_SPINLOCK(balancing);
@@ -9369,90 +9459,6 @@ static bool nohz_idle_balance(struct rq *this_rq, enum cpu_idle_type idle)
93699459

93709460
return true;
93719461
}
9372-
9373-
/*
9374-
* Current heuristic for kicking the idle load balancer in the presence
9375-
* of an idle CPU in the system.
9376-
* - This rq has more than one task.
9377-
* - This rq has at least one CFS task and the capacity of the CPU is
9378-
* significantly reduced because of RT tasks or IRQs.
9379-
* - At parent of LLC scheduler domain level, this CPU's scheduler group has
9380-
* multiple busy CPUs.
9381-
* - For SD_ASYM_PACKING, if the lower numbered CPU's in the scheduler
9382-
* domain span are idle.
9383-
*/
9384-
static inline bool nohz_kick_needed(struct rq *rq)
9385-
{
9386-
unsigned long now = jiffies;
9387-
struct sched_domain_shared *sds;
9388-
struct sched_domain *sd;
9389-
int nr_busy, i, cpu = rq->cpu;
9390-
bool kick = false;
9391-
9392-
if (unlikely(rq->idle_balance))
9393-
return false;
9394-
9395-
/*
9396-
* We may be recently in ticked or tickless idle mode. At the first
9397-
* busy tick after returning from idle, we will update the busy stats.
9398-
*/
9399-
set_cpu_sd_state_busy();
9400-
nohz_balance_exit_idle(cpu);
9401-
9402-
/*
9403-
* None are in tickless mode and hence no need for NOHZ idle load
9404-
* balancing.
9405-
*/
9406-
if (likely(!atomic_read(&nohz.nr_cpus)))
9407-
return false;
9408-
9409-
if (time_before(now, nohz.next_balance))
9410-
return false;
9411-
9412-
if (rq->nr_running >= 2)
9413-
return true;
9414-
9415-
rcu_read_lock();
9416-
sds = rcu_dereference(per_cpu(sd_llc_shared, cpu));
9417-
if (sds) {
9418-
/*
9419-
* XXX: write a coherent comment on why we do this.
9420-
* See also: http://lkml.kernel.org/r/20111202010832.602203411@sbsiddha-desk.sc.intel.com
9421-
*/
9422-
nr_busy = atomic_read(&sds->nr_busy_cpus);
9423-
if (nr_busy > 1) {
9424-
kick = true;
9425-
goto unlock;
9426-
}
9427-
9428-
}
9429-
9430-
sd = rcu_dereference(rq->sd);
9431-
if (sd) {
9432-
if ((rq->cfs.h_nr_running >= 1) &&
9433-
check_cpu_capacity(rq, sd)) {
9434-
kick = true;
9435-
goto unlock;
9436-
}
9437-
}
9438-
9439-
sd = rcu_dereference(per_cpu(sd_asym, cpu));
9440-
if (sd) {
9441-
for_each_cpu(i, sched_domain_span(sd)) {
9442-
if (i == cpu ||
9443-
!cpumask_test_cpu(i, nohz.idle_cpus_mask))
9444-
continue;
9445-
9446-
if (sched_asym_prefer(i, cpu)) {
9447-
kick = true;
9448-
goto unlock;
9449-
}
9450-
}
9451-
}
9452-
unlock:
9453-
rcu_read_unlock();
9454-
return kick;
9455-
}
94569462
#else
94579463
static bool nohz_idle_balance(struct rq *this_rq, enum cpu_idle_type idle)
94589464
{
@@ -9497,10 +9503,8 @@ void trigger_load_balance(struct rq *rq)
94979503

94989504
if (time_after_eq(jiffies, rq->next_balance))
94999505
raise_softirq(SCHED_SOFTIRQ);
9500-
#ifdef CONFIG_NO_HZ_COMMON
9501-
if (nohz_kick_needed(rq))
9502-
nohz_balancer_kick();
9503-
#endif
9506+
9507+
nohz_balancer_kick(rq);
95049508
}
95059509

95069510
static void rq_online_fair(struct rq *rq)

0 commit comments

Comments
 (0)