Skip to content

Commit af3fe03

Browse files
Peter ZijlstraIngo Molnar
authored andcommitted
sched/fair: Move rebalance_domains()
This pure code movement results in two #ifdef CONFIG_NO_HZ_COMMON sections landing next to each other. Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Frederic Weisbecker <fweisbec@gmail.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar <mingo@kernel.org>
1 parent 6392838 commit af3fe03

File tree

1 file changed

+118
-118
lines changed

1 file changed

+118
-118
lines changed

kernel/sched/fair.c

Lines changed: 118 additions & 118 deletions
Original file line numberDiff line numberDiff line change
@@ -9121,6 +9121,124 @@ static int active_load_balance_cpu_stop(void *data)
91219121
return 0;
91229122
}
91239123

9124+
static DEFINE_SPINLOCK(balancing);
9125+
9126+
/*
9127+
* Scale the max load_balance interval with the number of CPUs in the system.
9128+
* This trades load-balance latency on larger machines for less cross talk.
9129+
*/
9130+
void update_max_interval(void)
9131+
{
9132+
max_load_balance_interval = HZ*num_online_cpus()/10;
9133+
}
9134+
9135+
/*
9136+
* It checks each scheduling domain to see if it is due to be balanced,
9137+
* and initiates a balancing operation if so.
9138+
*
9139+
* Balancing parameters are set up in init_sched_domains.
9140+
*/
9141+
static void rebalance_domains(struct rq *rq, enum cpu_idle_type idle)
9142+
{
9143+
int continue_balancing = 1;
9144+
int cpu = rq->cpu;
9145+
unsigned long interval;
9146+
struct sched_domain *sd;
9147+
/* Earliest time when we have to do rebalance again */
9148+
unsigned long next_balance = jiffies + 60*HZ;
9149+
int update_next_balance = 0;
9150+
int need_serialize, need_decay = 0;
9151+
u64 max_cost = 0;
9152+
9153+
rcu_read_lock();
9154+
for_each_domain(cpu, sd) {
9155+
/*
9156+
* Decay the newidle max times here because this is a regular
9157+
* visit to all the domains. Decay ~1% per second.
9158+
*/
9159+
if (time_after(jiffies, sd->next_decay_max_lb_cost)) {
9160+
sd->max_newidle_lb_cost =
9161+
(sd->max_newidle_lb_cost * 253) / 256;
9162+
sd->next_decay_max_lb_cost = jiffies + HZ;
9163+
need_decay = 1;
9164+
}
9165+
max_cost += sd->max_newidle_lb_cost;
9166+
9167+
if (!(sd->flags & SD_LOAD_BALANCE))
9168+
continue;
9169+
9170+
/*
9171+
* Stop the load balance at this level. There is another
9172+
* CPU in our sched group which is doing load balancing more
9173+
* actively.
9174+
*/
9175+
if (!continue_balancing) {
9176+
if (need_decay)
9177+
continue;
9178+
break;
9179+
}
9180+
9181+
interval = get_sd_balance_interval(sd, idle != CPU_IDLE);
9182+
9183+
need_serialize = sd->flags & SD_SERIALIZE;
9184+
if (need_serialize) {
9185+
if (!spin_trylock(&balancing))
9186+
goto out;
9187+
}
9188+
9189+
if (time_after_eq(jiffies, sd->last_balance + interval)) {
9190+
if (load_balance(cpu, rq, sd, idle, &continue_balancing)) {
9191+
/*
9192+
* The LBF_DST_PINNED logic could have changed
9193+
* env->dst_cpu, so we can't know our idle
9194+
* state even if we migrated tasks. Update it.
9195+
*/
9196+
idle = idle_cpu(cpu) ? CPU_IDLE : CPU_NOT_IDLE;
9197+
}
9198+
sd->last_balance = jiffies;
9199+
interval = get_sd_balance_interval(sd, idle != CPU_IDLE);
9200+
}
9201+
if (need_serialize)
9202+
spin_unlock(&balancing);
9203+
out:
9204+
if (time_after(next_balance, sd->last_balance + interval)) {
9205+
next_balance = sd->last_balance + interval;
9206+
update_next_balance = 1;
9207+
}
9208+
}
9209+
if (need_decay) {
9210+
/*
9211+
* Ensure the rq-wide value also decays but keep it at a
9212+
* reasonable floor to avoid funnies with rq->avg_idle.
9213+
*/
9214+
rq->max_idle_balance_cost =
9215+
max((u64)sysctl_sched_migration_cost, max_cost);
9216+
}
9217+
rcu_read_unlock();
9218+
9219+
/*
9220+
* next_balance will be updated only when there is a need.
9221+
* When the cpu is attached to null domain for ex, it will not be
9222+
* updated.
9223+
*/
9224+
if (likely(update_next_balance)) {
9225+
rq->next_balance = next_balance;
9226+
9227+
#ifdef CONFIG_NO_HZ_COMMON
9228+
/*
9229+
* If this CPU has been elected to perform the nohz idle
9230+
* balance. Other idle CPUs have already rebalanced with
9231+
* nohz_idle_balance() and nohz.next_balance has been
9232+
* updated accordingly. This CPU is now running the idle load
9233+
* balance for itself and we need to update the
9234+
* nohz.next_balance accordingly.
9235+
*/
9236+
if ((idle == CPU_IDLE) && time_after(nohz.next_balance, rq->next_balance))
9237+
nohz.next_balance = rq->next_balance;
9238+
#endif
9239+
}
9240+
}
9241+
91249242
static inline int on_null_domain(struct rq *rq)
91259243
{
91269244
return unlikely(!rcu_dereference_sched(rq->sd));
@@ -9373,124 +9491,6 @@ void nohz_balance_enter_idle(int cpu)
93739491
static inline void nohz_balancer_kick(struct rq *rq) { }
93749492
#endif
93759493

9376-
static DEFINE_SPINLOCK(balancing);
9377-
9378-
/*
9379-
* Scale the max load_balance interval with the number of CPUs in the system.
9380-
* This trades load-balance latency on larger machines for less cross talk.
9381-
*/
9382-
void update_max_interval(void)
9383-
{
9384-
max_load_balance_interval = HZ*num_online_cpus()/10;
9385-
}
9386-
9387-
/*
9388-
* It checks each scheduling domain to see if it is due to be balanced,
9389-
* and initiates a balancing operation if so.
9390-
*
9391-
* Balancing parameters are set up in init_sched_domains.
9392-
*/
9393-
static void rebalance_domains(struct rq *rq, enum cpu_idle_type idle)
9394-
{
9395-
int continue_balancing = 1;
9396-
int cpu = rq->cpu;
9397-
unsigned long interval;
9398-
struct sched_domain *sd;
9399-
/* Earliest time when we have to do rebalance again */
9400-
unsigned long next_balance = jiffies + 60*HZ;
9401-
int update_next_balance = 0;
9402-
int need_serialize, need_decay = 0;
9403-
u64 max_cost = 0;
9404-
9405-
rcu_read_lock();
9406-
for_each_domain(cpu, sd) {
9407-
/*
9408-
* Decay the newidle max times here because this is a regular
9409-
* visit to all the domains. Decay ~1% per second.
9410-
*/
9411-
if (time_after(jiffies, sd->next_decay_max_lb_cost)) {
9412-
sd->max_newidle_lb_cost =
9413-
(sd->max_newidle_lb_cost * 253) / 256;
9414-
sd->next_decay_max_lb_cost = jiffies + HZ;
9415-
need_decay = 1;
9416-
}
9417-
max_cost += sd->max_newidle_lb_cost;
9418-
9419-
if (!(sd->flags & SD_LOAD_BALANCE))
9420-
continue;
9421-
9422-
/*
9423-
* Stop the load balance at this level. There is another
9424-
* CPU in our sched group which is doing load balancing more
9425-
* actively.
9426-
*/
9427-
if (!continue_balancing) {
9428-
if (need_decay)
9429-
continue;
9430-
break;
9431-
}
9432-
9433-
interval = get_sd_balance_interval(sd, idle != CPU_IDLE);
9434-
9435-
need_serialize = sd->flags & SD_SERIALIZE;
9436-
if (need_serialize) {
9437-
if (!spin_trylock(&balancing))
9438-
goto out;
9439-
}
9440-
9441-
if (time_after_eq(jiffies, sd->last_balance + interval)) {
9442-
if (load_balance(cpu, rq, sd, idle, &continue_balancing)) {
9443-
/*
9444-
* The LBF_DST_PINNED logic could have changed
9445-
* env->dst_cpu, so we can't know our idle
9446-
* state even if we migrated tasks. Update it.
9447-
*/
9448-
idle = idle_cpu(cpu) ? CPU_IDLE : CPU_NOT_IDLE;
9449-
}
9450-
sd->last_balance = jiffies;
9451-
interval = get_sd_balance_interval(sd, idle != CPU_IDLE);
9452-
}
9453-
if (need_serialize)
9454-
spin_unlock(&balancing);
9455-
out:
9456-
if (time_after(next_balance, sd->last_balance + interval)) {
9457-
next_balance = sd->last_balance + interval;
9458-
update_next_balance = 1;
9459-
}
9460-
}
9461-
if (need_decay) {
9462-
/*
9463-
* Ensure the rq-wide value also decays but keep it at a
9464-
* reasonable floor to avoid funnies with rq->avg_idle.
9465-
*/
9466-
rq->max_idle_balance_cost =
9467-
max((u64)sysctl_sched_migration_cost, max_cost);
9468-
}
9469-
rcu_read_unlock();
9470-
9471-
/*
9472-
* next_balance will be updated only when there is a need.
9473-
* When the CPU is attached to null domain for ex, it will not be
9474-
* updated.
9475-
*/
9476-
if (likely(update_next_balance)) {
9477-
rq->next_balance = next_balance;
9478-
9479-
#ifdef CONFIG_NO_HZ_COMMON
9480-
/*
9481-
* If this CPU has been elected to perform the nohz idle
9482-
* balance. Other idle CPUs have already rebalanced with
9483-
* nohz_idle_balance() and nohz.next_balance has been
9484-
* updated accordingly. This CPU is now running the idle load
9485-
* balance for itself and we need to update the
9486-
* nohz.next_balance accordingly.
9487-
*/
9488-
if ((idle == CPU_IDLE) && time_after(nohz.next_balance, rq->next_balance))
9489-
nohz.next_balance = rq->next_balance;
9490-
#endif
9491-
}
9492-
}
9493-
94949494
#ifdef CONFIG_NO_HZ_COMMON
94959495
/*
94969496
* In CONFIG_NO_HZ_COMMON case, the idle balance kickee will do the

0 commit comments

Comments
 (0)