@@ -9065,12 +9065,29 @@ static inline int find_new_ilb(void)
9065
9065
return nr_cpu_ids ;
9066
9066
}
9067
9067
9068
+ static inline void set_cpu_sd_state_busy (void )
9069
+ {
9070
+ struct sched_domain * sd ;
9071
+ int cpu = smp_processor_id ();
9072
+
9073
+ rcu_read_lock ();
9074
+ sd = rcu_dereference (per_cpu (sd_llc , cpu ));
9075
+
9076
+ if (!sd || !sd -> nohz_idle )
9077
+ goto unlock ;
9078
+ sd -> nohz_idle = 0 ;
9079
+
9080
+ atomic_inc (& sd -> shared -> nr_busy_cpus );
9081
+ unlock :
9082
+ rcu_read_unlock ();
9083
+ }
9084
+
9068
9085
/*
9069
9086
* Kick a CPU to do the nohz balancing, if it is time for it. We pick the
9070
9087
* nohz_load_balancer CPU (if there is one) otherwise fallback to any idle
9071
9088
* CPU (if there is one).
9072
9089
*/
9073
- static void nohz_balancer_kick (void )
9090
+ static void kick_ilb (void )
9074
9091
{
9075
9092
unsigned int flags ;
9076
9093
int ilb_cpu ;
@@ -9085,14 +9102,102 @@ static void nohz_balancer_kick(void)
9085
9102
flags = atomic_fetch_or (NOHZ_KICK_MASK , nohz_flags (ilb_cpu ));
9086
9103
if (flags & NOHZ_KICK_MASK )
9087
9104
return ;
9105
+
9088
9106
/*
9089
9107
* Use smp_send_reschedule() instead of resched_cpu().
9090
9108
* This way we generate a sched IPI on the target CPU which
9091
9109
* is idle. And the softirq performing nohz idle load balance
9092
9110
* will be run before returning from the IPI.
9093
9111
*/
9094
9112
smp_send_reschedule (ilb_cpu );
9095
- return ;
9113
+ }
9114
+
9115
+ /*
9116
+ * Current heuristic for kicking the idle load balancer in the presence
9117
+ * of an idle cpu in the system.
9118
+ * - This rq has more than one task.
9119
+ * - This rq has at least one CFS task and the capacity of the CPU is
9120
+ * significantly reduced because of RT tasks or IRQs.
9121
+ * - At parent of LLC scheduler domain level, this cpu's scheduler group has
9122
+ * multiple busy cpu.
9123
+ * - For SD_ASYM_PACKING, if the lower numbered cpu's in the scheduler
9124
+ * domain span are idle.
9125
+ */
9126
+ static void nohz_balancer_kick (struct rq * rq )
9127
+ {
9128
+ unsigned long now = jiffies ;
9129
+ struct sched_domain_shared * sds ;
9130
+ struct sched_domain * sd ;
9131
+ int nr_busy , i , cpu = rq -> cpu ;
9132
+ bool kick = false;
9133
+
9134
+ if (unlikely (rq -> idle_balance ))
9135
+ return ;
9136
+
9137
+ /*
9138
+ * We may be recently in ticked or tickless idle mode. At the first
9139
+ * busy tick after returning from idle, we will update the busy stats.
9140
+ */
9141
+ set_cpu_sd_state_busy ();
9142
+ nohz_balance_exit_idle (cpu );
9143
+
9144
+ /*
9145
+ * None are in tickless mode and hence no need for NOHZ idle load
9146
+ * balancing.
9147
+ */
9148
+ if (likely (!atomic_read (& nohz .nr_cpus )))
9149
+ return ;
9150
+
9151
+ if (time_before (now , nohz .next_balance ))
9152
+ return ;
9153
+
9154
+ if (rq -> nr_running >= 2 ) {
9155
+ kick = true;
9156
+ goto out ;
9157
+ }
9158
+
9159
+ rcu_read_lock ();
9160
+ sds = rcu_dereference (per_cpu (sd_llc_shared , cpu ));
9161
+ if (sds ) {
9162
+ /*
9163
+ * XXX: write a coherent comment on why we do this.
9164
+ * See also: http://lkml.kernel.org/r/20111202010832.602203411@sbsiddha-desk.sc.intel.com
9165
+ */
9166
+ nr_busy = atomic_read (& sds -> nr_busy_cpus );
9167
+ if (nr_busy > 1 ) {
9168
+ kick = true;
9169
+ goto unlock ;
9170
+ }
9171
+
9172
+ }
9173
+
9174
+ sd = rcu_dereference (rq -> sd );
9175
+ if (sd ) {
9176
+ if ((rq -> cfs .h_nr_running >= 1 ) &&
9177
+ check_cpu_capacity (rq , sd )) {
9178
+ kick = true;
9179
+ goto unlock ;
9180
+ }
9181
+ }
9182
+
9183
+ sd = rcu_dereference (per_cpu (sd_asym , cpu ));
9184
+ if (sd ) {
9185
+ for_each_cpu (i , sched_domain_span (sd )) {
9186
+ if (i == cpu ||
9187
+ !cpumask_test_cpu (i , nohz .idle_cpus_mask ))
9188
+ continue ;
9189
+
9190
+ if (sched_asym_prefer (i , cpu )) {
9191
+ kick = true;
9192
+ goto unlock ;
9193
+ }
9194
+ }
9195
+ }
9196
+ unlock :
9197
+ rcu_read_unlock ();
9198
+ out :
9199
+ if (kick )
9200
+ kick_ilb ();
9096
9201
}
9097
9202
9098
9203
void nohz_balance_exit_idle (unsigned int cpu )
@@ -9112,23 +9217,6 @@ void nohz_balance_exit_idle(unsigned int cpu)
9112
9217
}
9113
9218
}
9114
9219
9115
- static inline void set_cpu_sd_state_busy (void )
9116
- {
9117
- struct sched_domain * sd ;
9118
- int cpu = smp_processor_id ();
9119
-
9120
- rcu_read_lock ();
9121
- sd = rcu_dereference (per_cpu (sd_llc , cpu ));
9122
-
9123
- if (!sd || !sd -> nohz_idle )
9124
- goto unlock ;
9125
- sd -> nohz_idle = 0 ;
9126
-
9127
- atomic_inc (& sd -> shared -> nr_busy_cpus );
9128
- unlock :
9129
- rcu_read_unlock ();
9130
- }
9131
-
9132
9220
void set_cpu_sd_state_idle (void )
9133
9221
{
9134
9222
struct sched_domain * sd ;
@@ -9171,6 +9259,8 @@ void nohz_balance_enter_idle(int cpu)
9171
9259
atomic_inc (& nohz .nr_cpus );
9172
9260
atomic_or (NOHZ_TICK_STOPPED , nohz_flags (cpu ));
9173
9261
}
9262
+ #else
9263
+ static inline void nohz_balancer_kick (struct rq * rq ) { }
9174
9264
#endif
9175
9265
9176
9266
static DEFINE_SPINLOCK (balancing );
@@ -9369,90 +9459,6 @@ static bool nohz_idle_balance(struct rq *this_rq, enum cpu_idle_type idle)
9369
9459
9370
9460
return true;
9371
9461
}
9372
-
9373
- /*
9374
- * Current heuristic for kicking the idle load balancer in the presence
9375
- * of an idle CPU in the system.
9376
- * - This rq has more than one task.
9377
- * - This rq has at least one CFS task and the capacity of the CPU is
9378
- * significantly reduced because of RT tasks or IRQs.
9379
- * - At parent of LLC scheduler domain level, this CPU's scheduler group has
9380
- * multiple busy CPUs.
9381
- * - For SD_ASYM_PACKING, if the lower numbered CPU's in the scheduler
9382
- * domain span are idle.
9383
- */
9384
- static inline bool nohz_kick_needed (struct rq * rq )
9385
- {
9386
- unsigned long now = jiffies ;
9387
- struct sched_domain_shared * sds ;
9388
- struct sched_domain * sd ;
9389
- int nr_busy , i , cpu = rq -> cpu ;
9390
- bool kick = false;
9391
-
9392
- if (unlikely (rq -> idle_balance ))
9393
- return false;
9394
-
9395
- /*
9396
- * We may be recently in ticked or tickless idle mode. At the first
9397
- * busy tick after returning from idle, we will update the busy stats.
9398
- */
9399
- set_cpu_sd_state_busy ();
9400
- nohz_balance_exit_idle (cpu );
9401
-
9402
- /*
9403
- * None are in tickless mode and hence no need for NOHZ idle load
9404
- * balancing.
9405
- */
9406
- if (likely (!atomic_read (& nohz .nr_cpus )))
9407
- return false;
9408
-
9409
- if (time_before (now , nohz .next_balance ))
9410
- return false;
9411
-
9412
- if (rq -> nr_running >= 2 )
9413
- return true;
9414
-
9415
- rcu_read_lock ();
9416
- sds = rcu_dereference (per_cpu (sd_llc_shared , cpu ));
9417
- if (sds ) {
9418
- /*
9419
- * XXX: write a coherent comment on why we do this.
9420
- * See also: http://lkml.kernel.org/r/20111202010832.602203411@sbsiddha-desk.sc.intel.com
9421
- */
9422
- nr_busy = atomic_read (& sds -> nr_busy_cpus );
9423
- if (nr_busy > 1 ) {
9424
- kick = true;
9425
- goto unlock ;
9426
- }
9427
-
9428
- }
9429
-
9430
- sd = rcu_dereference (rq -> sd );
9431
- if (sd ) {
9432
- if ((rq -> cfs .h_nr_running >= 1 ) &&
9433
- check_cpu_capacity (rq , sd )) {
9434
- kick = true;
9435
- goto unlock ;
9436
- }
9437
- }
9438
-
9439
- sd = rcu_dereference (per_cpu (sd_asym , cpu ));
9440
- if (sd ) {
9441
- for_each_cpu (i , sched_domain_span (sd )) {
9442
- if (i == cpu ||
9443
- !cpumask_test_cpu (i , nohz .idle_cpus_mask ))
9444
- continue ;
9445
-
9446
- if (sched_asym_prefer (i , cpu )) {
9447
- kick = true;
9448
- goto unlock ;
9449
- }
9450
- }
9451
- }
9452
- unlock :
9453
- rcu_read_unlock ();
9454
- return kick ;
9455
- }
9456
9462
#else
9457
9463
static bool nohz_idle_balance (struct rq * this_rq , enum cpu_idle_type idle )
9458
9464
{
@@ -9497,10 +9503,8 @@ void trigger_load_balance(struct rq *rq)
9497
9503
9498
9504
if (time_after_eq (jiffies , rq -> next_balance ))
9499
9505
raise_softirq (SCHED_SOFTIRQ );
9500
- #ifdef CONFIG_NO_HZ_COMMON
9501
- if (nohz_kick_needed (rq ))
9502
- nohz_balancer_kick ();
9503
- #endif
9506
+
9507
+ nohz_balancer_kick (rq );
9504
9508
}
9505
9509
9506
9510
static void rq_online_fair (struct rq * rq )
0 commit comments