@@ -9121,6 +9121,124 @@ static int active_load_balance_cpu_stop(void *data)
9121
9121
return 0 ;
9122
9122
}
9123
9123
9124
+ static DEFINE_SPINLOCK (balancing );
9125
+
9126
+ /*
9127
+ * Scale the max load_balance interval with the number of CPUs in the system.
9128
+ * This trades load-balance latency on larger machines for less cross talk.
9129
+ */
9130
+ void update_max_interval (void )
9131
+ {
9132
+ max_load_balance_interval = HZ * num_online_cpus ()/10 ;
9133
+ }
9134
+
9135
+ /*
9136
+ * It checks each scheduling domain to see if it is due to be balanced,
9137
+ * and initiates a balancing operation if so.
9138
+ *
9139
+ * Balancing parameters are set up in init_sched_domains.
9140
+ */
9141
+ static void rebalance_domains (struct rq * rq , enum cpu_idle_type idle )
9142
+ {
9143
+ int continue_balancing = 1 ;
9144
+ int cpu = rq -> cpu ;
9145
+ unsigned long interval ;
9146
+ struct sched_domain * sd ;
9147
+ /* Earliest time when we have to do rebalance again */
9148
+ unsigned long next_balance = jiffies + 60 * HZ ;
9149
+ int update_next_balance = 0 ;
9150
+ int need_serialize , need_decay = 0 ;
9151
+ u64 max_cost = 0 ;
9152
+
9153
+ rcu_read_lock ();
9154
+ for_each_domain (cpu , sd ) {
9155
+ /*
9156
+ * Decay the newidle max times here because this is a regular
9157
+ * visit to all the domains. Decay ~1% per second.
9158
+ */
9159
+ if (time_after (jiffies , sd -> next_decay_max_lb_cost )) {
9160
+ sd -> max_newidle_lb_cost =
9161
+ (sd -> max_newidle_lb_cost * 253 ) / 256 ;
9162
+ sd -> next_decay_max_lb_cost = jiffies + HZ ;
9163
+ need_decay = 1 ;
9164
+ }
9165
+ max_cost += sd -> max_newidle_lb_cost ;
9166
+
9167
+ if (!(sd -> flags & SD_LOAD_BALANCE ))
9168
+ continue ;
9169
+
9170
+ /*
9171
+ * Stop the load balance at this level. There is another
9172
+ * CPU in our sched group which is doing load balancing more
9173
+ * actively.
9174
+ */
9175
+ if (!continue_balancing ) {
9176
+ if (need_decay )
9177
+ continue ;
9178
+ break ;
9179
+ }
9180
+
9181
+ interval = get_sd_balance_interval (sd , idle != CPU_IDLE );
9182
+
9183
+ need_serialize = sd -> flags & SD_SERIALIZE ;
9184
+ if (need_serialize ) {
9185
+ if (!spin_trylock (& balancing ))
9186
+ goto out ;
9187
+ }
9188
+
9189
+ if (time_after_eq (jiffies , sd -> last_balance + interval )) {
9190
+ if (load_balance (cpu , rq , sd , idle , & continue_balancing )) {
9191
+ /*
9192
+ * The LBF_DST_PINNED logic could have changed
9193
+ * env->dst_cpu, so we can't know our idle
9194
+ * state even if we migrated tasks. Update it.
9195
+ */
9196
+ idle = idle_cpu (cpu ) ? CPU_IDLE : CPU_NOT_IDLE ;
9197
+ }
9198
+ sd -> last_balance = jiffies ;
9199
+ interval = get_sd_balance_interval (sd , idle != CPU_IDLE );
9200
+ }
9201
+ if (need_serialize )
9202
+ spin_unlock (& balancing );
9203
+ out :
9204
+ if (time_after (next_balance , sd -> last_balance + interval )) {
9205
+ next_balance = sd -> last_balance + interval ;
9206
+ update_next_balance = 1 ;
9207
+ }
9208
+ }
9209
+ if (need_decay ) {
9210
+ /*
9211
+ * Ensure the rq-wide value also decays but keep it at a
9212
+ * reasonable floor to avoid funnies with rq->avg_idle.
9213
+ */
9214
+ rq -> max_idle_balance_cost =
9215
+ max ((u64 )sysctl_sched_migration_cost , max_cost );
9216
+ }
9217
+ rcu_read_unlock ();
9218
+
9219
+ /*
9220
+ * next_balance will be updated only when there is a need.
9221
+ * When the cpu is attached to null domain for ex, it will not be
9222
+ * updated.
9223
+ */
9224
+ if (likely (update_next_balance )) {
9225
+ rq -> next_balance = next_balance ;
9226
+
9227
+ #ifdef CONFIG_NO_HZ_COMMON
9228
+ /*
9229
+ * If this CPU has been elected to perform the nohz idle
9230
+ * balance. Other idle CPUs have already rebalanced with
9231
+ * nohz_idle_balance() and nohz.next_balance has been
9232
+ * updated accordingly. This CPU is now running the idle load
9233
+ * balance for itself and we need to update the
9234
+ * nohz.next_balance accordingly.
9235
+ */
9236
+ if ((idle == CPU_IDLE ) && time_after (nohz .next_balance , rq -> next_balance ))
9237
+ nohz .next_balance = rq -> next_balance ;
9238
+ #endif
9239
+ }
9240
+ }
9241
+
9124
9242
static inline int on_null_domain (struct rq * rq )
9125
9243
{
9126
9244
return unlikely (!rcu_dereference_sched (rq -> sd ));
@@ -9373,124 +9491,6 @@ void nohz_balance_enter_idle(int cpu)
9373
9491
static inline void nohz_balancer_kick (struct rq * rq ) { }
9374
9492
#endif
9375
9493
9376
- static DEFINE_SPINLOCK (balancing );
9377
-
9378
- /*
9379
- * Scale the max load_balance interval with the number of CPUs in the system.
9380
- * This trades load-balance latency on larger machines for less cross talk.
9381
- */
9382
- void update_max_interval (void )
9383
- {
9384
- max_load_balance_interval = HZ * num_online_cpus ()/10 ;
9385
- }
9386
-
9387
- /*
9388
- * It checks each scheduling domain to see if it is due to be balanced,
9389
- * and initiates a balancing operation if so.
9390
- *
9391
- * Balancing parameters are set up in init_sched_domains.
9392
- */
9393
- static void rebalance_domains (struct rq * rq , enum cpu_idle_type idle )
9394
- {
9395
- int continue_balancing = 1 ;
9396
- int cpu = rq -> cpu ;
9397
- unsigned long interval ;
9398
- struct sched_domain * sd ;
9399
- /* Earliest time when we have to do rebalance again */
9400
- unsigned long next_balance = jiffies + 60 * HZ ;
9401
- int update_next_balance = 0 ;
9402
- int need_serialize , need_decay = 0 ;
9403
- u64 max_cost = 0 ;
9404
-
9405
- rcu_read_lock ();
9406
- for_each_domain (cpu , sd ) {
9407
- /*
9408
- * Decay the newidle max times here because this is a regular
9409
- * visit to all the domains. Decay ~1% per second.
9410
- */
9411
- if (time_after (jiffies , sd -> next_decay_max_lb_cost )) {
9412
- sd -> max_newidle_lb_cost =
9413
- (sd -> max_newidle_lb_cost * 253 ) / 256 ;
9414
- sd -> next_decay_max_lb_cost = jiffies + HZ ;
9415
- need_decay = 1 ;
9416
- }
9417
- max_cost += sd -> max_newidle_lb_cost ;
9418
-
9419
- if (!(sd -> flags & SD_LOAD_BALANCE ))
9420
- continue ;
9421
-
9422
- /*
9423
- * Stop the load balance at this level. There is another
9424
- * CPU in our sched group which is doing load balancing more
9425
- * actively.
9426
- */
9427
- if (!continue_balancing ) {
9428
- if (need_decay )
9429
- continue ;
9430
- break ;
9431
- }
9432
-
9433
- interval = get_sd_balance_interval (sd , idle != CPU_IDLE );
9434
-
9435
- need_serialize = sd -> flags & SD_SERIALIZE ;
9436
- if (need_serialize ) {
9437
- if (!spin_trylock (& balancing ))
9438
- goto out ;
9439
- }
9440
-
9441
- if (time_after_eq (jiffies , sd -> last_balance + interval )) {
9442
- if (load_balance (cpu , rq , sd , idle , & continue_balancing )) {
9443
- /*
9444
- * The LBF_DST_PINNED logic could have changed
9445
- * env->dst_cpu, so we can't know our idle
9446
- * state even if we migrated tasks. Update it.
9447
- */
9448
- idle = idle_cpu (cpu ) ? CPU_IDLE : CPU_NOT_IDLE ;
9449
- }
9450
- sd -> last_balance = jiffies ;
9451
- interval = get_sd_balance_interval (sd , idle != CPU_IDLE );
9452
- }
9453
- if (need_serialize )
9454
- spin_unlock (& balancing );
9455
- out :
9456
- if (time_after (next_balance , sd -> last_balance + interval )) {
9457
- next_balance = sd -> last_balance + interval ;
9458
- update_next_balance = 1 ;
9459
- }
9460
- }
9461
- if (need_decay ) {
9462
- /*
9463
- * Ensure the rq-wide value also decays but keep it at a
9464
- * reasonable floor to avoid funnies with rq->avg_idle.
9465
- */
9466
- rq -> max_idle_balance_cost =
9467
- max ((u64 )sysctl_sched_migration_cost , max_cost );
9468
- }
9469
- rcu_read_unlock ();
9470
-
9471
- /*
9472
- * next_balance will be updated only when there is a need.
9473
- * When the CPU is attached to null domain for ex, it will not be
9474
- * updated.
9475
- */
9476
- if (likely (update_next_balance )) {
9477
- rq -> next_balance = next_balance ;
9478
-
9479
- #ifdef CONFIG_NO_HZ_COMMON
9480
- /*
9481
- * If this CPU has been elected to perform the nohz idle
9482
- * balance. Other idle CPUs have already rebalanced with
9483
- * nohz_idle_balance() and nohz.next_balance has been
9484
- * updated accordingly. This CPU is now running the idle load
9485
- * balance for itself and we need to update the
9486
- * nohz.next_balance accordingly.
9487
- */
9488
- if ((idle == CPU_IDLE ) && time_after (nohz .next_balance , rq -> next_balance ))
9489
- nohz .next_balance = rq -> next_balance ;
9490
- #endif
9491
- }
9492
- }
9493
-
9494
9494
#ifdef CONFIG_NO_HZ_COMMON
9495
9495
/*
9496
9496
* In CONFIG_NO_HZ_COMMON case, the idle balance kickee will do the
0 commit comments