@@ -4323,6 +4323,189 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
4323
4323
}
4324
4324
4325
4325
#ifdef CONFIG_SMP
4326
+
4327
+ /*
4328
+ * per rq 'load' arrray crap; XXX kill this.
4329
+ */
4330
+
4331
+ /*
4332
+ * The exact cpuload at various idx values, calculated at every tick would be
4333
+ * load = (2^idx - 1) / 2^idx * load + 1 / 2^idx * cur_load
4334
+ *
4335
+ * If a cpu misses updates for n-1 ticks (as it was idle) and update gets called
4336
+ * on nth tick when cpu may be busy, then we have:
4337
+ * load = ((2^idx - 1) / 2^idx)^(n-1) * load
4338
+ * load = (2^idx - 1) / 2^idx) * load + 1 / 2^idx * cur_load
4339
+ *
4340
+ * decay_load_missed() below does efficient calculation of
4341
+ * load = ((2^idx - 1) / 2^idx)^(n-1) * load
4342
+ * avoiding 0..n-1 loop doing load = ((2^idx - 1) / 2^idx) * load
4343
+ *
4344
+ * The calculation is approximated on a 128 point scale.
4345
+ * degrade_zero_ticks is the number of ticks after which load at any
4346
+ * particular idx is approximated to be zero.
4347
+ * degrade_factor is a precomputed table, a row for each load idx.
4348
+ * Each column corresponds to degradation factor for a power of two ticks,
4349
+ * based on 128 point scale.
4350
+ * Example:
4351
+ * row 2, col 3 (=12) says that the degradation at load idx 2 after
4352
+ * 8 ticks is 12/128 (which is an approximation of exact factor 3^8/4^8).
4353
+ *
4354
+ * With this power of 2 load factors, we can degrade the load n times
4355
+ * by looking at 1 bits in n and doing as many mult/shift instead of
4356
+ * n mult/shifts needed by the exact degradation.
4357
+ */
4358
+ #define DEGRADE_SHIFT 7
4359
+ static const unsigned char
4360
+ degrade_zero_ticks [CPU_LOAD_IDX_MAX ] = {0 , 8 , 32 , 64 , 128 };
4361
+ static const unsigned char
4362
+ degrade_factor [CPU_LOAD_IDX_MAX ][DEGRADE_SHIFT + 1 ] = {
4363
+ {0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 },
4364
+ {64 , 32 , 8 , 0 , 0 , 0 , 0 , 0 },
4365
+ {96 , 72 , 40 , 12 , 1 , 0 , 0 },
4366
+ {112 , 98 , 75 , 43 , 15 , 1 , 0 },
4367
+ {120 , 112 , 98 , 76 , 45 , 16 , 2 } };
4368
+
4369
+ /*
4370
+ * Update cpu_load for any missed ticks, due to tickless idle. The backlog
4371
+ * would be when CPU is idle and so we just decay the old load without
4372
+ * adding any new load.
4373
+ */
4374
+ static unsigned long
4375
+ decay_load_missed (unsigned long load , unsigned long missed_updates , int idx )
4376
+ {
4377
+ int j = 0 ;
4378
+
4379
+ if (!missed_updates )
4380
+ return load ;
4381
+
4382
+ if (missed_updates >= degrade_zero_ticks [idx ])
4383
+ return 0 ;
4384
+
4385
+ if (idx == 1 )
4386
+ return load >> missed_updates ;
4387
+
4388
+ while (missed_updates ) {
4389
+ if (missed_updates % 2 )
4390
+ load = (load * degrade_factor [idx ][j ]) >> DEGRADE_SHIFT ;
4391
+
4392
+ missed_updates >>= 1 ;
4393
+ j ++ ;
4394
+ }
4395
+ return load ;
4396
+ }
4397
+
4398
+ /*
4399
+ * Update rq->cpu_load[] statistics. This function is usually called every
4400
+ * scheduler tick (TICK_NSEC). With tickless idle this will not be called
4401
+ * every tick. We fix it up based on jiffies.
4402
+ */
4403
+ static void __update_cpu_load (struct rq * this_rq , unsigned long this_load ,
4404
+ unsigned long pending_updates )
4405
+ {
4406
+ int i , scale ;
4407
+
4408
+ this_rq -> nr_load_updates ++ ;
4409
+
4410
+ /* Update our load: */
4411
+ this_rq -> cpu_load [0 ] = this_load ; /* Fasttrack for idx 0 */
4412
+ for (i = 1 , scale = 2 ; i < CPU_LOAD_IDX_MAX ; i ++ , scale += scale ) {
4413
+ unsigned long old_load , new_load ;
4414
+
4415
+ /* scale is effectively 1 << i now, and >> i divides by scale */
4416
+
4417
+ old_load = this_rq -> cpu_load [i ];
4418
+ old_load = decay_load_missed (old_load , pending_updates - 1 , i );
4419
+ new_load = this_load ;
4420
+ /*
4421
+ * Round up the averaging division if load is increasing. This
4422
+ * prevents us from getting stuck on 9 if the load is 10, for
4423
+ * example.
4424
+ */
4425
+ if (new_load > old_load )
4426
+ new_load += scale - 1 ;
4427
+
4428
+ this_rq -> cpu_load [i ] = (old_load * (scale - 1 ) + new_load ) >> i ;
4429
+ }
4430
+
4431
+ sched_avg_update (this_rq );
4432
+ }
4433
+
4434
+ #ifdef CONFIG_NO_HZ_COMMON
4435
+ /*
4436
+ * There is no sane way to deal with nohz on smp when using jiffies because the
4437
+ * cpu doing the jiffies update might drift wrt the cpu doing the jiffy reading
4438
+ * causing off-by-one errors in observed deltas; {0,2} instead of {1,1}.
4439
+ *
4440
+ * Therefore we cannot use the delta approach from the regular tick since that
4441
+ * would seriously skew the load calculation. However we'll make do for those
4442
+ * updates happening while idle (nohz_idle_balance) or coming out of idle
4443
+ * (tick_nohz_idle_exit).
4444
+ *
4445
+ * This means we might still be one tick off for nohz periods.
4446
+ */
4447
+
4448
+ /*
4449
+ * Called from nohz_idle_balance() to update the load ratings before doing the
4450
+ * idle balance.
4451
+ */
4452
+ static void update_idle_cpu_load (struct rq * this_rq )
4453
+ {
4454
+ unsigned long curr_jiffies = ACCESS_ONCE (jiffies );
4455
+ unsigned long load = this_rq -> cfs .runnable_load_avg ;
4456
+ unsigned long pending_updates ;
4457
+
4458
+ /*
4459
+ * bail if there's load or we're actually up-to-date.
4460
+ */
4461
+ if (load || curr_jiffies == this_rq -> last_load_update_tick )
4462
+ return ;
4463
+
4464
+ pending_updates = curr_jiffies - this_rq -> last_load_update_tick ;
4465
+ this_rq -> last_load_update_tick = curr_jiffies ;
4466
+
4467
+ __update_cpu_load (this_rq , load , pending_updates );
4468
+ }
4469
+
4470
+ /*
4471
+ * Called from tick_nohz_idle_exit() -- try and fix up the ticks we missed.
4472
+ */
4473
+ void update_cpu_load_nohz (void )
4474
+ {
4475
+ struct rq * this_rq = this_rq ();
4476
+ unsigned long curr_jiffies = ACCESS_ONCE (jiffies );
4477
+ unsigned long pending_updates ;
4478
+
4479
+ if (curr_jiffies == this_rq -> last_load_update_tick )
4480
+ return ;
4481
+
4482
+ raw_spin_lock (& this_rq -> lock );
4483
+ pending_updates = curr_jiffies - this_rq -> last_load_update_tick ;
4484
+ if (pending_updates ) {
4485
+ this_rq -> last_load_update_tick = curr_jiffies ;
4486
+ /*
4487
+ * We were idle, this means load 0, the current load might be
4488
+ * !0 due to remote wakeups and the sort.
4489
+ */
4490
+ __update_cpu_load (this_rq , 0 , pending_updates );
4491
+ }
4492
+ raw_spin_unlock (& this_rq -> lock );
4493
+ }
4494
+ #endif /* CONFIG_NO_HZ */
4495
+
4496
+ /*
4497
+ * Called from scheduler_tick()
4498
+ */
4499
+ void update_cpu_load_active (struct rq * this_rq )
4500
+ {
4501
+ unsigned long load = this_rq -> cfs .runnable_load_avg ;
4502
+ /*
4503
+ * See the mess around update_idle_cpu_load() / update_cpu_load_nohz().
4504
+ */
4505
+ this_rq -> last_load_update_tick = jiffies ;
4506
+ __update_cpu_load (this_rq , load , 1 );
4507
+ }
4508
+
4326
4509
/* Used instead of source_load when we know the type == 0 */
4327
4510
static unsigned long weighted_cpuload (const int cpu )
4328
4511
{
0 commit comments