Skip to content

Commit 3289bdb

Browse files
Peter ZijlstraIngo Molnar
authored andcommitted
sched: Move the loadavg code to a more obvious location
I could not find the loadavg code.. turns out it was hidden in a file called proc.c. It further got mingled up with the cruft per rq load indexes (which we really want to get rid of). Move the per rq load indexes into the fair.c load-balance code (that's the only thing that uses them) and rename proc.c to loadavg.c so we can find it again. Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Borislav Petkov <bp@alien8.de> Cc: H. Peter Anvin <hpa@zytor.com> Cc: Paul Gortmaker <paul.gortmaker@windriver.com> Cc: Thomas Gleixner <tglx@linutronix.de> [ Did minor cleanups to the code. ] Signed-off-by: Ingo Molnar <mingo@kernel.org>
1 parent bb2ebf0 commit 3289bdb

File tree

6 files changed

+222
-219
lines changed

6 files changed

+222
-219
lines changed

include/linux/sched.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -173,7 +173,12 @@ extern unsigned long nr_iowait_cpu(int cpu);
173173
extern void get_iowait_load(unsigned long *nr_waiters, unsigned long *load);
174174

175175
extern void calc_global_load(unsigned long ticks);
176+
177+
#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
176178
extern void update_cpu_load_nohz(void);
179+
#else
180+
static inline void update_cpu_load_nohz(void) { }
181+
#endif
177182

178183
extern unsigned long get_parent_ip(unsigned long addr);
179184

kernel/sched/Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y)
1111
CFLAGS_core.o := $(PROFILING) -fno-omit-frame-pointer
1212
endif
1313

14-
obj-y += core.o proc.o clock.o cputime.o
14+
obj-y += core.o loadavg.o clock.o cputime.o
1515
obj-y += idle_task.o fair.o rt.o deadline.o stop_task.o
1616
obj-y += wait.o completion.o idle.o
1717
obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o

kernel/sched/core.c

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2397,9 +2397,9 @@ unsigned long nr_iowait_cpu(int cpu)
23972397

23982398
void get_iowait_load(unsigned long *nr_waiters, unsigned long *load)
23992399
{
2400-
struct rq *this = this_rq();
2401-
*nr_waiters = atomic_read(&this->nr_iowait);
2402-
*load = this->cpu_load[0];
2400+
struct rq *rq = this_rq();
2401+
*nr_waiters = atomic_read(&rq->nr_iowait);
2402+
*load = rq->load.weight;
24032403
}
24042404

24052405
#ifdef CONFIG_SMP
@@ -2497,6 +2497,7 @@ void scheduler_tick(void)
24972497
update_rq_clock(rq);
24982498
curr->sched_class->task_tick(rq, curr, 0);
24992499
update_cpu_load_active(rq);
2500+
calc_global_load_tick(rq);
25002501
raw_spin_unlock(&rq->lock);
25012502

25022503
perf_event_task_tick();

kernel/sched/fair.c

Lines changed: 183 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4323,6 +4323,189 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
43234323
}
43244324

43254325
#ifdef CONFIG_SMP
4326+
4327+
/*
4328+
* per rq 'load' arrray crap; XXX kill this.
4329+
*/
4330+
4331+
/*
4332+
* The exact cpuload at various idx values, calculated at every tick would be
4333+
* load = (2^idx - 1) / 2^idx * load + 1 / 2^idx * cur_load
4334+
*
4335+
* If a cpu misses updates for n-1 ticks (as it was idle) and update gets called
4336+
* on nth tick when cpu may be busy, then we have:
4337+
* load = ((2^idx - 1) / 2^idx)^(n-1) * load
4338+
* load = (2^idx - 1) / 2^idx) * load + 1 / 2^idx * cur_load
4339+
*
4340+
* decay_load_missed() below does efficient calculation of
4341+
* load = ((2^idx - 1) / 2^idx)^(n-1) * load
4342+
* avoiding 0..n-1 loop doing load = ((2^idx - 1) / 2^idx) * load
4343+
*
4344+
* The calculation is approximated on a 128 point scale.
4345+
* degrade_zero_ticks is the number of ticks after which load at any
4346+
* particular idx is approximated to be zero.
4347+
* degrade_factor is a precomputed table, a row for each load idx.
4348+
* Each column corresponds to degradation factor for a power of two ticks,
4349+
* based on 128 point scale.
4350+
* Example:
4351+
* row 2, col 3 (=12) says that the degradation at load idx 2 after
4352+
* 8 ticks is 12/128 (which is an approximation of exact factor 3^8/4^8).
4353+
*
4354+
* With this power of 2 load factors, we can degrade the load n times
4355+
* by looking at 1 bits in n and doing as many mult/shift instead of
4356+
* n mult/shifts needed by the exact degradation.
4357+
*/
4358+
#define DEGRADE_SHIFT 7
4359+
static const unsigned char
4360+
degrade_zero_ticks[CPU_LOAD_IDX_MAX] = {0, 8, 32, 64, 128};
4361+
static const unsigned char
4362+
degrade_factor[CPU_LOAD_IDX_MAX][DEGRADE_SHIFT + 1] = {
4363+
{0, 0, 0, 0, 0, 0, 0, 0},
4364+
{64, 32, 8, 0, 0, 0, 0, 0},
4365+
{96, 72, 40, 12, 1, 0, 0},
4366+
{112, 98, 75, 43, 15, 1, 0},
4367+
{120, 112, 98, 76, 45, 16, 2} };
4368+
4369+
/*
4370+
* Update cpu_load for any missed ticks, due to tickless idle. The backlog
4371+
* would be when CPU is idle and so we just decay the old load without
4372+
* adding any new load.
4373+
*/
4374+
static unsigned long
4375+
decay_load_missed(unsigned long load, unsigned long missed_updates, int idx)
4376+
{
4377+
int j = 0;
4378+
4379+
if (!missed_updates)
4380+
return load;
4381+
4382+
if (missed_updates >= degrade_zero_ticks[idx])
4383+
return 0;
4384+
4385+
if (idx == 1)
4386+
return load >> missed_updates;
4387+
4388+
while (missed_updates) {
4389+
if (missed_updates % 2)
4390+
load = (load * degrade_factor[idx][j]) >> DEGRADE_SHIFT;
4391+
4392+
missed_updates >>= 1;
4393+
j++;
4394+
}
4395+
return load;
4396+
}
4397+
4398+
/*
4399+
* Update rq->cpu_load[] statistics. This function is usually called every
4400+
* scheduler tick (TICK_NSEC). With tickless idle this will not be called
4401+
* every tick. We fix it up based on jiffies.
4402+
*/
4403+
static void __update_cpu_load(struct rq *this_rq, unsigned long this_load,
4404+
unsigned long pending_updates)
4405+
{
4406+
int i, scale;
4407+
4408+
this_rq->nr_load_updates++;
4409+
4410+
/* Update our load: */
4411+
this_rq->cpu_load[0] = this_load; /* Fasttrack for idx 0 */
4412+
for (i = 1, scale = 2; i < CPU_LOAD_IDX_MAX; i++, scale += scale) {
4413+
unsigned long old_load, new_load;
4414+
4415+
/* scale is effectively 1 << i now, and >> i divides by scale */
4416+
4417+
old_load = this_rq->cpu_load[i];
4418+
old_load = decay_load_missed(old_load, pending_updates - 1, i);
4419+
new_load = this_load;
4420+
/*
4421+
* Round up the averaging division if load is increasing. This
4422+
* prevents us from getting stuck on 9 if the load is 10, for
4423+
* example.
4424+
*/
4425+
if (new_load > old_load)
4426+
new_load += scale - 1;
4427+
4428+
this_rq->cpu_load[i] = (old_load * (scale - 1) + new_load) >> i;
4429+
}
4430+
4431+
sched_avg_update(this_rq);
4432+
}
4433+
4434+
#ifdef CONFIG_NO_HZ_COMMON
4435+
/*
4436+
* There is no sane way to deal with nohz on smp when using jiffies because the
4437+
* cpu doing the jiffies update might drift wrt the cpu doing the jiffy reading
4438+
* causing off-by-one errors in observed deltas; {0,2} instead of {1,1}.
4439+
*
4440+
* Therefore we cannot use the delta approach from the regular tick since that
4441+
* would seriously skew the load calculation. However we'll make do for those
4442+
* updates happening while idle (nohz_idle_balance) or coming out of idle
4443+
* (tick_nohz_idle_exit).
4444+
*
4445+
* This means we might still be one tick off for nohz periods.
4446+
*/
4447+
4448+
/*
4449+
* Called from nohz_idle_balance() to update the load ratings before doing the
4450+
* idle balance.
4451+
*/
4452+
static void update_idle_cpu_load(struct rq *this_rq)
4453+
{
4454+
unsigned long curr_jiffies = ACCESS_ONCE(jiffies);
4455+
unsigned long load = this_rq->cfs.runnable_load_avg;
4456+
unsigned long pending_updates;
4457+
4458+
/*
4459+
* bail if there's load or we're actually up-to-date.
4460+
*/
4461+
if (load || curr_jiffies == this_rq->last_load_update_tick)
4462+
return;
4463+
4464+
pending_updates = curr_jiffies - this_rq->last_load_update_tick;
4465+
this_rq->last_load_update_tick = curr_jiffies;
4466+
4467+
__update_cpu_load(this_rq, load, pending_updates);
4468+
}
4469+
4470+
/*
4471+
* Called from tick_nohz_idle_exit() -- try and fix up the ticks we missed.
4472+
*/
4473+
void update_cpu_load_nohz(void)
4474+
{
4475+
struct rq *this_rq = this_rq();
4476+
unsigned long curr_jiffies = ACCESS_ONCE(jiffies);
4477+
unsigned long pending_updates;
4478+
4479+
if (curr_jiffies == this_rq->last_load_update_tick)
4480+
return;
4481+
4482+
raw_spin_lock(&this_rq->lock);
4483+
pending_updates = curr_jiffies - this_rq->last_load_update_tick;
4484+
if (pending_updates) {
4485+
this_rq->last_load_update_tick = curr_jiffies;
4486+
/*
4487+
* We were idle, this means load 0, the current load might be
4488+
* !0 due to remote wakeups and the sort.
4489+
*/
4490+
__update_cpu_load(this_rq, 0, pending_updates);
4491+
}
4492+
raw_spin_unlock(&this_rq->lock);
4493+
}
4494+
#endif /* CONFIG_NO_HZ */
4495+
4496+
/*
4497+
* Called from scheduler_tick()
4498+
*/
4499+
void update_cpu_load_active(struct rq *this_rq)
4500+
{
4501+
unsigned long load = this_rq->cfs.runnable_load_avg;
4502+
/*
4503+
* See the mess around update_idle_cpu_load() / update_cpu_load_nohz().
4504+
*/
4505+
this_rq->last_load_update_tick = jiffies;
4506+
__update_cpu_load(this_rq, load, 1);
4507+
}
4508+
43264509
/* Used instead of source_load when we know the type == 0 */
43274510
static unsigned long weighted_cpuload(const int cpu)
43284511
{

0 commit comments

Comments
 (0)