Skip to content

Commit 8295836

Browse files
paulturnerIngo Molnar
authored andcommitted
sched: Replace update_shares weight distribution with per-entity computation
Now that the machinery in place is in place to compute contributed load in a bottom up fashion; replace the shares distribution code within update_shares() accordingly. Signed-off-by: Paul Turner <pjt@google.com> Reviewed-by: Ben Segall <bsegall@google.com> Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Link: http://lkml.kernel.org/r/20120823141507.061208672@google.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
1 parent f1b1728 commit 8295836

File tree

3 files changed

+36
-165
lines changed

3 files changed

+36
-165
lines changed

kernel/sched/debug.c

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -218,14 +218,6 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
218218
SEQ_printf(m, " .%-30s: %ld\n", "load", cfs_rq->load.weight);
219219
#ifdef CONFIG_FAIR_GROUP_SCHED
220220
#ifdef CONFIG_SMP
221-
SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "load_avg",
222-
SPLIT_NS(cfs_rq->load_avg));
223-
SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "load_period",
224-
SPLIT_NS(cfs_rq->load_period));
225-
SEQ_printf(m, " .%-30s: %ld\n", "load_contrib",
226-
cfs_rq->load_contribution);
227-
SEQ_printf(m, " .%-30s: %d\n", "load_tg",
228-
atomic_read(&cfs_rq->tg->load_weight));
229221
SEQ_printf(m, " .%-30s: %lld\n", "runnable_load_avg",
230222
cfs_rq->runnable_load_avg);
231223
SEQ_printf(m, " .%-30s: %lld\n", "blocked_load_avg",

kernel/sched/fair.c

Lines changed: 24 additions & 133 deletions
Original file line numberDiff line numberDiff line change
@@ -658,9 +658,6 @@ static u64 sched_vslice(struct cfs_rq *cfs_rq, struct sched_entity *se)
658658
return calc_delta_fair(sched_slice(cfs_rq, se), se);
659659
}
660660

661-
static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update);
662-
static void update_cfs_shares(struct cfs_rq *cfs_rq);
663-
664661
/*
665662
* Update the current task's runtime statistics. Skip current tasks that
666663
* are not in our scheduling class.
@@ -680,10 +677,6 @@ __update_curr(struct cfs_rq *cfs_rq, struct sched_entity *curr,
680677

681678
curr->vruntime += delta_exec_weighted;
682679
update_min_vruntime(cfs_rq);
683-
684-
#if defined CONFIG_SMP && defined CONFIG_FAIR_GROUP_SCHED
685-
cfs_rq->load_unacc_exec_time += delta_exec;
686-
#endif
687680
}
688681

689682
static void update_curr(struct cfs_rq *cfs_rq)
@@ -806,72 +799,7 @@ account_entity_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se)
806799
}
807800

808801
#ifdef CONFIG_FAIR_GROUP_SCHED
809-
/* we need this in update_cfs_load and load-balance functions below */
810-
static inline int throttled_hierarchy(struct cfs_rq *cfs_rq);
811802
# ifdef CONFIG_SMP
812-
static void update_cfs_rq_load_contribution(struct cfs_rq *cfs_rq,
813-
int global_update)
814-
{
815-
struct task_group *tg = cfs_rq->tg;
816-
long load_avg;
817-
818-
load_avg = div64_u64(cfs_rq->load_avg, cfs_rq->load_period+1);
819-
load_avg -= cfs_rq->load_contribution;
820-
821-
if (global_update || abs(load_avg) > cfs_rq->load_contribution / 8) {
822-
atomic_add(load_avg, &tg->load_weight);
823-
cfs_rq->load_contribution += load_avg;
824-
}
825-
}
826-
827-
static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update)
828-
{
829-
u64 period = sysctl_sched_shares_window;
830-
u64 now, delta;
831-
unsigned long load = cfs_rq->load.weight;
832-
833-
if (cfs_rq->tg == &root_task_group || throttled_hierarchy(cfs_rq))
834-
return;
835-
836-
now = rq_of(cfs_rq)->clock_task;
837-
delta = now - cfs_rq->load_stamp;
838-
839-
/* truncate load history at 4 idle periods */
840-
if (cfs_rq->load_stamp > cfs_rq->load_last &&
841-
now - cfs_rq->load_last > 4 * period) {
842-
cfs_rq->load_period = 0;
843-
cfs_rq->load_avg = 0;
844-
delta = period - 1;
845-
}
846-
847-
cfs_rq->load_stamp = now;
848-
cfs_rq->load_unacc_exec_time = 0;
849-
cfs_rq->load_period += delta;
850-
if (load) {
851-
cfs_rq->load_last = now;
852-
cfs_rq->load_avg += delta * load;
853-
}
854-
855-
/* consider updating load contribution on each fold or truncate */
856-
if (global_update || cfs_rq->load_period > period
857-
|| !cfs_rq->load_period)
858-
update_cfs_rq_load_contribution(cfs_rq, global_update);
859-
860-
while (cfs_rq->load_period > period) {
861-
/*
862-
* Inline assembly required to prevent the compiler
863-
* optimising this loop into a divmod call.
864-
* See __iter_div_u64_rem() for another example of this.
865-
*/
866-
asm("" : "+rm" (cfs_rq->load_period));
867-
cfs_rq->load_period /= 2;
868-
cfs_rq->load_avg /= 2;
869-
}
870-
871-
if (!cfs_rq->curr && !cfs_rq->nr_running && !cfs_rq->load_avg)
872-
list_del_leaf_cfs_rq(cfs_rq);
873-
}
874-
875803
static inline long calc_tg_weight(struct task_group *tg, struct cfs_rq *cfs_rq)
876804
{
877805
long tg_weight;
@@ -881,8 +809,8 @@ static inline long calc_tg_weight(struct task_group *tg, struct cfs_rq *cfs_rq)
881809
* to gain a more accurate current total weight. See
882810
* update_cfs_rq_load_contribution().
883811
*/
884-
tg_weight = atomic_read(&tg->load_weight);
885-
tg_weight -= cfs_rq->load_contribution;
812+
tg_weight = atomic64_read(&tg->load_avg);
813+
tg_weight -= cfs_rq->tg_load_contrib;
886814
tg_weight += cfs_rq->load.weight;
887815

888816
return tg_weight;
@@ -906,27 +834,11 @@ static long calc_cfs_shares(struct cfs_rq *cfs_rq, struct task_group *tg)
906834

907835
return shares;
908836
}
909-
910-
static void update_entity_shares_tick(struct cfs_rq *cfs_rq)
911-
{
912-
if (cfs_rq->load_unacc_exec_time > sysctl_sched_shares_window) {
913-
update_cfs_load(cfs_rq, 0);
914-
update_cfs_shares(cfs_rq);
915-
}
916-
}
917837
# else /* CONFIG_SMP */
918-
static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update)
919-
{
920-
}
921-
922838
static inline long calc_cfs_shares(struct cfs_rq *cfs_rq, struct task_group *tg)
923839
{
924840
return tg->shares;
925841
}
926-
927-
static inline void update_entity_shares_tick(struct cfs_rq *cfs_rq)
928-
{
929-
}
930842
# endif /* CONFIG_SMP */
931843
static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
932844
unsigned long weight)
@@ -944,6 +856,8 @@ static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
944856
account_entity_enqueue(cfs_rq, se);
945857
}
946858

859+
static inline int throttled_hierarchy(struct cfs_rq *cfs_rq);
860+
947861
static void update_cfs_shares(struct cfs_rq *cfs_rq)
948862
{
949863
struct task_group *tg;
@@ -963,17 +877,9 @@ static void update_cfs_shares(struct cfs_rq *cfs_rq)
963877
reweight_entity(cfs_rq_of(se), se, shares);
964878
}
965879
#else /* CONFIG_FAIR_GROUP_SCHED */
966-
static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update)
967-
{
968-
}
969-
970880
static inline void update_cfs_shares(struct cfs_rq *cfs_rq)
971881
{
972882
}
973-
974-
static inline void update_entity_shares_tick(struct cfs_rq *cfs_rq)
975-
{
976-
}
977883
#endif /* CONFIG_FAIR_GROUP_SCHED */
978884

979885
#ifdef CONFIG_SMP
@@ -1490,7 +1396,6 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
14901396
* Update run-time statistics of the 'current'.
14911397
*/
14921398
update_curr(cfs_rq);
1493-
update_cfs_load(cfs_rq, 0);
14941399
enqueue_entity_load_avg(cfs_rq, se, flags & ENQUEUE_WAKEUP);
14951400
account_entity_enqueue(cfs_rq, se);
14961401
update_cfs_shares(cfs_rq);
@@ -1587,7 +1492,6 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
15871492
if (se != cfs_rq->curr)
15881493
__dequeue_entity(cfs_rq, se);
15891494
se->on_rq = 0;
1590-
update_cfs_load(cfs_rq, 0);
15911495
account_entity_dequeue(cfs_rq, se);
15921496

15931497
/*
@@ -1756,11 +1660,6 @@ entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued)
17561660
update_entity_load_avg(curr, 1);
17571661
update_cfs_rq_blocked_load(cfs_rq, 1);
17581662

1759-
/*
1760-
* Update share accounting for long-running entities.
1761-
*/
1762-
update_entity_shares_tick(cfs_rq);
1763-
17641663
#ifdef CONFIG_SCHED_HRTICK
17651664
/*
17661665
* queued ticks are scheduled to match the slice, so don't bother
@@ -2005,18 +1904,9 @@ static int tg_unthrottle_up(struct task_group *tg, void *data)
20051904
cfs_rq->throttle_count--;
20061905
#ifdef CONFIG_SMP
20071906
if (!cfs_rq->throttle_count) {
2008-
u64 delta = rq->clock_task - cfs_rq->load_stamp;
2009-
2010-
/* leaving throttled state, advance shares averaging windows */
2011-
cfs_rq->load_stamp += delta;
2012-
cfs_rq->load_last += delta;
2013-
20141907
/* adjust cfs_rq_clock_task() */
20151908
cfs_rq->throttled_clock_task_time += rq->clock_task -
20161909
cfs_rq->throttled_clock_task;
2017-
2018-
/* update entity weight now that we are on_rq again */
2019-
update_cfs_shares(cfs_rq);
20201910
}
20211911
#endif
20221912

@@ -2028,11 +1918,9 @@ static int tg_throttle_down(struct task_group *tg, void *data)
20281918
struct rq *rq = data;
20291919
struct cfs_rq *cfs_rq = tg->cfs_rq[cpu_of(rq)];
20301920

2031-
/* group is entering throttled state, record last load */
2032-
if (!cfs_rq->throttle_count) {
2033-
update_cfs_load(cfs_rq, 0);
1921+
/* group is entering throttled state, stop time */
1922+
if (!cfs_rq->throttle_count)
20341923
cfs_rq->throttled_clock_task = rq->clock_task;
2035-
}
20361924
cfs_rq->throttle_count++;
20371925

20381926
return 0;
@@ -2630,7 +2518,6 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
26302518
if (cfs_rq_throttled(cfs_rq))
26312519
break;
26322520

2633-
update_cfs_load(cfs_rq, 0);
26342521
update_cfs_shares(cfs_rq);
26352522
update_entity_load_avg(se, 1);
26362523
}
@@ -2692,7 +2579,6 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
26922579
if (cfs_rq_throttled(cfs_rq))
26932580
break;
26942581

2695-
update_cfs_load(cfs_rq, 0);
26962582
update_cfs_shares(cfs_rq);
26972583
update_entity_load_avg(se, 1);
26982584
}
@@ -3755,27 +3641,36 @@ static int move_tasks(struct lb_env *env)
37553641
*/
37563642
static int update_shares_cpu(struct task_group *tg, int cpu)
37573643
{
3644+
struct sched_entity *se;
37583645
struct cfs_rq *cfs_rq;
37593646
unsigned long flags;
37603647
struct rq *rq;
37613648

3762-
if (!tg->se[cpu])
3763-
return 0;
3764-
37653649
rq = cpu_rq(cpu);
3650+
se = tg->se[cpu];
37663651
cfs_rq = tg->cfs_rq[cpu];
37673652

37683653
raw_spin_lock_irqsave(&rq->lock, flags);
37693654

37703655
update_rq_clock(rq);
3771-
update_cfs_load(cfs_rq, 1);
37723656
update_cfs_rq_blocked_load(cfs_rq, 1);
37733657

3774-
/*
3775-
* We need to update shares after updating tg->load_weight in
3776-
* order to adjust the weight of groups with long running tasks.
3777-
*/
3778-
update_cfs_shares(cfs_rq);
3658+
if (se) {
3659+
update_entity_load_avg(se, 1);
3660+
/*
3661+
* We pivot on our runnable average having decayed to zero for
3662+
* list removal. This generally implies that all our children
3663+
* have also been removed (modulo rounding error or bandwidth
3664+
* control); however, such cases are rare and we can fix these
3665+
* at enqueue.
3666+
*
3667+
* TODO: fix up out-of-order children on enqueue.
3668+
*/
3669+
if (!se->avg.runnable_avg_sum && !cfs_rq->nr_running)
3670+
list_del_leaf_cfs_rq(cfs_rq);
3671+
} else {
3672+
update_rq_runnable_avg(rq, rq->nr_running);
3673+
}
37793674

37803675
raw_spin_unlock_irqrestore(&rq->lock, flags);
37813676

@@ -5702,10 +5597,6 @@ void init_tg_cfs_entry(struct task_group *tg, struct cfs_rq *cfs_rq,
57025597

57035598
cfs_rq->tg = tg;
57045599
cfs_rq->rq = rq;
5705-
#ifdef CONFIG_SMP
5706-
/* allow initial update_cfs_load() to truncate */
5707-
cfs_rq->load_stamp = 1;
5708-
#endif
57095600
init_cfs_rq_runtime(cfs_rq);
57105601

57115602
tg->cfs_rq[cpu] = cfs_rq;

kernel/sched/sched.h

Lines changed: 12 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -234,11 +234,21 @@ struct cfs_rq {
234234
u64 runnable_load_avg, blocked_load_avg;
235235
atomic64_t decay_counter, removed_load;
236236
u64 last_decay;
237+
237238
#ifdef CONFIG_FAIR_GROUP_SCHED
238239
u32 tg_runnable_contrib;
239240
u64 tg_load_contrib;
240-
#endif
241-
#endif
241+
#endif /* CONFIG_FAIR_GROUP_SCHED */
242+
243+
/*
244+
* h_load = weight * f(tg)
245+
*
246+
* Where f(tg) is the recursive weight fraction assigned to
247+
* this group.
248+
*/
249+
unsigned long h_load;
250+
#endif /* CONFIG_SMP */
251+
242252
#ifdef CONFIG_FAIR_GROUP_SCHED
243253
struct rq *rq; /* cpu runqueue to which this cfs_rq is attached */
244254

@@ -254,28 +264,6 @@ struct cfs_rq {
254264
struct list_head leaf_cfs_rq_list;
255265
struct task_group *tg; /* group that "owns" this runqueue */
256266

257-
#ifdef CONFIG_SMP
258-
/*
259-
* h_load = weight * f(tg)
260-
*
261-
* Where f(tg) is the recursive weight fraction assigned to
262-
* this group.
263-
*/
264-
unsigned long h_load;
265-
266-
/*
267-
* Maintaining per-cpu shares distribution for group scheduling
268-
*
269-
* load_stamp is the last time we updated the load average
270-
* load_last is the last time we updated the load average and saw load
271-
* load_unacc_exec_time is currently unaccounted execution time
272-
*/
273-
u64 load_avg;
274-
u64 load_period;
275-
u64 load_stamp, load_last, load_unacc_exec_time;
276-
277-
unsigned long load_contribution;
278-
#endif /* CONFIG_SMP */
279267
#ifdef CONFIG_CFS_BANDWIDTH
280268
int runtime_enabled;
281269
u64 runtime_expires;

0 commit comments

Comments
 (0)