Skip to content

Commit 3d30544

Browse files
Peter ZijlstraIngo Molnar
authored andcommitted
sched/fair: Apply more PELT fixes
One additional 'rule' for using update_cfs_rq_load_avg() is that one should call update_tg_load_avg() if it returns true. Add a bunch of comments to hopefully clarify some of the rules: o You need to update cfs_rq _before_ any entity attach/detach, this is important, because while for mathmatical consisency this isn't strictly needed, it is required for the physical interpretation of the model, you attach/detach _now_. o When you modify the cfs_rq avg, you have to then call update_tg_load_avg() in order to propagate changes upwards. o (Fair) entities are always attached, switched_{to,from}_fair() deal with !fair. This directly follows from the definition of the cfs_rq averages, namely that they are a direct sum of all (runnable or blocked) entities on that rq. It is the second rule that this patch enforces, but it adds comments pertaining to all of them. Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Mike Galbraith <efault@gmx.de> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar <mingo@kernel.org>
1 parent 7dc603c commit 3d30544

File tree

1 file changed

+49
-4
lines changed

1 file changed

+49
-4
lines changed

kernel/sched/fair.c

Lines changed: 49 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -692,6 +692,7 @@ void init_entity_runnable_average(struct sched_entity *se)
692692

693693
static inline u64 cfs_rq_clock_task(struct cfs_rq *cfs_rq);
694694
static int update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq, bool update_freq);
695+
static void update_tg_load_avg(struct cfs_rq *cfs_rq, int force);
695696
static void attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se);
696697

697698
/*
@@ -725,6 +726,7 @@ void post_init_entity_util_avg(struct sched_entity *se)
725726
struct sched_avg *sa = &se->avg;
726727
long cap = (long)(SCHED_CAPACITY_SCALE - cfs_rq->avg.util_avg) / 2;
727728
u64 now = cfs_rq_clock_task(cfs_rq);
729+
int tg_update;
728730

729731
if (cap > 0) {
730732
if (cfs_rq->avg.util_avg != 0) {
@@ -757,8 +759,10 @@ void post_init_entity_util_avg(struct sched_entity *se)
757759
}
758760
}
759761

760-
update_cfs_rq_load_avg(now, cfs_rq, false);
762+
tg_update = update_cfs_rq_load_avg(now, cfs_rq, false);
761763
attach_entity_load_avg(cfs_rq, se);
764+
if (tg_update)
765+
update_tg_load_avg(cfs_rq, false);
762766
}
763767

764768
#else /* !CONFIG_SMP */
@@ -768,6 +772,9 @@ void init_entity_runnable_average(struct sched_entity *se)
768772
void post_init_entity_util_avg(struct sched_entity *se)
769773
{
770774
}
775+
static void update_tg_load_avg(struct cfs_rq *cfs_rq, int force)
776+
{
777+
}
771778
#endif /* CONFIG_SMP */
772779

773780
/*
@@ -2912,7 +2919,23 @@ static inline void cfs_rq_util_change(struct cfs_rq *cfs_rq)
29122919
WRITE_ONCE(*ptr, res); \
29132920
} while (0)
29142921

2915-
/* Group cfs_rq's load_avg is used for task_h_load and update_cfs_share */
2922+
/**
2923+
* update_cfs_rq_load_avg - update the cfs_rq's load/util averages
2924+
* @now: current time, as per cfs_rq_clock_task()
2925+
* @cfs_rq: cfs_rq to update
2926+
* @update_freq: should we call cfs_rq_util_change() or will the call do so
2927+
*
2928+
* The cfs_rq avg is the direct sum of all its entities (blocked and runnable)
2929+
* avg. The immediate corollary is that all (fair) tasks must be attached, see
2930+
* post_init_entity_util_avg().
2931+
*
2932+
* cfs_rq->avg is used for task_h_load() and update_cfs_share() for example.
2933+
*
2934+
* Returns true if the load decayed or we removed utilization. It is expected
2935+
* that one calls update_tg_load_avg() on this condition, but after you've
2936+
* modified the cfs_rq avg (attach/detach), such that we propagate the new
2937+
* avg up.
2938+
*/
29162939
static inline int
29172940
update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq, bool update_freq)
29182941
{
@@ -2967,6 +2990,14 @@ static inline void update_load_avg(struct sched_entity *se, int update_tg)
29672990
update_tg_load_avg(cfs_rq, 0);
29682991
}
29692992

2993+
/**
2994+
* attach_entity_load_avg - attach this entity to its cfs_rq load avg
2995+
* @cfs_rq: cfs_rq to attach to
2996+
* @se: sched_entity to attach
2997+
*
2998+
* Must call update_cfs_rq_load_avg() before this, since we rely on
2999+
* cfs_rq->avg.last_update_time being current.
3000+
*/
29703001
static void attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se)
29713002
{
29723003
if (!sched_feat(ATTACH_AGE_LOAD))
@@ -2998,6 +3029,14 @@ static void attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *s
29983029
cfs_rq_util_change(cfs_rq);
29993030
}
30003031

3032+
/**
3033+
* detach_entity_load_avg - detach this entity from its cfs_rq load avg
3034+
* @cfs_rq: cfs_rq to detach from
3035+
* @se: sched_entity to detach
3036+
*
3037+
* Must call update_cfs_rq_load_avg() before this, since we rely on
3038+
* cfs_rq->avg.last_update_time being current.
3039+
*/
30013040
static void detach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se)
30023041
{
30033042
__update_load_avg(cfs_rq->avg.last_update_time, cpu_of(rq_of(cfs_rq)),
@@ -8404,6 +8443,7 @@ static void detach_task_cfs_rq(struct task_struct *p)
84048443
struct sched_entity *se = &p->se;
84058444
struct cfs_rq *cfs_rq = cfs_rq_of(se);
84068445
u64 now = cfs_rq_clock_task(cfs_rq);
8446+
int tg_update;
84078447

84088448
if (!vruntime_normalized(p)) {
84098449
/*
@@ -8415,15 +8455,18 @@ static void detach_task_cfs_rq(struct task_struct *p)
84158455
}
84168456

84178457
/* Catch up with the cfs_rq and remove our load when we leave */
8418-
update_cfs_rq_load_avg(now, cfs_rq, false);
8458+
tg_update = update_cfs_rq_load_avg(now, cfs_rq, false);
84198459
detach_entity_load_avg(cfs_rq, se);
8460+
if (tg_update)
8461+
update_tg_load_avg(cfs_rq, false);
84208462
}
84218463

84228464
static void attach_task_cfs_rq(struct task_struct *p)
84238465
{
84248466
struct sched_entity *se = &p->se;
84258467
struct cfs_rq *cfs_rq = cfs_rq_of(se);
84268468
u64 now = cfs_rq_clock_task(cfs_rq);
8469+
int tg_update;
84278470

84288471
#ifdef CONFIG_FAIR_GROUP_SCHED
84298472
/*
@@ -8434,8 +8477,10 @@ static void attach_task_cfs_rq(struct task_struct *p)
84348477
#endif
84358478

84368479
/* Synchronize task with its cfs_rq */
8437-
update_cfs_rq_load_avg(now, cfs_rq, false);
8480+
tg_update = update_cfs_rq_load_avg(now, cfs_rq, false);
84388481
attach_entity_load_avg(cfs_rq, se);
8482+
if (tg_update)
8483+
update_tg_load_avg(cfs_rq, false);
84398484

84408485
if (!vruntime_normalized(p))
84418486
se->vruntime += cfs_rq->min_vruntime;

0 commit comments

Comments
 (0)