Skip to content

Commit c079629

Browse files
vingu-linaroIngo Molnar
authored andcommitted
sched/pelt: Move PELT related code in a dedicated file
We want to track rt_rq's utilization as a part of the estimation of the whole rq's utilization. This is necessary because rt tasks can steal utilization to cfs tasks and make them lighter than they are. As we want to use the same load tracking mecanism for both and prevent useless dependency between cfs and rt code, PELT code is moved in a dedicated file. Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Morten.Rasmussen@arm.com Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: claudio@evidence.eu.com Cc: daniel.lezcano@linaro.org Cc: dietmar.eggemann@arm.com Cc: joel@joelfernandes.org Cc: juri.lelli@redhat.com Cc: luca.abeni@santannapisa.it Cc: patrick.bellasi@arm.com Cc: quentin.perret@arm.com Cc: rjw@rjwysocki.net Cc: valentin.schneider@arm.com Cc: viresh.kumar@linaro.org Link: http://lkml.kernel.org/r/1530200714-4504-2-git-send-email-vincent.guittot@linaro.org Signed-off-by: Ingo Molnar <mingo@kernel.org>
1 parent 8fe5c5a commit c079629

File tree

5 files changed

+375
-333
lines changed

5 files changed

+375
-333
lines changed

kernel/sched/Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ obj-y += core.o loadavg.o clock.o cputime.o
2020
obj-y += idle.o fair.o rt.o deadline.o
2121
obj-y += wait.o wait_bit.o swait.o completion.o
2222

23-
obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o topology.o stop_task.o
23+
obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o topology.o stop_task.o pelt.o
2424
obj-$(CONFIG_SCHED_AUTOGROUP) += autogroup.o
2525
obj-$(CONFIG_SCHEDSTATS) += stats.o
2626
obj-$(CONFIG_SCHED_DEBUG) += debug.o

kernel/sched/fair.c

Lines changed: 1 addition & 332 deletions
Original file line numberDiff line numberDiff line change
@@ -255,9 +255,6 @@ static inline struct rq *rq_of(struct cfs_rq *cfs_rq)
255255
return cfs_rq->rq;
256256
}
257257

258-
/* An entity is a task if it doesn't "own" a runqueue */
259-
#define entity_is_task(se) (!se->my_q)
260-
261258
static inline struct task_struct *task_of(struct sched_entity *se)
262259
{
263260
SCHED_WARN_ON(!entity_is_task(se));
@@ -419,7 +416,6 @@ static inline struct rq *rq_of(struct cfs_rq *cfs_rq)
419416
return container_of(cfs_rq, struct rq, cfs);
420417
}
421418

422-
#define entity_is_task(se) 1
423419

424420
#define for_each_sched_entity(se) \
425421
for (; se; se = NULL)
@@ -692,7 +688,7 @@ static u64 sched_vslice(struct cfs_rq *cfs_rq, struct sched_entity *se)
692688
}
693689

694690
#ifdef CONFIG_SMP
695-
691+
#include "pelt.h"
696692
#include "sched-pelt.h"
697693

698694
static int select_idle_sibling(struct task_struct *p, int prev_cpu, int cpu);
@@ -2751,19 +2747,6 @@ account_entity_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se)
27512747
} while (0)
27522748

27532749
#ifdef CONFIG_SMP
2754-
/*
2755-
* XXX we want to get rid of these helpers and use the full load resolution.
2756-
*/
2757-
static inline long se_weight(struct sched_entity *se)
2758-
{
2759-
return scale_load_down(se->load.weight);
2760-
}
2761-
2762-
static inline long se_runnable(struct sched_entity *se)
2763-
{
2764-
return scale_load_down(se->runnable_weight);
2765-
}
2766-
27672750
static inline void
27682751
enqueue_runnable_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se)
27692752
{
@@ -3064,314 +3047,6 @@ static inline void cfs_rq_util_change(struct cfs_rq *cfs_rq, int flags)
30643047
}
30653048

30663049
#ifdef CONFIG_SMP
3067-
/*
3068-
* Approximate:
3069-
* val * y^n, where y^32 ~= 0.5 (~1 scheduling period)
3070-
*/
3071-
static u64 decay_load(u64 val, u64 n)
3072-
{
3073-
unsigned int local_n;
3074-
3075-
if (unlikely(n > LOAD_AVG_PERIOD * 63))
3076-
return 0;
3077-
3078-
/* after bounds checking we can collapse to 32-bit */
3079-
local_n = n;
3080-
3081-
/*
3082-
* As y^PERIOD = 1/2, we can combine
3083-
* y^n = 1/2^(n/PERIOD) * y^(n%PERIOD)
3084-
* With a look-up table which covers y^n (n<PERIOD)
3085-
*
3086-
* To achieve constant time decay_load.
3087-
*/
3088-
if (unlikely(local_n >= LOAD_AVG_PERIOD)) {
3089-
val >>= local_n / LOAD_AVG_PERIOD;
3090-
local_n %= LOAD_AVG_PERIOD;
3091-
}
3092-
3093-
val = mul_u64_u32_shr(val, runnable_avg_yN_inv[local_n], 32);
3094-
return val;
3095-
}
3096-
3097-
static u32 __accumulate_pelt_segments(u64 periods, u32 d1, u32 d3)
3098-
{
3099-
u32 c1, c2, c3 = d3; /* y^0 == 1 */
3100-
3101-
/*
3102-
* c1 = d1 y^p
3103-
*/
3104-
c1 = decay_load((u64)d1, periods);
3105-
3106-
/*
3107-
* p-1
3108-
* c2 = 1024 \Sum y^n
3109-
* n=1
3110-
*
3111-
* inf inf
3112-
* = 1024 ( \Sum y^n - \Sum y^n - y^0 )
3113-
* n=0 n=p
3114-
*/
3115-
c2 = LOAD_AVG_MAX - decay_load(LOAD_AVG_MAX, periods) - 1024;
3116-
3117-
return c1 + c2 + c3;
3118-
}
3119-
3120-
/*
3121-
* Accumulate the three separate parts of the sum; d1 the remainder
3122-
* of the last (incomplete) period, d2 the span of full periods and d3
3123-
* the remainder of the (incomplete) current period.
3124-
*
3125-
* d1 d2 d3
3126-
* ^ ^ ^
3127-
* | | |
3128-
* |<->|<----------------->|<--->|
3129-
* ... |---x---|------| ... |------|-----x (now)
3130-
*
3131-
* p-1
3132-
* u' = (u + d1) y^p + 1024 \Sum y^n + d3 y^0
3133-
* n=1
3134-
*
3135-
* = u y^p + (Step 1)
3136-
*
3137-
* p-1
3138-
* d1 y^p + 1024 \Sum y^n + d3 y^0 (Step 2)
3139-
* n=1
3140-
*/
3141-
static __always_inline u32
3142-
accumulate_sum(u64 delta, int cpu, struct sched_avg *sa,
3143-
unsigned long load, unsigned long runnable, int running)
3144-
{
3145-
unsigned long scale_freq, scale_cpu;
3146-
u32 contrib = (u32)delta; /* p == 0 -> delta < 1024 */
3147-
u64 periods;
3148-
3149-
scale_freq = arch_scale_freq_capacity(cpu);
3150-
scale_cpu = arch_scale_cpu_capacity(NULL, cpu);
3151-
3152-
delta += sa->period_contrib;
3153-
periods = delta / 1024; /* A period is 1024us (~1ms) */
3154-
3155-
/*
3156-
* Step 1: decay old *_sum if we crossed period boundaries.
3157-
*/
3158-
if (periods) {
3159-
sa->load_sum = decay_load(sa->load_sum, periods);
3160-
sa->runnable_load_sum =
3161-
decay_load(sa->runnable_load_sum, periods);
3162-
sa->util_sum = decay_load((u64)(sa->util_sum), periods);
3163-
3164-
/*
3165-
* Step 2
3166-
*/
3167-
delta %= 1024;
3168-
contrib = __accumulate_pelt_segments(periods,
3169-
1024 - sa->period_contrib, delta);
3170-
}
3171-
sa->period_contrib = delta;
3172-
3173-
contrib = cap_scale(contrib, scale_freq);
3174-
if (load)
3175-
sa->load_sum += load * contrib;
3176-
if (runnable)
3177-
sa->runnable_load_sum += runnable * contrib;
3178-
if (running)
3179-
sa->util_sum += contrib * scale_cpu;
3180-
3181-
return periods;
3182-
}
3183-
3184-
/*
3185-
* We can represent the historical contribution to runnable average as the
3186-
* coefficients of a geometric series. To do this we sub-divide our runnable
3187-
* history into segments of approximately 1ms (1024us); label the segment that
3188-
* occurred N-ms ago p_N, with p_0 corresponding to the current period, e.g.
3189-
*
3190-
* [<- 1024us ->|<- 1024us ->|<- 1024us ->| ...
3191-
* p0 p1 p2
3192-
* (now) (~1ms ago) (~2ms ago)
3193-
*
3194-
* Let u_i denote the fraction of p_i that the entity was runnable.
3195-
*
3196-
* We then designate the fractions u_i as our co-efficients, yielding the
3197-
* following representation of historical load:
3198-
* u_0 + u_1*y + u_2*y^2 + u_3*y^3 + ...
3199-
*
3200-
* We choose y based on the with of a reasonably scheduling period, fixing:
3201-
* y^32 = 0.5
3202-
*
3203-
* This means that the contribution to load ~32ms ago (u_32) will be weighted
3204-
* approximately half as much as the contribution to load within the last ms
3205-
* (u_0).
3206-
*
3207-
* When a period "rolls over" and we have new u_0`, multiplying the previous
3208-
* sum again by y is sufficient to update:
3209-
* load_avg = u_0` + y*(u_0 + u_1*y + u_2*y^2 + ... )
3210-
* = u_0 + u_1*y + u_2*y^2 + ... [re-labeling u_i --> u_{i+1}]
3211-
*/
3212-
static __always_inline int
3213-
___update_load_sum(u64 now, int cpu, struct sched_avg *sa,
3214-
unsigned long load, unsigned long runnable, int running)
3215-
{
3216-
u64 delta;
3217-
3218-
delta = now - sa->last_update_time;
3219-
/*
3220-
* This should only happen when time goes backwards, which it
3221-
* unfortunately does during sched clock init when we swap over to TSC.
3222-
*/
3223-
if ((s64)delta < 0) {
3224-
sa->last_update_time = now;
3225-
return 0;
3226-
}
3227-
3228-
/*
3229-
* Use 1024ns as the unit of measurement since it's a reasonable
3230-
* approximation of 1us and fast to compute.
3231-
*/
3232-
delta >>= 10;
3233-
if (!delta)
3234-
return 0;
3235-
3236-
sa->last_update_time += delta << 10;
3237-
3238-
/*
3239-
* running is a subset of runnable (weight) so running can't be set if
3240-
* runnable is clear. But there are some corner cases where the current
3241-
* se has been already dequeued but cfs_rq->curr still points to it.
3242-
* This means that weight will be 0 but not running for a sched_entity
3243-
* but also for a cfs_rq if the latter becomes idle. As an example,
3244-
* this happens during idle_balance() which calls
3245-
* update_blocked_averages()
3246-
*/
3247-
if (!load)
3248-
runnable = running = 0;
3249-
3250-
/*
3251-
* Now we know we crossed measurement unit boundaries. The *_avg
3252-
* accrues by two steps:
3253-
*
3254-
* Step 1: accumulate *_sum since last_update_time. If we haven't
3255-
* crossed period boundaries, finish.
3256-
*/
3257-
if (!accumulate_sum(delta, cpu, sa, load, runnable, running))
3258-
return 0;
3259-
3260-
return 1;
3261-
}
3262-
3263-
static __always_inline void
3264-
___update_load_avg(struct sched_avg *sa, unsigned long load, unsigned long runnable)
3265-
{
3266-
u32 divider = LOAD_AVG_MAX - 1024 + sa->period_contrib;
3267-
3268-
/*
3269-
* Step 2: update *_avg.
3270-
*/
3271-
sa->load_avg = div_u64(load * sa->load_sum, divider);
3272-
sa->runnable_load_avg = div_u64(runnable * sa->runnable_load_sum, divider);
3273-
sa->util_avg = sa->util_sum / divider;
3274-
}
3275-
3276-
/*
3277-
* When a task is dequeued, its estimated utilization should not be update if
3278-
* its util_avg has not been updated at least once.
3279-
* This flag is used to synchronize util_avg updates with util_est updates.
3280-
* We map this information into the LSB bit of the utilization saved at
3281-
* dequeue time (i.e. util_est.dequeued).
3282-
*/
3283-
#define UTIL_AVG_UNCHANGED 0x1
3284-
3285-
static inline void cfs_se_util_change(struct sched_avg *avg)
3286-
{
3287-
unsigned int enqueued;
3288-
3289-
if (!sched_feat(UTIL_EST))
3290-
return;
3291-
3292-
/* Avoid store if the flag has been already set */
3293-
enqueued = avg->util_est.enqueued;
3294-
if (!(enqueued & UTIL_AVG_UNCHANGED))
3295-
return;
3296-
3297-
/* Reset flag to report util_avg has been updated */
3298-
enqueued &= ~UTIL_AVG_UNCHANGED;
3299-
WRITE_ONCE(avg->util_est.enqueued, enqueued);
3300-
}
3301-
3302-
/*
3303-
* sched_entity:
3304-
*
3305-
* task:
3306-
* se_runnable() == se_weight()
3307-
*
3308-
* group: [ see update_cfs_group() ]
3309-
* se_weight() = tg->weight * grq->load_avg / tg->load_avg
3310-
* se_runnable() = se_weight(se) * grq->runnable_load_avg / grq->load_avg
3311-
*
3312-
* load_sum := runnable_sum
3313-
* load_avg = se_weight(se) * runnable_avg
3314-
*
3315-
* runnable_load_sum := runnable_sum
3316-
* runnable_load_avg = se_runnable(se) * runnable_avg
3317-
*
3318-
* XXX collapse load_sum and runnable_load_sum
3319-
*
3320-
* cfq_rs:
3321-
*
3322-
* load_sum = \Sum se_weight(se) * se->avg.load_sum
3323-
* load_avg = \Sum se->avg.load_avg
3324-
*
3325-
* runnable_load_sum = \Sum se_runnable(se) * se->avg.runnable_load_sum
3326-
* runnable_load_avg = \Sum se->avg.runable_load_avg
3327-
*/
3328-
3329-
static int
3330-
__update_load_avg_blocked_se(u64 now, int cpu, struct sched_entity *se)
3331-
{
3332-
if (entity_is_task(se))
3333-
se->runnable_weight = se->load.weight;
3334-
3335-
if (___update_load_sum(now, cpu, &se->avg, 0, 0, 0)) {
3336-
___update_load_avg(&se->avg, se_weight(se), se_runnable(se));
3337-
return 1;
3338-
}
3339-
3340-
return 0;
3341-
}
3342-
3343-
static int
3344-
__update_load_avg_se(u64 now, int cpu, struct cfs_rq *cfs_rq, struct sched_entity *se)
3345-
{
3346-
if (entity_is_task(se))
3347-
se->runnable_weight = se->load.weight;
3348-
3349-
if (___update_load_sum(now, cpu, &se->avg, !!se->on_rq, !!se->on_rq,
3350-
cfs_rq->curr == se)) {
3351-
3352-
___update_load_avg(&se->avg, se_weight(se), se_runnable(se));
3353-
cfs_se_util_change(&se->avg);
3354-
return 1;
3355-
}
3356-
3357-
return 0;
3358-
}
3359-
3360-
static int
3361-
__update_load_avg_cfs_rq(u64 now, int cpu, struct cfs_rq *cfs_rq)
3362-
{
3363-
if (___update_load_sum(now, cpu, &cfs_rq->avg,
3364-
scale_load_down(cfs_rq->load.weight),
3365-
scale_load_down(cfs_rq->runnable_weight),
3366-
cfs_rq->curr != NULL)) {
3367-
3368-
___update_load_avg(&cfs_rq->avg, 1, 1);
3369-
return 1;
3370-
}
3371-
3372-
return 0;
3373-
}
3374-
33753050
#ifdef CONFIG_FAIR_GROUP_SCHED
33763051
/**
33773052
* update_tg_load_avg - update the tg's load avg
@@ -4039,12 +3714,6 @@ util_est_dequeue(struct cfs_rq *cfs_rq, struct task_struct *p, bool task_sleep)
40393714

40403715
#else /* CONFIG_SMP */
40413716

4042-
static inline int
4043-
update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq)
4044-
{
4045-
return 0;
4046-
}
4047-
40483717
#define UPDATE_TG 0x0
40493718
#define SKIP_AGE_LOAD 0x0
40503719
#define DO_ATTACH 0x0

0 commit comments

Comments
 (0)