Skip to content

Commit f1b1728

Browse files
paulturnerIngo Molnar
authored andcommitted
sched: Maintain runnable averages across throttled periods
With bandwidth control tracked entities may cease execution according to user specified bandwidth limits. Charging this time as either throttled or blocked however, is incorrect and would falsely skew in either direction. What we actually want is for any throttled periods to be "invisible" to load-tracking as they are removed from the system for that interval and contribute normally otherwise. Do this by moderating the progression of time to omit any periods in which the entity belonged to a throttled hierarchy. Signed-off-by: Paul Turner <pjt@google.com> Reviewed-by: Ben Segall <bsegall@google.com> Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Link: http://lkml.kernel.org/r/20120823141506.998912151@google.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
1 parent bb17f65 commit f1b1728

File tree

2 files changed

+42
-11
lines changed

2 files changed

+42
-11
lines changed

kernel/sched/fair.c

Lines changed: 40 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1222,15 +1222,26 @@ static inline void subtract_blocked_load_contrib(struct cfs_rq *cfs_rq,
12221222
cfs_rq->blocked_load_avg = 0;
12231223
}
12241224

1225+
static inline u64 cfs_rq_clock_task(struct cfs_rq *cfs_rq);
1226+
12251227
/* Update a sched_entity's runnable average */
12261228
static inline void update_entity_load_avg(struct sched_entity *se,
12271229
int update_cfs_rq)
12281230
{
12291231
struct cfs_rq *cfs_rq = cfs_rq_of(se);
12301232
long contrib_delta;
1233+
u64 now;
12311234

1232-
if (!__update_entity_runnable_avg(rq_of(cfs_rq)->clock_task, &se->avg,
1233-
se->on_rq))
1235+
/*
1236+
* For a group entity we need to use their owned cfs_rq_clock_task() in
1237+
* case they are the parent of a throttled hierarchy.
1238+
*/
1239+
if (entity_is_task(se))
1240+
now = cfs_rq_clock_task(cfs_rq);
1241+
else
1242+
now = cfs_rq_clock_task(group_cfs_rq(se));
1243+
1244+
if (!__update_entity_runnable_avg(now, &se->avg, se->on_rq))
12341245
return;
12351246

12361247
contrib_delta = __update_entity_load_avg_contrib(se);
@@ -1250,7 +1261,7 @@ static inline void update_entity_load_avg(struct sched_entity *se,
12501261
*/
12511262
static void update_cfs_rq_blocked_load(struct cfs_rq *cfs_rq, int force_update)
12521263
{
1253-
u64 now = rq_of(cfs_rq)->clock_task >> 20;
1264+
u64 now = cfs_rq_clock_task(cfs_rq) >> 20;
12541265
u64 decays;
12551266

12561267
decays = now - cfs_rq->last_decay;
@@ -1841,6 +1852,15 @@ static inline struct cfs_bandwidth *tg_cfs_bandwidth(struct task_group *tg)
18411852
return &tg->cfs_bandwidth;
18421853
}
18431854

1855+
/* rq->task_clock normalized against any time this cfs_rq has spent throttled */
1856+
static inline u64 cfs_rq_clock_task(struct cfs_rq *cfs_rq)
1857+
{
1858+
if (unlikely(cfs_rq->throttle_count))
1859+
return cfs_rq->throttled_clock_task;
1860+
1861+
return rq_of(cfs_rq)->clock_task - cfs_rq->throttled_clock_task_time;
1862+
}
1863+
18441864
/* returns 0 on failure to allocate runtime */
18451865
static int assign_cfs_rq_runtime(struct cfs_rq *cfs_rq)
18461866
{
@@ -1991,6 +2011,10 @@ static int tg_unthrottle_up(struct task_group *tg, void *data)
19912011
cfs_rq->load_stamp += delta;
19922012
cfs_rq->load_last += delta;
19932013

2014+
/* adjust cfs_rq_clock_task() */
2015+
cfs_rq->throttled_clock_task_time += rq->clock_task -
2016+
cfs_rq->throttled_clock_task;
2017+
19942018
/* update entity weight now that we are on_rq again */
19952019
update_cfs_shares(cfs_rq);
19962020
}
@@ -2005,8 +2029,10 @@ static int tg_throttle_down(struct task_group *tg, void *data)
20052029
struct cfs_rq *cfs_rq = tg->cfs_rq[cpu_of(rq)];
20062030

20072031
/* group is entering throttled state, record last load */
2008-
if (!cfs_rq->throttle_count)
2032+
if (!cfs_rq->throttle_count) {
20092033
update_cfs_load(cfs_rq, 0);
2034+
cfs_rq->throttled_clock_task = rq->clock_task;
2035+
}
20102036
cfs_rq->throttle_count++;
20112037

20122038
return 0;
@@ -2021,7 +2047,7 @@ static void throttle_cfs_rq(struct cfs_rq *cfs_rq)
20212047

20222048
se = cfs_rq->tg->se[cpu_of(rq_of(cfs_rq))];
20232049

2024-
/* account load preceding throttle */
2050+
/* freeze hierarchy runnable averages while throttled */
20252051
rcu_read_lock();
20262052
walk_tg_tree_from(cfs_rq->tg, tg_throttle_down, tg_nop, (void *)rq);
20272053
rcu_read_unlock();
@@ -2045,7 +2071,7 @@ static void throttle_cfs_rq(struct cfs_rq *cfs_rq)
20452071
rq->nr_running -= task_delta;
20462072

20472073
cfs_rq->throttled = 1;
2048-
cfs_rq->throttled_timestamp = rq->clock;
2074+
cfs_rq->throttled_clock = rq->clock;
20492075
raw_spin_lock(&cfs_b->lock);
20502076
list_add_tail_rcu(&cfs_rq->throttled_list, &cfs_b->throttled_cfs_rq);
20512077
raw_spin_unlock(&cfs_b->lock);
@@ -2063,10 +2089,9 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
20632089

20642090
cfs_rq->throttled = 0;
20652091
raw_spin_lock(&cfs_b->lock);
2066-
cfs_b->throttled_time += rq->clock - cfs_rq->throttled_timestamp;
2092+
cfs_b->throttled_time += rq->clock - cfs_rq->throttled_clock;
20672093
list_del_rcu(&cfs_rq->throttled_list);
20682094
raw_spin_unlock(&cfs_b->lock);
2069-
cfs_rq->throttled_timestamp = 0;
20702095

20712096
update_rq_clock(rq);
20722097
/* update hierarchical throttle state */
@@ -2466,8 +2491,13 @@ static void unthrottle_offline_cfs_rqs(struct rq *rq)
24662491
}
24672492

24682493
#else /* CONFIG_CFS_BANDWIDTH */
2469-
static __always_inline
2470-
void account_cfs_rq_runtime(struct cfs_rq *cfs_rq, unsigned long delta_exec) {}
2494+
static inline u64 cfs_rq_clock_task(struct cfs_rq *cfs_rq)
2495+
{
2496+
return rq_of(cfs_rq)->clock_task;
2497+
}
2498+
2499+
static void account_cfs_rq_runtime(struct cfs_rq *cfs_rq,
2500+
unsigned long delta_exec) {}
24712501
static void check_cfs_rq_runtime(struct cfs_rq *cfs_rq) {}
24722502
static void check_enqueue_throttle(struct cfs_rq *cfs_rq) {}
24732503
static __always_inline void return_cfs_rq_runtime(struct cfs_rq *cfs_rq) {}

kernel/sched/sched.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -281,7 +281,8 @@ struct cfs_rq {
281281
u64 runtime_expires;
282282
s64 runtime_remaining;
283283

284-
u64 throttled_timestamp;
284+
u64 throttled_clock, throttled_clock_task;
285+
u64 throttled_clock_task_time;
285286
int throttled, throttle_count;
286287
struct list_head throttled_list;
287288
#endif /* CONFIG_CFS_BANDWIDTH */

0 commit comments

Comments
 (0)