Skip to content

Commit 34e2c55

Browse files
committed
cpufreq: Add mechanism for registering utilization update callbacks
Introduce a mechanism by which parts of the cpufreq subsystem ("setpolicy" drivers or the core) can register callbacks to be executed from cpufreq_update_util() which is invoked by the scheduler's update_load_avg() on CPU utilization changes. This allows the "setpolicy" drivers to dispense with their timers and do all of the computations they need and frequency/voltage adjustments in the update_load_avg() code path, among other things. The update_load_avg() changes were suggested by Peter Zijlstra. Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Acked-by: Viresh Kumar <viresh.kumar@linaro.org> Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org> Acked-by: Ingo Molnar <mingo@kernel.org>
1 parent de1df26 commit 34e2c55

File tree

6 files changed

+113
-1
lines changed

6 files changed

+113
-1
lines changed

drivers/cpufreq/cpufreq.c

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,51 @@ static LIST_HEAD(cpufreq_governor_list);
102102
static struct cpufreq_driver *cpufreq_driver;
103103
static DEFINE_PER_CPU(struct cpufreq_policy *, cpufreq_cpu_data);
104104
static DEFINE_RWLOCK(cpufreq_driver_lock);
105+
106+
static DEFINE_PER_CPU(struct update_util_data *, cpufreq_update_util_data);
107+
108+
/**
109+
* cpufreq_set_update_util_data - Populate the CPU's update_util_data pointer.
110+
* @cpu: The CPU to set the pointer for.
111+
* @data: New pointer value.
112+
*
113+
* Set and publish the update_util_data pointer for the given CPU. That pointer
114+
* points to a struct update_util_data object containing a callback function
115+
* to call from cpufreq_update_util(). That function will be called from an RCU
116+
* read-side critical section, so it must not sleep.
117+
*
118+
* Callers must use RCU callbacks to free any memory that might be accessed
119+
* via the old update_util_data pointer or invoke synchronize_rcu() right after
120+
* this function to avoid use-after-free.
121+
*/
122+
void cpufreq_set_update_util_data(int cpu, struct update_util_data *data)
123+
{
124+
rcu_assign_pointer(per_cpu(cpufreq_update_util_data, cpu), data);
125+
}
126+
EXPORT_SYMBOL_GPL(cpufreq_set_update_util_data);
127+
128+
/**
129+
* cpufreq_update_util - Take a note about CPU utilization changes.
130+
* @time: Current time.
131+
* @util: Current utilization.
132+
* @max: Utilization ceiling.
133+
*
134+
* This function is called by the scheduler on every invocation of
135+
* update_load_avg() on the CPU whose utilization is being updated.
136+
*/
137+
void cpufreq_update_util(u64 time, unsigned long util, unsigned long max)
138+
{
139+
struct update_util_data *data;
140+
141+
rcu_read_lock();
142+
143+
data = rcu_dereference(*this_cpu_ptr(&cpufreq_update_util_data));
144+
if (data && data->func)
145+
data->func(data, time, util, max);
146+
147+
rcu_read_unlock();
148+
}
149+
105150
DEFINE_MUTEX(cpufreq_governor_lock);
106151

107152
/* Flag to suspend/resume CPUFreq governors */

include/linux/cpufreq.h

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,36 @@ static inline bool policy_is_shared(struct cpufreq_policy *policy)
151151
extern struct kobject *cpufreq_global_kobject;
152152

153153
#ifdef CONFIG_CPU_FREQ
154+
void cpufreq_update_util(u64 time, unsigned long util, unsigned long max);
155+
156+
/**
157+
* cpufreq_trigger_update - Trigger CPU performance state evaluation if needed.
158+
* @time: Current time.
159+
*
160+
* The way cpufreq is currently arranged requires it to evaluate the CPU
161+
* performance state (frequency/voltage) on a regular basis to prevent it from
162+
* being stuck in a completely inadequate performance level for too long.
163+
* That is not guaranteed to happen if the updates are only triggered from CFS,
164+
* though, because they may not be coming in if RT or deadline tasks are active
165+
* all the time (or there are RT and DL tasks only).
166+
*
167+
* As a workaround for that issue, this function is called by the RT and DL
168+
* sched classes to trigger extra cpufreq updates to prevent it from stalling,
169+
* but that really is a band-aid. Going forward it should be replaced with
170+
* solutions targeted more specifically at RT and DL tasks.
171+
*/
172+
static inline void cpufreq_trigger_update(u64 time)
173+
{
174+
cpufreq_update_util(time, ULONG_MAX, 0);
175+
}
176+
177+
struct update_util_data {
178+
void (*func)(struct update_util_data *data,
179+
u64 time, unsigned long util, unsigned long max);
180+
};
181+
182+
void cpufreq_set_update_util_data(int cpu, struct update_util_data *data);
183+
154184
unsigned int cpufreq_get(unsigned int cpu);
155185
unsigned int cpufreq_quick_get(unsigned int cpu);
156186
unsigned int cpufreq_quick_get_max(unsigned int cpu);
@@ -162,6 +192,10 @@ int cpufreq_update_policy(unsigned int cpu);
162192
bool have_governor_per_policy(void);
163193
struct kobject *get_governor_parent_kobj(struct cpufreq_policy *policy);
164194
#else
195+
static inline void cpufreq_update_util(u64 time, unsigned long util,
196+
unsigned long max) {}
197+
static inline void cpufreq_trigger_update(u64 time) {}
198+
165199
static inline unsigned int cpufreq_get(unsigned int cpu)
166200
{
167201
return 0;

kernel/sched/deadline.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -726,6 +726,10 @@ static void update_curr_dl(struct rq *rq)
726726
if (!dl_task(curr) || !on_dl_rq(dl_se))
727727
return;
728728

729+
/* Kick cpufreq (see the comment in linux/cpufreq.h). */
730+
if (cpu_of(rq) == smp_processor_id())
731+
cpufreq_trigger_update(rq_clock(rq));
732+
729733
/*
730734
* Consumed budget is computed considering the time as
731735
* observed by schedulable tasks (excluding time spent

kernel/sched/fair.c

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2824,7 +2824,8 @@ static inline void update_load_avg(struct sched_entity *se, int update_tg)
28242824
{
28252825
struct cfs_rq *cfs_rq = cfs_rq_of(se);
28262826
u64 now = cfs_rq_clock_task(cfs_rq);
2827-
int cpu = cpu_of(rq_of(cfs_rq));
2827+
struct rq *rq = rq_of(cfs_rq);
2828+
int cpu = cpu_of(rq);
28282829

28292830
/*
28302831
* Track task load average for carrying it to new CPU after migrated, and
@@ -2836,6 +2837,29 @@ static inline void update_load_avg(struct sched_entity *se, int update_tg)
28362837

28372838
if (update_cfs_rq_load_avg(now, cfs_rq) && update_tg)
28382839
update_tg_load_avg(cfs_rq, 0);
2840+
2841+
if (cpu == smp_processor_id() && &rq->cfs == cfs_rq) {
2842+
unsigned long max = rq->cpu_capacity_orig;
2843+
2844+
/*
2845+
* There are a few boundary cases this might miss but it should
2846+
* get called often enough that that should (hopefully) not be
2847+
* a real problem -- added to that it only calls on the local
2848+
* CPU, so if we enqueue remotely we'll miss an update, but
2849+
* the next tick/schedule should update.
2850+
*
2851+
* It will not get called when we go idle, because the idle
2852+
* thread is a different class (!fair), nor will the utilization
2853+
* number include things like RT tasks.
2854+
*
2855+
* As is, the util number is not freq-invariant (we'd have to
2856+
* implement arch_scale_freq_capacity() for that).
2857+
*
2858+
* See cpu_util().
2859+
*/
2860+
cpufreq_update_util(rq_clock(rq),
2861+
min(cfs_rq->avg.util_avg, max), max);
2862+
}
28392863
}
28402864

28412865
static void attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se)

kernel/sched/rt.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -945,6 +945,10 @@ static void update_curr_rt(struct rq *rq)
945945
if (curr->sched_class != &rt_sched_class)
946946
return;
947947

948+
/* Kick cpufreq (see the comment in linux/cpufreq.h). */
949+
if (cpu_of(rq) == smp_processor_id())
950+
cpufreq_trigger_update(rq_clock(rq));
951+
948952
delta_exec = rq_clock_task(rq) - curr->se.exec_start;
949953
if (unlikely((s64)delta_exec <= 0))
950954
return;

kernel/sched/sched.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#include <linux/irq_work.h>
1010
#include <linux/tick.h>
1111
#include <linux/slab.h>
12+
#include <linux/cpufreq.h>
1213

1314
#include "cpupri.h"
1415
#include "cpudeadline.h"

0 commit comments

Comments
 (0)