Skip to content

Commit a4675fb

Browse files
committed
cpufreq: intel_pstate: Replace timers with utilization update callbacks
Instead of using a per-CPU deferrable timer for utilization sampling and P-states adjustments, register a utilization update callback that will be invoked from the scheduler on utilization changes. The sampling rate is still the same as what was used for the deferrable timers, so the functional impact of this patch should not be significant. Based on an earlier patch from Srinivas Pandruvada. Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Acked-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
1 parent 34e2c55 commit a4675fb

File tree

1 file changed

+39
-64
lines changed

1 file changed

+39
-64
lines changed

drivers/cpufreq/intel_pstate.c

Lines changed: 39 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ struct sample {
7171
u64 mperf;
7272
u64 tsc;
7373
int freq;
74-
ktime_t time;
74+
u64 time;
7575
};
7676

7777
struct pstate_data {
@@ -103,13 +103,13 @@ struct _pid {
103103
struct cpudata {
104104
int cpu;
105105

106-
struct timer_list timer;
106+
struct update_util_data update_util;
107107

108108
struct pstate_data pstate;
109109
struct vid_data vid;
110110
struct _pid pid;
111111

112-
ktime_t last_sample_time;
112+
u64 last_sample_time;
113113
u64 prev_aperf;
114114
u64 prev_mperf;
115115
u64 prev_tsc;
@@ -120,6 +120,7 @@ struct cpudata {
120120
static struct cpudata **all_cpu_data;
121121
struct pstate_adjust_policy {
122122
int sample_rate_ms;
123+
s64 sample_rate_ns;
123124
int deadband;
124125
int setpoint;
125126
int p_gain_pct;
@@ -712,7 +713,7 @@ static void core_set_pstate(struct cpudata *cpudata, int pstate)
712713
if (limits->no_turbo && !limits->turbo_disabled)
713714
val |= (u64)1 << 32;
714715

715-
wrmsrl_on_cpu(cpudata->cpu, MSR_IA32_PERF_CTL, val);
716+
wrmsrl(MSR_IA32_PERF_CTL, val);
716717
}
717718

718719
static int knl_get_turbo_pstate(void)
@@ -883,7 +884,7 @@ static inline void intel_pstate_calc_busy(struct cpudata *cpu)
883884
sample->core_pct_busy = (int32_t)core_pct;
884885
}
885886

886-
static inline void intel_pstate_sample(struct cpudata *cpu)
887+
static inline void intel_pstate_sample(struct cpudata *cpu, u64 time)
887888
{
888889
u64 aperf, mperf;
889890
unsigned long flags;
@@ -900,7 +901,7 @@ static inline void intel_pstate_sample(struct cpudata *cpu)
900901
local_irq_restore(flags);
901902

902903
cpu->last_sample_time = cpu->sample.time;
903-
cpu->sample.time = ktime_get();
904+
cpu->sample.time = time;
904905
cpu->sample.aperf = aperf;
905906
cpu->sample.mperf = mperf;
906907
cpu->sample.tsc = tsc;
@@ -915,22 +916,6 @@ static inline void intel_pstate_sample(struct cpudata *cpu)
915916
cpu->prev_tsc = tsc;
916917
}
917918

918-
static inline void intel_hwp_set_sample_time(struct cpudata *cpu)
919-
{
920-
int delay;
921-
922-
delay = msecs_to_jiffies(50);
923-
mod_timer_pinned(&cpu->timer, jiffies + delay);
924-
}
925-
926-
static inline void intel_pstate_set_sample_time(struct cpudata *cpu)
927-
{
928-
int delay;
929-
930-
delay = msecs_to_jiffies(pid_params.sample_rate_ms);
931-
mod_timer_pinned(&cpu->timer, jiffies + delay);
932-
}
933-
934919
static inline int32_t get_target_pstate_use_cpu_load(struct cpudata *cpu)
935920
{
936921
struct sample *sample = &cpu->sample;
@@ -970,8 +955,7 @@ static inline int32_t get_target_pstate_use_cpu_load(struct cpudata *cpu)
970955
static inline int32_t get_target_pstate_use_performance(struct cpudata *cpu)
971956
{
972957
int32_t core_busy, max_pstate, current_pstate, sample_ratio;
973-
s64 duration_us;
974-
u32 sample_time;
958+
u64 duration_ns;
975959

976960
/*
977961
* core_busy is the ratio of actual performance to max
@@ -990,18 +974,16 @@ static inline int32_t get_target_pstate_use_performance(struct cpudata *cpu)
990974
core_busy = mul_fp(core_busy, div_fp(max_pstate, current_pstate));
991975

992976
/*
993-
* Since we have a deferred timer, it will not fire unless
994-
* we are in C0. So, determine if the actual elapsed time
995-
* is significantly greater (3x) than our sample interval. If it
996-
* is, then we were idle for a long enough period of time
997-
* to adjust our busyness.
977+
* Since our utilization update callback will not run unless we are
978+
* in C0, check if the actual elapsed time is significantly greater (3x)
979+
* than our sample interval. If it is, then we were idle for a long
980+
* enough period of time to adjust our busyness.
998981
*/
999-
sample_time = pid_params.sample_rate_ms * USEC_PER_MSEC;
1000-
duration_us = ktime_us_delta(cpu->sample.time,
1001-
cpu->last_sample_time);
1002-
if (duration_us > sample_time * 3) {
1003-
sample_ratio = div_fp(int_tofp(sample_time),
1004-
int_tofp(duration_us));
982+
duration_ns = cpu->sample.time - cpu->last_sample_time;
983+
if ((s64)duration_ns > pid_params.sample_rate_ns * 3
984+
&& cpu->last_sample_time > 0) {
985+
sample_ratio = div_fp(int_tofp(pid_params.sample_rate_ns),
986+
int_tofp(duration_ns));
1005987
core_busy = mul_fp(core_busy, sample_ratio);
1006988
}
1007989

@@ -1031,23 +1013,17 @@ static inline void intel_pstate_adjust_busy_pstate(struct cpudata *cpu)
10311013
sample->freq);
10321014
}
10331015

1034-
static void intel_hwp_timer_func(unsigned long __data)
1035-
{
1036-
struct cpudata *cpu = (struct cpudata *) __data;
1037-
1038-
intel_pstate_sample(cpu);
1039-
intel_hwp_set_sample_time(cpu);
1040-
}
1041-
1042-
static void intel_pstate_timer_func(unsigned long __data)
1016+
static void intel_pstate_update_util(struct update_util_data *data, u64 time,
1017+
unsigned long util, unsigned long max)
10431018
{
1044-
struct cpudata *cpu = (struct cpudata *) __data;
1045-
1046-
intel_pstate_sample(cpu);
1019+
struct cpudata *cpu = container_of(data, struct cpudata, update_util);
1020+
u64 delta_ns = time - cpu->sample.time;
10471021

1048-
intel_pstate_adjust_busy_pstate(cpu);
1049-
1050-
intel_pstate_set_sample_time(cpu);
1022+
if ((s64)delta_ns >= pid_params.sample_rate_ns) {
1023+
intel_pstate_sample(cpu, time);
1024+
if (!hwp_active)
1025+
intel_pstate_adjust_busy_pstate(cpu);
1026+
}
10511027
}
10521028

10531029
#define ICPU(model, policy) \
@@ -1095,24 +1071,19 @@ static int intel_pstate_init_cpu(unsigned int cpunum)
10951071

10961072
cpu->cpu = cpunum;
10971073

1098-
if (hwp_active)
1074+
if (hwp_active) {
10991075
intel_pstate_hwp_enable(cpu);
1076+
pid_params.sample_rate_ms = 50;
1077+
pid_params.sample_rate_ns = 50 * NSEC_PER_MSEC;
1078+
}
11001079

11011080
intel_pstate_get_cpu_pstates(cpu);
11021081

1103-
init_timer_deferrable(&cpu->timer);
1104-
cpu->timer.data = (unsigned long)cpu;
1105-
cpu->timer.expires = jiffies + HZ/100;
1106-
1107-
if (!hwp_active)
1108-
cpu->timer.function = intel_pstate_timer_func;
1109-
else
1110-
cpu->timer.function = intel_hwp_timer_func;
1111-
11121082
intel_pstate_busy_pid_reset(cpu);
1113-
intel_pstate_sample(cpu);
1083+
intel_pstate_sample(cpu, 0);
11141084

1115-
add_timer_on(&cpu->timer, cpunum);
1085+
cpu->update_util.func = intel_pstate_update_util;
1086+
cpufreq_set_update_util_data(cpunum, &cpu->update_util);
11161087

11171088
pr_debug("intel_pstate: controlling: cpu %d\n", cpunum);
11181089

@@ -1196,7 +1167,9 @@ static void intel_pstate_stop_cpu(struct cpufreq_policy *policy)
11961167

11971168
pr_debug("intel_pstate: CPU %d exiting\n", cpu_num);
11981169

1199-
del_timer_sync(&all_cpu_data[cpu_num]->timer);
1170+
cpufreq_set_update_util_data(cpu_num, NULL);
1171+
synchronize_rcu();
1172+
12001173
if (hwp_active)
12011174
return;
12021175

@@ -1260,6 +1233,7 @@ static int intel_pstate_msrs_not_valid(void)
12601233
static void copy_pid_params(struct pstate_adjust_policy *policy)
12611234
{
12621235
pid_params.sample_rate_ms = policy->sample_rate_ms;
1236+
pid_params.sample_rate_ns = pid_params.sample_rate_ms * NSEC_PER_MSEC;
12631237
pid_params.p_gain_pct = policy->p_gain_pct;
12641238
pid_params.i_gain_pct = policy->i_gain_pct;
12651239
pid_params.d_gain_pct = policy->d_gain_pct;
@@ -1451,7 +1425,8 @@ static int __init intel_pstate_init(void)
14511425
get_online_cpus();
14521426
for_each_online_cpu(cpu) {
14531427
if (all_cpu_data[cpu]) {
1454-
del_timer_sync(&all_cpu_data[cpu]->timer);
1428+
cpufreq_set_update_util_data(cpu, NULL);
1429+
synchronize_rcu();
14551430
kfree(all_cpu_data[cpu]);
14561431
}
14571432
}

0 commit comments

Comments
 (0)