Skip to content

Commit c5a2ee7

Browse files
committed
cpufreq: intel_pstate: Active mode P-state limits rework
The coordination of P-state limits used by intel_pstate in the active mode (ie. by default) is problematic, because it synchronizes all of the limits (ie. the global ones and the per-policy ones) so as to use one common pair of P-state limits (min and max) across all CPUs in the system. The drawbacks of that are as follows: - If P-states are coordinated in hardware, it is not necessary to coordinate them in software on top of that, so in that case all of the above activity is in vain. - If P-states are not coordinated in hardware, then the processor is actually capable of setting different P-states for different CPUs and coordinating them at the software level simply doesn't allow that capability to be utilized. - The coordination works in such a way that setting a per-policy limit (eg. scaling_max_freq) for one CPU causes the common effective limit to change (and it will affect all of the other CPUs too), but subsequent reads from the corresponding sysfs attributes for the other CPUs will return stale values (which is confusing). - Reads from the global P-state limit attributes, min_perf_pct and max_perf_pct, return the effective common values and not the last values set through these attributes. However, the last values set through these attributes become hard limits that cannot be exceeded by writes to scaling_min_freq and scaling_max_freq, respectively, and they are not exposed, so essentially users have to remember what they are. All of that is painful enough to warrant a change of the management of P-state limits in the active mode. To that end, redesign the active mode P-state limits management in intel_pstate in accordance with the following rules: (1) All CPUs are affected by the global limits (that is, none of them can be requested to run faster than the global max and none of them can be requested to run slower than the global min). (2) Each individual CPU is affected by its own per-policy limits (that is, it cannot be requested to run faster than its own per-policy max and it cannot be requested to run slower than its own per-policy min). (3) The global and per-policy limits can be set independently. Also, the global maximum and minimum P-state limits will be always expressed as percentages of the maximum supported turbo P-state. Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
1 parent 5539534 commit c5a2ee7

File tree

1 file changed

+85
-100
lines changed

1 file changed

+85
-100
lines changed

drivers/cpufreq/intel_pstate.c

Lines changed: 85 additions & 100 deletions
Original file line numberDiff line numberDiff line change
@@ -187,44 +187,35 @@ struct _pid {
187187

188188
/**
189189
* struct perf_limits - Store user and policy limits
190-
* @no_turbo: User requested turbo state from intel_pstate sysfs
191-
* @turbo_disabled: Platform turbo status either from msr
192-
* MSR_IA32_MISC_ENABLE or when maximum available pstate
193-
* matches the maximum turbo pstate
194-
* @max_perf_pct: Effective maximum performance limit in percentage, this
195-
* is minimum of either limits enforced by cpufreq policy
196-
* or limits from user set limits via intel_pstate sysfs
197-
* @min_perf_pct: Effective minimum performance limit in percentage, this
198-
* is maximum of either limits enforced by cpufreq policy
199-
* or limits from user set limits via intel_pstate sysfs
200190
* @max_perf: This is a scaled value between 0 to 255 for max_perf_pct
201191
* This value is used to limit max pstate
202192
* @min_perf: This is a scaled value between 0 to 255 for min_perf_pct
203193
* This value is used to limit min pstate
204-
* @max_policy_pct: The maximum performance in percentage enforced by
205-
* cpufreq setpolicy interface
206-
* @max_sysfs_pct: The maximum performance in percentage enforced by
207-
* intel pstate sysfs interface, unused when per cpu
208-
* controls are enforced
209-
* @min_policy_pct: The minimum performance in percentage enforced by
210-
* cpufreq setpolicy interface
211-
* @min_sysfs_pct: The minimum performance in percentage enforced by
212-
* intel pstate sysfs interface, unused when per cpu
213-
* controls are enforced
214194
*
215-
* Storage for user and policy defined limits.
195+
* Storage for policy defined limits.
216196
*/
217197
struct perf_limits {
218-
int no_turbo;
219-
int turbo_disabled;
220-
int max_perf_pct;
221-
int min_perf_pct;
222198
int32_t max_perf;
223199
int32_t min_perf;
224-
int max_policy_pct;
225-
int max_sysfs_pct;
226-
int min_policy_pct;
227-
int min_sysfs_pct;
200+
};
201+
202+
/**
203+
* struct global_params - Global parameters, mostly tunable via sysfs.
204+
* @no_turbo: Whether or not to use turbo P-states.
205+
* @turbo_disabled: Whethet or not turbo P-states are available at all,
206+
* based on the MSR_IA32_MISC_ENABLE value and whether or
207+
* not the maximum reported turbo P-state is different from
208+
* the maximum reported non-turbo one.
209+
* @min_perf_pct: Minimum capacity limit in percent of the maximum turbo
210+
* P-state capacity.
211+
* @max_perf_pct: Maximum capacity limit in percent of the maximum turbo
212+
* P-state capacity.
213+
*/
214+
struct global_params {
215+
bool no_turbo;
216+
bool turbo_disabled;
217+
int max_perf_pct;
218+
int min_perf_pct;
228219
};
229220

230221
/**
@@ -245,9 +236,7 @@ struct perf_limits {
245236
* @prev_cummulative_iowait: IO Wait time difference from last and
246237
* current sample
247238
* @sample: Storage for storing last Sample data
248-
* @perf_limits: Pointer to perf_limit unique to this CPU
249-
* Not all field in the structure are applicable
250-
* when per cpu controls are enforced
239+
* @perf_limits: Capacity limits unique to this CPU
251240
* @acpi_perf_data: Stores ACPI perf information read from _PSS
252241
* @valid_pss_table: Set to true for valid ACPI _PSS entries found
253242
* @epp_powersave: Last saved HWP energy performance preference
@@ -279,7 +268,7 @@ struct cpudata {
279268
u64 prev_tsc;
280269
u64 prev_cummulative_iowait;
281270
struct sample sample;
282-
struct perf_limits *perf_limits;
271+
struct perf_limits perf_limits;
283272
#ifdef CONFIG_ACPI
284273
struct acpi_processor_performance acpi_perf_data;
285274
bool valid_pss_table;
@@ -364,16 +353,7 @@ static bool driver_registered __read_mostly;
364353
static bool acpi_ppc;
365354
#endif
366355

367-
static struct perf_limits global;
368-
369-
static void intel_pstate_init_limits(struct perf_limits *limits)
370-
{
371-
memset(limits, 0, sizeof(*limits));
372-
limits->max_perf_pct = 100;
373-
limits->max_perf = int_ext_tofp(1);
374-
limits->max_policy_pct = 100;
375-
limits->max_sysfs_pct = 100;
376-
}
356+
static struct global_params global;
377357

378358
static DEFINE_MUTEX(intel_pstate_driver_lock);
379359
static DEFINE_MUTEX(intel_pstate_limits_lock);
@@ -621,6 +601,14 @@ static inline void update_turbo_state(void)
621601
cpu->pstate.max_pstate == cpu->pstate.turbo_pstate);
622602
}
623603

604+
static int min_perf_pct_min(void)
605+
{
606+
struct cpudata *cpu = all_cpu_data[0];
607+
608+
return DIV_ROUND_UP(cpu->pstate.min_pstate * 100,
609+
cpu->pstate.turbo_pstate);
610+
}
611+
624612
static s16 intel_pstate_get_epb(struct cpudata *cpu_data)
625613
{
626614
u64 epb;
@@ -841,16 +829,13 @@ static struct freq_attr *hwp_cpufreq_attrs[] = {
841829
static void intel_pstate_hwp_set(struct cpufreq_policy *policy)
842830
{
843831
int min, hw_min, max, hw_max, cpu;
844-
struct perf_limits *perf_limits = &global;
845832
u64 value, cap;
846833

847834
for_each_cpu(cpu, policy->cpus) {
848835
struct cpudata *cpu_data = all_cpu_data[cpu];
836+
struct perf_limits *perf_limits = &cpu_data->perf_limits;
849837
s16 epp;
850838

851-
if (per_cpu_limits)
852-
perf_limits = all_cpu_data[cpu]->perf_limits;
853-
854839
rdmsrl_on_cpu(cpu, MSR_HWP_CAPABILITIES, &cap);
855840
hw_min = HWP_LOWEST_PERF(cap);
856841
if (global.no_turbo)
@@ -1163,6 +1148,15 @@ static ssize_t store_no_turbo(struct kobject *a, struct attribute *b,
11631148

11641149
global.no_turbo = clamp_t(int, input, 0, 1);
11651150

1151+
if (global.no_turbo) {
1152+
struct cpudata *cpu = all_cpu_data[0];
1153+
int pct = cpu->pstate.max_pstate * 100 / cpu->pstate.turbo_pstate;
1154+
1155+
/* Squash the global minimum into the permitted range. */
1156+
if (global.min_perf_pct > pct)
1157+
global.min_perf_pct = pct;
1158+
}
1159+
11661160
mutex_unlock(&intel_pstate_limits_lock);
11671161

11681162
intel_pstate_update_policies();
@@ -1191,11 +1185,7 @@ static ssize_t store_max_perf_pct(struct kobject *a, struct attribute *b,
11911185

11921186
mutex_lock(&intel_pstate_limits_lock);
11931187

1194-
global.max_sysfs_pct = clamp_t(int, input, 0 , 100);
1195-
global.max_perf_pct = min(global.max_policy_pct, global.max_sysfs_pct);
1196-
global.max_perf_pct = max(global.min_policy_pct, global.max_perf_pct);
1197-
global.max_perf_pct = max(global.min_perf_pct, global.max_perf_pct);
1198-
global.max_perf = percent_ext_fp(global.max_perf_pct);
1188+
global.max_perf_pct = clamp_t(int, input, global.min_perf_pct, 100);
11991189

12001190
mutex_unlock(&intel_pstate_limits_lock);
12011191

@@ -1225,11 +1215,8 @@ static ssize_t store_min_perf_pct(struct kobject *a, struct attribute *b,
12251215

12261216
mutex_lock(&intel_pstate_limits_lock);
12271217

1228-
global.min_sysfs_pct = clamp_t(int, input, 0 , 100);
1229-
global.min_perf_pct = max(global.min_policy_pct, global.min_sysfs_pct);
1230-
global.min_perf_pct = min(global.max_policy_pct, global.min_perf_pct);
1231-
global.min_perf_pct = min(global.max_perf_pct, global.min_perf_pct);
1232-
global.min_perf = percent_ext_fp(global.min_perf_pct);
1218+
global.min_perf_pct = clamp_t(int, input,
1219+
min_perf_pct_min(), global.max_perf_pct);
12331220

12341221
mutex_unlock(&intel_pstate_limits_lock);
12351222

@@ -1650,14 +1637,11 @@ static void intel_pstate_get_min_max(struct cpudata *cpu, int *min, int *max)
16501637
int max_perf = cpu->pstate.turbo_pstate;
16511638
int max_perf_adj;
16521639
int min_perf;
1653-
struct perf_limits *perf_limits = &global;
1640+
struct perf_limits *perf_limits = &cpu->perf_limits;
16541641

16551642
if (global.no_turbo || global.turbo_disabled)
16561643
max_perf = cpu->pstate.max_pstate;
16571644

1658-
if (per_cpu_limits)
1659-
perf_limits = cpu->perf_limits;
1660-
16611645
/*
16621646
* performance can be limited by user through sysfs, by cpufreq
16631647
* policy, or by cpu specific default values determined through
@@ -1968,18 +1952,11 @@ static int intel_pstate_init_cpu(unsigned int cpunum)
19681952
cpu = all_cpu_data[cpunum];
19691953

19701954
if (!cpu) {
1971-
unsigned int size = sizeof(struct cpudata);
1972-
1973-
if (per_cpu_limits)
1974-
size += sizeof(struct perf_limits);
1975-
1976-
cpu = kzalloc(size, GFP_KERNEL);
1955+
cpu = kzalloc(sizeof(*cpu), GFP_KERNEL);
19771956
if (!cpu)
19781957
return -ENOMEM;
19791958

19801959
all_cpu_data[cpunum] = cpu;
1981-
if (per_cpu_limits)
1982-
cpu->perf_limits = (struct perf_limits *)(cpu + 1);
19831960

19841961
cpu->epp_default = -EINVAL;
19851962
cpu->epp_powersave = -EINVAL;
@@ -2045,8 +2022,9 @@ static void intel_pstate_clear_update_util_hook(unsigned int cpu)
20452022
}
20462023

20472024
static void intel_pstate_update_perf_limits(struct cpufreq_policy *policy,
2048-
struct perf_limits *limits)
2025+
struct cpudata *cpu)
20492026
{
2027+
struct perf_limits *limits = &cpu->perf_limits;
20502028
int32_t max_policy_perf, min_policy_perf;
20512029

20522030
max_policy_perf = div_ext_fp(policy->max, policy->cpuinfo.max_freq);
@@ -2061,29 +2039,45 @@ static void intel_pstate_update_perf_limits(struct cpufreq_policy *policy,
20612039
}
20622040

20632041
/* Normalize user input to [min_perf, max_perf] */
2064-
limits->min_perf = max(min_policy_perf,
2065-
percent_ext_fp(limits->min_sysfs_pct));
2066-
limits->min_perf = min(limits->min_perf, max_policy_perf);
2067-
limits->max_perf = min(max_policy_perf,
2068-
percent_ext_fp(limits->max_sysfs_pct));
2069-
limits->max_perf = max(min_policy_perf, limits->max_perf);
2042+
if (per_cpu_limits) {
2043+
limits->min_perf = min_policy_perf;
2044+
limits->max_perf = max_policy_perf;
2045+
} else {
2046+
int32_t global_min, global_max;
2047+
2048+
/* Global limits are in percent of the maximum turbo P-state. */
2049+
global_max = percent_ext_fp(global.max_perf_pct);
2050+
global_min = percent_ext_fp(global.min_perf_pct);
2051+
if (policy->cpuinfo.max_freq != cpu->pstate.turbo_freq) {
2052+
int32_t turbo_factor;
2053+
2054+
turbo_factor = div_ext_fp(cpu->pstate.turbo_pstate,
2055+
cpu->pstate.max_pstate);
2056+
global_min = mul_ext_fp(global_min, turbo_factor);
2057+
global_max = mul_ext_fp(global_max, turbo_factor);
2058+
}
2059+
global_min = clamp_t(int32_t, global_min, 0, global_max);
20702060

2071-
/* Make sure min_perf <= max_perf */
2072-
limits->min_perf = min(limits->min_perf, limits->max_perf);
2061+
limits->min_perf = max(min_policy_perf, global_min);
2062+
limits->min_perf = min(limits->min_perf, max_policy_perf);
2063+
limits->max_perf = min(max_policy_perf, global_max);
2064+
limits->max_perf = max(min_policy_perf, limits->max_perf);
2065+
2066+
/* Make sure min_perf <= max_perf */
2067+
limits->min_perf = min(limits->min_perf, limits->max_perf);
2068+
}
20732069

20742070
limits->max_perf = round_up(limits->max_perf, EXT_FRAC_BITS);
20752071
limits->min_perf = round_up(limits->min_perf, EXT_FRAC_BITS);
2076-
limits->max_perf_pct = fp_ext_toint(limits->max_perf * 100);
2077-
limits->min_perf_pct = fp_ext_toint(limits->min_perf * 100);
20782072

20792073
pr_debug("cpu:%d max_perf_pct:%d min_perf_pct:%d\n", policy->cpu,
2080-
limits->max_perf_pct, limits->min_perf_pct);
2074+
fp_ext_toint(limits->max_perf * 100),
2075+
fp_ext_toint(limits->min_perf * 100));
20812076
}
20822077

20832078
static int intel_pstate_set_policy(struct cpufreq_policy *policy)
20842079
{
20852080
struct cpudata *cpu;
2086-
struct perf_limits *perf_limits = &global;
20872081

20882082
if (!policy->cpuinfo.max_freq)
20892083
return -ENODEV;
@@ -2101,12 +2095,9 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy)
21012095
policy->max = policy->cpuinfo.max_freq;
21022096
}
21032097

2104-
if (per_cpu_limits)
2105-
perf_limits = cpu->perf_limits;
2106-
21072098
mutex_lock(&intel_pstate_limits_lock);
21082099

2109-
intel_pstate_update_perf_limits(policy, perf_limits);
2100+
intel_pstate_update_perf_limits(policy, cpu);
21102101

21112102
if (cpu->policy == CPUFREQ_POLICY_PERFORMANCE) {
21122103
/*
@@ -2142,17 +2133,6 @@ static int intel_pstate_verify_policy(struct cpufreq_policy *policy)
21422133
policy->policy != CPUFREQ_POLICY_PERFORMANCE)
21432134
return -EINVAL;
21442135

2145-
/* When per-CPU limits are used, sysfs limits are not used */
2146-
if (!per_cpu_limits) {
2147-
unsigned int max_freq, min_freq;
2148-
2149-
max_freq = policy->cpuinfo.max_freq *
2150-
global.max_sysfs_pct / 100;
2151-
min_freq = policy->cpuinfo.max_freq *
2152-
global.min_sysfs_pct / 100;
2153-
cpufreq_verify_within_limits(policy, min_freq, max_freq);
2154-
}
2155-
21562136
return 0;
21572137
}
21582138

@@ -2192,8 +2172,8 @@ static int __intel_pstate_cpu_init(struct cpufreq_policy *policy)
21922172

21932173
cpu = all_cpu_data[policy->cpu];
21942174

2195-
if (per_cpu_limits)
2196-
intel_pstate_init_limits(cpu->perf_limits);
2175+
cpu->perf_limits.max_perf = int_ext_tofp(1);
2176+
cpu->perf_limits.min_perf = 0;
21972177

21982178
policy->min = cpu->pstate.min_pstate * cpu->pstate.scaling;
21992179
policy->max = cpu->pstate.turbo_pstate * cpu->pstate.scaling;
@@ -2252,6 +2232,8 @@ static int intel_cpufreq_verify_policy(struct cpufreq_policy *policy)
22522232

22532233
cpufreq_verify_within_cpu_limits(policy);
22542234

2235+
intel_pstate_update_perf_limits(policy, cpu);
2236+
22552237
return 0;
22562238
}
22572239

@@ -2354,14 +2336,17 @@ static int intel_pstate_register_driver(void)
23542336
{
23552337
int ret;
23562338

2357-
intel_pstate_init_limits(&global);
2339+
memset(&global, 0, sizeof(global));
2340+
global.max_perf_pct = 100;
23582341

23592342
ret = cpufreq_register_driver(intel_pstate_driver);
23602343
if (ret) {
23612344
intel_pstate_driver_cleanup();
23622345
return ret;
23632346
}
23642347

2348+
global.min_perf_pct = min_perf_pct_min();
2349+
23652350
mutex_lock(&intel_pstate_limits_lock);
23662351
driver_registered = true;
23672352
mutex_unlock(&intel_pstate_limits_lock);

0 commit comments

Comments
 (0)