Skip to content

Commit 001c76f

Browse files
committed
cpufreq: intel_pstate: Generic governors support
There may be reasons to use generic cpufreq governors (eg. schedutil) on Intel platforms instead of the intel_pstate driver's internal governor. However, that currently can only be done by disabling intel_pstate altogether and using the acpi-cpufreq driver instead of it, which is subject to limitations. First of all, acpi-cpufreq only works on systems where the _PSS object is present in the ACPI tables for all logical CPUs. Second, on those systems acpi-cpufreq will only use frequencies listed by _PSS which may be suboptimal. In particular, by convention, the whole turbo range is represented in _PSS as a single P-state and the frequency assigned to it is greater by 1 MHz than the greatest non-turbo frequency listed by _PSS. That may confuse governors to use turbo frequencies less frequently which may lead to suboptimal performance. For this reason, make it possible to use the intel_pstate driver with generic cpufreq governors as a "normal" cpufreq driver. That mode is enforced by adding intel_pstate=passive to the kernel command line and cannot be disabled at run time. In that mode, intel_pstate provides a cpufreq driver interface including the ->target() and ->fast_switch() callbacks and is listed in scaling_driver as "intel_cpufreq". Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Tested-by: Doug Smythies <dsmythies@telus.net>
1 parent d0ea59e commit 001c76f

File tree

2 files changed

+176
-24
lines changed

2 files changed

+176
-24
lines changed

Documentation/kernel-parameters.txt

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1760,6 +1760,12 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
17601760
disable
17611761
Do not enable intel_pstate as the default
17621762
scaling driver for the supported processors
1763+
passive
1764+
Use intel_pstate as a scaling driver, but configure it
1765+
to work with generic cpufreq governors (instead of
1766+
enabling its internal governor). This mode cannot be
1767+
used along with the hardware-managed P-states (HWP)
1768+
feature.
17631769
force
17641770
Enable intel_pstate on systems that prohibit it by default
17651771
in favor of acpi-cpufreq. Forcing the intel_pstate driver

drivers/cpufreq/intel_pstate.c

Lines changed: 170 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,8 @@
3737
#include <asm/cpufeature.h>
3838
#include <asm/intel-family.h>
3939

40+
#define INTEL_CPUFREQ_TRANSITION_LATENCY 20000
41+
4042
#define ATOM_RATIOS 0x66a
4143
#define ATOM_VIDS 0x66b
4244
#define ATOM_TURBO_RATIOS 0x66c
@@ -122,6 +124,8 @@ struct sample {
122124
* @scaling: Scaling factor to convert frequency to cpufreq
123125
* frequency units
124126
* @turbo_pstate: Max Turbo P state possible for this platform
127+
* @max_freq: @max_pstate frequency in cpufreq units
128+
* @turbo_freq: @turbo_pstate frequency in cpufreq units
125129
*
126130
* Stores the per cpu model P state limits and current P state.
127131
*/
@@ -132,6 +136,8 @@ struct pstate_data {
132136
int max_pstate_physical;
133137
int scaling;
134138
int turbo_pstate;
139+
unsigned int max_freq;
140+
unsigned int turbo_freq;
135141
};
136142

137143
/**
@@ -470,7 +476,7 @@ static void intel_pstate_init_acpi_perf_limits(struct cpufreq_policy *policy)
470476
{
471477
}
472478

473-
static void intel_pstate_exit_perf_limits(struct cpufreq_policy *policy)
479+
static inline int intel_pstate_exit_perf_limits(struct cpufreq_policy *policy)
474480
{
475481
}
476482
#endif
@@ -1225,6 +1231,8 @@ static void intel_pstate_get_cpu_pstates(struct cpudata *cpu)
12251231
cpu->pstate.max_pstate_physical = pstate_funcs.get_max_physical();
12261232
cpu->pstate.turbo_pstate = pstate_funcs.get_turbo();
12271233
cpu->pstate.scaling = pstate_funcs.get_scaling();
1234+
cpu->pstate.max_freq = cpu->pstate.max_pstate * cpu->pstate.scaling;
1235+
cpu->pstate.turbo_freq = cpu->pstate.turbo_pstate * cpu->pstate.scaling;
12281236

12291237
if (pstate_funcs.get_vid)
12301238
pstate_funcs.get_vid(cpu);
@@ -1363,15 +1371,19 @@ static inline int32_t get_target_pstate_use_performance(struct cpudata *cpu)
13631371
return cpu->pstate.current_pstate - pid_calc(&cpu->pid, perf_scaled);
13641372
}
13651373

1366-
static inline void intel_pstate_update_pstate(struct cpudata *cpu, int pstate)
1374+
static int intel_pstate_prepare_request(struct cpudata *cpu, int pstate)
13671375
{
13681376
int max_perf, min_perf;
13691377

1370-
update_turbo_state();
1371-
13721378
intel_pstate_get_min_max(cpu, &min_perf, &max_perf);
13731379
pstate = clamp_t(int, pstate, min_perf, max_perf);
13741380
trace_cpu_frequency(pstate * cpu->pstate.scaling, cpu->cpu);
1381+
return pstate;
1382+
}
1383+
1384+
static void intel_pstate_update_pstate(struct cpudata *cpu, int pstate)
1385+
{
1386+
pstate = intel_pstate_prepare_request(cpu, pstate);
13751387
if (pstate == cpu->pstate.current_pstate)
13761388
return;
13771389

@@ -1389,6 +1401,8 @@ static inline void intel_pstate_adjust_busy_pstate(struct cpudata *cpu)
13891401
target_pstate = cpu->policy == CPUFREQ_POLICY_PERFORMANCE ?
13901402
cpu->pstate.turbo_pstate : pstate_funcs.get_target_pstate(cpu);
13911403

1404+
update_turbo_state();
1405+
13921406
intel_pstate_update_pstate(cpu, target_pstate);
13931407

13941408
sample = &cpu->sample;
@@ -1670,22 +1684,30 @@ static int intel_pstate_verify_policy(struct cpufreq_policy *policy)
16701684
return 0;
16711685
}
16721686

1687+
static void intel_cpufreq_stop_cpu(struct cpufreq_policy *policy)
1688+
{
1689+
intel_pstate_set_min_pstate(all_cpu_data[policy->cpu]);
1690+
}
1691+
16731692
static void intel_pstate_stop_cpu(struct cpufreq_policy *policy)
16741693
{
1675-
int cpu_num = policy->cpu;
1676-
struct cpudata *cpu = all_cpu_data[cpu_num];
1694+
pr_debug("CPU %d exiting\n", policy->cpu);
16771695

1678-
pr_debug("CPU %d exiting\n", cpu_num);
1696+
intel_pstate_clear_update_util_hook(policy->cpu);
1697+
if (!hwp_active)
1698+
intel_cpufreq_stop_cpu(policy);
1699+
}
16791700

1680-
intel_pstate_clear_update_util_hook(cpu_num);
1701+
static int intel_pstate_cpu_exit(struct cpufreq_policy *policy)
1702+
{
1703+
intel_pstate_exit_perf_limits(policy);
16811704

1682-
if (hwp_active)
1683-
return;
1705+
policy->fast_switch_possible = false;
16841706

1685-
intel_pstate_set_min_pstate(cpu);
1707+
return 0;
16861708
}
16871709

1688-
static int intel_pstate_cpu_init(struct cpufreq_policy *policy)
1710+
static int __intel_pstate_cpu_init(struct cpufreq_policy *policy)
16891711
{
16901712
struct cpudata *cpu;
16911713
int rc;
@@ -1696,11 +1718,6 @@ static int intel_pstate_cpu_init(struct cpufreq_policy *policy)
16961718

16971719
cpu = all_cpu_data[policy->cpu];
16981720

1699-
if (limits->min_perf_pct == 100 && limits->max_perf_pct == 100)
1700-
policy->policy = CPUFREQ_POLICY_PERFORMANCE;
1701-
else
1702-
policy->policy = CPUFREQ_POLICY_POWERSAVE;
1703-
17041721
/*
17051722
* We need sane value in the cpu->perf_limits, so inherit from global
17061723
* perf_limits limits, which are seeded with values based on the
@@ -1720,20 +1737,30 @@ static int intel_pstate_cpu_init(struct cpufreq_policy *policy)
17201737
policy->cpuinfo.max_freq *= cpu->pstate.scaling;
17211738

17221739
intel_pstate_init_acpi_perf_limits(policy);
1723-
policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL;
17241740
cpumask_set_cpu(policy->cpu, policy->cpus);
17251741

1742+
policy->fast_switch_possible = true;
1743+
17261744
return 0;
17271745
}
17281746

1729-
static int intel_pstate_cpu_exit(struct cpufreq_policy *policy)
1747+
static int intel_pstate_cpu_init(struct cpufreq_policy *policy)
17301748
{
1731-
intel_pstate_exit_perf_limits(policy);
1749+
int ret = __intel_pstate_cpu_init(policy);
1750+
1751+
if (ret)
1752+
return ret;
1753+
1754+
policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL;
1755+
if (limits->min_perf_pct == 100 && limits->max_perf_pct == 100)
1756+
policy->policy = CPUFREQ_POLICY_PERFORMANCE;
1757+
else
1758+
policy->policy = CPUFREQ_POLICY_POWERSAVE;
17321759

17331760
return 0;
17341761
}
17351762

1736-
static struct cpufreq_driver intel_pstate_driver = {
1763+
static struct cpufreq_driver intel_pstate = {
17371764
.flags = CPUFREQ_CONST_LOOPS,
17381765
.verify = intel_pstate_verify_policy,
17391766
.setpolicy = intel_pstate_set_policy,
@@ -1745,6 +1772,118 @@ static struct cpufreq_driver intel_pstate_driver = {
17451772
.name = "intel_pstate",
17461773
};
17471774

1775+
static int intel_cpufreq_verify_policy(struct cpufreq_policy *policy)
1776+
{
1777+
struct cpudata *cpu = all_cpu_data[policy->cpu];
1778+
struct perf_limits *perf_limits = limits;
1779+
1780+
update_turbo_state();
1781+
policy->cpuinfo.max_freq = limits->turbo_disabled ?
1782+
cpu->pstate.max_freq : cpu->pstate.turbo_freq;
1783+
1784+
cpufreq_verify_within_cpu_limits(policy);
1785+
1786+
if (per_cpu_limits)
1787+
perf_limits = cpu->perf_limits;
1788+
1789+
intel_pstate_update_perf_limits(policy, perf_limits);
1790+
1791+
return 0;
1792+
}
1793+
1794+
static unsigned int intel_cpufreq_turbo_update(struct cpudata *cpu,
1795+
struct cpufreq_policy *policy,
1796+
unsigned int target_freq)
1797+
{
1798+
unsigned int max_freq;
1799+
1800+
update_turbo_state();
1801+
1802+
max_freq = limits->no_turbo || limits->turbo_disabled ?
1803+
cpu->pstate.max_freq : cpu->pstate.turbo_freq;
1804+
policy->cpuinfo.max_freq = max_freq;
1805+
if (policy->max > max_freq)
1806+
policy->max = max_freq;
1807+
1808+
if (target_freq > max_freq)
1809+
target_freq = max_freq;
1810+
1811+
return target_freq;
1812+
}
1813+
1814+
static int intel_cpufreq_target(struct cpufreq_policy *policy,
1815+
unsigned int target_freq,
1816+
unsigned int relation)
1817+
{
1818+
struct cpudata *cpu = all_cpu_data[policy->cpu];
1819+
struct cpufreq_freqs freqs;
1820+
int target_pstate;
1821+
1822+
freqs.old = policy->cur;
1823+
freqs.new = intel_cpufreq_turbo_update(cpu, policy, target_freq);
1824+
1825+
cpufreq_freq_transition_begin(policy, &freqs);
1826+
switch (relation) {
1827+
case CPUFREQ_RELATION_L:
1828+
target_pstate = DIV_ROUND_UP(freqs.new, cpu->pstate.scaling);
1829+
break;
1830+
case CPUFREQ_RELATION_H:
1831+
target_pstate = freqs.new / cpu->pstate.scaling;
1832+
break;
1833+
default:
1834+
target_pstate = DIV_ROUND_CLOSEST(freqs.new, cpu->pstate.scaling);
1835+
break;
1836+
}
1837+
target_pstate = intel_pstate_prepare_request(cpu, target_pstate);
1838+
if (target_pstate != cpu->pstate.current_pstate) {
1839+
cpu->pstate.current_pstate = target_pstate;
1840+
wrmsrl_on_cpu(policy->cpu, MSR_IA32_PERF_CTL,
1841+
pstate_funcs.get_val(cpu, target_pstate));
1842+
}
1843+
cpufreq_freq_transition_end(policy, &freqs, false);
1844+
1845+
return 0;
1846+
}
1847+
1848+
static unsigned int intel_cpufreq_fast_switch(struct cpufreq_policy *policy,
1849+
unsigned int target_freq)
1850+
{
1851+
struct cpudata *cpu = all_cpu_data[policy->cpu];
1852+
int target_pstate;
1853+
1854+
target_freq = intel_cpufreq_turbo_update(cpu, policy, target_freq);
1855+
target_pstate = DIV_ROUND_UP(target_freq, cpu->pstate.scaling);
1856+
intel_pstate_update_pstate(cpu, target_pstate);
1857+
return target_freq;
1858+
}
1859+
1860+
static int intel_cpufreq_cpu_init(struct cpufreq_policy *policy)
1861+
{
1862+
int ret = __intel_pstate_cpu_init(policy);
1863+
1864+
if (ret)
1865+
return ret;
1866+
1867+
policy->cpuinfo.transition_latency = INTEL_CPUFREQ_TRANSITION_LATENCY;
1868+
/* This reflects the intel_pstate_get_cpu_pstates() setting. */
1869+
policy->cur = policy->cpuinfo.min_freq;
1870+
1871+
return 0;
1872+
}
1873+
1874+
static struct cpufreq_driver intel_cpufreq = {
1875+
.flags = CPUFREQ_CONST_LOOPS,
1876+
.verify = intel_cpufreq_verify_policy,
1877+
.target = intel_cpufreq_target,
1878+
.fast_switch = intel_cpufreq_fast_switch,
1879+
.init = intel_cpufreq_cpu_init,
1880+
.exit = intel_pstate_cpu_exit,
1881+
.stop_cpu = intel_cpufreq_stop_cpu,
1882+
.name = "intel_cpufreq",
1883+
};
1884+
1885+
static struct cpufreq_driver *intel_pstate_driver = &intel_pstate;
1886+
17481887
static int no_load __initdata;
17491888
static int no_hwp __initdata;
17501889
static int hwp_only __initdata;
@@ -1976,7 +2115,7 @@ static int __init intel_pstate_init(void)
19762115

19772116
intel_pstate_request_control_from_smm();
19782117

1979-
rc = cpufreq_register_driver(&intel_pstate_driver);
2118+
rc = cpufreq_register_driver(intel_pstate_driver);
19802119
if (rc)
19812120
goto out;
19822121

@@ -1991,7 +2130,9 @@ static int __init intel_pstate_init(void)
19912130
get_online_cpus();
19922131
for_each_online_cpu(cpu) {
19932132
if (all_cpu_data[cpu]) {
1994-
intel_pstate_clear_update_util_hook(cpu);
2133+
if (intel_pstate_driver == &intel_pstate)
2134+
intel_pstate_clear_update_util_hook(cpu);
2135+
19952136
kfree(all_cpu_data[cpu]);
19962137
}
19972138
}
@@ -2007,8 +2148,13 @@ static int __init intel_pstate_setup(char *str)
20072148
if (!str)
20082149
return -EINVAL;
20092150

2010-
if (!strcmp(str, "disable"))
2151+
if (!strcmp(str, "disable")) {
20112152
no_load = 1;
2153+
} else if (!strcmp(str, "passive")) {
2154+
pr_info("Passive mode enabled\n");
2155+
intel_pstate_driver = &intel_cpufreq;
2156+
no_hwp = 1;
2157+
}
20122158
if (!strcmp(str, "no_hwp")) {
20132159
pr_info("HWP disabled\n");
20142160
no_hwp = 1;

0 commit comments

Comments
 (0)