Skip to content

Commit 2ec1c17

Browse files
committed
Merge branches 'pm-opp', 'pm-cpufreq' and 'pm-tools'
* pm-opp: PM / OPP: do error handling at the bottom of dev_pm_opp_add_dynamic() PM / OPP: handle allocation of device_opp in a separate routine PM / OPP: reuse find_device_opp() instead of duplicating code PM / OPP: Staticize __dev_pm_opp_remove() PM / OPP: replace kfree with kfree_rcu while freeing 'struct device_opp' * pm-cpufreq: MAINTAINERS: add entry for intel_pstate intel_pstate: Add a few comments intel_pstate: add kernel parameter to force loading * pm-tools: Revert "tools: cpupower: fix return checks for sysfs_get_idlestate_count()"
4 parents 035f10e + 6ce4184 + 7c1ac18 + 2a813f1 commit 2ec1c17

File tree

5 files changed

+97
-38
lines changed

5 files changed

+97
-38
lines changed

Documentation/kernel-parameters.txt

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1446,6 +1446,15 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
14461446
disable
14471447
Do not enable intel_pstate as the default
14481448
scaling driver for the supported processors
1449+
force
1450+
Enable intel_pstate on systems that prohibit it by default
1451+
in favor of acpi-cpufreq. Forcing the intel_pstate driver
1452+
instead of acpi-cpufreq may disable platform features, such
1453+
as thermal controls and power capping, that rely on ACPI
1454+
P-States information being indicated to OSPM and therefore
1455+
should be used with caution. This option does not work with
1456+
processors that aren't supported by the intel_pstate driver
1457+
or on platforms that use pcc-cpufreq instead of acpi-cpufreq.
14491458
no_hwp
14501459
Do not enable hardware P state control (HWP)
14511460
if available.

MAINTAINERS

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4869,6 +4869,12 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/lenb/linux.git
48694869
S: Supported
48704870
F: drivers/idle/intel_idle.c
48714871

4872+
INTEL PSTATE DRIVER
4873+
M: Kristen Carlson Accardi <kristen@linux.intel.com>
4874+
L: linux-pm@vger.kernel.org
4875+
S: Supported
4876+
F: drivers/cpufreq/intel_pstate.c
4877+
48724878
INTEL FRAMEBUFFER DRIVER (excluding 810 and 815)
48734879
M: Maik Broemme <mbroemme@plusserver.de>
48744880
L: linux-fbdev@vger.kernel.org

drivers/base/power/opp.c

Lines changed: 42 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,11 @@ struct dev_pm_opp {
8484
*
8585
* This is an internal data structure maintaining the link to opps attached to
8686
* a device. This structure is not meant to be shared to users as it is
87-
* meant for book keeping and private to OPP library
87+
* meant for book keeping and private to OPP library.
88+
*
89+
* Because the opp structures can be used from both rcu and srcu readers, we
90+
* need to wait for the grace period of both of them before freeing any
91+
* resources. And so we have used kfree_rcu() from within call_srcu() handlers.
8892
*/
8993
struct device_opp {
9094
struct list_head node;
@@ -382,12 +386,34 @@ struct dev_pm_opp *dev_pm_opp_find_freq_floor(struct device *dev,
382386
}
383387
EXPORT_SYMBOL_GPL(dev_pm_opp_find_freq_floor);
384388

389+
static struct device_opp *add_device_opp(struct device *dev)
390+
{
391+
struct device_opp *dev_opp;
392+
393+
/*
394+
* Allocate a new device OPP table. In the infrequent case where a new
395+
* device is needed to be added, we pay this penalty.
396+
*/
397+
dev_opp = kzalloc(sizeof(*dev_opp), GFP_KERNEL);
398+
if (!dev_opp)
399+
return NULL;
400+
401+
dev_opp->dev = dev;
402+
srcu_init_notifier_head(&dev_opp->srcu_head);
403+
INIT_LIST_HEAD(&dev_opp->opp_list);
404+
405+
/* Secure the device list modification */
406+
list_add_rcu(&dev_opp->node, &dev_opp_list);
407+
return dev_opp;
408+
}
409+
385410
static int dev_pm_opp_add_dynamic(struct device *dev, unsigned long freq,
386411
unsigned long u_volt, bool dynamic)
387412
{
388413
struct device_opp *dev_opp = NULL;
389414
struct dev_pm_opp *opp, *new_opp;
390415
struct list_head *head;
416+
int ret;
391417

392418
/* allocate new OPP node */
393419
new_opp = kzalloc(sizeof(*new_opp), GFP_KERNEL);
@@ -408,27 +434,12 @@ static int dev_pm_opp_add_dynamic(struct device *dev, unsigned long freq,
408434
/* Check for existing list for 'dev' */
409435
dev_opp = find_device_opp(dev);
410436
if (IS_ERR(dev_opp)) {
411-
/*
412-
* Allocate a new device OPP table. In the infrequent case
413-
* where a new device is needed to be added, we pay this
414-
* penalty.
415-
*/
416-
dev_opp = kzalloc(sizeof(struct device_opp), GFP_KERNEL);
437+
dev_opp = add_device_opp(dev);
417438
if (!dev_opp) {
418-
mutex_unlock(&dev_opp_list_lock);
419-
kfree(new_opp);
420-
dev_warn(dev,
421-
"%s: Unable to create device OPP structure\n",
422-
__func__);
423-
return -ENOMEM;
439+
ret = -ENOMEM;
440+
goto free_opp;
424441
}
425442

426-
dev_opp->dev = dev;
427-
srcu_init_notifier_head(&dev_opp->srcu_head);
428-
INIT_LIST_HEAD(&dev_opp->opp_list);
429-
430-
/* Secure the device list modification */
431-
list_add_rcu(&dev_opp->node, &dev_opp_list);
432443
head = &dev_opp->opp_list;
433444
goto list_add;
434445
}
@@ -447,15 +458,13 @@ static int dev_pm_opp_add_dynamic(struct device *dev, unsigned long freq,
447458

448459
/* Duplicate OPPs ? */
449460
if (new_opp->rate == opp->rate) {
450-
int ret = opp->available && new_opp->u_volt == opp->u_volt ?
461+
ret = opp->available && new_opp->u_volt == opp->u_volt ?
451462
0 : -EEXIST;
452463

453464
dev_warn(dev, "%s: duplicate OPPs detected. Existing: freq: %lu, volt: %lu, enabled: %d. New: freq: %lu, volt: %lu, enabled: %d\n",
454465
__func__, opp->rate, opp->u_volt, opp->available,
455466
new_opp->rate, new_opp->u_volt, new_opp->available);
456-
mutex_unlock(&dev_opp_list_lock);
457-
kfree(new_opp);
458-
return ret;
467+
goto free_opp;
459468
}
460469

461470
list_add:
@@ -469,6 +478,11 @@ static int dev_pm_opp_add_dynamic(struct device *dev, unsigned long freq,
469478
*/
470479
srcu_notifier_call_chain(&dev_opp->srcu_head, OPP_EVENT_ADD, new_opp);
471480
return 0;
481+
482+
free_opp:
483+
mutex_unlock(&dev_opp_list_lock);
484+
kfree(new_opp);
485+
return ret;
472486
}
473487

474488
/**
@@ -511,10 +525,11 @@ static void kfree_device_rcu(struct rcu_head *head)
511525
{
512526
struct device_opp *device_opp = container_of(head, struct device_opp, rcu_head);
513527

514-
kfree(device_opp);
528+
kfree_rcu(device_opp, rcu_head);
515529
}
516530

517-
void __dev_pm_opp_remove(struct device_opp *dev_opp, struct dev_pm_opp *opp)
531+
static void __dev_pm_opp_remove(struct device_opp *dev_opp,
532+
struct dev_pm_opp *opp)
518533
{
519534
/*
520535
* Notify the changes in the availability of the operable
@@ -592,7 +607,7 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_remove);
592607
static int opp_set_availability(struct device *dev, unsigned long freq,
593608
bool availability_req)
594609
{
595-
struct device_opp *tmp_dev_opp, *dev_opp = ERR_PTR(-ENODEV);
610+
struct device_opp *dev_opp;
596611
struct dev_pm_opp *new_opp, *tmp_opp, *opp = ERR_PTR(-ENODEV);
597612
int r = 0;
598613

@@ -606,12 +621,7 @@ static int opp_set_availability(struct device *dev, unsigned long freq,
606621
mutex_lock(&dev_opp_list_lock);
607622

608623
/* Find the device_opp */
609-
list_for_each_entry(tmp_dev_opp, &dev_opp_list, node) {
610-
if (dev == tmp_dev_opp->dev) {
611-
dev_opp = tmp_dev_opp;
612-
break;
613-
}
614-
}
624+
dev_opp = find_device_opp(dev);
615625
if (IS_ERR(dev_opp)) {
616626
r = PTR_ERR(dev_opp);
617627
dev_warn(dev, "%s: Device OPP not found (%d)\n", __func__, r);

drivers/cpufreq/intel_pstate.c

Lines changed: 36 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -199,7 +199,14 @@ static signed int pid_calc(struct _pid *pid, int32_t busy)
199199

200200
pid->integral += fp_error;
201201

202-
/* limit the integral term */
202+
/*
203+
* We limit the integral here so that it will never
204+
* get higher than 30. This prevents it from becoming
205+
* too large an input over long periods of time and allows
206+
* it to get factored out sooner.
207+
*
208+
* The value of 30 was chosen through experimentation.
209+
*/
203210
integral_limit = int_tofp(30);
204211
if (pid->integral > integral_limit)
205212
pid->integral = integral_limit;
@@ -616,6 +623,11 @@ static void intel_pstate_get_min_max(struct cpudata *cpu, int *min, int *max)
616623
if (limits.no_turbo || limits.turbo_disabled)
617624
max_perf = cpu->pstate.max_pstate;
618625

626+
/*
627+
* performance can be limited by user through sysfs, by cpufreq
628+
* policy, or by cpu specific default values determined through
629+
* experimentation.
630+
*/
619631
max_perf_adj = fp_toint(mul_fp(int_tofp(max_perf), limits.max_perf));
620632
*max = clamp_t(int, max_perf_adj,
621633
cpu->pstate.min_pstate, cpu->pstate.turbo_pstate);
@@ -717,11 +729,29 @@ static inline int32_t intel_pstate_get_scaled_busy(struct cpudata *cpu)
717729
u32 duration_us;
718730
u32 sample_time;
719731

732+
/*
733+
* core_busy is the ratio of actual performance to max
734+
* max_pstate is the max non turbo pstate available
735+
* current_pstate was the pstate that was requested during
736+
* the last sample period.
737+
*
738+
* We normalize core_busy, which was our actual percent
739+
* performance to what we requested during the last sample
740+
* period. The result will be a percentage of busy at a
741+
* specified pstate.
742+
*/
720743
core_busy = cpu->sample.core_pct_busy;
721744
max_pstate = int_tofp(cpu->pstate.max_pstate);
722745
current_pstate = int_tofp(cpu->pstate.current_pstate);
723746
core_busy = mul_fp(core_busy, div_fp(max_pstate, current_pstate));
724747

748+
/*
749+
* Since we have a deferred timer, it will not fire unless
750+
* we are in C0. So, determine if the actual elapsed time
751+
* is significantly greater (3x) than our sample interval. If it
752+
* is, then we were idle for a long enough period of time
753+
* to adjust our busyness.
754+
*/
725755
sample_time = pid_params.sample_rate_ms * USEC_PER_MSEC;
726756
duration_us = (u32) ktime_us_delta(cpu->sample.time,
727757
cpu->last_sample_time);
@@ -948,6 +978,7 @@ static struct cpufreq_driver intel_pstate_driver = {
948978

949979
static int __initdata no_load;
950980
static int __initdata no_hwp;
981+
static unsigned int force_load;
951982

952983
static int intel_pstate_msrs_not_valid(void)
953984
{
@@ -1094,7 +1125,8 @@ static bool intel_pstate_platform_pwr_mgmt_exists(void)
10941125
case PSS:
10951126
return intel_pstate_no_acpi_pss();
10961127
case PPC:
1097-
return intel_pstate_has_acpi_ppc();
1128+
return intel_pstate_has_acpi_ppc() &&
1129+
(!force_load);
10981130
}
10991131
}
11001132

@@ -1175,6 +1207,8 @@ static int __init intel_pstate_setup(char *str)
11751207
no_load = 1;
11761208
if (!strcmp(str, "no_hwp"))
11771209
no_hwp = 1;
1210+
if (!strcmp(str, "force"))
1211+
force_load = 1;
11781212
return 0;
11791213
}
11801214
early_param("intel_pstate", intel_pstate_setup);

tools/power/cpupower/utils/cpuidle-info.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -22,13 +22,13 @@
2222

2323
static void cpuidle_cpu_output(unsigned int cpu, int verbose)
2424
{
25-
int idlestates, idlestate;
25+
unsigned int idlestates, idlestate;
2626
char *tmp;
2727

2828
printf(_ ("Analyzing CPU %d:\n"), cpu);
2929

3030
idlestates = sysfs_get_idlestate_count(cpu);
31-
if (idlestates < 1) {
31+
if (idlestates == 0) {
3232
printf(_("CPU %u: No idle states\n"), cpu);
3333
return;
3434
}
@@ -100,10 +100,10 @@ static void cpuidle_general_output(void)
100100
static void proc_cpuidle_cpu_output(unsigned int cpu)
101101
{
102102
long max_allowed_cstate = 2000000000;
103-
int cstate, cstates;
103+
unsigned int cstate, cstates;
104104

105105
cstates = sysfs_get_idlestate_count(cpu);
106-
if (cstates < 1) {
106+
if (cstates == 0) {
107107
printf(_("CPU %u: No C-states info\n"), cpu);
108108
return;
109109
}

0 commit comments

Comments
 (0)