Skip to content

Commit aae4518

Browse files
committed
PM / sleep: Mechanism to avoid resuming runtime-suspended devices unnecessarily
Currently, some subsystems (e.g. PCI and the ACPI PM domain) have to resume all runtime-suspended devices during system suspend, mostly because those devices may need to be reprogrammed due to different wakeup settings for system sleep and for runtime PM. For some devices, though, it's OK to remain in runtime suspend throughout a complete system suspend/resume cycle (if the device was in runtime suspend at the start of the cycle). We would like to do this whenever possible, to avoid the overhead of extra power-up and power-down events. However, problems may arise because the device's descendants may require it to be at full power at various points during the cycle. Therefore the most straightforward way to do this safely is if the device and all its descendants can remain runtime suspended until the complete stage of system resume. To this end, introduce a new device PM flag, power.direct_complete and modify the PM core to use that flag as follows. If the ->prepare() callback of a device returns a positive number, the PM core will regard that as an indication that it may leave the device runtime-suspended. It will then check if the system power transition in progress is a suspend (and not hibernation in particular) and if the device is, indeed, runtime-suspended. In that case, the PM core will set the device's power.direct_complete flag. Otherwise it will clear power.direct_complete for the device and it also will later clear it for the device's parent (if there's one). Next, the PM core will not invoke the ->suspend() ->suspend_late(), ->suspend_irq(), ->resume_irq(), ->resume_early(), or ->resume() callbacks for all devices having power.direct_complete set. It will invoke their ->complete() callbacks, however, and those callbacks are then responsible for resuming the devices as appropriate, if necessary. For example, in some cases they may need to queue up runtime resume requests for the devices using pm_request_resume(). Changelog partly based on an Alan Stern's description of the idea (http://marc.info/?l=linux-pm&m=139940466625569&w=2). Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Acked-by: Alan Stern <stern@rowland.harvard.edu>
1 parent f6514be commit aae4518

File tree

3 files changed

+85
-23
lines changed

3 files changed

+85
-23
lines changed

drivers/base/power/main.c

Lines changed: 51 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -479,7 +479,7 @@ static int device_resume_noirq(struct device *dev, pm_message_t state, bool asyn
479479
TRACE_DEVICE(dev);
480480
TRACE_RESUME(0);
481481

482-
if (dev->power.syscore)
482+
if (dev->power.syscore || dev->power.direct_complete)
483483
goto Out;
484484

485485
if (!dev->power.is_noirq_suspended)
@@ -605,7 +605,7 @@ static int device_resume_early(struct device *dev, pm_message_t state, bool asyn
605605
TRACE_DEVICE(dev);
606606
TRACE_RESUME(0);
607607

608-
if (dev->power.syscore)
608+
if (dev->power.syscore || dev->power.direct_complete)
609609
goto Out;
610610

611611
if (!dev->power.is_late_suspended)
@@ -735,6 +735,12 @@ static int device_resume(struct device *dev, pm_message_t state, bool async)
735735
if (dev->power.syscore)
736736
goto Complete;
737737

738+
if (dev->power.direct_complete) {
739+
/* Match the pm_runtime_disable() in __device_suspend(). */
740+
pm_runtime_enable(dev);
741+
goto Complete;
742+
}
743+
738744
dpm_wait(dev->parent, async);
739745
dpm_watchdog_set(&wd, dev);
740746
device_lock(dev);
@@ -1007,7 +1013,7 @@ static int __device_suspend_noirq(struct device *dev, pm_message_t state, bool a
10071013
goto Complete;
10081014
}
10091015

1010-
if (dev->power.syscore)
1016+
if (dev->power.syscore || dev->power.direct_complete)
10111017
goto Complete;
10121018

10131019
dpm_wait_for_children(dev, async);
@@ -1146,7 +1152,7 @@ static int __device_suspend_late(struct device *dev, pm_message_t state, bool as
11461152
goto Complete;
11471153
}
11481154

1149-
if (dev->power.syscore)
1155+
if (dev->power.syscore || dev->power.direct_complete)
11501156
goto Complete;
11511157

11521158
dpm_wait_for_children(dev, async);
@@ -1332,6 +1338,17 @@ static int __device_suspend(struct device *dev, pm_message_t state, bool async)
13321338
if (dev->power.syscore)
13331339
goto Complete;
13341340

1341+
if (dev->power.direct_complete) {
1342+
if (pm_runtime_status_suspended(dev)) {
1343+
pm_runtime_disable(dev);
1344+
if (pm_runtime_suspended_if_enabled(dev))
1345+
goto Complete;
1346+
1347+
pm_runtime_enable(dev);
1348+
}
1349+
dev->power.direct_complete = false;
1350+
}
1351+
13351352
dpm_watchdog_set(&wd, dev);
13361353
device_lock(dev);
13371354

@@ -1382,10 +1399,19 @@ static int __device_suspend(struct device *dev, pm_message_t state, bool async)
13821399

13831400
End:
13841401
if (!error) {
1402+
struct device *parent = dev->parent;
1403+
13851404
dev->power.is_suspended = true;
1386-
if (dev->power.wakeup_path
1387-
&& dev->parent && !dev->parent->power.ignore_children)
1388-
dev->parent->power.wakeup_path = true;
1405+
if (parent) {
1406+
spin_lock_irq(&parent->power.lock);
1407+
1408+
dev->parent->power.direct_complete = false;
1409+
if (dev->power.wakeup_path
1410+
&& !dev->parent->power.ignore_children)
1411+
dev->parent->power.wakeup_path = true;
1412+
1413+
spin_unlock_irq(&parent->power.lock);
1414+
}
13891415
}
13901416

13911417
device_unlock(dev);
@@ -1487,7 +1513,7 @@ static int device_prepare(struct device *dev, pm_message_t state)
14871513
{
14881514
int (*callback)(struct device *) = NULL;
14891515
char *info = NULL;
1490-
int error = 0;
1516+
int ret = 0;
14911517

14921518
if (dev->power.syscore)
14931519
return 0;
@@ -1523,17 +1549,27 @@ static int device_prepare(struct device *dev, pm_message_t state)
15231549
callback = dev->driver->pm->prepare;
15241550
}
15251551

1526-
if (callback) {
1527-
error = callback(dev);
1528-
suspend_report_result(callback, error);
1529-
}
1552+
if (callback)
1553+
ret = callback(dev);
15301554

15311555
device_unlock(dev);
15321556

1533-
if (error)
1557+
if (ret < 0) {
1558+
suspend_report_result(callback, ret);
15341559
pm_runtime_put(dev);
1535-
1536-
return error;
1560+
return ret;
1561+
}
1562+
/*
1563+
* A positive return value from ->prepare() means "this device appears
1564+
* to be runtime-suspended and its state is fine, so if it really is
1565+
* runtime-suspended, you can leave it in that state provided that you
1566+
* will do the same thing with all of its descendants". This only
1567+
* applies to suspend transitions, however.
1568+
*/
1569+
spin_lock_irq(&dev->power.lock);
1570+
dev->power.direct_complete = ret > 0 && state.event == PM_EVENT_SUSPEND;
1571+
spin_unlock_irq(&dev->power.lock);
1572+
return 0;
15371573
}
15381574

15391575
/**

include/linux/pm.h

Lines changed: 28 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -93,13 +93,23 @@ typedef struct pm_message {
9393
* been registered) to recover from the race condition.
9494
* This method is executed for all kinds of suspend transitions and is
9595
* followed by one of the suspend callbacks: @suspend(), @freeze(), or
96-
* @poweroff(). The PM core executes subsystem-level @prepare() for all
97-
* devices before starting to invoke suspend callbacks for any of them, so
98-
* generally devices may be assumed to be functional or to respond to
99-
* runtime resume requests while @prepare() is being executed. However,
100-
* device drivers may NOT assume anything about the availability of user
101-
* space at that time and it is NOT valid to request firmware from within
102-
* @prepare() (it's too late to do that). It also is NOT valid to allocate
96+
* @poweroff(). If the transition is a suspend to memory or standby (that
97+
* is, not related to hibernation), the return value of @prepare() may be
98+
* used to indicate to the PM core to leave the device in runtime suspend
99+
* if applicable. Namely, if @prepare() returns a positive number, the PM
100+
* core will understand that as a declaration that the device appears to be
101+
* runtime-suspended and it may be left in that state during the entire
102+
* transition and during the subsequent resume if all of its descendants
103+
* are left in runtime suspend too. If that happens, @complete() will be
104+
* executed directly after @prepare() and it must ensure the proper
105+
* functioning of the device after the system resume.
106+
* The PM core executes subsystem-level @prepare() for all devices before
107+
* starting to invoke suspend callbacks for any of them, so generally
108+
* devices may be assumed to be functional or to respond to runtime resume
109+
* requests while @prepare() is being executed. However, device drivers
110+
* may NOT assume anything about the availability of user space at that
111+
* time and it is NOT valid to request firmware from within @prepare()
112+
* (it's too late to do that). It also is NOT valid to allocate
103113
* substantial amounts of memory from @prepare() in the GFP_KERNEL mode.
104114
* [To work around these limitations, drivers may register suspend and
105115
* hibernation notifiers to be executed before the freezing of tasks.]
@@ -112,7 +122,16 @@ typedef struct pm_message {
112122
* of the other devices that the PM core has unsuccessfully attempted to
113123
* suspend earlier).
114124
* The PM core executes subsystem-level @complete() after it has executed
115-
* the appropriate resume callbacks for all devices.
125+
* the appropriate resume callbacks for all devices. If the corresponding
126+
* @prepare() at the beginning of the suspend transition returned a
127+
* positive number and the device was left in runtime suspend (without
128+
* executing any suspend and resume callbacks for it), @complete() will be
129+
* the only callback executed for the device during resume. In that case,
130+
* @complete() must be prepared to do whatever is necessary to ensure the
131+
* proper functioning of the device after the system resume. To this end,
132+
* @complete() can check the power.direct_complete flag of the device to
133+
* learn whether (unset) or not (set) the previous suspend and resume
134+
* callbacks have been executed for it.
116135
*
117136
* @suspend: Executed before putting the system into a sleep state in which the
118137
* contents of main memory are preserved. The exact action to perform
@@ -546,6 +565,7 @@ struct dev_pm_info {
546565
bool is_late_suspended:1;
547566
bool ignore_children:1;
548567
bool early_init:1; /* Owned by the PM core */
568+
bool direct_complete:1; /* Owned by the PM core */
549569
spinlock_t lock;
550570
#ifdef CONFIG_PM_SLEEP
551571
struct list_head entry;

include/linux/pm_runtime.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,11 @@ static inline bool pm_runtime_status_suspended(struct device *dev)
101101
return dev->power.runtime_status == RPM_SUSPENDED;
102102
}
103103

104+
static inline bool pm_runtime_suspended_if_enabled(struct device *dev)
105+
{
106+
return pm_runtime_status_suspended(dev) && dev->power.disable_depth == 1;
107+
}
108+
104109
static inline bool pm_runtime_enabled(struct device *dev)
105110
{
106111
return !dev->power.disable_depth;
@@ -150,6 +155,7 @@ static inline void device_set_run_wake(struct device *dev, bool enable) {}
150155
static inline bool pm_runtime_suspended(struct device *dev) { return false; }
151156
static inline bool pm_runtime_active(struct device *dev) { return true; }
152157
static inline bool pm_runtime_status_suspended(struct device *dev) { return false; }
158+
static inline bool pm_runtime_suspended_if_enabled(struct device *dev) { return false; }
153159
static inline bool pm_runtime_enabled(struct device *dev) { return false; }
154160

155161
static inline void pm_runtime_no_callbacks(struct device *dev) {}

0 commit comments

Comments
 (0)