diff options
author | Rafael J. Wysocki <[email protected]> | 2023-12-18 20:25:02 +0100 |
---|---|---|
committer | Rafael J. Wysocki <[email protected]> | 2023-12-28 14:20:15 +0100 |
commit | 4e814173a8c4f432fd068b1c796f0416328c9d99 (patch) | |
tree | 1a6aa8ac86bee07f39bb3008ffef711878975211 | |
parent | 5f70413a85056db04050604a76b52e3f39a37f21 (diff) |
thermal: core: Fix thermal zone suspend-resume synchronization
There are 3 synchronization issues with thermal zone suspend-resume
during system-wide transitions:
1. The resume code runs in a PM notifier which is invoked after user
space has been thawed, so it can run concurrently with user space
which can trigger a thermal zone device removal. If that happens,
the thermal zone resume code may use a stale pointer to the next
list element and crash, because it does not hold thermal_list_lock
while walking thermal_tz_list.
2. The thermal zone resume code calls thermal_zone_device_init()
outside the zone lock, so user space or an update triggered by
the platform firmware may see an inconsistent state of a
thermal zone leading to unexpected behavior.
3. Clearing the in_suspend global variable in thermal_pm_notify()
allows __thermal_zone_device_update() to continue for all thermal
zones and it may as well run before the thermal_tz_list walk (or
at any point during the list walk for that matter) and attempt to
operate on a thermal zone that has not been resumed yet. It may
also race destructively with thermal_zone_device_init().
To address these issues, add thermal_list_lock locking to
thermal_pm_notify(), especially arount the thermal_tz_list,
make it call thermal_zone_device_init() back-to-back with
__thermal_zone_device_update() under the zone lock and replace
in_suspend with per-zone bool "suspend" indicators set and unset
under the given zone's lock.
Link: https://lore.kernel.org/linux-pm/[email protected]/
Reported-by: Bo Ye <[email protected]>
Signed-off-by: Rafael J. Wysocki <[email protected]>
-rw-r--r-- | drivers/thermal/thermal_core.c | 30 | ||||
-rw-r--r-- | include/linux/thermal.h | 2 |
2 files changed, 25 insertions, 7 deletions
diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c index 5e5fcbd81dda..7456335efaaa 100644 --- a/drivers/thermal/thermal_core.c +++ b/drivers/thermal/thermal_core.c @@ -37,8 +37,6 @@ static LIST_HEAD(thermal_governor_list); static DEFINE_MUTEX(thermal_list_lock); static DEFINE_MUTEX(thermal_governor_lock); -static atomic_t in_suspend; - static struct thermal_governor *def_governor; /* @@ -431,7 +429,7 @@ void __thermal_zone_device_update(struct thermal_zone_device *tz, { struct thermal_trip *trip; - if (atomic_read(&in_suspend)) + if (tz->suspended) return; if (!thermal_zone_device_is_enabled(tz)) @@ -1542,17 +1540,35 @@ static int thermal_pm_notify(struct notifier_block *nb, case PM_HIBERNATION_PREPARE: case PM_RESTORE_PREPARE: case PM_SUSPEND_PREPARE: - atomic_set(&in_suspend, 1); + mutex_lock(&thermal_list_lock); + + list_for_each_entry(tz, &thermal_tz_list, node) { + mutex_lock(&tz->lock); + + tz->suspended = true; + + mutex_unlock(&tz->lock); + } + + mutex_unlock(&thermal_list_lock); break; case PM_POST_HIBERNATION: case PM_POST_RESTORE: case PM_POST_SUSPEND: - atomic_set(&in_suspend, 0); + mutex_lock(&thermal_list_lock); + list_for_each_entry(tz, &thermal_tz_list, node) { + mutex_lock(&tz->lock); + + tz->suspended = false; + thermal_zone_device_init(tz); - thermal_zone_device_update(tz, - THERMAL_EVENT_UNSPECIFIED); + __thermal_zone_device_update(tz, THERMAL_EVENT_UNSPECIFIED); + + mutex_unlock(&tz->lock); } + + mutex_unlock(&thermal_list_lock); break; default: break; diff --git a/include/linux/thermal.h b/include/linux/thermal.h index 09f6eb82c191..d00622b64d50 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -152,6 +152,7 @@ struct thermal_cooling_device { * @node: node in thermal_tz_list (in thermal_core.c) * @poll_queue: delayed work for polling * @notify_event: Last notification event + * @suspended: thermal zone suspend indicator */ struct thermal_zone_device { int id; @@ -185,6 +186,7 @@ struct thermal_zone_device { struct list_head node; struct delayed_work poll_queue; enum thermal_notify_event notify_event; + bool suspended; }; /** |