aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/admin-guide/kernel-parameters.txt16
-rw-r--r--Documentation/admin-guide/pm/amd-pstate.rst59
-rw-r--r--Documentation/power/opp.rst2
-rw-r--r--Documentation/power/pci.rst2
-rw-r--r--Documentation/power/runtime_pm.rst23
-rw-r--r--Documentation/translations/zh_CN/power/opp.rst2
-rw-r--r--arch/x86/Kconfig5
-rw-r--r--arch/x86/kernel/acpi/cstate.c4
-rw-r--r--drivers/accel/ivpu/ivpu_pm.c2
-rw-r--r--drivers/acpi/cppc_acpi.c13
-rw-r--r--drivers/acpi/processor_driver.c6
-rw-r--r--drivers/base/power/main.c267
-rw-r--r--drivers/base/power/runtime.c36
-rw-r--r--drivers/base/power/wakeirq.c4
-rw-r--r--drivers/cpufreq/Kconfig.arm1
-rw-r--r--drivers/cpufreq/amd-pstate.c200
-rw-r--r--drivers/cpufreq/brcmstb-avs-cpufreq.c2
-rw-r--r--drivers/cpufreq/cpufreq-dt-platdev.c1
-rw-r--r--drivers/cpufreq/cpufreq.c32
-rw-r--r--drivers/cpufreq/cpufreq_ondemand.c1
-rw-r--r--drivers/cpufreq/imx6q-cpufreq.c45
-rw-r--r--drivers/cpufreq/intel_pstate.c49
-rw-r--r--drivers/cpufreq/mediatek-cpufreq-hw.c19
-rw-r--r--drivers/cpufreq/scmi-cpufreq.c26
-rw-r--r--drivers/cpuidle/driver.c3
-rw-r--r--drivers/cpuidle/governors/haltpoll.c9
-rw-r--r--drivers/firmware/arm_scmi/driver.c5
-rw-r--r--drivers/firmware/arm_scmi/perf.c53
-rw-r--r--drivers/firmware/arm_scmi/powercap.c12
-rw-r--r--drivers/firmware/arm_scmi/protocols.h4
-rw-r--r--drivers/gpu/drm/i915/intel_runtime_pm.c5
-rw-r--r--drivers/gpu/drm/xe/xe_pm.c2
-rw-r--r--drivers/idle/intel_idle.c3
-rw-r--r--drivers/media/i2c/ccs/ccs-core.c2
-rw-r--r--drivers/media/i2c/ov64a40.c2
-rw-r--r--drivers/media/i2c/thp7312.c2
-rw-r--r--drivers/net/ipa/ipa_smp2p.c2
-rw-r--r--drivers/pci/pci.c2
-rw-r--r--drivers/powercap/dtpm.c2
-rw-r--r--drivers/powercap/dtpm_cpu.c2
-rw-r--r--drivers/powercap/intel_rapl_common.c36
-rw-r--r--drivers/powercap/intel_rapl_msr.c8
-rw-r--r--drivers/powercap/intel_rapl_tpmi.c15
-rw-r--r--drivers/thermal/intel/int340x_thermal/processor_thermal_rapl.c8
-rw-r--r--include/acpi/cppc_acpi.h5
-rw-r--r--include/linux/amd-pstate.h10
-rw-r--r--include/linux/cpufreq.h20
-rw-r--r--include/linux/intel_rapl.h6
-rw-r--r--include/linux/pm.h30
-rw-r--r--include/linux/pm_runtime.h30
-rw-r--r--include/linux/scmi_protocol.h8
-rw-r--r--include/linux/suspend.h74
-rw-r--r--include/trace/events/rpm.h42
-rw-r--r--kernel/power/Kconfig26
-rw-r--r--kernel/power/hibernate.c107
-rw-r--r--kernel/power/main.c182
-rw-r--r--kernel/power/power.h23
-rw-r--r--kernel/power/snapshot.c25
-rw-r--r--kernel/power/suspend.c9
-rw-r--r--kernel/power/swap.c197
-rw-r--r--kernel/power/user.c4
-rw-r--r--sound/hda/hdac_device.c2
62 files changed, 1268 insertions, 526 deletions
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 31b3a25680d0..e0f51ebdb225 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -374,6 +374,11 @@
selects a performance level in this range and appropriate
to the current workload.
+ amd_prefcore=
+ [X86]
+ disable
+ Disable amd-pstate preferred core.
+
amijoy.map= [HW,JOY] Amiga joystick support
Map of devices attached to JOY0DAT and JOY1DAT
Format: <a>,<b>
@@ -1748,6 +1753,17 @@
(that will set all pages holding image data
during restoration read-only).
+ hibernate.compressor= [HIBERNATION] Compression algorithm to be
+ used with hibernation.
+ Format: { lzo | lz4 }
+ Default: lzo
+
+ lzo: Select LZO compression algorithm to
+ compress/decompress hibernation image.
+
+ lz4: Select LZ4 compression algorithm to
+ compress/decompress hibernation image.
+
highmem=nn[KMG] [KNL,BOOT] forces the highmem zone to have an exact
size of <nn>. This works even on boxes that have no
highmem otherwise. This also works to reduce highmem
diff --git a/Documentation/admin-guide/pm/amd-pstate.rst b/Documentation/admin-guide/pm/amd-pstate.rst
index 9eb26014d34b..1e0d101b020a 100644
--- a/Documentation/admin-guide/pm/amd-pstate.rst
+++ b/Documentation/admin-guide/pm/amd-pstate.rst
@@ -300,8 +300,8 @@ platforms. The AMD P-States mechanism is the more performance and energy
efficiency frequency management method on AMD processors.
-AMD Pstate Driver Operation Modes
-=================================
+``amd-pstate`` Driver Operation Modes
+======================================
``amd_pstate`` CPPC has 3 operation modes: autonomous (active) mode,
non-autonomous (passive) mode and guided autonomous (guided) mode.
@@ -353,6 +353,48 @@ is activated. In this mode, driver requests minimum and maximum performance
level and the platform autonomously selects a performance level in this range
and appropriate to the current workload.
+``amd-pstate`` Preferred Core
+=================================
+
+The core frequency is subjected to the process variation in semiconductors.
+Not all cores are able to reach the maximum frequency respecting the
+infrastructure limits. Consequently, AMD has redefined the concept of
+maximum frequency of a part. This means that a fraction of cores can reach
+maximum frequency. To find the best process scheduling policy for a given
+scenario, OS needs to know the core ordering informed by the platform through
+highest performance capability register of the CPPC interface.
+
+``amd-pstate`` preferred core enables the scheduler to prefer scheduling on
+cores that can achieve a higher frequency with lower voltage. The preferred
+core rankings can dynamically change based on the workload, platform conditions,
+thermals and ageing.
+
+The priority metric will be initialized by the ``amd-pstate`` driver. The ``amd-pstate``
+driver will also determine whether or not ``amd-pstate`` preferred core is
+supported by the platform.
+
+``amd-pstate`` driver will provide an initial core ordering when the system boots.
+The platform uses the CPPC interfaces to communicate the core ranking to the
+operating system and scheduler to make sure that OS is choosing the cores
+with highest performance firstly for scheduling the process. When ``amd-pstate``
+driver receives a message with the highest performance change, it will
+update the core ranking and set the cpu's priority.
+
+``amd-pstate`` Preferred Core Switch
+=====================================
+Kernel Parameters
+-----------------
+
+``amd-pstate`` peferred core`` has two states: enable and disable.
+Enable/disable states can be chosen by different kernel parameters.
+Default enable ``amd-pstate`` preferred core.
+
+``amd_prefcore=disable``
+
+For systems that support ``amd-pstate`` preferred core, the core rankings will
+always be advertised by the platform. But OS can choose to ignore that via the
+kernel parameter ``amd_prefcore=disable``.
+
User Space Interface in ``sysfs`` - General
===========================================
@@ -385,6 +427,19 @@ control its functionality at the system level. They are located in the
to the operation mode represented by that string - or to be
unregistered in the "disable" case.
+``prefcore``
+ Preferred core state of the driver: "enabled" or "disabled".
+
+ "enabled"
+ Enable the ``amd-pstate`` preferred core.
+
+ "disabled"
+ Disable the ``amd-pstate`` preferred core
+
+
+ This attribute is read-only to check the state of preferred core set
+ by the kernel parameter.
+
``cpupower`` tool support for ``amd-pstate``
===============================================
diff --git a/Documentation/power/opp.rst b/Documentation/power/opp.rst
index a7c03c470980..1b7f1d854f14 100644
--- a/Documentation/power/opp.rst
+++ b/Documentation/power/opp.rst
@@ -305,7 +305,7 @@ dev_pm_opp_get_opp_count
{
/* Do things */
num_available = dev_pm_opp_get_opp_count(dev);
- speeds = kzalloc(sizeof(u32) * num_available, GFP_KERNEL);
+ speeds = kcalloc(num_available, sizeof(u32), GFP_KERNEL);
/* populate the table in increasing order */
freq = 0;
while (!IS_ERR(opp = dev_pm_opp_find_freq_ceil(dev, &freq))) {
diff --git a/Documentation/power/pci.rst b/Documentation/power/pci.rst
index a125544b4cb6..12070320307e 100644
--- a/Documentation/power/pci.rst
+++ b/Documentation/power/pci.rst
@@ -625,7 +625,7 @@ The PCI subsystem-level callbacks they correspond to::
pci_pm_poweroff()
pci_pm_poweroff_noirq()
-work in analogy with pci_pm_suspend() and pci_pm_poweroff_noirq(), respectively,
+work in analogy with pci_pm_suspend() and pci_pm_suspend_noirq(), respectively,
although they don't attempt to save the device's standard configuration
registers.
diff --git a/Documentation/power/runtime_pm.rst b/Documentation/power/runtime_pm.rst
index 65b86e487afe..5c4e730f38d0 100644
--- a/Documentation/power/runtime_pm.rst
+++ b/Documentation/power/runtime_pm.rst
@@ -154,7 +154,7 @@ suspending the device are satisfied) and to queue up a suspend request for the
device in that case. If there is no idle callback, or if the callback returns
0, then the PM core will attempt to carry out a runtime suspend of the device,
also respecting devices configured for autosuspend. In essence this means a
-call to pm_runtime_autosuspend() (do note that drivers needs to update the
+call to __pm_runtime_autosuspend() (do note that drivers needs to update the
device last busy mark, pm_runtime_mark_last_busy(), to control the delay under
this circumstance). To prevent this (for example, if the callback routine has
started a delayed suspend), the routine must return a non-zero value. Negative
@@ -396,10 +396,9 @@ drivers/base/power/runtime.c and include/linux/pm_runtime.h:
nonzero, increment the counter and return 1; otherwise return 0 without
changing the counter
- `int pm_runtime_get_if_active(struct device *dev, bool ign_usage_count);`
+ `int pm_runtime_get_if_active(struct device *dev);`
- return -EINVAL if 'power.disable_depth' is nonzero; otherwise, if the
- runtime PM status is RPM_ACTIVE, and either ign_usage_count is true
- or the device's usage_count is non-zero, increment the counter and
+ runtime PM status is RPM_ACTIVE, increment the counter and
return 1; otherwise return 0 without changing the counter
`void pm_runtime_put_noidle(struct device *dev);`
@@ -410,6 +409,10 @@ drivers/base/power/runtime.c and include/linux/pm_runtime.h:
pm_request_idle(dev) and return its result
`int pm_runtime_put_autosuspend(struct device *dev);`
+ - does the same as __pm_runtime_put_autosuspend() for now, but in the
+ future, will also call pm_runtime_mark_last_busy() as well, DO NOT USE!
+
+ `int __pm_runtime_put_autosuspend(struct device *dev);`
- decrement the device's usage counter; if the result is 0 then run
pm_request_autosuspend(dev) and return its result
@@ -540,6 +543,7 @@ It is safe to execute the following helper functions from interrupt context:
- pm_runtime_put_noidle()
- pm_runtime_put()
- pm_runtime_put_autosuspend()
+- __pm_runtime_put_autosuspend()
- pm_runtime_enable()
- pm_suspend_ignore_children()
- pm_runtime_set_active()
@@ -730,6 +734,7 @@ out the following operations:
for it, respectively.
7. Generic subsystem callbacks
+==============================
Subsystems may wish to conserve code space by using the set of generic power
management callbacks provided by the PM core, defined in
@@ -865,9 +870,9 @@ automatically be delayed until the desired period of inactivity has elapsed.
Inactivity is determined based on the power.last_busy field. Drivers should
call pm_runtime_mark_last_busy() to update this field after carrying out I/O,
-typically just before calling pm_runtime_put_autosuspend(). The desired length
-of the inactivity period is a matter of policy. Subsystems can set this length
-initially by calling pm_runtime_set_autosuspend_delay(), but after device
+typically just before calling __pm_runtime_put_autosuspend(). The desired
+length of the inactivity period is a matter of policy. Subsystems can set this
+length initially by calling pm_runtime_set_autosuspend_delay(), but after device
registration the length should be controlled by user space, using the
/sys/devices/.../power/autosuspend_delay_ms attribute.
@@ -878,7 +883,7 @@ instead of the non-autosuspend counterparts::
Instead of: pm_runtime_suspend use: pm_runtime_autosuspend;
Instead of: pm_schedule_suspend use: pm_request_autosuspend;
- Instead of: pm_runtime_put use: pm_runtime_put_autosuspend;
+ Instead of: pm_runtime_put use: __pm_runtime_put_autosuspend;
Instead of: pm_runtime_put_sync use: pm_runtime_put_sync_autosuspend.
Drivers may also continue to use the non-autosuspend helper functions; they
@@ -917,7 +922,7 @@ Here is a schematic pseudo-code example::
lock(&foo->private_lock);
if (--foo->num_pending_requests == 0) {
pm_runtime_mark_last_busy(&foo->dev);
- pm_runtime_put_autosuspend(&foo->dev);
+ __pm_runtime_put_autosuspend(&foo->dev);
} else {
foo_process_next_request(foo);
}
diff --git a/Documentation/translations/zh_CN/power/opp.rst b/Documentation/translations/zh_CN/power/opp.rst
index 8d6e3f6f6202..7470fa2d4c43 100644
--- a/Documentation/translations/zh_CN/power/opp.rst
+++ b/Documentation/translations/zh_CN/power/opp.rst
@@ -274,7 +274,7 @@ dev_pm_opp_get_opp_count
{
/* 做一些事情 */
num_available = dev_pm_opp_get_opp_count(dev);
- speeds = kzalloc(sizeof(u32) * num_available, GFP_KERNEL);
+ speeds = kcalloc(num_available, sizeof(u32), GFP_KERNEL);
/* 按升序填充表 */
freq = 0;
while (!IS_ERR(opp = dev_pm_opp_find_freq_ceil(dev, &freq))) {
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 5edec175b9bf..29d110285438 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1054,8 +1054,9 @@ config SCHED_MC
config SCHED_MC_PRIO
bool "CPU core priorities scheduler support"
- depends on SCHED_MC && CPU_SUP_INTEL
- select X86_INTEL_PSTATE
+ depends on SCHED_MC
+ select X86_INTEL_PSTATE if CPU_SUP_INTEL
+ select X86_AMD_PSTATE if CPU_SUP_AMD && ACPI
select CPU_FREQ
default y
help
diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c
index 401808b47af3..f3ffd0a3a012 100644
--- a/arch/x86/kernel/acpi/cstate.c
+++ b/arch/x86/kernel/acpi/cstate.c
@@ -131,8 +131,8 @@ static long acpi_processor_ffh_cstate_probe_cpu(void *_cx)
cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &edx);
/* Check whether this particular cx_type (in CST) is supported or not */
- cstate_type = ((cx->address >> MWAIT_SUBSTATE_SIZE) &
- MWAIT_CSTATE_MASK) + 1;
+ cstate_type = (((cx->address >> MWAIT_SUBSTATE_SIZE) &
+ MWAIT_CSTATE_MASK) + 1) & MWAIT_CSTATE_MASK;
edx_part = edx >> (cstate_type * MWAIT_SUBSTATE_SIZE);
num_cstate_subtype = edx_part & MWAIT_SUBSTATE_MASK;
diff --git a/drivers/accel/ivpu/ivpu_pm.c b/drivers/accel/ivpu/ivpu_pm.c
index 5f73854234ba..814882ae5059 100644
--- a/drivers/accel/ivpu/ivpu_pm.c
+++ b/drivers/accel/ivpu/ivpu_pm.c
@@ -309,7 +309,7 @@ int ivpu_rpm_get_if_active(struct ivpu_device *vdev)
{
int ret;
- ret = pm_runtime_get_if_active(vdev->drm.dev, false);
+ ret = pm_runtime_get_if_in_use(vdev->drm.dev);
drm_WARN_ON(&vdev->drm, ret < 0);
return ret;
diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c
index d155a86a8614..a50e70abdf19 100644
--- a/drivers/acpi/cppc_acpi.c
+++ b/drivers/acpi/cppc_acpi.c
@@ -1158,6 +1158,19 @@ int cppc_get_nominal_perf(int cpunum, u64 *nominal_perf)
}
/**
+ * cppc_get_highest_perf - Get the highest performance register value.
+ * @cpunum: CPU from which to get highest performance.
+ * @highest_perf: Return address.
+ *
+ * Return: 0 for success, -EIO otherwise.
+ */
+int cppc_get_highest_perf(int cpunum, u64 *highest_perf)
+{
+ return cppc_get_perf(cpunum, HIGHEST_PERF, highest_perf);
+}
+EXPORT_SYMBOL_GPL(cppc_get_highest_perf);
+
+/**
* cppc_get_epp_perf - Get the epp register value.
* @cpunum: CPU from which to get epp preference value.
* @epp_perf: Return address.
diff --git a/drivers/acpi/processor_driver.c b/drivers/acpi/processor_driver.c
index 4bd16b3f0781..67db60eda370 100644
--- a/drivers/acpi/processor_driver.c
+++ b/drivers/acpi/processor_driver.c
@@ -27,6 +27,7 @@
#define ACPI_PROCESSOR_NOTIFY_PERFORMANCE 0x80
#define ACPI_PROCESSOR_NOTIFY_POWER 0x81
#define ACPI_PROCESSOR_NOTIFY_THROTTLING 0x82
+#define ACPI_PROCESSOR_NOTIFY_HIGEST_PERF_CHANGED 0x85
MODULE_AUTHOR("Paul Diefenbaugh");
MODULE_DESCRIPTION("ACPI Processor Driver");
@@ -83,6 +84,11 @@ static void acpi_processor_notify(acpi_handle handle, u32 event, void *data)
acpi_bus_generate_netlink_event(device->pnp.device_class,
dev_name(&device->dev), event, 0);
break;
+ case ACPI_PROCESSOR_NOTIFY_HIGEST_PERF_CHANGED:
+ cpufreq_update_limits(pr->id);
+ acpi_bus_generate_netlink_event(device->pnp.device_class,
+ dev_name(&device->dev), event, 0);
+ break;
default:
acpi_handle_debug(handle, "Unsupported event [0x%x]\n", event);
break;
diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
index fadcd0379dc2..5679f966f676 100644
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -60,7 +60,6 @@ static LIST_HEAD(dpm_suspended_list);
static LIST_HEAD(dpm_late_early_list);
static LIST_HEAD(dpm_noirq_list);
-struct suspend_stats suspend_stats;
static DEFINE_MUTEX(dpm_list_mtx);
static pm_message_t pm_transition;
@@ -578,6 +577,35 @@ bool dev_pm_skip_resume(struct device *dev)
return !dev->power.must_resume;
}
+static bool is_async(struct device *dev)
+{
+ return dev->power.async_suspend && pm_async_enabled
+ && !pm_trace_is_enabled();
+}
+
+static bool dpm_async_fn(struct device *dev, async_func_t func)
+{
+ reinit_completion(&dev->power.completion);
+
+ if (is_async(dev)) {
+ dev->power.async_in_progress = true;
+
+ get_device(dev);
+
+ if (async_schedule_dev_nocall(func, dev))
+ return true;
+
+ put_device(dev);
+ }
+ /*
+ * Because async_schedule_dev_nocall() above has returned false or it
+ * has not been called at all, func() is not running and it is safe to
+ * update the async_in_progress flag without extra synchronization.
+ */
+ dev->power.async_in_progress = false;
+ return false;
+}
+
/**
* device_resume_noirq - Execute a "noirq resume" callback for given device.
* @dev: Device to handle.
@@ -657,42 +685,12 @@ Out:
TRACE_RESUME(error);
if (error) {
- suspend_stats.failed_resume_noirq++;
- dpm_save_failed_step(SUSPEND_RESUME_NOIRQ);
+ async_error = error;
dpm_save_failed_dev(dev_name(dev));
pm_dev_err(dev, state, async ? " async noirq" : " noirq", error);
}
}
-static bool is_async(struct device *dev)
-{
- return dev->power.async_suspend && pm_async_enabled
- && !pm_trace_is_enabled();
-}
-
-static bool dpm_async_fn(struct device *dev, async_func_t func)
-{
- reinit_completion(&dev->power.completion);
-
- if (is_async(dev)) {
- dev->power.async_in_progress = true;
-
- get_device(dev);
-
- if (async_schedule_dev_nocall(func, dev))
- return true;
-
- put_device(dev);
- }
- /*
- * Because async_schedule_dev_nocall() above has returned false or it
- * has not been called at all, func() is not running and it is safe to
- * update the async_in_progress flag without extra synchronization.
- */
- dev->power.async_in_progress = false;
- return false;
-}
-
static void async_resume_noirq(void *data, async_cookie_t cookie)
{
struct device *dev = data;
@@ -707,9 +705,12 @@ static void dpm_noirq_resume_devices(pm_message_t state)
ktime_t starttime = ktime_get();
trace_suspend_resume(TPS("dpm_resume_noirq"), state.event, true);
- mutex_lock(&dpm_list_mtx);
+
+ async_error = 0;
pm_transition = state;
+ mutex_lock(&dpm_list_mtx);
+
/*
* Trigger the resume of "async" devices upfront so they don't have to
* wait for the "non-async" ones they don't depend on.
@@ -736,6 +737,9 @@ static void dpm_noirq_resume_devices(pm_message_t state)
mutex_unlock(&dpm_list_mtx);
async_synchronize_full();
dpm_show_time(starttime, state, 0, "noirq");
+ if (async_error)
+ dpm_save_failed_step(SUSPEND_RESUME_NOIRQ);
+
trace_suspend_resume(TPS("dpm_resume_noirq"), state.event, false);
}
@@ -817,8 +821,7 @@ Out:
complete_all(&dev->power.completion);
if (error) {
- suspend_stats.failed_resume_early++;
- dpm_save_failed_step(SUSPEND_RESUME_EARLY);
+ async_error = error;
dpm_save_failed_dev(dev_name(dev));
pm_dev_err(dev, state, async ? " async early" : " early", error);
}
@@ -842,9 +845,12 @@ void dpm_resume_early(pm_message_t state)
ktime_t starttime = ktime_get();
trace_suspend_resume(TPS("dpm_resume_early"), state.event, true);
- mutex_lock(&dpm_list_mtx);
+
+ async_error = 0;
pm_transition = state;
+ mutex_lock(&dpm_list_mtx);
+
/*
* Trigger the resume of "async" devices upfront so they don't have to
* wait for the "non-async" ones they don't depend on.
@@ -871,6 +877,9 @@ void dpm_resume_early(pm_message_t state)
mutex_unlock(&dpm_list_mtx);
async_synchronize_full();
dpm_show_time(starttime, state, 0, "early");
+ if (async_error)
+ dpm_save_failed_step(SUSPEND_RESUME_EARLY);
+
trace_suspend_resume(TPS("dpm_resume_early"), state.event, false);
}
@@ -974,8 +983,7 @@ static void device_resume(struct device *dev, pm_message_t state, bool async)
TRACE_RESUME(error);
if (error) {
- suspend_stats.failed_resume++;
- dpm_save_failed_step(SUSPEND_RESUME);
+ async_error = error;
dpm_save_failed_dev(dev_name(dev));
pm_dev_err(dev, state, async ? " async" : "", error);
}
@@ -1004,10 +1012,11 @@ void dpm_resume(pm_message_t state)
trace_suspend_resume(TPS("dpm_resume"), state.event, true);
might_sleep();
- mutex_lock(&dpm_list_mtx);
pm_transition = state;
async_error = 0;
+ mutex_lock(&dpm_list_mtx);
+
/*
* Trigger the resume of "async" devices upfront so they don't have to
* wait for the "non-async" ones they don't depend on.
@@ -1017,29 +1026,25 @@ void dpm_resume(pm_message_t state)
while (!list_empty(&dpm_suspended_list)) {
dev = to_device(dpm_suspended_list.next);
-
- get_device(dev);
+ list_move_tail(&dev->power.entry, &dpm_prepared_list);
if (!dev->power.async_in_progress) {
+ get_device(dev);
+
mutex_unlock(&dpm_list_mtx);
device_resume(dev, state, false);
+ put_device(dev);
+
mutex_lock(&dpm_list_mtx);
}
-
- if (!list_empty(&dev->power.entry))
- list_move_tail(&dev->power.entry, &dpm_prepared_list);
-
- mutex_unlock(&dpm_list_mtx);
-
- put_device(dev);
-
- mutex_lock(&dpm_list_mtx);
}
mutex_unlock(&dpm_list_mtx);
async_synchronize_full();
dpm_show_time(starttime, state, 0, NULL);
+ if (async_error)
+ dpm_save_failed_step(SUSPEND_RESUME);
cpufreq_resume();
devfreq_resume();
@@ -1187,7 +1192,7 @@ static void dpm_superior_set_must_resume(struct device *dev)
}
/**
- * __device_suspend_noirq - Execute a "noirq suspend" callback for given device.
+ * device_suspend_noirq - Execute a "noirq suspend" callback for given device.
* @dev: Device to handle.
* @state: PM transition of the system being carried out.
* @async: If true, the device is being suspended asynchronously.
@@ -1195,7 +1200,7 @@ static void dpm_superior_set_must_resume(struct device *dev)
* The driver of @dev will not receive interrupts while this function is being
* executed.
*/
-static int __device_suspend_noirq(struct device *dev, pm_message_t state, bool async)
+static int device_suspend_noirq(struct device *dev, pm_message_t state, bool async)
{
pm_callback_t callback = NULL;
const char *info = NULL;
@@ -1240,6 +1245,8 @@ Run:
error = dpm_run_callback(callback, dev, state, info);
if (error) {
async_error = error;
+ dpm_save_failed_dev(dev_name(dev));
+ pm_dev_err(dev, state, async ? " async noirq" : " noirq", error);
goto Complete;
}
@@ -1269,54 +1276,37 @@ Complete:
static void async_suspend_noirq(void *data, async_cookie_t cookie)
{
struct device *dev = data;
- int error;
-
- error = __device_suspend_noirq(dev, pm_transition, true);
- if (error) {
- dpm_save_failed_dev(dev_name(dev));
- pm_dev_err(dev, pm_transition, " async", error);
- }
+ device_suspend_noirq(dev, pm_transition, true);
put_device(dev);
}
-static int device_suspend_noirq(struct device *dev)
-{
- if (dpm_async_fn(dev, async_suspend_noirq))
- return 0;
-
- return __device_suspend_noirq(dev, pm_transition, false);
-}
-
static int dpm_noirq_suspend_devices(pm_message_t state)
{
ktime_t starttime = ktime_get();
int error = 0;
trace_suspend_resume(TPS("dpm_suspend_noirq"), state.event, true);
- mutex_lock(&dpm_list_mtx);
+
pm_transition = state;
async_error = 0;
+ mutex_lock(&dpm_list_mtx);
+
while (!list_empty(&dpm_late_early_list)) {
struct device *dev = to_device(dpm_late_early_list.prev);
- get_device(dev);
- mutex_unlock(&dpm_list_mtx);
-
- error = device_suspend_noirq(dev);
+ list_move(&dev->power.entry, &dpm_noirq_list);
- mutex_lock(&dpm_list_mtx);
+ if (dpm_async_fn(dev, async_suspend_noirq))
+ continue;
- if (error) {
- pm_dev_err(dev, state, " noirq", error);
- dpm_save_failed_dev(dev_name(dev));
- } else if (!list_empty(&dev->power.entry)) {
- list_move(&dev->power.entry, &dpm_noirq_list);
- }
+ get_device(dev);
mutex_unlock(&dpm_list_mtx);
+ error = device_suspend_noirq(dev, state, false);
+
put_device(dev);
mutex_lock(&dpm_list_mtx);
@@ -1324,15 +1314,16 @@ static int dpm_noirq_suspend_devices(pm_message_t state)
if (error || async_error)
break;
}
+
mutex_unlock(&dpm_list_mtx);
+
async_synchronize_full();
if (!error)
error = async_error;
- if (error) {
- suspend_stats.failed_suspend_noirq++;
+ if (error)
dpm_save_failed_step(SUSPEND_SUSPEND_NOIRQ);
- }
+
dpm_show_time(starttime, state, error, "noirq");
trace_suspend_resume(TPS("dpm_suspend_noirq"), state.event, false);
return error;
@@ -1375,14 +1366,14 @@ static void dpm_propagate_wakeup_to_parent(struct device *dev)
}
/**
- * __device_suspend_late - Execute a "late suspend" callback for given device.
+ * device_suspend_late - Execute a "late suspend" callback for given device.
* @dev: Device to handle.
* @state: PM transition of the system being carried out.
* @async: If true, the device is being suspended asynchronously.
*
* Runtime PM is disabled for @dev while this function is being executed.
*/
-static int __device_suspend_late(struct device *dev, pm_message_t state, bool async)
+static int device_suspend_late(struct device *dev, pm_message_t state, bool async)
{
pm_callback_t callback = NULL;
const char *info = NULL;
@@ -1434,6 +1425,8 @@ Run:
error = dpm_run_callback(callback, dev, state, info);
if (error) {
async_error = error;
+ dpm_save_failed_dev(dev_name(dev));
+ pm_dev_err(dev, state, async ? " async late" : " late", error);
goto Complete;
}
dpm_propagate_wakeup_to_parent(dev);
@@ -1450,24 +1443,11 @@ Complete:
static void async_suspend_late(void *data, async_cookie_t cookie)
{
struct device *dev = data;
- int error;
- error = __device_suspend_late(dev, pm_transition, true);
- if (error) {
- dpm_save_failed_dev(dev_name(dev));
- pm_dev_err(dev, pm_transition, " async", error);
- }
+ device_suspend_late(dev, pm_transition, true);
put_device(dev);
}
-static int device_suspend_late(struct device *dev)
-{
- if (dpm_async_fn(dev, async_suspend_late))
- return 0;
-
- return __device_suspend_late(dev, pm_transition, false);
-}
-
/**
* dpm_suspend_late - Execute "late suspend" callbacks for all devices.
* @state: PM transition of the system being carried out.
@@ -1478,32 +1458,28 @@ int dpm_suspend_late(pm_message_t state)
int error = 0;
trace_suspend_resume(TPS("dpm_suspend_late"), state.event, true);
- wake_up_all_idle_cpus();
- mutex_lock(&dpm_list_mtx);
+
pm_transition = state;
async_error = 0;
- while (!list_empty(&dpm_suspended_list)) {
- struct device *dev = to_device(dpm_suspended_list.prev);
-
- get_device(dev);
+ wake_up_all_idle_cpus();
- mutex_unlock(&dpm_list_mtx);
+ mutex_lock(&dpm_list_mtx);
- error = device_suspend_late(dev);
+ while (!list_empty(&dpm_suspended_list)) {
+ struct device *dev = to_device(dpm_suspended_list.prev);
- mutex_lock(&dpm_list_mtx);
+ list_move(&dev->power.entry, &dpm_late_early_list);
- if (!list_empty(&dev->power.entry))
- list_move(&dev->power.entry, &dpm_late_early_list);
+ if (dpm_async_fn(dev, async_suspend_late))
+ continue;
- if (error) {
- pm_dev_err(dev, state, " late", error);
- dpm_save_failed_dev(dev_name(dev));
- }
+ get_device(dev);
mutex_unlock(&dpm_list_mtx);
+ error = device_suspend_late(dev, state, false);
+
put_device(dev);
mutex_lock(&dpm_list_mtx);
@@ -1511,12 +1487,14 @@ int dpm_suspend_late(pm_message_t state)
if (error || async_error)
break;
}
+
mutex_unlock(&dpm_list_mtx);
+
async_synchronize_full();
if (!error)
error = async_error;
+
if (error) {
- suspend_stats.failed_suspend_late++;
dpm_save_failed_step(SUSPEND_SUSPEND_LATE);
dpm_resume_early(resume_event(state));
}
@@ -1597,12 +1575,12 @@ static void dpm_clear_superiors_direct_complete(struct device *dev)
}
/**
- * __device_suspend - Execute "suspend" callbacks for given device.
+ * device_suspend - Execute "suspend" callbacks for given device.
* @dev: Device to handle.
* @state: PM transition of the system being carried out.
* @async: If true, the device is being suspended asynchronously.
*/
-static int __device_suspend(struct device *dev, pm_message_t state, bool async)
+static int device_suspend(struct device *dev, pm_message_t state, bool async)
{
pm_callback_t callback = NULL;
const char *info = NULL;
@@ -1716,8 +1694,11 @@ static int __device_suspend(struct device *dev, pm_message_t state, bool async)
dpm_watchdog_clear(&wd);
Complete:
- if (error)
+ if (error) {
async_error = error;
+ dpm_save_failed_dev(dev_name(dev));
+ pm_dev_err(dev, state, async ? " async" : "", error);
+ }
complete_all(&dev->power.completion);
TRACE_SUSPEND(error);
@@ -1727,25 +1708,11 @@ static int __device_suspend(struct device *dev, pm_message_t state, bool async)
static void async_suspend(void *data, async_cookie_t cookie)
{
struct device *dev = data;
- int error;
-
- error = __device_suspend(dev, pm_transition, true);
- if (error) {
- dpm_save_failed_dev(dev_name(dev));
- pm_dev_err(dev, pm_transition, " async", error);
- }
+ device_suspend(dev, pm_transition, true);
put_device(dev);
}
-static int device_suspend(struct device *dev)
-{
- if (dpm_async_fn(dev, async_suspend))
- return 0;
-
- return __device_suspend(dev, pm_transition, false);
-}
-
/**
* dpm_suspend - Execute "suspend" callbacks for all non-sysdev devices.
* @state: PM transition of the system being carried out.
@@ -1761,29 +1728,25 @@ int dpm_suspend(pm_message_t state)
devfreq_suspend();
cpufreq_suspend();
- mutex_lock(&dpm_list_mtx);
pm_transition = state;
async_error = 0;
- while (!list_empty(&dpm_prepared_list)) {
- struct device *dev = to_device(dpm_prepared_list.prev);
- get_device(dev);
+ mutex_lock(&dpm_list_mtx);
- mutex_unlock(&dpm_list_mtx);
+ while (!list_empty(&dpm_prepared_list)) {
+ struct device *dev = to_device(dpm_prepared_list.prev);
- error = device_suspend(dev);
+ list_move(&dev->power.entry, &dpm_suspended_list);
- mutex_lock(&dpm_list_mtx);
+ if (dpm_async_fn(dev, async_suspend))
+ continue;
- if (error) {
- pm_dev_err(dev, state, "", error);
- dpm_save_failed_dev(dev_name(dev));
- } else if (!list_empty(&dev->power.entry)) {
- list_move(&dev->power.entry, &dpm_suspended_list);
- }
+ get_device(dev);
mutex_unlock(&dpm_list_mtx);
+ error = device_suspend(dev, state, false);
+
put_device(dev);
mutex_lock(&dpm_list_mtx);
@@ -1791,14 +1754,16 @@ int dpm_suspend(pm_message_t state)
if (error || async_error)
break;
}
+
mutex_unlock(&dpm_list_mtx);
+
async_synchronize_full();
if (!error)
error = async_error;
- if (error) {
- suspend_stats.failed_suspend++;
+
+ if (error)
dpm_save_failed_step(SUSPEND_SUSPEND);
- }
+
dpm_show_time(starttime, state, error, NULL);
trace_suspend_resume(TPS("dpm_suspend"), state.event, false);
return error;
@@ -1949,11 +1914,11 @@ int dpm_suspend_start(pm_message_t state)
int error;
error = dpm_prepare(state);
- if (error) {
- suspend_stats.failed_prepare++;
+ if (error)
dpm_save_failed_step(SUSPEND_PREPARE);
- } else
+ else
error = dpm_suspend(state);
+
dpm_show_time(starttime, state, error, "start");
return error;
}
diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c
index 05793c9fbb84..2ee45841486b 100644
--- a/drivers/base/power/runtime.c
+++ b/drivers/base/power/runtime.c
@@ -94,6 +94,7 @@ static void update_pm_runtime_accounting(struct device *dev)
static void __update_runtime_status(struct device *dev, enum rpm_status status)
{
update_pm_runtime_accounting(dev);
+ trace_rpm_status(dev, status);
dev->power.runtime_status = status;
}
@@ -1176,7 +1177,7 @@ int __pm_runtime_resume(struct device *dev, int rpmflags)
EXPORT_SYMBOL_GPL(__pm_runtime_resume);
/**
- * pm_runtime_get_if_active - Conditionally bump up device usage counter.
+ * pm_runtime_get_conditional - Conditionally bump up device usage counter.
* @dev: Device to handle.
* @ign_usage_count: Whether or not to look at the current usage counter value.
*
@@ -1197,7 +1198,7 @@ EXPORT_SYMBOL_GPL(__pm_runtime_resume);
* The caller is responsible for decrementing the runtime PM usage counter of
* @dev after this function has returned a positive value for it.
*/
-int pm_runtime_get_if_active(struct device *dev, bool ign_usage_count)
+static int pm_runtime_get_conditional(struct device *dev, bool ign_usage_count)
{
unsigned long flags;
int retval;
@@ -1218,9 +1219,40 @@ int pm_runtime_get_if_active(struct device *dev, bool ign_usage_count)
return retval;
}
+
+/**
+ * pm_runtime_get_if_active - Bump up runtime PM usage counter if the device is
+ * in active state
+ * @dev: Target device.
+ *
+ * Increment the runtime PM usage counter of @dev if its runtime PM status is
+ * %RPM_ACTIVE, in which case it returns 1. If the device is in a different
+ * state, 0 is returned. -EINVAL is returned if runtime PM is disabled for the
+ * device, in which case also the usage_count will remain unmodified.
+ */
+int pm_runtime_get_if_active(struct device *dev)
+{
+ return pm_runtime_get_conditional(dev, true);
+}
EXPORT_SYMBOL_GPL(pm_runtime_get_if_active);
/**
+ * pm_runtime_get_if_in_use - Conditionally bump up runtime PM usage counter.
+ * @dev: Target device.
+ *
+ * Increment the runtime PM usage counter of @dev if its runtime PM status is
+ * %RPM_ACTIVE and its runtime PM usage counter is greater than 0, in which case
+ * it returns 1. If the device is in a different state or its usage_count is 0,
+ * 0 is returned. -EINVAL is returned if runtime PM is disabled for the device,
+ * in which case also the usage_count will remain unmodified.
+ */
+int pm_runtime_get_if_in_use(struct device *dev)
+{
+ return pm_runtime_get_conditional(dev, false);
+}
+EXPORT_SYMBOL_GPL(pm_runtime_get_if_in_use);
+
+/**
* __pm_runtime_set_status - Set runtime PM status of a device.
* @dev: Device to handle.
* @status: New runtime PM status of the device.
diff --git a/drivers/base/power/wakeirq.c b/drivers/base/power/wakeirq.c
index 42171f766dcb..5a5a9e978e85 100644
--- a/drivers/base/power/wakeirq.c
+++ b/drivers/base/power/wakeirq.c
@@ -313,8 +313,10 @@ void dev_pm_enable_wake_irq_complete(struct device *dev)
return;
if (wirq->status & WAKE_IRQ_DEDICATED_MANAGED &&
- wirq->status & WAKE_IRQ_DEDICATED_REVERSE)
+ wirq->status & WAKE_IRQ_DEDICATED_REVERSE) {
enable_irq(wirq->irq);
+ wirq->status |= WAKE_IRQ_DEDICATED_ENABLED;
+ }
}
/**
diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm
index f911606897b8..a0ebad77666e 100644
--- a/drivers/cpufreq/Kconfig.arm
+++ b/drivers/cpufreq/Kconfig.arm
@@ -173,6 +173,7 @@ config ARM_QCOM_CPUFREQ_NVMEM
config ARM_QCOM_CPUFREQ_HW
tristate "QCOM CPUFreq HW driver"
depends on ARCH_QCOM || COMPILE_TEST
+ depends on COMMON_CLK
help
Support for the CPUFreq HW driver.
Some QCOM chipsets have a HW engine to offload the steps
diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c
index 1791d37fbc53..2015c9fcc3c9 100644
--- a/drivers/cpufreq/amd-pstate.c
+++ b/drivers/cpufreq/amd-pstate.c
@@ -37,6 +37,7 @@
#include <linux/uaccess.h>
#include <linux/static_call.h>
#include <linux/amd-pstate.h>
+#include <linux/topology.h>
#include <acpi/processor.h>
#include <acpi/cppc_acpi.h>
@@ -49,6 +50,7 @@
#define AMD_PSTATE_TRANSITION_LATENCY 20000
#define AMD_PSTATE_TRANSITION_DELAY 1000
+#define AMD_PSTATE_PREFCORE_THRESHOLD 166
/*
* TODO: We need more time to fine tune processors with shared memory solution
@@ -64,6 +66,7 @@ static struct cpufreq_driver amd_pstate_driver;
static struct cpufreq_driver amd_pstate_epp_driver;
static int cppc_state = AMD_PSTATE_UNDEFINED;
static bool cppc_enabled;
+static bool amd_pstate_prefcore = true;
/*
* AMD Energy Preference Performance (EPP)
@@ -297,13 +300,14 @@ static int pstate_init_perf(struct amd_cpudata *cpudata)
if (ret)
return ret;
- /*
- * TODO: Introduce AMD specific power feature.
- *
- * CPPC entry doesn't indicate the highest performance in some ASICs.
+ /* For platforms that do not support the preferred core feature, the
+ * highest_pef may be configured with 166 or 255, to avoid max frequency
+ * calculated wrongly. we take the AMD_CPPC_HIGHEST_PERF(cap1) value as
+ * the default max perf.
*/
- highest_perf = amd_get_highest_perf();
- if (highest_perf > AMD_CPPC_HIGHEST_PERF(cap1))
+ if (cpudata->hw_prefcore)
+ highest_perf = AMD_PSTATE_PREFCORE_THRESHOLD;
+ else
highest_perf = AMD_CPPC_HIGHEST_PERF(cap1);
WRITE_ONCE(cpudata->highest_perf, highest_perf);
@@ -311,6 +315,7 @@ static int pstate_init_perf(struct amd_cpudata *cpudata)
WRITE_ONCE(cpudata->nominal_perf, AMD_CPPC_NOMINAL_PERF(cap1));
WRITE_ONCE(cpudata->lowest_nonlinear_perf, AMD_CPPC_LOWNONLIN_PERF(cap1));
WRITE_ONCE(cpudata->lowest_perf, AMD_CPPC_LOWEST_PERF(cap1));
+ WRITE_ONCE(cpudata->prefcore_ranking, AMD_CPPC_HIGHEST_PERF(cap1));
WRITE_ONCE(cpudata->min_limit_perf, AMD_CPPC_LOWEST_PERF(cap1));
return 0;
}
@@ -324,8 +329,9 @@ static int cppc_init_perf(struct amd_cpudata *cpudata)
if (ret)
return ret;
- highest_perf = amd_get_highest_perf();
- if (highest_perf > cppc_perf.highest_perf)
+ if (cpudata->hw_prefcore)
+ highest_perf = AMD_PSTATE_PREFCORE_THRESHOLD;
+ else
highest_perf = cppc_perf.highest_perf;
WRITE_ONCE(cpudata->highest_perf, highest_perf);
@@ -334,6 +340,7 @@ static int cppc_init_perf(struct amd_cpudata *cpudata)
WRITE_ONCE(cpudata->lowest_nonlinear_perf,
cppc_perf.lowest_nonlinear_perf);
WRITE_ONCE(cpudata->lowest_perf, cppc_perf.lowest_perf);
+ WRITE_ONCE(cpudata->prefcore_ranking, cppc_perf.highest_perf);
WRITE_ONCE(cpudata->min_limit_perf, cppc_perf.lowest_perf);
if (cppc_state == AMD_PSTATE_ACTIVE)
@@ -477,12 +484,19 @@ static int amd_pstate_verify(struct cpufreq_policy_data *policy)
static int amd_pstate_update_min_max_limit(struct cpufreq_policy *policy)
{
- u32 max_limit_perf, min_limit_perf;
+ u32 max_limit_perf, min_limit_perf, lowest_perf;
struct amd_cpudata *cpudata = policy->driver_data;
max_limit_perf = div_u64(policy->max * cpudata->highest_perf, cpudata->max_freq);
min_limit_perf = div_u64(policy->min * cpudata->highest_perf, cpudata->max_freq);
+ lowest_perf = READ_ONCE(cpudata->lowest_perf);
+ if (min_limit_perf < lowest_perf)
+ min_limit_perf = lowest_perf;
+
+ if (max_limit_perf < min_limit_perf)
+ max_limit_perf = min_limit_perf;
+
WRITE_ONCE(cpudata->max_limit_perf, max_limit_perf);
WRITE_ONCE(cpudata->min_limit_perf, min_limit_perf);
WRITE_ONCE(cpudata->max_limit_freq, policy->max);
@@ -570,7 +584,7 @@ static void amd_pstate_adjust_perf(unsigned int cpu,
if (target_perf < capacity)
des_perf = DIV_ROUND_UP(cap_perf * target_perf, capacity);
- min_perf = READ_ONCE(cpudata->highest_perf);
+ min_perf = READ_ONCE(cpudata->lowest_perf);
if (_min_perf < capacity)
min_perf = DIV_ROUND_UP(cap_perf * _min_perf, capacity);
@@ -706,6 +720,114 @@ static void amd_perf_ctl_reset(unsigned int cpu)
wrmsrl_on_cpu(cpu, MSR_AMD_PERF_CTL, 0);
}
+/*
+ * Set amd-pstate preferred core enable can't be done directly from cpufreq callbacks
+ * due to locking, so queue the work for later.
+ */
+static void amd_pstste_sched_prefcore_workfn(struct work_struct *work)
+{
+ sched_set_itmt_support();
+}
+static DECLARE_WORK(sched_prefcore_work, amd_pstste_sched_prefcore_workfn);
+
+/*
+ * Get the highest performance register value.
+ * @cpu: CPU from which to get highest performance.
+ * @highest_perf: Return address.
+ *
+ * Return: 0 for success, -EIO otherwise.
+ */
+static int amd_pstate_get_highest_perf(int cpu, u32 *highest_perf)
+{
+ int ret;
+
+ if (boot_cpu_has(X86_FEATURE_CPPC)) {
+ u64 cap1;
+
+ ret = rdmsrl_safe_on_cpu(cpu, MSR_AMD_CPPC_CAP1, &cap1);
+ if (ret)
+ return ret;
+ WRITE_ONCE(*highest_perf, AMD_CPPC_HIGHEST_PERF(cap1));
+ } else {
+ u64 cppc_highest_perf;
+
+ ret = cppc_get_highest_perf(cpu, &cppc_highest_perf);
+ if (ret)
+ return ret;
+ WRITE_ONCE(*highest_perf, cppc_highest_perf);
+ }
+
+ return (ret);
+}
+
+#define CPPC_MAX_PERF U8_MAX
+
+static void amd_pstate_init_prefcore(struct amd_cpudata *cpudata)
+{
+ int ret, prio;
+ u32 highest_perf;
+
+ ret = amd_pstate_get_highest_perf(cpudata->cpu, &highest_perf);
+ if (ret)
+ return;
+
+ cpudata->hw_prefcore = true;
+ /* check if CPPC preferred core feature is enabled*/
+ if (highest_perf < CPPC_MAX_PERF)
+ prio = (int)highest_perf;
+ else {
+ pr_debug("AMD CPPC preferred core is unsupported!\n");
+ cpudata->hw_prefcore = false;
+ return;
+ }
+
+ if (!amd_pstate_prefcore)
+ return;
+
+ /*
+ * The priorities can be set regardless of whether or not
+ * sched_set_itmt_support(true) has been called and it is valid to
+ * update them at any time after it has been called.
+ */
+ sched_set_itmt_core_prio(prio, cpudata->cpu);
+
+ schedule_work(&sched_prefcore_work);
+}
+
+static void amd_pstate_update_limits(unsigned int cpu)
+{
+ struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
+ struct amd_cpudata *cpudata = policy->driver_data;
+ u32 prev_high = 0, cur_high = 0;
+ int ret;
+ bool highest_perf_changed = false;
+
+ mutex_lock(&amd_pstate_driver_lock);
+ if ((!amd_pstate_prefcore) || (!cpudata->hw_prefcore))
+ goto free_cpufreq_put;
+
+ ret = amd_pstate_get_highest_perf(cpu, &cur_high);
+ if (ret)
+ goto free_cpufreq_put;
+
+ prev_high = READ_ONCE(cpudata->prefcore_ranking);
+ if (prev_high != cur_high) {
+ highest_perf_changed = true;
+ WRITE_ONCE(cpudata->prefcore_ranking, cur_high);
+
+ if (cur_high < CPPC_MAX_PERF)
+ sched_set_itmt_core_prio((int)cur_high, cpu);
+ }
+
+free_cpufreq_put:
+ cpufreq_cpu_put(policy);
+
+ if (!highest_perf_changed)
+ cpufreq_update_policy(cpu);
+
+ mutex_unlock(&amd_pstate_driver_lock);
+}
+
static int amd_pstate_cpu_init(struct cpufreq_policy *policy)
{
int min_freq, max_freq, nominal_freq, lowest_nonlinear_freq, ret;
@@ -727,6 +849,8 @@ static int amd_pstate_cpu_init(struct cpufreq_policy *policy)
cpudata->cpu = policy->cpu;
+ amd_pstate_init_prefcore(cpudata);
+
ret = amd_pstate_init_perf(cpudata);
if (ret)
goto free_cpudata1;
@@ -877,6 +1001,28 @@ static ssize_t show_amd_pstate_highest_perf(struct cpufreq_policy *policy,
return sysfs_emit(buf, "%u\n", perf);
}
+static ssize_t show_amd_pstate_prefcore_ranking(struct cpufreq_policy *policy,
+ char *buf)
+{
+ u32 perf;
+ struct amd_cpudata *cpudata = policy->driver_data;
+
+ perf = READ_ONCE(cpudata->prefcore_ranking);
+
+ return sysfs_emit(buf, "%u\n", perf);
+}
+
+static ssize_t show_amd_pstate_hw_prefcore(struct cpufreq_policy *policy,
+ char *buf)
+{
+ bool hw_prefcore;
+ struct amd_cpudata *cpudata = policy->driver_data;
+
+ hw_prefcore = READ_ONCE(cpudata->hw_prefcore);
+
+ return sysfs_emit(buf, "%s\n", str_enabled_disabled(hw_prefcore));
+}
+
static ssize_t show_energy_performance_available_preferences(
struct cpufreq_policy *policy, char *buf)
{
@@ -1074,18 +1220,29 @@ static ssize_t status_store(struct device *a, struct device_attribute *b,
return ret < 0 ? ret : count;
}
+static ssize_t prefcore_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ return sysfs_emit(buf, "%s\n", str_enabled_disabled(amd_pstate_prefcore));
+}
+
cpufreq_freq_attr_ro(amd_pstate_max_freq);
cpufreq_freq_attr_ro(amd_pstate_lowest_nonlinear_freq);
cpufreq_freq_attr_ro(amd_pstate_highest_perf);
+cpufreq_freq_attr_ro(amd_pstate_prefcore_ranking);
+cpufreq_freq_attr_ro(amd_pstate_hw_prefcore);
cpufreq_freq_attr_rw(energy_performance_preference);
cpufreq_freq_attr_ro(energy_performance_available_preferences);
static DEVICE_ATTR_RW(status);
+static DEVICE_ATTR_RO(prefcore);
static struct freq_attr *amd_pstate_attr[] = {
&amd_pstate_max_freq,
&amd_pstate_lowest_nonlinear_freq,
&amd_pstate_highest_perf,
+ &amd_pstate_prefcore_ranking,
+ &amd_pstate_hw_prefcore,
NULL,
};
@@ -1093,6 +1250,8 @@ static struct freq_attr *amd_pstate_epp_attr[] = {
&amd_pstate_max_freq,
&amd_pstate_lowest_nonlinear_freq,
&amd_pstate_highest_perf,
+ &amd_pstate_prefcore_ranking,
+ &amd_pstate_hw_prefcore,
&energy_performance_preference,
&energy_performance_available_preferences,
NULL,
@@ -1100,6 +1259,7 @@ static struct freq_attr *amd_pstate_epp_attr[] = {
static struct attribute *pstate_global_attributes[] = {
&dev_attr_status.attr,
+ &dev_attr_prefcore.attr,
NULL
};
@@ -1151,6 +1311,8 @@ static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy)
cpudata->cpu = policy->cpu;
cpudata->epp_policy = 0;
+ amd_pstate_init_prefcore(cpudata);
+
ret = amd_pstate_init_perf(cpudata);
if (ret)
goto free_cpudata1;
@@ -1232,6 +1394,12 @@ static void amd_pstate_epp_update_limit(struct cpufreq_policy *policy)
max_limit_perf = div_u64(policy->max * cpudata->highest_perf, cpudata->max_freq);
min_limit_perf = div_u64(policy->min * cpudata->highest_perf, cpudata->max_freq);
+ if (min_limit_perf < min_perf)
+ min_limit_perf = min_perf;
+
+ if (max_limit_perf < min_limit_perf)
+ max_limit_perf = min_limit_perf;
+
WRITE_ONCE(cpudata->max_limit_perf, max_limit_perf);
WRITE_ONCE(cpudata->min_limit_perf, min_limit_perf);
@@ -1432,6 +1600,7 @@ static struct cpufreq_driver amd_pstate_driver = {
.suspend = amd_pstate_cpu_suspend,
.resume = amd_pstate_cpu_resume,
.set_boost = amd_pstate_set_boost,
+ .update_limits = amd_pstate_update_limits,
.name = "amd-pstate",
.attr = amd_pstate_attr,
};
@@ -1446,6 +1615,7 @@ static struct cpufreq_driver amd_pstate_epp_driver = {
.online = amd_pstate_epp_cpu_online,
.suspend = amd_pstate_epp_suspend,
.resume = amd_pstate_epp_resume,
+ .update_limits = amd_pstate_update_limits,
.name = "amd-pstate-epp",
.attr = amd_pstate_epp_attr,
};
@@ -1567,7 +1737,17 @@ static int __init amd_pstate_param(char *str)
return amd_pstate_set_driver(mode_idx);
}
+
+static int __init amd_prefcore_param(char *str)
+{
+ if (!strcmp(str, "disable"))
+ amd_pstate_prefcore = false;
+
+ return 0;
+}
+
early_param("amd_pstate", amd_pstate_param);
+early_param("amd_prefcore", amd_prefcore_param);
MODULE_AUTHOR("Huang Rui <[email protected]>");
MODULE_DESCRIPTION("AMD Processor P-state Frequency Driver");
diff --git a/drivers/cpufreq/brcmstb-avs-cpufreq.c b/drivers/cpufreq/brcmstb-avs-cpufreq.c
index 35fb3a559ea9..1a1857b0a6f4 100644
--- a/drivers/cpufreq/brcmstb-avs-cpufreq.c
+++ b/drivers/cpufreq/brcmstb-avs-cpufreq.c
@@ -481,6 +481,8 @@ static bool brcm_avs_is_firmware_loaded(struct private_data *priv)
static unsigned int brcm_avs_cpufreq_get(unsigned int cpu)
{
struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
+ if (!policy)
+ return 0;
struct private_data *priv = policy->driver_data;
cpufreq_cpu_put(policy);
diff --git a/drivers/cpufreq/cpufreq-dt-platdev.c b/drivers/cpufreq/cpufreq-dt-platdev.c
index bd1e1357cef8..b993a498084b 100644
--- a/drivers/cpufreq/cpufreq-dt-platdev.c
+++ b/drivers/cpufreq/cpufreq-dt-platdev.c
@@ -156,6 +156,7 @@ static const struct of_device_id blocklist[] __initconst = {
{ .compatible = "qcom,sc7280", },
{ .compatible = "qcom,sc8180x", },
{ .compatible = "qcom,sc8280xp", },
+ { .compatible = "qcom,sdm670", },
{ .compatible = "qcom,sdm845", },
{ .compatible = "qcom,sdx75", },
{ .compatible = "qcom,sm6115", },
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 44db4f59c4cc..f6f8d7f450e7 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -576,17 +576,26 @@ unsigned int cpufreq_policy_transition_delay_us(struct cpufreq_policy *policy)
latency = policy->cpuinfo.transition_latency / NSEC_PER_USEC;
if (latency) {
+ unsigned int max_delay_us = 2 * MSEC_PER_SEC;
+
+ /*
+ * If the platform already has high transition_latency, use it
+ * as-is.
+ */
+ if (latency > max_delay_us)
+ return latency;
+
/*
- * For platforms that can change the frequency very fast (< 10
+ * For platforms that can change the frequency very fast (< 2
* us), the above formula gives a decent transition delay. But
* for platforms where transition_latency is in milliseconds, it
* ends up giving unrealistic values.
*
- * Cap the default transition delay to 10 ms, which seems to be
+ * Cap the default transition delay to 2 ms, which seems to be
* a reasonable amount of time after which we should reevaluate
* the frequency.
*/
- return min(latency * LATENCY_MULTIPLIER, (unsigned int)10000);
+ return min(latency * LATENCY_MULTIPLIER, max_delay_us);
}
return LATENCY_MULTIPLIER;
@@ -1571,7 +1580,8 @@ static int cpufreq_online(unsigned int cpu)
if (cpufreq_driver->ready)
cpufreq_driver->ready(policy);
- if (cpufreq_thermal_control_enabled(cpufreq_driver))
+ /* Register cpufreq cooling only for a new policy */
+ if (new_policy && cpufreq_thermal_control_enabled(cpufreq_driver))
policy->cdev = of_cpufreq_cooling_register(policy);
pr_debug("initialization complete\n");
@@ -1655,11 +1665,6 @@ static void __cpufreq_offline(unsigned int cpu, struct cpufreq_policy *policy)
else
policy->last_policy = policy->policy;
- if (cpufreq_thermal_control_enabled(cpufreq_driver)) {
- cpufreq_cooling_unregister(policy->cdev);
- policy->cdev = NULL;
- }
-
if (has_target())
cpufreq_exit_governor(policy);
@@ -1720,6 +1725,15 @@ static void cpufreq_remove_dev(struct device *dev, struct subsys_interface *sif)
return;
}
+ /*
+ * Unregister cpufreq cooling once all the CPUs of the policy are
+ * removed.
+ */
+ if (cpufreq_thermal_control_enabled(cpufreq_driver)) {
+ cpufreq_cooling_unregister(policy->cdev);
+ policy->cdev = NULL;
+ }
+
/* We did light-weight exit earlier, do full tear down now */
if (cpufreq_driver->offline)
cpufreq_driver->exit(policy);
diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c
index c52d19d67557..a7c38b8b3e78 100644
--- a/drivers/cpufreq/cpufreq_ondemand.c
+++ b/drivers/cpufreq/cpufreq_ondemand.c
@@ -22,7 +22,6 @@
#define DEF_SAMPLING_DOWN_FACTOR (1)
#define MAX_SAMPLING_DOWN_FACTOR (100000)
#define MICRO_FREQUENCY_UP_THRESHOLD (95)
-#define MICRO_FREQUENCY_MIN_SAMPLE_RATE (10000)
#define MIN_FREQUENCY_UP_THRESHOLD (1)
#define MAX_FREQUENCY_UP_THRESHOLD (100)
diff --git a/drivers/cpufreq/imx6q-cpufreq.c b/drivers/cpufreq/imx6q-cpufreq.c
index 33728c242f66..c20d3ecc5a81 100644
--- a/drivers/cpufreq/imx6q-cpufreq.c
+++ b/drivers/cpufreq/imx6q-cpufreq.c
@@ -14,6 +14,8 @@
#include <linux/pm_opp.h>
#include <linux/platform_device.h>
#include <linux/regulator/consumer.h>
+#include <linux/mfd/syscon.h>
+#include <linux/regmap.h>
#define PU_SOC_VOLTAGE_NORMAL 1250000
#define PU_SOC_VOLTAGE_HIGH 1275000
@@ -225,8 +227,6 @@ static void imx6x_disable_freq_in_opp(struct device *dev, unsigned long freq)
static int imx6q_opp_check_speed_grading(struct device *dev)
{
- struct device_node *np;
- void __iomem *base;
u32 val;
int ret;
@@ -235,16 +235,11 @@ static int imx6q_opp_check_speed_grading(struct device *dev)
if (ret)
return ret;
} else {
- np = of_find_compatible_node(NULL, NULL, "fsl,imx6q-ocotp");
- if (!np)
- return -ENOENT;
+ struct regmap *ocotp;
- base = of_iomap(np, 0);
- of_node_put(np);
- if (!base) {
- dev_err(dev, "failed to map ocotp\n");
- return -EFAULT;
- }
+ ocotp = syscon_regmap_lookup_by_compatible("fsl,imx6q-ocotp");
+ if (IS_ERR(ocotp))
+ return -ENOENT;
/*
* SPEED_GRADING[1:0] defines the max speed of ARM:
@@ -254,8 +249,7 @@ static int imx6q_opp_check_speed_grading(struct device *dev)
* 2b'00: 792000000Hz;
* We need to set the max speed of ARM according to fuse map.
*/
- val = readl_relaxed(base + OCOTP_CFG3);
- iounmap(base);
+ regmap_read(ocotp, OCOTP_CFG3, &val);
}
val >>= OCOTP_CFG3_SPEED_SHIFT;
@@ -290,25 +284,16 @@ static int imx6ul_opp_check_speed_grading(struct device *dev)
if (ret)
return ret;
} else {
- struct device_node *np;
- void __iomem *base;
-
- np = of_find_compatible_node(NULL, NULL, "fsl,imx6ul-ocotp");
- if (!np)
- np = of_find_compatible_node(NULL, NULL,
- "fsl,imx6ull-ocotp");
- if (!np)
- return -ENOENT;
+ struct regmap *ocotp;
- base = of_iomap(np, 0);
- of_node_put(np);
- if (!base) {
- dev_err(dev, "failed to map ocotp\n");
- return -EFAULT;
- }
+ ocotp = syscon_regmap_lookup_by_compatible("fsl,imx6ul-ocotp");
+ if (IS_ERR(ocotp))
+ ocotp = syscon_regmap_lookup_by_compatible("fsl,imx6ull-ocotp");
+
+ if (IS_ERR(ocotp))
+ return -ENOENT;
- val = readl_relaxed(base + OCOTP_CFG3);
- iounmap(base);
+ regmap_read(ocotp, OCOTP_CFG3, &val);
}
/*
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index ca94e60e705a..dbbf299f4219 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -25,6 +25,7 @@
#include <linux/acpi.h>
#include <linux/vmalloc.h>
#include <linux/pm_qos.h>
+#include <linux/bitfield.h>
#include <trace/events/power.h>
#include <asm/cpu.h>
@@ -201,8 +202,6 @@ struct global_params {
* @prev_aperf: Last APERF value read from APERF MSR
* @prev_mperf: Last MPERF value read from MPERF MSR
* @prev_tsc: Last timestamp counter (TSC) value
- * @prev_cummulative_iowait: IO Wait time difference from last and
- * current sample
* @sample: Storage for storing last Sample data
* @min_perf_ratio: Minimum capacity in terms of PERF or HWP ratios
* @max_perf_ratio: Maximum capacity in terms of PERF or HWP ratios
@@ -241,7 +240,6 @@ struct cpudata {
u64 prev_aperf;
u64 prev_mperf;
u64 prev_tsc;
- u64 prev_cummulative_iowait;
struct sample sample;
int32_t min_perf_ratio;
int32_t max_perf_ratio;
@@ -2987,6 +2985,9 @@ static void intel_cpufreq_adjust_perf(unsigned int cpunum,
if (min_pstate < cpu->min_perf_ratio)
min_pstate = cpu->min_perf_ratio;
+ if (min_pstate > cpu->max_perf_ratio)
+ min_pstate = cpu->max_perf_ratio;
+
max_pstate = min(cap_pstate, cpu->max_perf_ratio);
if (max_pstate < min_pstate)
max_pstate = min_pstate;
@@ -3404,14 +3405,31 @@ static bool intel_pstate_hwp_is_enabled(void)
return !!(value & 0x1);
}
-static const struct x86_cpu_id intel_epp_balance_perf[] = {
+#define POWERSAVE_MASK GENMASK(7, 0)
+#define BALANCE_POWER_MASK GENMASK(15, 8)
+#define BALANCE_PERFORMANCE_MASK GENMASK(23, 16)
+#define PERFORMANCE_MASK GENMASK(31, 24)
+
+#define HWP_SET_EPP_VALUES(powersave, balance_power, balance_perf, performance) \
+ (FIELD_PREP_CONST(POWERSAVE_MASK, powersave) |\
+ FIELD_PREP_CONST(BALANCE_POWER_MASK, balance_power) |\
+ FIELD_PREP_CONST(BALANCE_PERFORMANCE_MASK, balance_perf) |\
+ FIELD_PREP_CONST(PERFORMANCE_MASK, performance))
+
+#define HWP_SET_DEF_BALANCE_PERF_EPP(balance_perf) \
+ (HWP_SET_EPP_VALUES(HWP_EPP_POWERSAVE, HWP_EPP_BALANCE_POWERSAVE,\
+ balance_perf, HWP_EPP_PERFORMANCE))
+
+static const struct x86_cpu_id intel_epp_default[] = {
/*
* Set EPP value as 102, this is the max suggested EPP
* which can result in one core turbo frequency for
* AlderLake Mobile CPUs.
*/
- X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, 102),
- X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, 32),
+ X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, HWP_SET_DEF_BALANCE_PERF_EPP(102)),
+ X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, HWP_SET_DEF_BALANCE_PERF_EPP(32)),
+ X86_MATCH_INTEL_FAM6_MODEL(METEORLAKE_L, HWP_SET_EPP_VALUES(HWP_EPP_POWERSAVE,
+ HWP_EPP_BALANCE_POWERSAVE, 115, 16)),
{}
};
@@ -3509,11 +3527,24 @@ hwp_cpu_matched:
intel_pstate_sysfs_expose_params();
if (hwp_active) {
- const struct x86_cpu_id *id = x86_match_cpu(intel_epp_balance_perf);
+ const struct x86_cpu_id *id = x86_match_cpu(intel_epp_default);
const struct x86_cpu_id *hybrid_id = x86_match_cpu(intel_hybrid_scaling_factor);
- if (id)
- epp_values[EPP_INDEX_BALANCE_PERFORMANCE] = id->driver_data;
+ if (id) {
+ epp_values[EPP_INDEX_POWERSAVE] =
+ FIELD_GET(POWERSAVE_MASK, id->driver_data);
+ epp_values[EPP_INDEX_BALANCE_POWERSAVE] =
+ FIELD_GET(BALANCE_POWER_MASK, id->driver_data);
+ epp_values[EPP_INDEX_BALANCE_PERFORMANCE] =
+ FIELD_GET(BALANCE_PERFORMANCE_MASK, id->driver_data);
+ epp_values[EPP_INDEX_PERFORMANCE] =
+ FIELD_GET(PERFORMANCE_MASK, id->driver_data);
+ pr_debug("Updated EPPs powersave:%x balanced power:%x balanced perf:%x performance:%x\n",
+ epp_values[EPP_INDEX_POWERSAVE],
+ epp_values[EPP_INDEX_BALANCE_POWERSAVE],
+ epp_values[EPP_INDEX_BALANCE_PERFORMANCE],
+ epp_values[EPP_INDEX_PERFORMANCE]);
+ }
if (hybrid_id) {
hybrid_scaling_factor = hybrid_id->driver_data;
diff --git a/drivers/cpufreq/mediatek-cpufreq-hw.c b/drivers/cpufreq/mediatek-cpufreq-hw.c
index d46afb3c0092..8d097dcddda4 100644
--- a/drivers/cpufreq/mediatek-cpufreq-hw.c
+++ b/drivers/cpufreq/mediatek-cpufreq-hw.c
@@ -13,6 +13,7 @@
#include <linux/of.h>
#include <linux/of_platform.h>
#include <linux/platform_device.h>
+#include <linux/regulator/consumer.h>
#include <linux/slab.h>
#define LUT_MAX_ENTRIES 32U
@@ -300,7 +301,23 @@ static struct cpufreq_driver cpufreq_mtk_hw_driver = {
static int mtk_cpufreq_hw_driver_probe(struct platform_device *pdev)
{
const void *data;
- int ret;
+ int ret, cpu;
+ struct device *cpu_dev;
+ struct regulator *cpu_reg;
+
+ /* Make sure that all CPU supplies are available before proceeding. */
+ for_each_possible_cpu(cpu) {
+ cpu_dev = get_cpu_device(cpu);
+ if (!cpu_dev)
+ return dev_err_probe(&pdev->dev, -EPROBE_DEFER,
+ "Failed to get cpu%d device\n", cpu);
+
+ cpu_reg = devm_regulator_get(cpu_dev, "cpu");
+ if (IS_ERR(cpu_reg))
+ return dev_err_probe(&pdev->dev, PTR_ERR(cpu_reg),
+ "CPU%d regulator get failed\n", cpu);
+ }
+
data = of_device_get_match_data(&pdev->dev);
if (!data)
diff --git a/drivers/cpufreq/scmi-cpufreq.c b/drivers/cpufreq/scmi-cpufreq.c
index 4ee23f4ebf4a..0b483bd0d3ca 100644
--- a/drivers/cpufreq/scmi-cpufreq.c
+++ b/drivers/cpufreq/scmi-cpufreq.c
@@ -144,6 +144,29 @@ scmi_get_cpu_power(struct device *cpu_dev, unsigned long *power,
return 0;
}
+static int
+scmi_get_rate_limit(u32 domain, bool has_fast_switch)
+{
+ int ret, rate_limit;
+
+ if (has_fast_switch) {
+ /*
+ * Fast channels are used whenever available,
+ * so use their rate_limit value if populated.
+ */
+ ret = perf_ops->fast_switch_rate_limit(ph, domain,
+ &rate_limit);
+ if (!ret && rate_limit)
+ return rate_limit;
+ }
+
+ ret = perf_ops->rate_limit_get(ph, domain, &rate_limit);
+ if (ret)
+ return 0;
+
+ return rate_limit;
+}
+
static int scmi_cpufreq_init(struct cpufreq_policy *policy)
{
int ret, nr_opp, domain;
@@ -250,6 +273,9 @@ static int scmi_cpufreq_init(struct cpufreq_policy *policy)
policy->fast_switch_possible =
perf_ops->fast_switch_possible(ph, domain);
+ policy->transition_delay_us =
+ scmi_get_rate_limit(domain, policy->fast_switch_possible);
+
return 0;
out_free_opp:
diff --git a/drivers/cpuidle/driver.c b/drivers/cpuidle/driver.c
index d9cda7f6ccb9..cf5873cc45dc 100644
--- a/drivers/cpuidle/driver.c
+++ b/drivers/cpuidle/driver.c
@@ -16,6 +16,7 @@
#include <linux/cpumask.h>
#include <linux/tick.h>
#include <linux/cpu.h>
+#include <linux/math64.h>
#include "cpuidle.h"
@@ -187,7 +188,7 @@ static void __cpuidle_driver_init(struct cpuidle_driver *drv)
s->target_residency = div_u64(s->target_residency_ns, NSEC_PER_USEC);
if (s->exit_latency > 0)
- s->exit_latency_ns = s->exit_latency * NSEC_PER_USEC;
+ s->exit_latency_ns = mul_u32_u32(s->exit_latency, NSEC_PER_USEC);
else if (s->exit_latency_ns < 0)
s->exit_latency_ns = 0;
else
diff --git a/drivers/cpuidle/governors/haltpoll.c b/drivers/cpuidle/governors/haltpoll.c
index 1dff3a52917d..663b7f164d20 100644
--- a/drivers/cpuidle/governors/haltpoll.c
+++ b/drivers/cpuidle/governors/haltpoll.c
@@ -98,10 +98,15 @@ static void adjust_poll_limit(struct cpuidle_device *dev, u64 block_ns)
unsigned int shrink = guest_halt_poll_shrink;
val = dev->poll_limit_ns;
- if (shrink == 0)
+ if (shrink == 0) {
val = 0;
- else
+ } else {
val /= shrink;
+ /* Reset value to 0 if shrunk below grow_start */
+ if (val < guest_halt_poll_grow_start)
+ val = 0;
+ }
+
trace_guest_halt_poll_ns_shrink(val, dev->poll_limit_ns);
dev->poll_limit_ns = val;
}
diff --git a/drivers/firmware/arm_scmi/driver.c b/drivers/firmware/arm_scmi/driver.c
index 3ea64b22cf0d..1d38ecfafc59 100644
--- a/drivers/firmware/arm_scmi/driver.c
+++ b/drivers/firmware/arm_scmi/driver.c
@@ -1617,7 +1617,7 @@ static void
scmi_common_fastchannel_init(const struct scmi_protocol_handle *ph,
u8 describe_id, u32 message_id, u32 valid_size,
u32 domain, void __iomem **p_addr,
- struct scmi_fc_db_info **p_db)
+ struct scmi_fc_db_info **p_db, u32 *rate_limit)
{
int ret;
u32 flags;
@@ -1661,6 +1661,9 @@ scmi_common_fastchannel_init(const struct scmi_protocol_handle *ph,
goto err_xfer;
}
+ if (rate_limit)
+ *rate_limit = le32_to_cpu(resp->rate_limit) & GENMASK(19, 0);
+
phys_addr = le32_to_cpu(resp->chan_addr_low);
phys_addr |= (u64)le32_to_cpu(resp->chan_addr_high) << 32;
addr = devm_ioremap(ph->dev, phys_addr, size);
diff --git a/drivers/firmware/arm_scmi/perf.c b/drivers/firmware/arm_scmi/perf.c
index 211e8e0aef2c..fbcbd703198a 100644
--- a/drivers/firmware/arm_scmi/perf.c
+++ b/drivers/firmware/arm_scmi/perf.c
@@ -153,6 +153,7 @@ struct perf_dom_info {
bool perf_fastchannels;
bool level_indexing_mode;
u32 opp_count;
+ u32 rate_limit_us;
u32 sustained_freq_khz;
u32 sustained_perf_level;
unsigned long mult_factor;
@@ -266,6 +267,8 @@ scmi_perf_domain_attributes_get(const struct scmi_protocol_handle *ph,
if (PROTOCOL_REV_MAJOR(version) >= 0x4)
dom_info->level_indexing_mode =
SUPPORTS_LEVEL_INDEXING(flags);
+ dom_info->rate_limit_us = le32_to_cpu(attr->rate_limit_us) &
+ GENMASK(19, 0);
dom_info->sustained_freq_khz =
le32_to_cpu(attr->sustained_freq_khz);
dom_info->sustained_perf_level =
@@ -786,23 +789,27 @@ static void scmi_perf_domain_init_fc(const struct scmi_protocol_handle *ph,
ph->hops->fastchannel_init(ph, PERF_DESCRIBE_FASTCHANNEL,
PERF_LEVEL_GET, 4, dom->id,
- &fc[PERF_FC_LEVEL].get_addr, NULL);
+ &fc[PERF_FC_LEVEL].get_addr, NULL,
+ &fc[PERF_FC_LEVEL].rate_limit);
ph->hops->fastchannel_init(ph, PERF_DESCRIBE_FASTCHANNEL,
PERF_LIMITS_GET, 8, dom->id,
- &fc[PERF_FC_LIMIT].get_addr, NULL);
+ &fc[PERF_FC_LIMIT].get_addr, NULL,
+ &fc[PERF_FC_LIMIT].rate_limit);
if (dom->info.set_perf)
ph->hops->fastchannel_init(ph, PERF_DESCRIBE_FASTCHANNEL,
PERF_LEVEL_SET, 4, dom->id,
&fc[PERF_FC_LEVEL].set_addr,
- &fc[PERF_FC_LEVEL].set_db);
+ &fc[PERF_FC_LEVEL].set_db,
+ &fc[PERF_FC_LEVEL].rate_limit);
if (dom->set_limits)
ph->hops->fastchannel_init(ph, PERF_DESCRIBE_FASTCHANNEL,
PERF_LIMITS_SET, 8, dom->id,
&fc[PERF_FC_LIMIT].set_addr,
- &fc[PERF_FC_LIMIT].set_db);
+ &fc[PERF_FC_LIMIT].set_db,
+ &fc[PERF_FC_LIMIT].rate_limit);
dom->fc_info = fc;
}
@@ -855,6 +862,23 @@ scmi_dvfs_transition_latency_get(const struct scmi_protocol_handle *ph,
return dom->opp[dom->opp_count - 1].trans_latency_us * 1000;
}
+static int
+scmi_dvfs_rate_limit_get(const struct scmi_protocol_handle *ph,
+ u32 domain, u32 *rate_limit)
+{
+ struct perf_dom_info *dom;
+
+ if (!rate_limit)
+ return -EINVAL;
+
+ dom = scmi_perf_domain_lookup(ph, domain);
+ if (IS_ERR(dom))
+ return PTR_ERR(dom);
+
+ *rate_limit = dom->rate_limit_us;
+ return 0;
+}
+
static int scmi_dvfs_freq_set(const struct scmi_protocol_handle *ph, u32 domain,
unsigned long freq, bool poll)
{
@@ -954,6 +978,25 @@ static bool scmi_fast_switch_possible(const struct scmi_protocol_handle *ph,
return dom->fc_info && dom->fc_info[PERF_FC_LEVEL].set_addr;
}
+static int scmi_fast_switch_rate_limit(const struct scmi_protocol_handle *ph,
+ u32 domain, u32 *rate_limit)
+{
+ struct perf_dom_info *dom;
+
+ if (!rate_limit)
+ return -EINVAL;
+
+ dom = scmi_perf_domain_lookup(ph, domain);
+ if (IS_ERR(dom))
+ return PTR_ERR(dom);
+
+ if (!dom->fc_info)
+ return -EINVAL;
+
+ *rate_limit = dom->fc_info[PERF_FC_LEVEL].rate_limit;
+ return 0;
+}
+
static enum scmi_power_scale
scmi_power_scale_get(const struct scmi_protocol_handle *ph)
{
@@ -970,11 +1013,13 @@ static const struct scmi_perf_proto_ops perf_proto_ops = {
.level_set = scmi_perf_level_set,
.level_get = scmi_perf_level_get,
.transition_latency_get = scmi_dvfs_transition_latency_get,
+ .rate_limit_get = scmi_dvfs_rate_limit_get,
.device_opps_add = scmi_dvfs_device_opps_add,
.freq_set = scmi_dvfs_freq_set,
.freq_get = scmi_dvfs_freq_get,
.est_power_get = scmi_dvfs_est_power_get,
.fast_switch_possible = scmi_fast_switch_possible,
+ .fast_switch_rate_limit = scmi_fast_switch_rate_limit,
.power_scale_get = scmi_power_scale_get,
};
diff --git a/drivers/firmware/arm_scmi/powercap.c b/drivers/firmware/arm_scmi/powercap.c
index a4c6cd4716fe..604184c044ff 100644
--- a/drivers/firmware/arm_scmi/powercap.c
+++ b/drivers/firmware/arm_scmi/powercap.c
@@ -703,20 +703,24 @@ static void scmi_powercap_domain_init_fc(const struct scmi_protocol_handle *ph,
ph->hops->fastchannel_init(ph, POWERCAP_DESCRIBE_FASTCHANNEL,
POWERCAP_CAP_SET, 4, domain,
&fc[POWERCAP_FC_CAP].set_addr,
- &fc[POWERCAP_FC_CAP].set_db);
+ &fc[POWERCAP_FC_CAP].set_db,
+ &fc[POWERCAP_FC_CAP].rate_limit);
ph->hops->fastchannel_init(ph, POWERCAP_DESCRIBE_FASTCHANNEL,
POWERCAP_CAP_GET, 4, domain,
- &fc[POWERCAP_FC_CAP].get_addr, NULL);
+ &fc[POWERCAP_FC_CAP].get_addr, NULL,
+ &fc[POWERCAP_FC_CAP].rate_limit);
ph->hops->fastchannel_init(ph, POWERCAP_DESCRIBE_FASTCHANNEL,
POWERCAP_PAI_SET, 4, domain,
&fc[POWERCAP_FC_PAI].set_addr,
- &fc[POWERCAP_FC_PAI].set_db);
+ &fc[POWERCAP_FC_PAI].set_db,
+ &fc[POWERCAP_FC_PAI].rate_limit);
ph->hops->fastchannel_init(ph, POWERCAP_DESCRIBE_FASTCHANNEL,
POWERCAP_PAI_GET, 4, domain,
- &fc[POWERCAP_FC_PAI].get_addr, NULL);
+ &fc[POWERCAP_FC_PAI].get_addr, NULL,
+ &fc[POWERCAP_PAI_GET].rate_limit);
*p_fc = fc;
}
diff --git a/drivers/firmware/arm_scmi/protocols.h b/drivers/firmware/arm_scmi/protocols.h
index e683c26f24eb..8b5d9ce4a33a 100644
--- a/drivers/firmware/arm_scmi/protocols.h
+++ b/drivers/firmware/arm_scmi/protocols.h
@@ -234,6 +234,7 @@ struct scmi_fc_info {
void __iomem *set_addr;
void __iomem *get_addr;
struct scmi_fc_db_info *set_db;
+ u32 rate_limit;
};
/**
@@ -268,7 +269,8 @@ struct scmi_proto_helpers_ops {
u8 describe_id, u32 message_id,
u32 valid_size, u32 domain,
void __iomem **p_addr,
- struct scmi_fc_db_info **p_db);
+ struct scmi_fc_db_info **p_db,
+ u32 *rate_limit);
void (*fastchannel_db_ring)(struct scmi_fc_db_info *db);
};
diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c
index 860b51b56a92..d4e844128826 100644
--- a/drivers/gpu/drm/i915/intel_runtime_pm.c
+++ b/drivers/gpu/drm/i915/intel_runtime_pm.c
@@ -246,7 +246,10 @@ static intel_wakeref_t __intel_runtime_pm_get_if_active(struct intel_runtime_pm
* function, since the power state is undefined. This applies
* atm to the late/early system suspend/resume handlers.
*/
- if (pm_runtime_get_if_active(rpm->kdev, ignore_usecount) <= 0)
+ if ((ignore_usecount &&
+ pm_runtime_get_if_active(rpm->kdev) <= 0) ||
+ (!ignore_usecount &&
+ pm_runtime_get_if_in_use(rpm->kdev) <= 0))
return 0;
}
diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c
index b429c2876a76..dd110058bf74 100644
--- a/drivers/gpu/drm/xe/xe_pm.c
+++ b/drivers/gpu/drm/xe/xe_pm.c
@@ -330,7 +330,7 @@ int xe_pm_runtime_put(struct xe_device *xe)
int xe_pm_runtime_get_if_active(struct xe_device *xe)
{
- return pm_runtime_get_if_active(xe->drm.dev, true);
+ return pm_runtime_get_if_active(xe->drm.dev);
}
void xe_pm_assert_unbounded_bridge(struct xe_device *xe)
diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c
index bcf1198e8991..e486027f8b07 100644
--- a/drivers/idle/intel_idle.c
+++ b/drivers/idle/intel_idle.c
@@ -1934,7 +1934,8 @@ static void __init spr_idle_state_table_update(void)
static bool __init intel_idle_verify_cstate(unsigned int mwait_hint)
{
- unsigned int mwait_cstate = MWAIT_HINT2CSTATE(mwait_hint) + 1;
+ unsigned int mwait_cstate = (MWAIT_HINT2CSTATE(mwait_hint) + 1) &
+ MWAIT_CSTATE_MASK;
unsigned int num_substates = (mwait_substates >> mwait_cstate * 4) &
MWAIT_SUBSTATE_MASK;
diff --git a/drivers/media/i2c/ccs/ccs-core.c b/drivers/media/i2c/ccs/ccs-core.c
index e21287d50c15..e1ae0f9fad43 100644
--- a/drivers/media/i2c/ccs/ccs-core.c
+++ b/drivers/media/i2c/ccs/ccs-core.c
@@ -674,7 +674,7 @@ static int ccs_set_ctrl(struct v4l2_ctrl *ctrl)
break;
}
- pm_status = pm_runtime_get_if_active(&client->dev, true);
+ pm_status = pm_runtime_get_if_active(&client->dev);
if (!pm_status)
return 0;
diff --git a/drivers/media/i2c/ov64a40.c b/drivers/media/i2c/ov64a40.c
index 4fba4c2cb064..541bf74581d2 100644
--- a/drivers/media/i2c/ov64a40.c
+++ b/drivers/media/i2c/ov64a40.c
@@ -3287,7 +3287,7 @@ static int ov64a40_set_ctrl(struct v4l2_ctrl *ctrl)
exp_max, 1, exp_val);
}
- pm_status = pm_runtime_get_if_active(ov64a40->dev, true);
+ pm_status = pm_runtime_get_if_active(ov64a40->dev);
if (!pm_status)
return 0;
diff --git a/drivers/media/i2c/thp7312.c b/drivers/media/i2c/thp7312.c
index 2806887514dc..19bd923a7315 100644
--- a/drivers/media/i2c/thp7312.c
+++ b/drivers/media/i2c/thp7312.c
@@ -1052,7 +1052,7 @@ static int thp7312_s_ctrl(struct v4l2_ctrl *ctrl)
if (ctrl->flags & V4L2_CTRL_FLAG_INACTIVE)
return -EINVAL;
- if (!pm_runtime_get_if_active(thp7312->dev, true))
+ if (!pm_runtime_get_if_active(thp7312->dev))
return 0;
switch (ctrl->id) {
diff --git a/drivers/net/ipa/ipa_smp2p.c b/drivers/net/ipa/ipa_smp2p.c
index 5620dc271fac..cbf3d4761ce3 100644
--- a/drivers/net/ipa/ipa_smp2p.c
+++ b/drivers/net/ipa/ipa_smp2p.c
@@ -92,7 +92,7 @@ static void ipa_smp2p_notify(struct ipa_smp2p *smp2p)
return;
dev = &smp2p->ipa->pdev->dev;
- smp2p->power_on = pm_runtime_get_if_active(dev, true) > 0;
+ smp2p->power_on = pm_runtime_get_if_active(dev) > 0;
/* Signal whether the IPA power is enabled */
mask = BIT(smp2p->enabled_bit);
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index c3585229c12a..10a5f7d61367 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -2532,7 +2532,7 @@ static void pci_pme_list_scan(struct work_struct *work)
* course of the call.
*/
if (bdev) {
- bref = pm_runtime_get_if_active(bdev, true);
+ bref = pm_runtime_get_if_active(bdev);
if (!bref)
continue;
diff --git a/drivers/powercap/dtpm.c b/drivers/powercap/dtpm.c
index ce920f17f45f..f390665743c4 100644
--- a/drivers/powercap/dtpm.c
+++ b/drivers/powercap/dtpm.c
@@ -522,7 +522,7 @@ static int dtpm_for_each_child(const struct dtpm_node *hierarchy,
/**
* dtpm_create_hierarchy - Create the dtpm hierarchy
- * @hierarchy: An array of struct dtpm_node describing the hierarchy
+ * @dtpm_match_table: Pointer to the array of device ID structures
*
* The function is called by the platform specific code with the
* description of the different node in the hierarchy. It creates the
diff --git a/drivers/powercap/dtpm_cpu.c b/drivers/powercap/dtpm_cpu.c
index 9193c3b8edeb..ae7ee611978b 100644
--- a/drivers/powercap/dtpm_cpu.c
+++ b/drivers/powercap/dtpm_cpu.c
@@ -219,7 +219,7 @@ static int __dtpm_cpu_setup(int cpu, struct dtpm *parent)
ret = freq_qos_add_request(&policy->constraints,
&dtpm_cpu->qos_req, FREQ_QOS_MAX,
pd->table[pd->nr_perf_states - 1].frequency);
- if (ret)
+ if (ret < 0)
goto out_dtpm_unregister;
cpufreq_cpu_put(policy);
diff --git a/drivers/powercap/intel_rapl_common.c b/drivers/powercap/intel_rapl_common.c
index 2feed036c1cd..aa627a6b12a4 100644
--- a/drivers/powercap/intel_rapl_common.c
+++ b/drivers/powercap/intel_rapl_common.c
@@ -5,6 +5,7 @@
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/cleanup.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/list.h>
@@ -759,6 +760,11 @@ static int rapl_config(struct rapl_package *rp)
default:
return -EINVAL;
}
+
+ /* defaults_msr can be NULL on unsupported platforms */
+ if (!rp->priv->defaults || !rp->priv->rpi)
+ return -ENODEV;
+
return 0;
}
@@ -1256,6 +1262,8 @@ static const struct x86_cpu_id rapl_ids[] __initconst = {
X86_MATCH_INTEL_FAM6_MODEL(METEORLAKE_L, &rapl_defaults_core),
X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &rapl_defaults_spr_server),
X86_MATCH_INTEL_FAM6_MODEL(EMERALDRAPIDS_X, &rapl_defaults_spr_server),
+ X86_MATCH_INTEL_FAM6_MODEL(LUNARLAKE_M, &rapl_defaults_core),
+ X86_MATCH_INTEL_FAM6_MODEL(ARROWLAKE, &rapl_defaults_core),
X86_MATCH_INTEL_FAM6_MODEL(LAKEFIELD, &rapl_defaults_core),
X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT, &rapl_defaults_byt),
@@ -1499,7 +1507,7 @@ static int rapl_detect_domains(struct rapl_package *rp)
}
/* called from CPU hotplug notifier, hotplug lock held */
-void rapl_remove_package(struct rapl_package *rp)
+void rapl_remove_package_cpuslocked(struct rapl_package *rp)
{
struct rapl_domain *rd, *rd_package = NULL;
@@ -1528,10 +1536,18 @@ void rapl_remove_package(struct rapl_package *rp)
list_del(&rp->plist);
kfree(rp);
}
+EXPORT_SYMBOL_GPL(rapl_remove_package_cpuslocked);
+
+void rapl_remove_package(struct rapl_package *rp)
+{
+ guard(cpus_read_lock)();
+ rapl_remove_package_cpuslocked(rp);
+}
EXPORT_SYMBOL_GPL(rapl_remove_package);
/* caller to ensure CPU hotplug lock is held */
-struct rapl_package *rapl_find_package_domain(int id, struct rapl_if_priv *priv, bool id_is_cpu)
+struct rapl_package *rapl_find_package_domain_cpuslocked(int id, struct rapl_if_priv *priv,
+ bool id_is_cpu)
{
struct rapl_package *rp;
int uid;
@@ -1549,10 +1565,17 @@ struct rapl_package *rapl_find_package_domain(int id, struct rapl_if_priv *priv,
return NULL;
}
+EXPORT_SYMBOL_GPL(rapl_find_package_domain_cpuslocked);
+
+struct rapl_package *rapl_find_package_domain(int id, struct rapl_if_priv *priv, bool id_is_cpu)
+{
+ guard(cpus_read_lock)();
+ return rapl_find_package_domain_cpuslocked(id, priv, id_is_cpu);
+}
EXPORT_SYMBOL_GPL(rapl_find_package_domain);
/* called from CPU hotplug notifier, hotplug lock held */
-struct rapl_package *rapl_add_package(int id, struct rapl_if_priv *priv, bool id_is_cpu)
+struct rapl_package *rapl_add_package_cpuslocked(int id, struct rapl_if_priv *priv, bool id_is_cpu)
{
struct rapl_package *rp;
int ret;
@@ -1598,6 +1621,13 @@ err_free_package:
kfree(rp);
return ERR_PTR(ret);
}
+EXPORT_SYMBOL_GPL(rapl_add_package_cpuslocked);
+
+struct rapl_package *rapl_add_package(int id, struct rapl_if_priv *priv, bool id_is_cpu)
+{
+ guard(cpus_read_lock)();
+ return rapl_add_package_cpuslocked(id, priv, id_is_cpu);
+}
EXPORT_SYMBOL_GPL(rapl_add_package);
static void power_limit_state_save(void)
diff --git a/drivers/powercap/intel_rapl_msr.c b/drivers/powercap/intel_rapl_msr.c
index 250bd41a588c..b4b6930cacb0 100644
--- a/drivers/powercap/intel_rapl_msr.c
+++ b/drivers/powercap/intel_rapl_msr.c
@@ -73,9 +73,9 @@ static int rapl_cpu_online(unsigned int cpu)
{
struct rapl_package *rp;
- rp = rapl_find_package_domain(cpu, rapl_msr_priv, true);
+ rp = rapl_find_package_domain_cpuslocked(cpu, rapl_msr_priv, true);
if (!rp) {
- rp = rapl_add_package(cpu, rapl_msr_priv, true);
+ rp = rapl_add_package_cpuslocked(cpu, rapl_msr_priv, true);
if (IS_ERR(rp))
return PTR_ERR(rp);
}
@@ -88,14 +88,14 @@ static int rapl_cpu_down_prep(unsigned int cpu)
struct rapl_package *rp;
int lead_cpu;
- rp = rapl_find_package_domain(cpu, rapl_msr_priv, true);
+ rp = rapl_find_package_domain_cpuslocked(cpu, rapl_msr_priv, true);
if (!rp)
return 0;
cpumask_clear_cpu(cpu, &rp->cpumask);
lead_cpu = cpumask_first(&rp->cpumask);
if (lead_cpu >= nr_cpu_ids)
- rapl_remove_package(rp);
+ rapl_remove_package_cpuslocked(rp);
else if (rp->lead_cpu == cpu)
rp->lead_cpu = lead_cpu;
return 0;
diff --git a/drivers/powercap/intel_rapl_tpmi.c b/drivers/powercap/intel_rapl_tpmi.c
index 891c90fefd8b..f6b7f085977c 100644
--- a/drivers/powercap/intel_rapl_tpmi.c
+++ b/drivers/powercap/intel_rapl_tpmi.c
@@ -40,6 +40,7 @@ enum tpmi_rapl_register {
TPMI_RAPL_REG_ENERGY_STATUS,
TPMI_RAPL_REG_PERF_STATUS,
TPMI_RAPL_REG_POWER_INFO,
+ TPMI_RAPL_REG_DOMAIN_INFO,
TPMI_RAPL_REG_INTERRUPT,
TPMI_RAPL_REG_MAX = 15,
};
@@ -130,6 +131,12 @@ static void trp_release(struct tpmi_rapl_package *trp)
mutex_unlock(&tpmi_rapl_lock);
}
+/*
+ * Bit 0 of TPMI_RAPL_REG_DOMAIN_INFO indicates if the current package is a domain
+ * root or not. Only domain root packages can enumerate System (Psys) Domain.
+ */
+#define TPMI_RAPL_DOMAIN_ROOT BIT(0)
+
static int parse_one_domain(struct tpmi_rapl_package *trp, u32 offset)
{
u8 tpmi_domain_version;
@@ -139,6 +146,7 @@ static int parse_one_domain(struct tpmi_rapl_package *trp, u32 offset)
enum rapl_domain_reg_id reg_id;
int tpmi_domain_size, tpmi_domain_flags;
u64 tpmi_domain_header = readq(trp->base + offset);
+ u64 tpmi_domain_info;
/* Domain Parent bits are ignored for now */
tpmi_domain_version = tpmi_domain_header & 0xff;
@@ -169,6 +177,13 @@ static int parse_one_domain(struct tpmi_rapl_package *trp, u32 offset)
domain_type = RAPL_DOMAIN_PACKAGE;
break;
case TPMI_RAPL_DOMAIN_SYSTEM:
+ if (!(tpmi_domain_flags & BIT(TPMI_RAPL_REG_DOMAIN_INFO))) {
+ pr_warn(FW_BUG "System domain must support Domain Info register\n");
+ return -ENODEV;
+ }
+ tpmi_domain_info = readq(trp->base + offset + TPMI_RAPL_REG_DOMAIN_INFO);
+ if (!(tpmi_domain_info & TPMI_RAPL_DOMAIN_ROOT))
+ return 0;
domain_type = RAPL_DOMAIN_PLATFORM;
break;
case TPMI_RAPL_DOMAIN_MEMORY:
diff --git a/drivers/thermal/intel/int340x_thermal/processor_thermal_rapl.c b/drivers/thermal/intel/int340x_thermal/processor_thermal_rapl.c
index 2f00fc3bf274..e964a9375722 100644
--- a/drivers/thermal/intel/int340x_thermal/processor_thermal_rapl.c
+++ b/drivers/thermal/intel/int340x_thermal/processor_thermal_rapl.c
@@ -27,9 +27,9 @@ static int rapl_mmio_cpu_online(unsigned int cpu)
if (topology_physical_package_id(cpu))
return 0;
- rp = rapl_find_package_domain(cpu, &rapl_mmio_priv, true);
+ rp = rapl_find_package_domain_cpuslocked(cpu, &rapl_mmio_priv, true);
if (!rp) {
- rp = rapl_add_package(cpu, &rapl_mmio_priv, true);
+ rp = rapl_add_package_cpuslocked(cpu, &rapl_mmio_priv, true);
if (IS_ERR(rp))
return PTR_ERR(rp);
}
@@ -42,14 +42,14 @@ static int rapl_mmio_cpu_down_prep(unsigned int cpu)
struct rapl_package *rp;
int lead_cpu;
- rp = rapl_find_package_domain(cpu, &rapl_mmio_priv, true);
+ rp = rapl_find_package_domain_cpuslocked(cpu, &rapl_mmio_priv, true);
if (!rp)
return 0;
cpumask_clear_cpu(cpu, &rp->cpumask);
lead_cpu = cpumask_first(&rp->cpumask);
if (lead_cpu >= nr_cpu_ids)
- rapl_remove_package(rp);
+ rapl_remove_package_cpuslocked(rp);
else if (rp->lead_cpu == cpu)
rp->lead_cpu = lead_cpu;
return 0;
diff --git a/include/acpi/cppc_acpi.h b/include/acpi/cppc_acpi.h
index 3a0995f8bce8..930b6afba6f4 100644
--- a/include/acpi/cppc_acpi.h
+++ b/include/acpi/cppc_acpi.h
@@ -139,6 +139,7 @@ struct cppc_cpudata {
#ifdef CONFIG_ACPI_CPPC_LIB
extern int cppc_get_desired_perf(int cpunum, u64 *desired_perf);
extern int cppc_get_nominal_perf(int cpunum, u64 *nominal_perf);
+extern int cppc_get_highest_perf(int cpunum, u64 *highest_perf);
extern int cppc_get_perf_ctrs(int cpu, struct cppc_perf_fb_ctrs *perf_fb_ctrs);
extern int cppc_set_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls);
extern int cppc_set_enable(int cpu, bool enable);
@@ -167,6 +168,10 @@ static inline int cppc_get_nominal_perf(int cpunum, u64 *nominal_perf)
{
return -ENOTSUPP;
}
+static inline int cppc_get_highest_perf(int cpunum, u64 *highest_perf)
+{
+ return -ENOTSUPP;
+}
static inline int cppc_get_perf_ctrs(int cpu, struct cppc_perf_fb_ctrs *perf_fb_ctrs)
{
return -ENOTSUPP;
diff --git a/include/linux/amd-pstate.h b/include/linux/amd-pstate.h
index 6ad02ad9c7b4..d21838835abd 100644
--- a/include/linux/amd-pstate.h
+++ b/include/linux/amd-pstate.h
@@ -39,11 +39,16 @@ struct amd_aperf_mperf {
* @cppc_req_cached: cached performance request hints
* @highest_perf: the maximum performance an individual processor may reach,
* assuming ideal conditions
+ * For platforms that do not support the preferred core feature, the
+ * highest_pef may be configured with 166 or 255, to avoid max frequency
+ * calculated wrongly. we take the fixed value as the highest_perf.
* @nominal_perf: the maximum sustained performance level of the processor,
* assuming ideal operating conditions
* @lowest_nonlinear_perf: the lowest performance level at which nonlinear power
* savings are achieved
* @lowest_perf: the absolute lowest performance level of the processor
+ * @prefcore_ranking: the preferred core ranking, the higher value indicates a higher
+ * priority.
* @max_freq: the frequency that mapped to highest_perf
* @min_freq: the frequency that mapped to lowest_perf
* @nominal_freq: the frequency that mapped to nominal_perf
@@ -52,6 +57,9 @@ struct amd_aperf_mperf {
* @prev: Last Aperf/Mperf/tsc count value read from register
* @freq: current cpu frequency value
* @boost_supported: check whether the Processor or SBIOS supports boost mode
+ * @hw_prefcore: check whether HW supports preferred core featue.
+ * Only when hw_prefcore and early prefcore param are true,
+ * AMD P-State driver supports preferred core featue.
* @epp_policy: Last saved policy used to set energy-performance preference
* @epp_cached: Cached CPPC energy-performance preference value
* @policy: Cpufreq policy value
@@ -70,6 +78,7 @@ struct amd_cpudata {
u32 nominal_perf;
u32 lowest_nonlinear_perf;
u32 lowest_perf;
+ u32 prefcore_ranking;
u32 min_limit_perf;
u32 max_limit_perf;
u32 min_limit_freq;
@@ -85,6 +94,7 @@ struct amd_cpudata {
u64 freq;
bool boost_supported;
+ bool hw_prefcore;
/* EPP feature related attributes*/
s16 epp_policy;
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index afda5f24d3dd..692ea6e55129 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -263,6 +263,7 @@ static inline bool cpufreq_supports_freq_invariance(void)
return false;
}
static inline void disable_cpufreq(void) { }
+static inline void cpufreq_update_limits(unsigned int cpu) { }
#endif
#ifdef CONFIG_CPU_FREQ_STAT
@@ -568,9 +569,7 @@ static inline unsigned long cpufreq_scale(unsigned long old, u_int div,
/*
* The polling frequency depends on the capability of the processor. Default
- * polling frequency is 1000 times the transition latency of the processor. The
- * ondemand governor will work on any processor with transition latency <= 10ms,
- * using appropriate sampling rate.
+ * polling frequency is 1000 times the transition latency of the processor.
*/
#define LATENCY_MULTIPLIER (1000)
@@ -1021,6 +1020,18 @@ static inline int cpufreq_table_find_index_c(struct cpufreq_policy *policy,
efficiencies);
}
+static inline bool cpufreq_is_in_limits(struct cpufreq_policy *policy, int idx)
+{
+ unsigned int freq;
+
+ if (idx < 0)
+ return false;
+
+ freq = policy->freq_table[idx].frequency;
+
+ return freq == clamp_val(freq, policy->min, policy->max);
+}
+
static inline int cpufreq_frequency_table_target(struct cpufreq_policy *policy,
unsigned int target_freq,
unsigned int relation)
@@ -1054,7 +1065,8 @@ retry:
return 0;
}
- if (idx < 0 && efficiencies) {
+ /* Limit frequency index to honor policy->min/max */
+ if (!cpufreq_is_in_limits(policy, idx) && efficiencies) {
efficiencies = false;
goto retry;
}
diff --git a/include/linux/intel_rapl.h b/include/linux/intel_rapl.h
index 33f21bd85dbf..f3196f82fd8a 100644
--- a/include/linux/intel_rapl.h
+++ b/include/linux/intel_rapl.h
@@ -178,6 +178,12 @@ struct rapl_package {
struct rapl_if_priv *priv;
};
+struct rapl_package *rapl_find_package_domain_cpuslocked(int id, struct rapl_if_priv *priv,
+ bool id_is_cpu);
+struct rapl_package *rapl_add_package_cpuslocked(int id, struct rapl_if_priv *priv,
+ bool id_is_cpu);
+void rapl_remove_package_cpuslocked(struct rapl_package *rp);
+
struct rapl_package *rapl_find_package_domain(int id, struct rapl_if_priv *priv, bool id_is_cpu);
struct rapl_package *rapl_add_package(int id, struct rapl_if_priv *priv, bool id_is_cpu);
void rapl_remove_package(struct rapl_package *rp);
diff --git a/include/linux/pm.h b/include/linux/pm.h
index a2f3e53a8196..97b0e23363c8 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -662,8 +662,8 @@ struct pm_subsys_data {
struct dev_pm_info {
pm_message_t power_state;
- unsigned int can_wakeup:1;
- unsigned int async_suspend:1;
+ bool can_wakeup:1;
+ bool async_suspend:1;
bool in_dpm_list:1; /* Owned by the PM core */
bool is_prepared:1; /* Owned by the PM core */
bool is_suspended:1; /* Ditto */
@@ -682,10 +682,10 @@ struct dev_pm_info {
bool syscore:1;
bool no_pm_callbacks:1; /* Owned by the PM core */
bool async_in_progress:1; /* Owned by the PM core */
- unsigned int must_resume:1; /* Owned by the PM core */
- unsigned int may_skip_resume:1; /* Set by subsystems */
+ bool must_resume:1; /* Owned by the PM core */
+ bool may_skip_resume:1; /* Set by subsystems */
#else
- unsigned int should_wakeup:1;
+ bool should_wakeup:1;
#endif
#ifdef CONFIG_PM
struct hrtimer suspend_timer;
@@ -696,17 +696,17 @@ struct dev_pm_info {
atomic_t usage_count;
atomic_t child_count;
unsigned int disable_depth:3;
- unsigned int idle_notification:1;
- unsigned int request_pending:1;
- unsigned int deferred_resume:1;
- unsigned int needs_force_resume:1;
- unsigned int runtime_auto:1;
+ bool idle_notification:1;
+ bool request_pending:1;
+ bool deferred_resume:1;
+ bool needs_force_resume:1;
+ bool runtime_auto:1;
bool ignore_children:1;
- unsigned int no_callbacks:1;
- unsigned int irq_safe:1;
- unsigned int use_autosuspend:1;
- unsigned int timer_autosuspends:1;
- unsigned int memalloc_noio:1;
+ bool no_callbacks:1;
+ bool irq_safe:1;
+ bool use_autosuspend:1;
+ bool timer_autosuspends:1;
+ bool memalloc_noio:1;
unsigned int links_count;
enum rpm_request request;
enum rpm_status runtime_status;
diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h
index 7c9b35448563..d39dc863f612 100644
--- a/include/linux/pm_runtime.h
+++ b/include/linux/pm_runtime.h
@@ -72,7 +72,8 @@ extern int pm_runtime_force_resume(struct device *dev);
extern int __pm_runtime_idle(struct device *dev, int rpmflags);
extern int __pm_runtime_suspend(struct device *dev, int rpmflags);
extern int __pm_runtime_resume(struct device *dev, int rpmflags);
-extern int pm_runtime_get_if_active(struct device *dev, bool ign_usage_count);
+extern int pm_runtime_get_if_active(struct device *dev);
+extern int pm_runtime_get_if_in_use(struct device *dev);
extern int pm_schedule_suspend(struct device *dev, unsigned int delay);
extern int __pm_runtime_set_status(struct device *dev, unsigned int status);
extern int pm_runtime_barrier(struct device *dev);
@@ -95,18 +96,6 @@ extern void pm_runtime_release_supplier(struct device_link *link);
extern int devm_pm_runtime_enable(struct device *dev);
/**
- * pm_runtime_get_if_in_use - Conditionally bump up runtime PM usage counter.
- * @dev: Target device.
- *
- * Increment the runtime PM usage counter of @dev if its runtime PM status is
- * %RPM_ACTIVE and its runtime PM usage counter is greater than 0.
- */
-static inline int pm_runtime_get_if_in_use(struct device *dev)
-{
- return pm_runtime_get_if_active(dev, false);
-}
-
-/**
* pm_suspend_ignore_children - Set runtime PM behavior regarding children.
* @dev: Target device.
* @enable: Whether or not to ignore possible dependencies on children.
@@ -275,8 +264,7 @@ static inline int pm_runtime_get_if_in_use(struct device *dev)
{
return -EINVAL;
}
-static inline int pm_runtime_get_if_active(struct device *dev,
- bool ign_usage_count)
+static inline int pm_runtime_get_if_active(struct device *dev)
{
return -EINVAL;
}
@@ -461,6 +449,18 @@ static inline int pm_runtime_put(struct device *dev)
}
/**
+ * __pm_runtime_put_autosuspend - Drop device usage counter and queue autosuspend if 0.
+ * @dev: Target device.
+ *
+ * Decrement the runtime PM usage counter of @dev and if it turns out to be
+ * equal to 0, queue up a work item for @dev like in pm_request_autosuspend().
+ */
+static inline int __pm_runtime_put_autosuspend(struct device *dev)
+{
+ return __pm_runtime_suspend(dev, RPM_GET_PUT | RPM_ASYNC | RPM_AUTO);
+}
+
+/**
* pm_runtime_put_autosuspend - Drop device usage counter and queue autosuspend if 0.
* @dev: Target device.
*
diff --git a/include/linux/scmi_protocol.h b/include/linux/scmi_protocol.h
index f2f05fb42d28..fafedb3b6604 100644
--- a/include/linux/scmi_protocol.h
+++ b/include/linux/scmi_protocol.h
@@ -128,6 +128,8 @@ struct scmi_perf_domain_info {
* @level_set: sets the performance level of a domain
* @level_get: gets the performance level of a domain
* @transition_latency_get: gets the DVFS transition latency for a given device
+ * @rate_limit_get: gets the minimum time (us) required between successive
+ * requests
* @device_opps_add: adds all the OPPs for a given device
* @freq_set: sets the frequency for a given device using sustained frequency
* to sustained performance level mapping
@@ -137,6 +139,8 @@ struct scmi_perf_domain_info {
* at a given frequency
* @fast_switch_possible: indicates if fast DVFS switching is possible or not
* for a given device
+ * @fast_switch_rate_limit: gets the minimum time (us) required between
+ * successive fast_switching requests
* @power_scale_mw_get: indicates if the power values provided are in milliWatts
* or in some other (abstract) scale
*/
@@ -154,6 +158,8 @@ struct scmi_perf_proto_ops {
u32 *level, bool poll);
int (*transition_latency_get)(const struct scmi_protocol_handle *ph,
u32 domain);
+ int (*rate_limit_get)(const struct scmi_protocol_handle *ph,
+ u32 domain, u32 *rate_limit);
int (*device_opps_add)(const struct scmi_protocol_handle *ph,
struct device *dev, u32 domain);
int (*freq_set)(const struct scmi_protocol_handle *ph, u32 domain,
@@ -164,6 +170,8 @@ struct scmi_perf_proto_ops {
unsigned long *rate, unsigned long *power);
bool (*fast_switch_possible)(const struct scmi_protocol_handle *ph,
u32 domain);
+ int (*fast_switch_rate_limit)(const struct scmi_protocol_handle *ph,
+ u32 domain, u32 *rate_limit);
enum scmi_power_scale (*power_scale_get)(const struct scmi_protocol_handle *ph);
};
diff --git a/include/linux/suspend.h b/include/linux/suspend.h
index ef503088942d..da6ebca3ff77 100644
--- a/include/linux/suspend.h
+++ b/include/linux/suspend.h
@@ -40,65 +40,6 @@ typedef int __bitwise suspend_state_t;
#define PM_SUSPEND_MIN PM_SUSPEND_TO_IDLE
#define PM_SUSPEND_MAX ((__force suspend_state_t) 4)
-enum suspend_stat_step {
- SUSPEND_FREEZE = 1,
- SUSPEND_PREPARE,
- SUSPEND_SUSPEND,
- SUSPEND_SUSPEND_LATE,
- SUSPEND_SUSPEND_NOIRQ,
- SUSPEND_RESUME_NOIRQ,
- SUSPEND_RESUME_EARLY,
- SUSPEND_RESUME
-};
-
-struct suspend_stats {
- int success;
- int fail;
- int failed_freeze;
- int failed_prepare;
- int failed_suspend;
- int failed_suspend_late;
- int failed_suspend_noirq;
- int failed_resume;
- int failed_resume_early;
- int failed_resume_noirq;
-#define REC_FAILED_NUM 2
- int last_failed_dev;
- char failed_devs[REC_FAILED_NUM][40];
- int last_failed_errno;
- int errno[REC_FAILED_NUM];
- int last_failed_step;
- u64 last_hw_sleep;
- u64 total_hw_sleep;
- u64 max_hw_sleep;
- enum suspend_stat_step failed_steps[REC_FAILED_NUM];
-};
-
-extern struct suspend_stats suspend_stats;
-
-static inline void dpm_save_failed_dev(const char *name)
-{
- strscpy(suspend_stats.failed_devs[suspend_stats.last_failed_dev],
- name,
- sizeof(suspend_stats.failed_devs[0]));
- suspend_stats.last_failed_dev++;
- suspend_stats.last_failed_dev %= REC_FAILED_NUM;
-}
-
-static inline void dpm_save_failed_errno(int err)
-{
- suspend_stats.errno[suspend_stats.last_failed_errno] = err;
- suspend_stats.last_failed_errno++;
- suspend_stats.last_failed_errno %= REC_FAILED_NUM;
-}
-
-static inline void dpm_save_failed_step(enum suspend_stat_step step)
-{
- suspend_stats.failed_steps[suspend_stats.last_failed_step] = step;
- suspend_stats.last_failed_step++;
- suspend_stats.last_failed_step %= REC_FAILED_NUM;
-}
-
/**
* struct platform_suspend_ops - Callbacks for managing platform dependent
* system sleep states.
@@ -626,4 +567,19 @@ static inline void queue_up_suspend_work(void) {}
#endif /* !CONFIG_PM_AUTOSLEEP */
+enum suspend_stat_step {
+ SUSPEND_WORKING = 0,
+ SUSPEND_FREEZE,
+ SUSPEND_PREPARE,
+ SUSPEND_SUSPEND,
+ SUSPEND_SUSPEND_LATE,
+ SUSPEND_SUSPEND_NOIRQ,
+ SUSPEND_RESUME_NOIRQ,
+ SUSPEND_RESUME_EARLY,
+ SUSPEND_RESUME
+};
+
+void dpm_save_failed_dev(const char *name);
+void dpm_save_failed_step(enum suspend_stat_step step);
+
#endif /* _LINUX_SUSPEND_H */
diff --git a/include/trace/events/rpm.h b/include/trace/events/rpm.h
index 3c716214dab1..bd120e23ce12 100644
--- a/include/trace/events/rpm.h
+++ b/include/trace/events/rpm.h
@@ -101,6 +101,48 @@ TRACE_EVENT(rpm_return_int,
__entry->ret)
);
+#define RPM_STATUS_STRINGS \
+ EM(RPM_INVALID, "RPM_INVALID") \
+ EM(RPM_ACTIVE, "RPM_ACTIVE") \
+ EM(RPM_RESUMING, "RPM_RESUMING") \
+ EM(RPM_SUSPENDED, "RPM_SUSPENDED") \
+ EMe(RPM_SUSPENDING, "RPM_SUSPENDING")
+
+/* Enums require being exported to userspace, for user tool parsing. */
+#undef EM
+#undef EMe
+#define EM(a, b) TRACE_DEFINE_ENUM(a);
+#define EMe(a, b) TRACE_DEFINE_ENUM(a);
+
+RPM_STATUS_STRINGS
+
+/*
+ * Now redefine the EM() and EMe() macros to map the enums to the strings that
+ * will be printed in the output.
+ */
+#undef EM
+#undef EMe
+#define EM(a, b) { a, b },
+#define EMe(a, b) { a, b }
+
+TRACE_EVENT(rpm_status,
+ TP_PROTO(struct device *dev, enum rpm_status status),
+ TP_ARGS(dev, status),
+
+ TP_STRUCT__entry(
+ __string(name, dev_name(dev))
+ __field(int, status)
+ ),
+
+ TP_fast_assign(
+ __assign_str(name, dev_name(dev));
+ __entry->status = status;
+ ),
+
+ TP_printk("%s status=%s", __get_str(name),
+ __print_symbolic(__entry->status, RPM_STATUS_STRINGS))
+);
+
#endif /* _TRACE_RUNTIME_POWER_H */
/* This part must be outside protection */
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index 4b31629c5be4..afce8130d8b9 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -39,9 +39,9 @@ config HIBERNATION
bool "Hibernation (aka 'suspend to disk')"
depends on SWAP && ARCH_HIBERNATION_POSSIBLE
select HIBERNATE_CALLBACKS
- select LZO_COMPRESS
- select LZO_DECOMPRESS
select CRC32
+ select CRYPTO
+ select CRYPTO_LZO
help
Enable the suspend to disk (STD) functionality, which is usually
called "hibernation" in user interfaces. STD checkpoints the
@@ -92,6 +92,28 @@ config HIBERNATION_SNAPSHOT_DEV
If in doubt, say Y.
+choice
+ prompt "Default compressor"
+ default HIBERNATION_COMP_LZO
+ depends on HIBERNATION
+
+config HIBERNATION_COMP_LZO
+ bool "lzo"
+ depends on CRYPTO_LZO
+
+config HIBERNATION_COMP_LZ4
+ bool "lz4"
+ depends on CRYPTO_LZ4
+
+endchoice
+
+config HIBERNATION_DEF_COMP
+ string
+ default "lzo" if HIBERNATION_COMP_LZO
+ default "lz4" if HIBERNATION_COMP_LZ4
+ help
+ Default compressor to be used for hibernation.
+
config PM_STD_PARTITION
string "Default resume partition"
depends on HIBERNATION
diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
index 4b0b7cf2e019..43b1a82e800c 100644
--- a/kernel/power/hibernate.c
+++ b/kernel/power/hibernate.c
@@ -47,6 +47,15 @@ dev_t swsusp_resume_device;
sector_t swsusp_resume_block;
__visible int in_suspend __nosavedata;
+static char hibernate_compressor[CRYPTO_MAX_ALG_NAME] = CONFIG_HIBERNATION_DEF_COMP;
+
+/*
+ * Compression/decompression algorithm to be used while saving/loading
+ * image to/from disk. This would later be used in 'kernel/power/swap.c'
+ * to allocate comp streams.
+ */
+char hib_comp_algo[CRYPTO_MAX_ALG_NAME];
+
enum {
HIBERNATION_INVALID,
HIBERNATION_PLATFORM,
@@ -718,6 +727,9 @@ static int load_image_and_restore(void)
return error;
}
+#define COMPRESSION_ALGO_LZO "lzo"
+#define COMPRESSION_ALGO_LZ4 "lz4"
+
/**
* hibernate - Carry out system hibernation, including saving the image.
*/
@@ -732,6 +744,17 @@ int hibernate(void)
return -EPERM;
}
+ /*
+ * Query for the compression algorithm support if compression is enabled.
+ */
+ if (!nocompress) {
+ strscpy(hib_comp_algo, hibernate_compressor, sizeof(hib_comp_algo));
+ if (crypto_has_comp(hib_comp_algo, 0, 0) != 1) {
+ pr_err("%s compression is not available\n", hib_comp_algo);
+ return -EOPNOTSUPP;
+ }
+ }
+
sleep_flags = lock_system_sleep();
/* The snapshot device should not be opened while we're running */
if (!hibernate_acquire()) {
@@ -766,11 +789,24 @@ int hibernate(void)
if (hibernation_mode == HIBERNATION_PLATFORM)
flags |= SF_PLATFORM_MODE;
- if (nocompress)
+ if (nocompress) {
flags |= SF_NOCOMPRESS_MODE;
- else
+ } else {
flags |= SF_CRC32_MODE;
+ /*
+ * By default, LZO compression is enabled. Use SF_COMPRESSION_ALG_LZ4
+ * to override this behaviour and use LZ4.
+ *
+ * Refer kernel/power/power.h for more details
+ */
+
+ if (!strcmp(hib_comp_algo, COMPRESSION_ALGO_LZ4))
+ flags |= SF_COMPRESSION_ALG_LZ4;
+ else
+ flags |= SF_COMPRESSION_ALG_LZO;
+ }
+
pm_pr_dbg("Writing hibernation image.\n");
error = swsusp_write(flags);
swsusp_free();
@@ -955,6 +991,22 @@ static int software_resume(void)
if (error)
goto Unlock;
+ /*
+ * Check if the hibernation image is compressed. If so, query for
+ * the algorithm support.
+ */
+ if (!(swsusp_header_flags & SF_NOCOMPRESS_MODE)) {
+ if (swsusp_header_flags & SF_COMPRESSION_ALG_LZ4)
+ strscpy(hib_comp_algo, COMPRESSION_ALGO_LZ4, sizeof(hib_comp_algo));
+ else
+ strscpy(hib_comp_algo, COMPRESSION_ALGO_LZO, sizeof(hib_comp_algo));
+ if (crypto_has_comp(hib_comp_algo, 0, 0) != 1) {
+ pr_err("%s compression is not available\n", hib_comp_algo);
+ error = -EOPNOTSUPP;
+ goto Unlock;
+ }
+ }
+
/* The snapshot device should not be opened while we're running */
if (!hibernate_acquire()) {
error = -EBUSY;
@@ -1370,6 +1422,57 @@ static int __init nohibernate_setup(char *str)
return 1;
}
+static const char * const comp_alg_enabled[] = {
+#if IS_ENABLED(CONFIG_CRYPTO_LZO)
+ COMPRESSION_ALGO_LZO,
+#endif
+#if IS_ENABLED(CONFIG_CRYPTO_LZ4)
+ COMPRESSION_ALGO_LZ4,
+#endif
+};
+
+static int hibernate_compressor_param_set(const char *compressor,
+ const struct kernel_param *kp)
+{
+ unsigned int sleep_flags;
+ int index, ret;
+
+ sleep_flags = lock_system_sleep();
+
+ index = sysfs_match_string(comp_alg_enabled, compressor);
+ if (index >= 0) {
+ ret = param_set_copystring(comp_alg_enabled[index], kp);
+ if (!ret)
+ strscpy(hib_comp_algo, comp_alg_enabled[index],
+ sizeof(hib_comp_algo));
+ } else {
+ ret = index;
+ }
+
+ unlock_system_sleep(sleep_flags);
+
+ if (ret)
+ pr_debug("Cannot set specified compressor %s\n",
+ compressor);
+
+ return ret;
+}
+
+static const struct kernel_param_ops hibernate_compressor_param_ops = {
+ .set = hibernate_compressor_param_set,
+ .get = param_get_string,
+};
+
+static struct kparam_string hibernate_compressor_param_string = {
+ .maxlen = sizeof(hibernate_compressor),
+ .string = hibernate_compressor,
+};
+
+module_param_cb(compressor, &hibernate_compressor_param_ops,
+ &hibernate_compressor_param_string, 0644);
+MODULE_PARM_DESC(compressor,
+ "Compression algorithm to be used with hibernation");
+
__setup("noresume", noresume_setup);
__setup("resume_offset=", resume_offset_setup);
__setup("resume=", resume_setup);
diff --git a/kernel/power/main.c b/kernel/power/main.c
index b1ae9b677d03..a9e0693aaf69 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -95,19 +95,6 @@ int unregister_pm_notifier(struct notifier_block *nb)
}
EXPORT_SYMBOL_GPL(unregister_pm_notifier);
-void pm_report_hw_sleep_time(u64 t)
-{
- suspend_stats.last_hw_sleep = t;
- suspend_stats.total_hw_sleep += t;
-}
-EXPORT_SYMBOL_GPL(pm_report_hw_sleep_time);
-
-void pm_report_max_hw_sleep(u64 t)
-{
- suspend_stats.max_hw_sleep = t;
-}
-EXPORT_SYMBOL_GPL(pm_report_max_hw_sleep);
-
int pm_notifier_call_chain_robust(unsigned long val_up, unsigned long val_down)
{
int ret;
@@ -319,26 +306,86 @@ static ssize_t pm_test_store(struct kobject *kobj, struct kobj_attribute *attr,
power_attr(pm_test);
#endif /* CONFIG_PM_SLEEP_DEBUG */
-static char *suspend_step_name(enum suspend_stat_step step)
-{
- switch (step) {
- case SUSPEND_FREEZE:
- return "freeze";
- case SUSPEND_PREPARE:
- return "prepare";
- case SUSPEND_SUSPEND:
- return "suspend";
- case SUSPEND_SUSPEND_NOIRQ:
- return "suspend_noirq";
- case SUSPEND_RESUME_NOIRQ:
- return "resume_noirq";
- case SUSPEND_RESUME:
- return "resume";
- default:
- return "";
+#define SUSPEND_NR_STEPS SUSPEND_RESUME
+#define REC_FAILED_NUM 2
+
+struct suspend_stats {
+ unsigned int step_failures[SUSPEND_NR_STEPS];
+ unsigned int success;
+ unsigned int fail;
+ int last_failed_dev;
+ char failed_devs[REC_FAILED_NUM][40];
+ int last_failed_errno;
+ int errno[REC_FAILED_NUM];
+ int last_failed_step;
+ u64 last_hw_sleep;
+ u64 total_hw_sleep;
+ u64 max_hw_sleep;
+ enum suspend_stat_step failed_steps[REC_FAILED_NUM];
+};
+
+static struct suspend_stats suspend_stats;
+static DEFINE_MUTEX(suspend_stats_lock);
+
+void dpm_save_failed_dev(const char *name)
+{
+ mutex_lock(&suspend_stats_lock);
+
+ strscpy(suspend_stats.failed_devs[suspend_stats.last_failed_dev],
+ name, sizeof(suspend_stats.failed_devs[0]));
+ suspend_stats.last_failed_dev++;
+ suspend_stats.last_failed_dev %= REC_FAILED_NUM;
+
+ mutex_unlock(&suspend_stats_lock);
+}
+
+void dpm_save_failed_step(enum suspend_stat_step step)
+{
+ suspend_stats.step_failures[step-1]++;
+ suspend_stats.failed_steps[suspend_stats.last_failed_step] = step;
+ suspend_stats.last_failed_step++;
+ suspend_stats.last_failed_step %= REC_FAILED_NUM;
+}
+
+void dpm_save_errno(int err)
+{
+ if (!err) {
+ suspend_stats.success++;
+ return;
}
+
+ suspend_stats.fail++;
+
+ suspend_stats.errno[suspend_stats.last_failed_errno] = err;
+ suspend_stats.last_failed_errno++;
+ suspend_stats.last_failed_errno %= REC_FAILED_NUM;
}
+void pm_report_hw_sleep_time(u64 t)
+{
+ suspend_stats.last_hw_sleep = t;
+ suspend_stats.total_hw_sleep += t;
+}
+EXPORT_SYMBOL_GPL(pm_report_hw_sleep_time);
+
+void pm_report_max_hw_sleep(u64 t)
+{
+ suspend_stats.max_hw_sleep = t;
+}
+EXPORT_SYMBOL_GPL(pm_report_max_hw_sleep);
+
+static const char * const suspend_step_names[] = {
+ [SUSPEND_WORKING] = "",
+ [SUSPEND_FREEZE] = "freeze",
+ [SUSPEND_PREPARE] = "prepare",
+ [SUSPEND_SUSPEND] = "suspend",
+ [SUSPEND_SUSPEND_LATE] = "suspend_late",
+ [SUSPEND_SUSPEND_NOIRQ] = "suspend_noirq",
+ [SUSPEND_RESUME_NOIRQ] = "resume_noirq",
+ [SUSPEND_RESUME_EARLY] = "resume_early",
+ [SUSPEND_RESUME] = "resume",
+};
+
#define suspend_attr(_name, format_str) \
static ssize_t _name##_show(struct kobject *kobj, \
struct kobj_attribute *attr, char *buf) \
@@ -347,20 +394,30 @@ static ssize_t _name##_show(struct kobject *kobj, \
} \
static struct kobj_attribute _name = __ATTR_RO(_name)
-suspend_attr(success, "%d\n");
-suspend_attr(fail, "%d\n");
-suspend_attr(failed_freeze, "%d\n");
-suspend_attr(failed_prepare, "%d\n");
-suspend_attr(failed_suspend, "%d\n");
-suspend_attr(failed_suspend_late, "%d\n");
-suspend_attr(failed_suspend_noirq, "%d\n");
-suspend_attr(failed_resume, "%d\n");
-suspend_attr(failed_resume_early, "%d\n");
-suspend_attr(failed_resume_noirq, "%d\n");
+suspend_attr(success, "%u\n");
+suspend_attr(fail, "%u\n");
suspend_attr(last_hw_sleep, "%llu\n");
suspend_attr(total_hw_sleep, "%llu\n");
suspend_attr(max_hw_sleep, "%llu\n");
+#define suspend_step_attr(_name, step) \
+static ssize_t _name##_show(struct kobject *kobj, \
+ struct kobj_attribute *attr, char *buf) \
+{ \
+ return sprintf(buf, "%u\n", \
+ suspend_stats.step_failures[step-1]); \
+} \
+static struct kobj_attribute _name = __ATTR_RO(_name)
+
+suspend_step_attr(failed_freeze, SUSPEND_FREEZE);
+suspend_step_attr(failed_prepare, SUSPEND_PREPARE);
+suspend_step_attr(failed_suspend, SUSPEND_SUSPEND);
+suspend_step_attr(failed_suspend_late, SUSPEND_SUSPEND_LATE);
+suspend_step_attr(failed_suspend_noirq, SUSPEND_SUSPEND_NOIRQ);
+suspend_step_attr(failed_resume, SUSPEND_RESUME);
+suspend_step_attr(failed_resume_early, SUSPEND_RESUME_EARLY);
+suspend_step_attr(failed_resume_noirq, SUSPEND_RESUME_NOIRQ);
+
static ssize_t last_failed_dev_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buf)
{
@@ -392,16 +449,14 @@ static struct kobj_attribute last_failed_errno = __ATTR_RO(last_failed_errno);
static ssize_t last_failed_step_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buf)
{
- int index;
enum suspend_stat_step step;
- char *last_failed_step = NULL;
+ int index;
index = suspend_stats.last_failed_step + REC_FAILED_NUM - 1;
index %= REC_FAILED_NUM;
step = suspend_stats.failed_steps[index];
- last_failed_step = suspend_step_name(step);
- return sprintf(buf, "%s\n", last_failed_step);
+ return sprintf(buf, "%s\n", suspend_step_names[step]);
}
static struct kobj_attribute last_failed_step = __ATTR_RO(last_failed_step);
@@ -449,6 +504,7 @@ static const struct attribute_group suspend_attr_group = {
static int suspend_stats_show(struct seq_file *s, void *unused)
{
int i, index, last_dev, last_errno, last_step;
+ enum suspend_stat_step step;
last_dev = suspend_stats.last_failed_dev + REC_FAILED_NUM - 1;
last_dev %= REC_FAILED_NUM;
@@ -456,47 +512,35 @@ static int suspend_stats_show(struct seq_file *s, void *unused)
last_errno %= REC_FAILED_NUM;
last_step = suspend_stats.last_failed_step + REC_FAILED_NUM - 1;
last_step %= REC_FAILED_NUM;
- seq_printf(s, "%s: %d\n%s: %d\n%s: %d\n%s: %d\n%s: %d\n"
- "%s: %d\n%s: %d\n%s: %d\n%s: %d\n%s: %d\n",
- "success", suspend_stats.success,
- "fail", suspend_stats.fail,
- "failed_freeze", suspend_stats.failed_freeze,
- "failed_prepare", suspend_stats.failed_prepare,
- "failed_suspend", suspend_stats.failed_suspend,
- "failed_suspend_late",
- suspend_stats.failed_suspend_late,
- "failed_suspend_noirq",
- suspend_stats.failed_suspend_noirq,
- "failed_resume", suspend_stats.failed_resume,
- "failed_resume_early",
- suspend_stats.failed_resume_early,
- "failed_resume_noirq",
- suspend_stats.failed_resume_noirq);
+
+ seq_printf(s, "success: %u\nfail: %u\n",
+ suspend_stats.success, suspend_stats.fail);
+
+ for (step = SUSPEND_FREEZE; step <= SUSPEND_NR_STEPS; step++)
+ seq_printf(s, "failed_%s: %u\n", suspend_step_names[step],
+ suspend_stats.step_failures[step-1]);
+
seq_printf(s, "failures:\n last_failed_dev:\t%-s\n",
- suspend_stats.failed_devs[last_dev]);
+ suspend_stats.failed_devs[last_dev]);
for (i = 1; i < REC_FAILED_NUM; i++) {
index = last_dev + REC_FAILED_NUM - i;
index %= REC_FAILED_NUM;
- seq_printf(s, "\t\t\t%-s\n",
- suspend_stats.failed_devs[index]);
+ seq_printf(s, "\t\t\t%-s\n", suspend_stats.failed_devs[index]);
}
seq_printf(s, " last_failed_errno:\t%-d\n",
suspend_stats.errno[last_errno]);
for (i = 1; i < REC_FAILED_NUM; i++) {
index = last_errno + REC_FAILED_NUM - i;
index %= REC_FAILED_NUM;
- seq_printf(s, "\t\t\t%-d\n",
- suspend_stats.errno[index]);
+ seq_printf(s, "\t\t\t%-d\n", suspend_stats.errno[index]);
}
seq_printf(s, " last_failed_step:\t%-s\n",
- suspend_step_name(
- suspend_stats.failed_steps[last_step]));
+ suspend_step_names[suspend_stats.failed_steps[last_step]]);
for (i = 1; i < REC_FAILED_NUM; i++) {
index = last_step + REC_FAILED_NUM - i;
index %= REC_FAILED_NUM;
seq_printf(s, "\t\t\t%-s\n",
- suspend_step_name(
- suspend_stats.failed_steps[index]));
+ suspend_step_names[suspend_stats.failed_steps[index]]);
}
return 0;
diff --git a/kernel/power/power.h b/kernel/power/power.h
index 8499a39c62f4..de0e6b1077f2 100644
--- a/kernel/power/power.h
+++ b/kernel/power/power.h
@@ -6,6 +6,7 @@
#include <linux/compiler.h>
#include <linux/cpu.h>
#include <linux/cpuidle.h>
+#include <linux/crypto.h>
struct swsusp_info {
struct new_utsname uts;
@@ -54,6 +55,10 @@ asmlinkage int swsusp_save(void);
/* kernel/power/hibernate.c */
extern bool freezer_test_done;
+extern char hib_comp_algo[CRYPTO_MAX_ALG_NAME];
+
+/* kernel/power/swap.c */
+extern unsigned int swsusp_header_flags;
extern int hibernation_snapshot(int platform_mode);
extern int hibernation_restore(int platform_mode);
@@ -148,7 +153,7 @@ extern unsigned int snapshot_additional_pages(struct zone *zone);
extern unsigned long snapshot_get_image_size(void);
extern int snapshot_read_next(struct snapshot_handle *handle);
extern int snapshot_write_next(struct snapshot_handle *handle);
-extern void snapshot_write_finalize(struct snapshot_handle *handle);
+int snapshot_write_finalize(struct snapshot_handle *handle);
extern int snapshot_image_loaded(struct snapshot_handle *handle);
extern bool hibernate_acquire(void);
@@ -162,11 +167,25 @@ extern int swsusp_swap_in_use(void);
* Flags that can be passed from the hibernatig hernel to the "boot" kernel in
* the image header.
*/
+#define SF_COMPRESSION_ALG_LZO 0 /* dummy, details given below */
#define SF_PLATFORM_MODE 1
#define SF_NOCOMPRESS_MODE 2
#define SF_CRC32_MODE 4
#define SF_HW_SIG 8
+/*
+ * Bit to indicate the compression algorithm to be used(for LZ4). The same
+ * could be checked while saving/loading image to/from disk to use the
+ * corresponding algorithms.
+ *
+ * By default, LZO compression is enabled if SF_CRC32_MODE is set. Use
+ * SF_COMPRESSION_ALG_LZ4 to override this behaviour and use LZ4.
+ *
+ * SF_CRC32_MODE, SF_COMPRESSION_ALG_LZO(dummy) -> Compression, LZO
+ * SF_CRC32_MODE, SF_COMPRESSION_ALG_LZ4 -> Compression, LZ4
+ */
+#define SF_COMPRESSION_ALG_LZ4 16
+
/* kernel/power/hibernate.c */
int swsusp_check(bool exclusive);
extern void swsusp_free(void);
@@ -327,3 +346,5 @@ static inline void pm_sleep_enable_secondary_cpus(void)
suspend_enable_secondary_cpus();
cpuidle_resume();
}
+
+void dpm_save_errno(int err);
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index 5c96ff067c64..405eddbda4fc 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -58,22 +58,24 @@ static inline void hibernate_restore_protection_end(void)
hibernate_restore_protection_active = false;
}
-static inline void hibernate_restore_protect_page(void *page_address)
+static inline int __must_check hibernate_restore_protect_page(void *page_address)
{
if (hibernate_restore_protection_active)
- set_memory_ro((unsigned long)page_address, 1);
+ return set_memory_ro((unsigned long)page_address, 1);
+ return 0;
}
-static inline void hibernate_restore_unprotect_page(void *page_address)
+static inline int hibernate_restore_unprotect_page(void *page_address)
{
if (hibernate_restore_protection_active)
- set_memory_rw((unsigned long)page_address, 1);
+ return set_memory_rw((unsigned long)page_address, 1);
+ return 0;
}
#else
static inline void hibernate_restore_protection_begin(void) {}
static inline void hibernate_restore_protection_end(void) {}
-static inline void hibernate_restore_protect_page(void *page_address) {}
-static inline void hibernate_restore_unprotect_page(void *page_address) {}
+static inline int __must_check hibernate_restore_protect_page(void *page_address) {return 0; }
+static inline int hibernate_restore_unprotect_page(void *page_address) {return 0; }
#endif /* CONFIG_STRICT_KERNEL_RWX && CONFIG_ARCH_HAS_SET_MEMORY */
@@ -2832,7 +2834,9 @@ next:
}
} else {
copy_last_highmem_page();
- hibernate_restore_protect_page(handle->buffer);
+ error = hibernate_restore_protect_page(handle->buffer);
+ if (error)
+ return error;
handle->buffer = get_buffer(&orig_bm, &ca);
if (IS_ERR(handle->buffer))
return PTR_ERR(handle->buffer);
@@ -2858,15 +2862,18 @@ next:
* stored in highmem. Additionally, it recycles bitmap memory that's not
* necessary any more.
*/
-void snapshot_write_finalize(struct snapshot_handle *handle)
+int snapshot_write_finalize(struct snapshot_handle *handle)
{
+ int error;
+
copy_last_highmem_page();
- hibernate_restore_protect_page(handle->buffer);
+ error = hibernate_restore_protect_page(handle->buffer);
/* Do that only if we have loaded the image entirely */
if (handle->cur > 1 && handle->cur > nr_meta_pages + nr_copy_pages + nr_zero_pages) {
memory_bm_recycle(&orig_bm);
free_highmem_data();
}
+ return error;
}
int snapshot_image_loaded(struct snapshot_handle *handle)
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
index fa3bf161d13f..e3ae93bbcb9b 100644
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c
@@ -192,6 +192,7 @@ static int __init mem_sleep_default_setup(char *str)
if (mem_sleep_labels[state] &&
!strcmp(str, mem_sleep_labels[state])) {
mem_sleep_default = state;
+ mem_sleep_current = state;
break;
}
@@ -367,7 +368,6 @@ static int suspend_prepare(suspend_state_t state)
if (!error)
return 0;
- suspend_stats.failed_freeze++;
dpm_save_failed_step(SUSPEND_FREEZE);
pm_notifier_call_chain(PM_POST_SUSPEND);
Restore:
@@ -617,12 +617,7 @@ int pm_suspend(suspend_state_t state)
pr_info("suspend entry (%s)\n", mem_sleep_labels[state]);
error = enter_state(state);
- if (error) {
- suspend_stats.fail++;
- dpm_save_failed_errno(error);
- } else {
- suspend_stats.success++;
- }
+ dpm_save_errno(error);
pr_info("suspend exit\n");
return error;
}
diff --git a/kernel/power/swap.c b/kernel/power/swap.c
index 6053ddddaf65..364342cc7f2d 100644
--- a/kernel/power/swap.c
+++ b/kernel/power/swap.c
@@ -23,7 +23,6 @@
#include <linux/swapops.h>
#include <linux/pm.h>
#include <linux/slab.h>
-#include <linux/lzo.h>
#include <linux/vmalloc.h>
#include <linux/cpumask.h>
#include <linux/atomic.h>
@@ -339,6 +338,13 @@ static int mark_swapfiles(struct swap_map_handle *handle, unsigned int flags)
return error;
}
+/*
+ * Hold the swsusp_header flag. This is used in software_resume() in
+ * 'kernel/power/hibernate' to check if the image is compressed and query
+ * for the compression algorithm support(if so).
+ */
+unsigned int swsusp_header_flags;
+
/**
* swsusp_swap_check - check if the resume device is a swap device
* and get its index (if so)
@@ -514,25 +520,30 @@ static int swap_writer_finish(struct swap_map_handle *handle,
return error;
}
+/*
+ * Bytes we need for compressed data in worst case. We assume(limitation)
+ * this is the worst of all the compression algorithms.
+ */
+#define bytes_worst_compress(x) ((x) + ((x) / 16) + 64 + 3 + 2)
+
/* We need to remember how much compressed data we need to read. */
-#define LZO_HEADER sizeof(size_t)
+#define CMP_HEADER sizeof(size_t)
/* Number of pages/bytes we'll compress at one time. */
-#define LZO_UNC_PAGES 32
-#define LZO_UNC_SIZE (LZO_UNC_PAGES * PAGE_SIZE)
+#define UNC_PAGES 32
+#define UNC_SIZE (UNC_PAGES * PAGE_SIZE)
-/* Number of pages/bytes we need for compressed data (worst case). */
-#define LZO_CMP_PAGES DIV_ROUND_UP(lzo1x_worst_compress(LZO_UNC_SIZE) + \
- LZO_HEADER, PAGE_SIZE)
-#define LZO_CMP_SIZE (LZO_CMP_PAGES * PAGE_SIZE)
+/* Number of pages we need for compressed data (worst case). */
+#define CMP_PAGES DIV_ROUND_UP(bytes_worst_compress(UNC_SIZE) + \
+ CMP_HEADER, PAGE_SIZE)
+#define CMP_SIZE (CMP_PAGES * PAGE_SIZE)
/* Maximum number of threads for compression/decompression. */
-#define LZO_THREADS 3
+#define CMP_THREADS 3
/* Minimum/maximum number of pages for read buffering. */
-#define LZO_MIN_RD_PAGES 1024
-#define LZO_MAX_RD_PAGES 8192
-
+#define CMP_MIN_RD_PAGES 1024
+#define CMP_MAX_RD_PAGES 8192
/**
* save_image - save the suspend image data
@@ -593,8 +604,8 @@ struct crc_data {
wait_queue_head_t go; /* start crc update */
wait_queue_head_t done; /* crc update done */
u32 *crc32; /* points to handle's crc32 */
- size_t *unc_len[LZO_THREADS]; /* uncompressed lengths */
- unsigned char *unc[LZO_THREADS]; /* uncompressed data */
+ size_t *unc_len[CMP_THREADS]; /* uncompressed lengths */
+ unsigned char *unc[CMP_THREADS]; /* uncompressed data */
};
/*
@@ -625,10 +636,11 @@ static int crc32_threadfn(void *data)
return 0;
}
/*
- * Structure used for LZO data compression.
+ * Structure used for data compression.
*/
struct cmp_data {
struct task_struct *thr; /* thread */
+ struct crypto_comp *cc; /* crypto compressor stream */
atomic_t ready; /* ready to start flag */
atomic_t stop; /* ready to stop flag */
int ret; /* return code */
@@ -636,17 +648,20 @@ struct cmp_data {
wait_queue_head_t done; /* compression done */
size_t unc_len; /* uncompressed length */
size_t cmp_len; /* compressed length */
- unsigned char unc[LZO_UNC_SIZE]; /* uncompressed buffer */
- unsigned char cmp[LZO_CMP_SIZE]; /* compressed buffer */
- unsigned char wrk[LZO1X_1_MEM_COMPRESS]; /* compression workspace */
+ unsigned char unc[UNC_SIZE]; /* uncompressed buffer */
+ unsigned char cmp[CMP_SIZE]; /* compressed buffer */
};
+/* Indicates the image size after compression */
+static atomic_t compressed_size = ATOMIC_INIT(0);
+
/*
* Compression function that runs in its own thread.
*/
-static int lzo_compress_threadfn(void *data)
+static int compress_threadfn(void *data)
{
struct cmp_data *d = data;
+ unsigned int cmp_len = 0;
while (1) {
wait_event(d->go, atomic_read_acquire(&d->ready) ||
@@ -660,9 +675,13 @@ static int lzo_compress_threadfn(void *data)
}
atomic_set(&d->ready, 0);
- d->ret = lzo1x_1_compress(d->unc, d->unc_len,
- d->cmp + LZO_HEADER, &d->cmp_len,
- d->wrk);
+ cmp_len = CMP_SIZE - CMP_HEADER;
+ d->ret = crypto_comp_compress(d->cc, d->unc, d->unc_len,
+ d->cmp + CMP_HEADER,
+ &cmp_len);
+ d->cmp_len = cmp_len;
+
+ atomic_set(&compressed_size, atomic_read(&compressed_size) + d->cmp_len);
atomic_set_release(&d->stop, 1);
wake_up(&d->done);
}
@@ -670,14 +689,14 @@ static int lzo_compress_threadfn(void *data)
}
/**
- * save_image_lzo - Save the suspend image data compressed with LZO.
+ * save_compressed_image - Save the suspend image data after compression.
* @handle: Swap map handle to use for saving the image.
* @snapshot: Image to read data from.
* @nr_to_write: Number of pages to save.
*/
-static int save_image_lzo(struct swap_map_handle *handle,
- struct snapshot_handle *snapshot,
- unsigned int nr_to_write)
+static int save_compressed_image(struct swap_map_handle *handle,
+ struct snapshot_handle *snapshot,
+ unsigned int nr_to_write)
{
unsigned int m;
int ret = 0;
@@ -694,23 +713,25 @@ static int save_image_lzo(struct swap_map_handle *handle,
hib_init_batch(&hb);
+ atomic_set(&compressed_size, 0);
+
/*
* We'll limit the number of threads for compression to limit memory
* footprint.
*/
nr_threads = num_online_cpus() - 1;
- nr_threads = clamp_val(nr_threads, 1, LZO_THREADS);
+ nr_threads = clamp_val(nr_threads, 1, CMP_THREADS);
page = (void *)__get_free_page(GFP_NOIO | __GFP_HIGH);
if (!page) {
- pr_err("Failed to allocate LZO page\n");
+ pr_err("Failed to allocate %s page\n", hib_comp_algo);
ret = -ENOMEM;
goto out_clean;
}
data = vzalloc(array_size(nr_threads, sizeof(*data)));
if (!data) {
- pr_err("Failed to allocate LZO data\n");
+ pr_err("Failed to allocate %s data\n", hib_comp_algo);
ret = -ENOMEM;
goto out_clean;
}
@@ -729,7 +750,14 @@ static int save_image_lzo(struct swap_map_handle *handle,
init_waitqueue_head(&data[thr].go);
init_waitqueue_head(&data[thr].done);
- data[thr].thr = kthread_run(lzo_compress_threadfn,
+ data[thr].cc = crypto_alloc_comp(hib_comp_algo, 0, 0);
+ if (IS_ERR_OR_NULL(data[thr].cc)) {
+ pr_err("Could not allocate comp stream %ld\n", PTR_ERR(data[thr].cc));
+ ret = -EFAULT;
+ goto out_clean;
+ }
+
+ data[thr].thr = kthread_run(compress_threadfn,
&data[thr],
"image_compress/%u", thr);
if (IS_ERR(data[thr].thr)) {
@@ -767,7 +795,7 @@ static int save_image_lzo(struct swap_map_handle *handle,
*/
handle->reqd_free_pages = reqd_free_pages();
- pr_info("Using %u thread(s) for compression\n", nr_threads);
+ pr_info("Using %u thread(s) for %s compression\n", nr_threads, hib_comp_algo);
pr_info("Compressing and saving image data (%u pages)...\n",
nr_to_write);
m = nr_to_write / 10;
@@ -777,7 +805,7 @@ static int save_image_lzo(struct swap_map_handle *handle,
start = ktime_get();
for (;;) {
for (thr = 0; thr < nr_threads; thr++) {
- for (off = 0; off < LZO_UNC_SIZE; off += PAGE_SIZE) {
+ for (off = 0; off < UNC_SIZE; off += PAGE_SIZE) {
ret = snapshot_read_next(snapshot);
if (ret < 0)
goto out_finish;
@@ -817,14 +845,14 @@ static int save_image_lzo(struct swap_map_handle *handle,
ret = data[thr].ret;
if (ret < 0) {
- pr_err("LZO compression failed\n");
+ pr_err("%s compression failed\n", hib_comp_algo);
goto out_finish;
}
if (unlikely(!data[thr].cmp_len ||
data[thr].cmp_len >
- lzo1x_worst_compress(data[thr].unc_len))) {
- pr_err("Invalid LZO compressed length\n");
+ bytes_worst_compress(data[thr].unc_len))) {
+ pr_err("Invalid %s compressed length\n", hib_comp_algo);
ret = -1;
goto out_finish;
}
@@ -840,7 +868,7 @@ static int save_image_lzo(struct swap_map_handle *handle,
* read it.
*/
for (off = 0;
- off < LZO_HEADER + data[thr].cmp_len;
+ off < CMP_HEADER + data[thr].cmp_len;
off += PAGE_SIZE) {
memcpy(page, data[thr].cmp + off, PAGE_SIZE);
@@ -862,6 +890,9 @@ out_finish:
if (!ret)
pr_info("Image saving done\n");
swsusp_show_speed(start, stop, nr_to_write, "Wrote");
+ pr_info("Image size after compression: %d kbytes\n",
+ (atomic_read(&compressed_size) / 1024));
+
out_clean:
hib_finish_batch(&hb);
if (crc) {
@@ -870,9 +901,12 @@ out_clean:
kfree(crc);
}
if (data) {
- for (thr = 0; thr < nr_threads; thr++)
+ for (thr = 0; thr < nr_threads; thr++) {
if (data[thr].thr)
kthread_stop(data[thr].thr);
+ if (data[thr].cc)
+ crypto_free_comp(data[thr].cc);
+ }
vfree(data);
}
if (page) free_page((unsigned long)page);
@@ -942,7 +976,7 @@ int swsusp_write(unsigned int flags)
if (!error) {
error = (flags & SF_NOCOMPRESS_MODE) ?
save_image(&handle, &snapshot, pages - 1) :
- save_image_lzo(&handle, &snapshot, pages - 1);
+ save_compressed_image(&handle, &snapshot, pages - 1);
}
out_finish:
error = swap_writer_finish(&handle, flags, error);
@@ -1100,8 +1134,8 @@ static int load_image(struct swap_map_handle *handle,
ret = err2;
if (!ret) {
pr_info("Image loading done\n");
- snapshot_write_finalize(snapshot);
- if (!snapshot_image_loaded(snapshot))
+ ret = snapshot_write_finalize(snapshot);
+ if (!ret && !snapshot_image_loaded(snapshot))
ret = -ENODATA;
}
swsusp_show_speed(start, stop, nr_to_read, "Read");
@@ -1109,10 +1143,11 @@ static int load_image(struct swap_map_handle *handle,
}
/*
- * Structure used for LZO data decompression.
+ * Structure used for data decompression.
*/
struct dec_data {
struct task_struct *thr; /* thread */
+ struct crypto_comp *cc; /* crypto compressor stream */
atomic_t ready; /* ready to start flag */
atomic_t stop; /* ready to stop flag */
int ret; /* return code */
@@ -1120,16 +1155,17 @@ struct dec_data {
wait_queue_head_t done; /* decompression done */
size_t unc_len; /* uncompressed length */
size_t cmp_len; /* compressed length */
- unsigned char unc[LZO_UNC_SIZE]; /* uncompressed buffer */
- unsigned char cmp[LZO_CMP_SIZE]; /* compressed buffer */
+ unsigned char unc[UNC_SIZE]; /* uncompressed buffer */
+ unsigned char cmp[CMP_SIZE]; /* compressed buffer */
};
/*
* Decompression function that runs in its own thread.
*/
-static int lzo_decompress_threadfn(void *data)
+static int decompress_threadfn(void *data)
{
struct dec_data *d = data;
+ unsigned int unc_len = 0;
while (1) {
wait_event(d->go, atomic_read_acquire(&d->ready) ||
@@ -1143,9 +1179,11 @@ static int lzo_decompress_threadfn(void *data)
}
atomic_set(&d->ready, 0);
- d->unc_len = LZO_UNC_SIZE;
- d->ret = lzo1x_decompress_safe(d->cmp + LZO_HEADER, d->cmp_len,
- d->unc, &d->unc_len);
+ unc_len = UNC_SIZE;
+ d->ret = crypto_comp_decompress(d->cc, d->cmp + CMP_HEADER, d->cmp_len,
+ d->unc, &unc_len);
+ d->unc_len = unc_len;
+
if (clean_pages_on_decompress)
flush_icache_range((unsigned long)d->unc,
(unsigned long)d->unc + d->unc_len);
@@ -1157,14 +1195,14 @@ static int lzo_decompress_threadfn(void *data)
}
/**
- * load_image_lzo - Load compressed image data and decompress them with LZO.
+ * load_compressed_image - Load compressed image data and decompress it.
* @handle: Swap map handle to use for loading data.
* @snapshot: Image to copy uncompressed data into.
* @nr_to_read: Number of pages to load.
*/
-static int load_image_lzo(struct swap_map_handle *handle,
- struct snapshot_handle *snapshot,
- unsigned int nr_to_read)
+static int load_compressed_image(struct swap_map_handle *handle,
+ struct snapshot_handle *snapshot,
+ unsigned int nr_to_read)
{
unsigned int m;
int ret = 0;
@@ -1189,18 +1227,18 @@ static int load_image_lzo(struct swap_map_handle *handle,
* footprint.
*/
nr_threads = num_online_cpus() - 1;
- nr_threads = clamp_val(nr_threads, 1, LZO_THREADS);
+ nr_threads = clamp_val(nr_threads, 1, CMP_THREADS);
- page = vmalloc(array_size(LZO_MAX_RD_PAGES, sizeof(*page)));
+ page = vmalloc(array_size(CMP_MAX_RD_PAGES, sizeof(*page)));
if (!page) {
- pr_err("Failed to allocate LZO page\n");
+ pr_err("Failed to allocate %s page\n", hib_comp_algo);
ret = -ENOMEM;
goto out_clean;
}
data = vzalloc(array_size(nr_threads, sizeof(*data)));
if (!data) {
- pr_err("Failed to allocate LZO data\n");
+ pr_err("Failed to allocate %s data\n", hib_comp_algo);
ret = -ENOMEM;
goto out_clean;
}
@@ -1221,7 +1259,14 @@ static int load_image_lzo(struct swap_map_handle *handle,
init_waitqueue_head(&data[thr].go);
init_waitqueue_head(&data[thr].done);
- data[thr].thr = kthread_run(lzo_decompress_threadfn,
+ data[thr].cc = crypto_alloc_comp(hib_comp_algo, 0, 0);
+ if (IS_ERR_OR_NULL(data[thr].cc)) {
+ pr_err("Could not allocate comp stream %ld\n", PTR_ERR(data[thr].cc));
+ ret = -EFAULT;
+ goto out_clean;
+ }
+
+ data[thr].thr = kthread_run(decompress_threadfn,
&data[thr],
"image_decompress/%u", thr);
if (IS_ERR(data[thr].thr)) {
@@ -1262,18 +1307,18 @@ static int load_image_lzo(struct swap_map_handle *handle,
*/
if (low_free_pages() > snapshot_get_image_size())
read_pages = (low_free_pages() - snapshot_get_image_size()) / 2;
- read_pages = clamp_val(read_pages, LZO_MIN_RD_PAGES, LZO_MAX_RD_PAGES);
+ read_pages = clamp_val(read_pages, CMP_MIN_RD_PAGES, CMP_MAX_RD_PAGES);
for (i = 0; i < read_pages; i++) {
- page[i] = (void *)__get_free_page(i < LZO_CMP_PAGES ?
+ page[i] = (void *)__get_free_page(i < CMP_PAGES ?
GFP_NOIO | __GFP_HIGH :
GFP_NOIO | __GFP_NOWARN |
__GFP_NORETRY);
if (!page[i]) {
- if (i < LZO_CMP_PAGES) {
+ if (i < CMP_PAGES) {
ring_size = i;
- pr_err("Failed to allocate LZO pages\n");
+ pr_err("Failed to allocate %s pages\n", hib_comp_algo);
ret = -ENOMEM;
goto out_clean;
} else {
@@ -1283,7 +1328,7 @@ static int load_image_lzo(struct swap_map_handle *handle,
}
want = ring_size = i;
- pr_info("Using %u thread(s) for decompression\n", nr_threads);
+ pr_info("Using %u thread(s) for %s decompression\n", nr_threads, hib_comp_algo);
pr_info("Loading and decompressing image data (%u pages)...\n",
nr_to_read);
m = nr_to_read / 10;
@@ -1344,13 +1389,13 @@ static int load_image_lzo(struct swap_map_handle *handle,
data[thr].cmp_len = *(size_t *)page[pg];
if (unlikely(!data[thr].cmp_len ||
data[thr].cmp_len >
- lzo1x_worst_compress(LZO_UNC_SIZE))) {
- pr_err("Invalid LZO compressed length\n");
+ bytes_worst_compress(UNC_SIZE))) {
+ pr_err("Invalid %s compressed length\n", hib_comp_algo);
ret = -1;
goto out_finish;
}
- need = DIV_ROUND_UP(data[thr].cmp_len + LZO_HEADER,
+ need = DIV_ROUND_UP(data[thr].cmp_len + CMP_HEADER,
PAGE_SIZE);
if (need > have) {
if (eof > 1) {
@@ -1361,7 +1406,7 @@ static int load_image_lzo(struct swap_map_handle *handle,
}
for (off = 0;
- off < LZO_HEADER + data[thr].cmp_len;
+ off < CMP_HEADER + data[thr].cmp_len;
off += PAGE_SIZE) {
memcpy(data[thr].cmp + off,
page[pg], PAGE_SIZE);
@@ -1378,7 +1423,7 @@ static int load_image_lzo(struct swap_map_handle *handle,
/*
* Wait for more data while we are decompressing.
*/
- if (have < LZO_CMP_PAGES && asked) {
+ if (have < CMP_PAGES && asked) {
ret = hib_wait_io(&hb);
if (ret)
goto out_finish;
@@ -1396,14 +1441,14 @@ static int load_image_lzo(struct swap_map_handle *handle,
ret = data[thr].ret;
if (ret < 0) {
- pr_err("LZO decompression failed\n");
+ pr_err("%s decompression failed\n", hib_comp_algo);
goto out_finish;
}
if (unlikely(!data[thr].unc_len ||
- data[thr].unc_len > LZO_UNC_SIZE ||
- data[thr].unc_len & (PAGE_SIZE - 1))) {
- pr_err("Invalid LZO uncompressed length\n");
+ data[thr].unc_len > UNC_SIZE ||
+ data[thr].unc_len & (PAGE_SIZE - 1))) {
+ pr_err("Invalid %s uncompressed length\n", hib_comp_algo);
ret = -1;
goto out_finish;
}
@@ -1441,8 +1486,8 @@ out_finish:
stop = ktime_get();
if (!ret) {
pr_info("Image loading done\n");
- snapshot_write_finalize(snapshot);
- if (!snapshot_image_loaded(snapshot))
+ ret = snapshot_write_finalize(snapshot);
+ if (!ret && !snapshot_image_loaded(snapshot))
ret = -ENODATA;
if (!ret) {
if (swsusp_header->flags & SF_CRC32_MODE) {
@@ -1464,9 +1509,12 @@ out_clean:
kfree(crc);
}
if (data) {
- for (thr = 0; thr < nr_threads; thr++)
+ for (thr = 0; thr < nr_threads; thr++) {
if (data[thr].thr)
kthread_stop(data[thr].thr);
+ if (data[thr].cc)
+ crypto_free_comp(data[thr].cc);
+ }
vfree(data);
}
vfree(page);
@@ -1500,7 +1548,7 @@ int swsusp_read(unsigned int *flags_p)
if (!error) {
error = (*flags_p & SF_NOCOMPRESS_MODE) ?
load_image(&handle, &snapshot, header->pages - 1) :
- load_image_lzo(&handle, &snapshot, header->pages - 1);
+ load_compressed_image(&handle, &snapshot, header->pages - 1);
}
swap_reader_finish(&handle);
end:
@@ -1535,6 +1583,7 @@ int swsusp_check(bool exclusive)
if (!memcmp(HIBERNATE_SIG, swsusp_header->sig, 10)) {
memcpy(swsusp_header->sig, swsusp_header->orig_sig, 10);
+ swsusp_header_flags = swsusp_header->flags;
/* Reset swap signature now */
error = hib_submit_io(REQ_OP_WRITE | REQ_SYNC,
swsusp_resume_block,
diff --git a/kernel/power/user.c b/kernel/power/user.c
index 3a4e70366f35..3aa41ba22129 100644
--- a/kernel/power/user.c
+++ b/kernel/power/user.c
@@ -317,7 +317,9 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd,
break;
case SNAPSHOT_ATOMIC_RESTORE:
- snapshot_write_finalize(&data->handle);
+ error = snapshot_write_finalize(&data->handle);
+ if (error)
+ break;
if (data->mode != O_WRONLY || !data->frozen ||
!snapshot_image_loaded(&data->handle)) {
error = -EPERM;
diff --git a/sound/hda/hdac_device.c b/sound/hda/hdac_device.c
index 7f7b67fe1b65..068c16e52dff 100644
--- a/sound/hda/hdac_device.c
+++ b/sound/hda/hdac_device.c
@@ -612,7 +612,7 @@ EXPORT_SYMBOL_GPL(snd_hdac_power_up_pm);
int snd_hdac_keep_power_up(struct hdac_device *codec)
{
if (!atomic_inc_not_zero(&codec->in_pm)) {
- int ret = pm_runtime_get_if_active(&codec->dev, true);
+ int ret = pm_runtime_get_if_active(&codec->dev);
if (!ret)
return -1;
if (ret < 0)