aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/admin-guide/kernel-parameters.txt7
-rw-r--r--Documentation/admin-guide/pm/amd-pstate.rst78
-rw-r--r--Documentation/devicetree/bindings/cpufreq/cpufreq-qcom-hw.yaml5
-rw-r--r--Documentation/devicetree/bindings/cpufreq/qcom-cpufreq-nvmem.yaml81
-rw-r--r--Documentation/devicetree/bindings/opp/opp-v2-kryo-cpu.yaml16
-rw-r--r--arch/mips/include/asm/mach-loongson32/cpufreq.h18
-rw-r--r--arch/mips/include/asm/mach-loongson32/platform.h1
-rw-r--r--arch/mips/loongson32/common/platform.c16
-rw-r--r--arch/mips/loongson32/ls1b/board.c1
-rw-r--r--arch/x86/kernel/process.c1
-rw-r--r--drivers/acpi/cppc_acpi.c67
-rw-r--r--drivers/base/power/domain.c5
-rw-r--r--drivers/base/power/runtime.c4
-rw-r--r--drivers/cpufreq/Kconfig10
-rw-r--r--drivers/cpufreq/Makefile1
-rw-r--r--drivers/cpufreq/amd-pstate.c705
-rw-r--r--drivers/cpufreq/brcmstb-avs-cpufreq.c5
-rw-r--r--drivers/cpufreq/cpufreq.c10
-rw-r--r--drivers/cpufreq/davinci-cpufreq.c4
-rw-r--r--drivers/cpufreq/loongson1-cpufreq.c222
-rw-r--r--drivers/cpufreq/mediatek-cpufreq-hw.c5
-rw-r--r--drivers/cpufreq/omap-cpufreq.c4
-rw-r--r--drivers/cpufreq/qcom-cpufreq-hw.c6
-rw-r--r--drivers/cpufreq/tegra194-cpufreq.c3
-rw-r--r--drivers/cpuidle/Kconfig1
-rw-r--r--drivers/cpuidle/Kconfig.arm10
-rw-r--r--drivers/cpuidle/cpuidle-haltpoll.c2
-rw-r--r--drivers/cpuidle/cpuidle-psci-domain.c7
-rw-r--r--drivers/cpuidle/cpuidle-psci.c3
-rw-r--r--drivers/cpuidle/driver.c4
-rw-r--r--drivers/cpuidle/governors/teo.c102
-rw-r--r--drivers/cpuidle/sysfs.c6
-rw-r--r--drivers/idle/intel_idle.c2
-rw-r--r--drivers/powercap/idle_inject.c6
-rw-r--r--drivers/powercap/intel_rapl_common.c13
-rw-r--r--drivers/powercap/powercap_sys.c14
-rw-r--r--include/acpi/cppc_acpi.h12
-rw-r--r--include/linux/amd-pstate.h32
-rw-r--r--include/linux/cpufreq.h2
-rw-r--r--include/linux/pm.h4
-rw-r--r--kernel/power/Kconfig1
-rw-r--r--kernel/power/energy_model.c5
-rw-r--r--kernel/power/swap.c16
43 files changed, 1146 insertions, 371 deletions
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 6cfa6e3996cf..e3618dfdb36a 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -7020,3 +7020,10 @@
management firmware translates the requests into actual
hardware states (core frequency, data fabric and memory
clocks etc.)
+ active
+ Use amd_pstate_epp driver instance as the scaling driver,
+ driver provides a hint to the hardware if software wants
+ to bias toward performance (0x0) or energy efficiency (0xff)
+ to the CPPC firmware. then CPPC power algorithm will
+ calculate the runtime workload and adjust the realtime cores
+ frequency.
diff --git a/Documentation/admin-guide/pm/amd-pstate.rst b/Documentation/admin-guide/pm/amd-pstate.rst
index 5376d53faaa8..d143e72cf93e 100644
--- a/Documentation/admin-guide/pm/amd-pstate.rst
+++ b/Documentation/admin-guide/pm/amd-pstate.rst
@@ -230,8 +230,8 @@ with :c:macro:`MSR_AMD_CPPC_ENABLE` or ``cppc_set_enable``, it will respond
to the request from AMD P-States.
-User Space Interface in ``sysfs``
-==================================
+User Space Interface in ``sysfs`` - Per-policy control
+======================================================
``amd-pstate`` exposes several global attributes (files) in ``sysfs`` to
control its functionality at the system level. They are located in the
@@ -262,6 +262,25 @@ lowest non-linear performance in `AMD CPPC Performance Capability
<perf_cap_>`_.)
This attribute is read-only.
+``energy_performance_available_preferences``
+
+A list of all the supported EPP preferences that could be used for
+``energy_performance_preference`` on this system.
+These profiles represent different hints that are provided
+to the low-level firmware about the user's desired energy vs efficiency
+tradeoff. ``default`` represents the epp value is set by platform
+firmware. This attribute is read-only.
+
+``energy_performance_preference``
+
+The current energy performance preference can be read from this attribute.
+and user can change current preference according to energy or performance needs
+Please get all support profiles list from
+``energy_performance_available_preferences`` attribute, all the profiles are
+integer values defined between 0 to 255 when EPP feature is enabled by platform
+firmware, if EPP feature is disabled, driver will ignore the written value
+This attribute is read-write.
+
Other performance and frequency values can be read back from
``/sys/devices/system/cpu/cpuX/acpi_cppc/``, see :ref:`cppc_sysfs`.
@@ -280,8 +299,30 @@ module which supports the new AMD P-States mechanism on most of the future AMD
platforms. The AMD P-States mechanism is the more performance and energy
efficiency frequency management method on AMD processors.
-Kernel Module Options for ``amd-pstate``
-=========================================
+
+AMD Pstate Driver Operation Modes
+=================================
+
+``amd_pstate`` CPPC has two operation modes: CPPC Autonomous(active) mode and
+CPPC non-autonomous(passive) mode.
+active mode and passive mode can be chosen by different kernel parameters.
+When in Autonomous mode, CPPC ignores requests done in the Desired Performance
+Target register and takes into account only the values set to the Minimum requested
+performance, Maximum requested performance, and Energy Performance Preference
+registers. When Autonomous is disabled, it only considers the Desired Performance Target.
+
+Active Mode
+------------
+
+``amd_pstate=active``
+
+This is the low-level firmware control mode which is implemented by ``amd_pstate_epp``
+driver with ``amd_pstate=active`` passed to the kernel in the command line.
+In this mode, ``amd_pstate_epp`` driver provides a hint to the hardware if software
+wants to bias toward performance (0x0) or energy efficiency (0xff) to the CPPC firmware.
+then CPPC power algorithm will calculate the runtime workload and adjust the realtime
+cores frequency according to the power supply and thermal, core voltage and some other
+hardware conditions.
Passive Mode
------------
@@ -298,6 +339,35 @@ processor must provide at least nominal performance requested and go higher if c
operating conditions allow.
+User Space Interface in ``sysfs`` - General
+===========================================
+
+Global Attributes
+-----------------
+
+``amd-pstate`` exposes several global attributes (files) in ``sysfs`` to
+control its functionality at the system level. They are located in the
+``/sys/devices/system/cpu/amd-pstate/`` directory and affect all CPUs.
+
+``status``
+ Operation mode of the driver: "active", "passive" or "disable".
+
+ "active"
+ The driver is functional and in the ``active mode``
+
+ "passive"
+ The driver is functional and in the ``passive mode``
+
+ "disable"
+ The driver is unregistered and not functional now.
+
+ This attribute can be written to in order to change the driver's
+ operation mode or to unregister it. The string written to it must be
+ one of the possible values of it and, if successful, writing one of
+ these values to the sysfs file will cause the driver to switch over
+ to the operation mode represented by that string - or to be
+ unregistered in the "disable" case.
+
``cpupower`` tool support for ``amd-pstate``
===============================================
diff --git a/Documentation/devicetree/bindings/cpufreq/cpufreq-qcom-hw.yaml b/Documentation/devicetree/bindings/cpufreq/cpufreq-qcom-hw.yaml
index 99e159bc5fb1..e4aa8c67d532 100644
--- a/Documentation/devicetree/bindings/cpufreq/cpufreq-qcom-hw.yaml
+++ b/Documentation/devicetree/bindings/cpufreq/cpufreq-qcom-hw.yaml
@@ -26,8 +26,13 @@ properties:
items:
- enum:
- qcom,qdu1000-cpufreq-epss
+ - qcom,sc7280-cpufreq-epss
+ - qcom,sc8280xp-cpufreq-epss
- qcom,sm6375-cpufreq-epss
- qcom,sm8250-cpufreq-epss
+ - qcom,sm8350-cpufreq-epss
+ - qcom,sm8450-cpufreq-epss
+ - qcom,sm8550-cpufreq-epss
- const: qcom,cpufreq-epss
reg:
diff --git a/Documentation/devicetree/bindings/cpufreq/qcom-cpufreq-nvmem.yaml b/Documentation/devicetree/bindings/cpufreq/qcom-cpufreq-nvmem.yaml
index 9c086eac6ca7..6f5e7904181f 100644
--- a/Documentation/devicetree/bindings/cpufreq/qcom-cpufreq-nvmem.yaml
+++ b/Documentation/devicetree/bindings/cpufreq/qcom-cpufreq-nvmem.yaml
@@ -17,6 +17,9 @@ description: |
on the CPU OPP in use. The CPUFreq driver sets the CPR power domain level
according to the required OPPs defined in the CPU OPP tables.
+ For old implementation efuses are parsed to select the correct opp table and
+ voltage and CPR is not supported/used.
+
select:
properties:
compatible:
@@ -33,37 +36,65 @@ select:
required:
- compatible
-properties:
- cpus:
- type: object
-
- patternProperties:
- '^cpu@[0-9a-f]+$':
- type: object
-
- properties:
- power-domains:
- maxItems: 1
-
- power-domain-names:
- items:
- - const: cpr
-
- required:
- - power-domains
- - power-domain-names
-
patternProperties:
'^opp-table(-[a-z0-9]+)?$':
- if:
+ allOf:
+ - if:
+ properties:
+ compatible:
+ const: operating-points-v2-kryo-cpu
+ then:
+ $ref: /schemas/opp/opp-v2-kryo-cpu.yaml#
+
+ - if:
+ properties:
+ compatible:
+ const: operating-points-v2-qcom-level
+ then:
+ $ref: /schemas/opp/opp-v2-qcom-level.yaml#
+
+ unevaluatedProperties: false
+
+allOf:
+ - if:
properties:
compatible:
- const: operating-points-v2-kryo-cpu
+ contains:
+ enum:
+ - qcom,qcs404
+
then:
+ properties:
+ cpus:
+ type: object
+
+ patternProperties:
+ '^cpu@[0-9a-f]+$':
+ type: object
+
+ properties:
+ power-domains:
+ maxItems: 1
+
+ power-domain-names:
+ items:
+ - const: cpr
+
+ required:
+ - power-domains
+ - power-domain-names
+
patternProperties:
- '^opp-?[0-9]+$':
- required:
- - required-opps
+ '^opp-table(-[a-z0-9]+)?$':
+ if:
+ properties:
+ compatible:
+ const: operating-points-v2-kryo-cpu
+ then:
+ patternProperties:
+ '^opp-?[0-9]+$':
+ required:
+ - required-opps
additionalProperties: true
diff --git a/Documentation/devicetree/bindings/opp/opp-v2-kryo-cpu.yaml b/Documentation/devicetree/bindings/opp/opp-v2-kryo-cpu.yaml
index b4947b326773..bbbad31ae4ca 100644
--- a/Documentation/devicetree/bindings/opp/opp-v2-kryo-cpu.yaml
+++ b/Documentation/devicetree/bindings/opp/opp-v2-kryo-cpu.yaml
@@ -50,12 +50,22 @@ patternProperties:
opp-supported-hw:
description: |
A single 32 bit bitmap value, representing compatible HW.
- Bitmap:
+ Bitmap for MSM8996 format:
0: MSM8996, speedbin 0
1: MSM8996, speedbin 1
2: MSM8996, speedbin 2
- 3-31: unused
- maximum: 0x7
+ 3: MSM8996, speedbin 3
+ 4-31: unused
+
+ Bitmap for MSM8996SG format (speedbin shifted of 4 left):
+ 0-3: unused
+ 4: MSM8996SG, speedbin 0
+ 5: MSM8996SG, speedbin 1
+ 6: MSM8996SG, speedbin 2
+ 7-31: unused
+ enum: [0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7,
+ 0x9, 0xd, 0xe, 0xf,
+ 0x10, 0x20, 0x30, 0x70]
clock-latency-ns: true
diff --git a/arch/mips/include/asm/mach-loongson32/cpufreq.h b/arch/mips/include/asm/mach-loongson32/cpufreq.h
deleted file mode 100644
index e422a32883ae..000000000000
--- a/arch/mips/include/asm/mach-loongson32/cpufreq.h
+++ /dev/null
@@ -1,18 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Copyright (c) 2014 Zhang, Keguang <[email protected]>
- *
- * Loongson 1 CPUFreq platform support.
- */
-
-#ifndef __ASM_MACH_LOONGSON32_CPUFREQ_H
-#define __ASM_MACH_LOONGSON32_CPUFREQ_H
-
-struct plat_ls1x_cpufreq {
- const char *clk_name; /* CPU clk */
- const char *osc_clk_name; /* OSC clk */
- unsigned int max_freq; /* in kHz */
- unsigned int min_freq; /* in kHz */
-};
-
-#endif /* __ASM_MACH_LOONGSON32_CPUFREQ_H */
diff --git a/arch/mips/include/asm/mach-loongson32/platform.h b/arch/mips/include/asm/mach-loongson32/platform.h
index eb83e2741887..86e1a6aab4e5 100644
--- a/arch/mips/include/asm/mach-loongson32/platform.h
+++ b/arch/mips/include/asm/mach-loongson32/platform.h
@@ -12,7 +12,6 @@
#include <nand.h>
extern struct platform_device ls1x_uart_pdev;
-extern struct platform_device ls1x_cpufreq_pdev;
extern struct platform_device ls1x_eth0_pdev;
extern struct platform_device ls1x_eth1_pdev;
extern struct platform_device ls1x_ehci_pdev;
diff --git a/arch/mips/loongson32/common/platform.c b/arch/mips/loongson32/common/platform.c
index 311dc1580bbd..64d7979394e6 100644
--- a/arch/mips/loongson32/common/platform.c
+++ b/arch/mips/loongson32/common/platform.c
@@ -15,7 +15,6 @@
#include <platform.h>
#include <loongson1.h>
-#include <cpufreq.h>
#include <dma.h>
#include <nand.h>
@@ -62,21 +61,6 @@ void __init ls1x_serial_set_uartclk(struct platform_device *pdev)
p->uartclk = clk_get_rate(clk);
}
-/* CPUFreq */
-static struct plat_ls1x_cpufreq ls1x_cpufreq_pdata = {
- .clk_name = "cpu_clk",
- .osc_clk_name = "osc_clk",
- .max_freq = 266 * 1000,
- .min_freq = 33 * 1000,
-};
-
-struct platform_device ls1x_cpufreq_pdev = {
- .name = "ls1x-cpufreq",
- .dev = {
- .platform_data = &ls1x_cpufreq_pdata,
- },
-};
-
/* Synopsys Ethernet GMAC */
static struct stmmac_mdio_bus_data ls1x_mdio_bus_data = {
.phy_mask = 0,
diff --git a/arch/mips/loongson32/ls1b/board.c b/arch/mips/loongson32/ls1b/board.c
index 727e06718dab..fed8d432ef20 100644
--- a/arch/mips/loongson32/ls1b/board.c
+++ b/arch/mips/loongson32/ls1b/board.c
@@ -35,7 +35,6 @@ static const struct gpio_led_platform_data ls1x_led_pdata __initconst = {
static struct platform_device *ls1b_platform_devices[] __initdata = {
&ls1x_uart_pdev,
- &ls1x_cpufreq_pdev,
&ls1x_eth0_pdev,
&ls1x_eth1_pdev,
&ls1x_ehci_pdev,
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 40d156a31676..00a831d56c50 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -721,6 +721,7 @@ void arch_cpu_idle(void)
{
x86_idle();
}
+EXPORT_SYMBOL_GPL(arch_cpu_idle);
/*
* We use this if we don't have any better idle routine..
diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c
index 0f17b1c32718..02d83c807271 100644
--- a/drivers/acpi/cppc_acpi.c
+++ b/drivers/acpi/cppc_acpi.c
@@ -1154,6 +1154,19 @@ int cppc_get_nominal_perf(int cpunum, u64 *nominal_perf)
}
/**
+ * cppc_get_epp_perf - Get the epp register value.
+ * @cpunum: CPU from which to get epp preference value.
+ * @epp_perf: Return address.
+ *
+ * Return: 0 for success, -EIO otherwise.
+ */
+int cppc_get_epp_perf(int cpunum, u64 *epp_perf)
+{
+ return cppc_get_perf(cpunum, ENERGY_PERF, epp_perf);
+}
+EXPORT_SYMBOL_GPL(cppc_get_epp_perf);
+
+/**
* cppc_get_perf_caps - Get a CPU's performance capabilities.
* @cpunum: CPU from which to get capabilities info.
* @perf_caps: ptr to cppc_perf_caps. See cppc_acpi.h
@@ -1365,6 +1378,60 @@ out_err:
}
EXPORT_SYMBOL_GPL(cppc_get_perf_ctrs);
+/*
+ * Set Energy Performance Preference Register value through
+ * Performance Controls Interface
+ */
+int cppc_set_epp_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls, bool enable)
+{
+ int pcc_ss_id = per_cpu(cpu_pcc_subspace_idx, cpu);
+ struct cpc_register_resource *epp_set_reg;
+ struct cpc_register_resource *auto_sel_reg;
+ struct cpc_desc *cpc_desc = per_cpu(cpc_desc_ptr, cpu);
+ struct cppc_pcc_data *pcc_ss_data = NULL;
+ int ret;
+
+ if (!cpc_desc) {
+ pr_debug("No CPC descriptor for CPU:%d\n", cpu);
+ return -ENODEV;
+ }
+
+ auto_sel_reg = &cpc_desc->cpc_regs[AUTO_SEL_ENABLE];
+ epp_set_reg = &cpc_desc->cpc_regs[ENERGY_PERF];
+
+ if (CPC_IN_PCC(epp_set_reg) || CPC_IN_PCC(auto_sel_reg)) {
+ if (pcc_ss_id < 0) {
+ pr_debug("Invalid pcc_ss_id for CPU:%d\n", cpu);
+ return -ENODEV;
+ }
+
+ if (CPC_SUPPORTED(auto_sel_reg)) {
+ ret = cpc_write(cpu, auto_sel_reg, enable);
+ if (ret)
+ return ret;
+ }
+
+ if (CPC_SUPPORTED(epp_set_reg)) {
+ ret = cpc_write(cpu, epp_set_reg, perf_ctrls->energy_perf);
+ if (ret)
+ return ret;
+ }
+
+ pcc_ss_data = pcc_data[pcc_ss_id];
+
+ down_write(&pcc_ss_data->pcc_lock);
+ /* after writing CPC, transfer the ownership of PCC to platform */
+ ret = send_pcc_cmd(pcc_ss_id, CMD_WRITE);
+ up_write(&pcc_ss_data->pcc_lock);
+ } else {
+ ret = -ENOTSUPP;
+ pr_debug("_CPC in PCC is not supported\n");
+ }
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(cppc_set_epp_perf);
+
/**
* cppc_set_enable - Set to enable CPPC on the processor by writing the
* Continuous Performance Control package EnableRegister field.
diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index 967bcf9d415e..6097644ebdc5 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -220,13 +220,10 @@ static void genpd_debug_add(struct generic_pm_domain *genpd);
static void genpd_debug_remove(struct generic_pm_domain *genpd)
{
- struct dentry *d;
-
if (!genpd_debugfs_dir)
return;
- d = debugfs_lookup(genpd->name, genpd_debugfs_dir);
- debugfs_remove(d);
+ debugfs_lookup_and_remove(genpd->name, genpd_debugfs_dir);
}
static void genpd_update_accounting(struct generic_pm_domain *genpd)
diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c
index 50e726b6c2cf..b29be7d4d7d0 100644
--- a/drivers/base/power/runtime.c
+++ b/drivers/base/power/runtime.c
@@ -1864,6 +1864,10 @@ static bool pm_runtime_need_not_resume(struct device *dev)
* sure the device is put into low power state and it should only be used during
* system-wide PM transitions to sleep states. It assumes that the analogous
* pm_runtime_force_resume() will be used to resume the device.
+ *
+ * Do not use with DPM_FLAG_SMART_SUSPEND as this can lead to an inconsistent
+ * state where this function has called the ->runtime_suspend callback but the
+ * PM core marks the driver as runtime active.
*/
int pm_runtime_force_suspend(struct device *dev)
{
diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig
index 2a84fc63371e..76aa1336e2be 100644
--- a/drivers/cpufreq/Kconfig
+++ b/drivers/cpufreq/Kconfig
@@ -3,7 +3,6 @@ menu "CPU Frequency scaling"
config CPU_FREQ
bool "CPU Frequency scaling"
- select SRCU
help
CPU Frequency scaling allows you to change the clock speed of
CPUs on the fly. This is a nice method to save power, because
@@ -271,15 +270,6 @@ config LOONGSON2_CPUFREQ
Loongson2F and its successors support this feature.
If in doubt, say N.
-
-config LOONGSON1_CPUFREQ
- tristate "Loongson1 CPUFreq Driver"
- depends on LOONGSON1_LS1B
- help
- This option adds a CPUFreq driver for loongson1 processors which
- support software configurable cpu frequency.
-
- If in doubt, say N.
endif
if SPARC64
diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile
index 32a7029e25ed..4a806cc5265b 100644
--- a/drivers/cpufreq/Makefile
+++ b/drivers/cpufreq/Makefile
@@ -111,7 +111,6 @@ obj-$(CONFIG_POWERNV_CPUFREQ) += powernv-cpufreq.o
obj-$(CONFIG_BMIPS_CPUFREQ) += bmips-cpufreq.o
obj-$(CONFIG_IA64_ACPI_CPUFREQ) += ia64-acpi-cpufreq.o
obj-$(CONFIG_LOONGSON2_CPUFREQ) += loongson2_cpufreq.o
-obj-$(CONFIG_LOONGSON1_CPUFREQ) += loongson1-cpufreq.o
obj-$(CONFIG_SH_CPU_FREQ) += sh-cpufreq.o
obj-$(CONFIG_SPARC_US2E_CPUFREQ) += sparc-us2e-cpufreq.o
obj-$(CONFIG_SPARC_US3_CPUFREQ) += sparc-us3-cpufreq.o
diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c
index c17bd845f5fc..45c88894fd8e 100644
--- a/drivers/cpufreq/amd-pstate.c
+++ b/drivers/cpufreq/amd-pstate.c
@@ -59,8 +59,171 @@
* we disable it by default to go acpi-cpufreq on these processors and add a
* module parameter to be able to enable it manually for debugging.
*/
+static struct cpufreq_driver *current_pstate_driver;
static struct cpufreq_driver amd_pstate_driver;
-static int cppc_load __initdata;
+static struct cpufreq_driver amd_pstate_epp_driver;
+static int cppc_state = AMD_PSTATE_DISABLE;
+struct kobject *amd_pstate_kobj;
+
+/*
+ * AMD Energy Preference Performance (EPP)
+ * The EPP is used in the CCLK DPM controller to drive
+ * the frequency that a core is going to operate during
+ * short periods of activity. EPP values will be utilized for
+ * different OS profiles (balanced, performance, power savings)
+ * display strings corresponding to EPP index in the
+ * energy_perf_strings[]
+ * index String
+ *-------------------------------------
+ * 0 default
+ * 1 performance
+ * 2 balance_performance
+ * 3 balance_power
+ * 4 power
+ */
+enum energy_perf_value_index {
+ EPP_INDEX_DEFAULT = 0,
+ EPP_INDEX_PERFORMANCE,
+ EPP_INDEX_BALANCE_PERFORMANCE,
+ EPP_INDEX_BALANCE_POWERSAVE,
+ EPP_INDEX_POWERSAVE,
+};
+
+static const char * const energy_perf_strings[] = {
+ [EPP_INDEX_DEFAULT] = "default",
+ [EPP_INDEX_PERFORMANCE] = "performance",
+ [EPP_INDEX_BALANCE_PERFORMANCE] = "balance_performance",
+ [EPP_INDEX_BALANCE_POWERSAVE] = "balance_power",
+ [EPP_INDEX_POWERSAVE] = "power",
+ NULL
+};
+
+static unsigned int epp_values[] = {
+ [EPP_INDEX_DEFAULT] = 0,
+ [EPP_INDEX_PERFORMANCE] = AMD_CPPC_EPP_PERFORMANCE,
+ [EPP_INDEX_BALANCE_PERFORMANCE] = AMD_CPPC_EPP_BALANCE_PERFORMANCE,
+ [EPP_INDEX_BALANCE_POWERSAVE] = AMD_CPPC_EPP_BALANCE_POWERSAVE,
+ [EPP_INDEX_POWERSAVE] = AMD_CPPC_EPP_POWERSAVE,
+ };
+
+static inline int get_mode_idx_from_str(const char *str, size_t size)
+{
+ int i;
+
+ for (i=0; i < AMD_PSTATE_MAX; i++) {
+ if (!strncmp(str, amd_pstate_mode_string[i], size))
+ return i;
+ }
+ return -EINVAL;
+}
+
+static DEFINE_MUTEX(amd_pstate_limits_lock);
+static DEFINE_MUTEX(amd_pstate_driver_lock);
+
+static s16 amd_pstate_get_epp(struct amd_cpudata *cpudata, u64 cppc_req_cached)
+{
+ u64 epp;
+ int ret;
+
+ if (boot_cpu_has(X86_FEATURE_CPPC)) {
+ if (!cppc_req_cached) {
+ epp = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ,
+ &cppc_req_cached);
+ if (epp)
+ return epp;
+ }
+ epp = (cppc_req_cached >> 24) & 0xFF;
+ } else {
+ ret = cppc_get_epp_perf(cpudata->cpu, &epp);
+ if (ret < 0) {
+ pr_debug("Could not retrieve energy perf value (%d)\n", ret);
+ return -EIO;
+ }
+ }
+
+ return (s16)(epp & 0xff);
+}
+
+static int amd_pstate_get_energy_pref_index(struct amd_cpudata *cpudata)
+{
+ s16 epp;
+ int index = -EINVAL;
+
+ epp = amd_pstate_get_epp(cpudata, 0);
+ if (epp < 0)
+ return epp;
+
+ switch (epp) {
+ case AMD_CPPC_EPP_PERFORMANCE:
+ index = EPP_INDEX_PERFORMANCE;
+ break;
+ case AMD_CPPC_EPP_BALANCE_PERFORMANCE:
+ index = EPP_INDEX_BALANCE_PERFORMANCE;
+ break;
+ case AMD_CPPC_EPP_BALANCE_POWERSAVE:
+ index = EPP_INDEX_BALANCE_POWERSAVE;
+ break;
+ case AMD_CPPC_EPP_POWERSAVE:
+ index = EPP_INDEX_POWERSAVE;
+ break;
+ default:
+ break;
+ }
+
+ return index;
+}
+
+static int amd_pstate_set_epp(struct amd_cpudata *cpudata, u32 epp)
+{
+ int ret;
+ struct cppc_perf_ctrls perf_ctrls;
+
+ if (boot_cpu_has(X86_FEATURE_CPPC)) {
+ u64 value = READ_ONCE(cpudata->cppc_req_cached);
+
+ value &= ~GENMASK_ULL(31, 24);
+ value |= (u64)epp << 24;
+ WRITE_ONCE(cpudata->cppc_req_cached, value);
+
+ ret = wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, value);
+ if (!ret)
+ cpudata->epp_cached = epp;
+ } else {
+ perf_ctrls.energy_perf = epp;
+ ret = cppc_set_epp_perf(cpudata->cpu, &perf_ctrls, 1);
+ if (ret) {
+ pr_debug("failed to set energy perf value (%d)\n", ret);
+ return ret;
+ }
+ cpudata->epp_cached = epp;
+ }
+
+ return ret;
+}
+
+static int amd_pstate_set_energy_pref_index(struct amd_cpudata *cpudata,
+ int pref_index)
+{
+ int epp = -EINVAL;
+ int ret;
+
+ if (!pref_index) {
+ pr_debug("EPP pref_index is invalid\n");
+ return -EINVAL;
+ }
+
+ if (epp == -EINVAL)
+ epp = epp_values[pref_index];
+
+ if (epp > 0 && cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) {
+ pr_debug("EPP cannot be set under performance policy\n");
+ return -EBUSY;
+ }
+
+ ret = amd_pstate_set_epp(cpudata, epp);
+
+ return ret;
+}
static inline int pstate_enable(bool enable)
{
@@ -70,11 +233,21 @@ static inline int pstate_enable(bool enable)
static int cppc_enable(bool enable)
{
int cpu, ret = 0;
+ struct cppc_perf_ctrls perf_ctrls;
for_each_present_cpu(cpu) {
ret = cppc_set_enable(cpu, enable);
if (ret)
return ret;
+
+ /* Enable autonomous mode for EPP */
+ if (cppc_state == AMD_PSTATE_ACTIVE) {
+ /* Set desired perf as zero to allow EPP firmware control */
+ perf_ctrls.desired_perf = 0;
+ ret = cppc_set_perf(cpu, &perf_ctrls);
+ if (ret)
+ return ret;
+ }
}
return ret;
@@ -418,7 +591,7 @@ static void amd_pstate_boost_init(struct amd_cpudata *cpudata)
return;
cpudata->boost_supported = true;
- amd_pstate_driver.boost_enabled = true;
+ current_pstate_driver->boost_enabled = true;
}
static void amd_perf_ctl_reset(unsigned int cpu)
@@ -501,6 +674,8 @@ static int amd_pstate_cpu_init(struct cpufreq_policy *policy)
policy->driver_data = cpudata;
amd_pstate_boost_init(cpudata);
+ if (!current_pstate_driver->adjust_perf)
+ current_pstate_driver->adjust_perf = amd_pstate_adjust_perf;
return 0;
@@ -561,7 +736,7 @@ static ssize_t show_amd_pstate_max_freq(struct cpufreq_policy *policy,
if (max_freq < 0)
return max_freq;
- return sprintf(&buf[0], "%u\n", max_freq);
+ return sysfs_emit(buf, "%u\n", max_freq);
}
static ssize_t show_amd_pstate_lowest_nonlinear_freq(struct cpufreq_policy *policy,
@@ -574,7 +749,7 @@ static ssize_t show_amd_pstate_lowest_nonlinear_freq(struct cpufreq_policy *poli
if (freq < 0)
return freq;
- return sprintf(&buf[0], "%u\n", freq);
+ return sysfs_emit(buf, "%u\n", freq);
}
/*
@@ -589,13 +764,151 @@ static ssize_t show_amd_pstate_highest_perf(struct cpufreq_policy *policy,
perf = READ_ONCE(cpudata->highest_perf);
- return sprintf(&buf[0], "%u\n", perf);
+ return sysfs_emit(buf, "%u\n", perf);
+}
+
+static ssize_t show_energy_performance_available_preferences(
+ struct cpufreq_policy *policy, char *buf)
+{
+ int i = 0;
+ int offset = 0;
+
+ while (energy_perf_strings[i] != NULL)
+ offset += sysfs_emit_at(buf, offset, "%s ", energy_perf_strings[i++]);
+
+ sysfs_emit_at(buf, offset, "\n");
+
+ return offset;
+}
+
+static ssize_t store_energy_performance_preference(
+ struct cpufreq_policy *policy, const char *buf, size_t count)
+{
+ struct amd_cpudata *cpudata = policy->driver_data;
+ char str_preference[21];
+ ssize_t ret;
+
+ ret = sscanf(buf, "%20s", str_preference);
+ if (ret != 1)
+ return -EINVAL;
+
+ ret = match_string(energy_perf_strings, -1, str_preference);
+ if (ret < 0)
+ return -EINVAL;
+
+ mutex_lock(&amd_pstate_limits_lock);
+ ret = amd_pstate_set_energy_pref_index(cpudata, ret);
+ mutex_unlock(&amd_pstate_limits_lock);
+
+ return ret ?: count;
+}
+
+static ssize_t show_energy_performance_preference(
+ struct cpufreq_policy *policy, char *buf)
+{
+ struct amd_cpudata *cpudata = policy->driver_data;
+ int preference;
+
+ preference = amd_pstate_get_energy_pref_index(cpudata);
+ if (preference < 0)
+ return preference;
+
+ return sysfs_emit(buf, "%s\n", energy_perf_strings[preference]);
+}
+
+static ssize_t amd_pstate_show_status(char *buf)
+{
+ if (!current_pstate_driver)
+ return sysfs_emit(buf, "disable\n");
+
+ return sysfs_emit(buf, "%s\n", amd_pstate_mode_string[cppc_state]);
+}
+
+static void amd_pstate_driver_cleanup(void)
+{
+ current_pstate_driver = NULL;
+}
+
+static int amd_pstate_update_status(const char *buf, size_t size)
+{
+ int ret = 0;
+ int mode_idx;
+
+ if (size > 7 || size < 6)
+ return -EINVAL;
+ mode_idx = get_mode_idx_from_str(buf, size);
+
+ switch(mode_idx) {
+ case AMD_PSTATE_DISABLE:
+ if (!current_pstate_driver)
+ return -EINVAL;
+ if (cppc_state == AMD_PSTATE_ACTIVE)
+ return -EBUSY;
+ cpufreq_unregister_driver(current_pstate_driver);
+ amd_pstate_driver_cleanup();
+ break;
+ case AMD_PSTATE_PASSIVE:
+ if (current_pstate_driver) {
+ if (current_pstate_driver == &amd_pstate_driver)
+ return 0;
+ cpufreq_unregister_driver(current_pstate_driver);
+ cppc_state = AMD_PSTATE_PASSIVE;
+ current_pstate_driver = &amd_pstate_driver;
+ }
+
+ ret = cpufreq_register_driver(current_pstate_driver);
+ break;
+ case AMD_PSTATE_ACTIVE:
+ if (current_pstate_driver) {
+ if (current_pstate_driver == &amd_pstate_epp_driver)
+ return 0;
+ cpufreq_unregister_driver(current_pstate_driver);
+ current_pstate_driver = &amd_pstate_epp_driver;
+ cppc_state = AMD_PSTATE_ACTIVE;
+ }
+
+ ret = cpufreq_register_driver(current_pstate_driver);
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+
+ return ret;
+}
+
+static ssize_t show_status(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ ssize_t ret;
+
+ mutex_lock(&amd_pstate_driver_lock);
+ ret = amd_pstate_show_status(buf);
+ mutex_unlock(&amd_pstate_driver_lock);
+
+ return ret;
+}
+
+static ssize_t store_status(struct kobject *a, struct kobj_attribute *b,
+ const char *buf, size_t count)
+{
+ char *p = memchr(buf, '\n', count);
+ int ret;
+
+ mutex_lock(&amd_pstate_driver_lock);
+ ret = amd_pstate_update_status(buf, p ? p - buf : count);
+ mutex_unlock(&amd_pstate_driver_lock);
+
+ return ret < 0 ? ret : count;
}
cpufreq_freq_attr_ro(amd_pstate_max_freq);
cpufreq_freq_attr_ro(amd_pstate_lowest_nonlinear_freq);
cpufreq_freq_attr_ro(amd_pstate_highest_perf);
+cpufreq_freq_attr_rw(energy_performance_preference);
+cpufreq_freq_attr_ro(energy_performance_available_preferences);
+define_one_global_rw(status);
static struct freq_attr *amd_pstate_attr[] = {
&amd_pstate_max_freq,
@@ -604,6 +917,313 @@ static struct freq_attr *amd_pstate_attr[] = {
NULL,
};
+static struct freq_attr *amd_pstate_epp_attr[] = {
+ &amd_pstate_max_freq,
+ &amd_pstate_lowest_nonlinear_freq,
+ &amd_pstate_highest_perf,
+ &energy_performance_preference,
+ &energy_performance_available_preferences,
+ NULL,
+};
+
+static struct attribute *pstate_global_attributes[] = {
+ &status.attr,
+ NULL
+};
+
+static const struct attribute_group amd_pstate_global_attr_group = {
+ .attrs = pstate_global_attributes,
+};
+
+static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy)
+{
+ int min_freq, max_freq, nominal_freq, lowest_nonlinear_freq, ret;
+ struct amd_cpudata *cpudata;
+ struct device *dev;
+ u64 value;
+
+ /*
+ * Resetting PERF_CTL_MSR will put the CPU in P0 frequency,
+ * which is ideal for initialization process.
+ */
+ amd_perf_ctl_reset(policy->cpu);
+ dev = get_cpu_device(policy->cpu);
+ if (!dev)
+ return -ENODEV;
+
+ cpudata = kzalloc(sizeof(*cpudata), GFP_KERNEL);
+ if (!cpudata)
+ return -ENOMEM;
+
+ cpudata->cpu = policy->cpu;
+ cpudata->epp_policy = 0;
+
+ ret = amd_pstate_init_perf(cpudata);
+ if (ret)
+ goto free_cpudata1;
+
+ min_freq = amd_get_min_freq(cpudata);
+ max_freq = amd_get_max_freq(cpudata);
+ nominal_freq = amd_get_nominal_freq(cpudata);
+ lowest_nonlinear_freq = amd_get_lowest_nonlinear_freq(cpudata);
+ if (min_freq < 0 || max_freq < 0 || min_freq > max_freq) {
+ dev_err(dev, "min_freq(%d) or max_freq(%d) value is incorrect\n",
+ min_freq, max_freq);
+ ret = -EINVAL;
+ goto free_cpudata1;
+ }
+
+ policy->cpuinfo.min_freq = min_freq;
+ policy->cpuinfo.max_freq = max_freq;
+ /* It will be updated by governor */
+ policy->cur = policy->cpuinfo.min_freq;
+
+ /* Initial processor data capability frequencies */
+ cpudata->max_freq = max_freq;
+ cpudata->min_freq = min_freq;
+ cpudata->nominal_freq = nominal_freq;
+ cpudata->lowest_nonlinear_freq = lowest_nonlinear_freq;
+
+ policy->driver_data = cpudata;
+
+ cpudata->epp_cached = amd_pstate_get_epp(cpudata, 0);
+
+ policy->min = policy->cpuinfo.min_freq;
+ policy->max = policy->cpuinfo.max_freq;
+
+ /*
+ * Set the policy to powersave to provide a valid fallback value in case
+ * the default cpufreq governor is neither powersave nor performance.
+ */
+ policy->policy = CPUFREQ_POLICY_POWERSAVE;
+
+ if (boot_cpu_has(X86_FEATURE_CPPC)) {
+ policy->fast_switch_possible = true;
+ ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, &value);
+ if (ret)
+ return ret;
+ WRITE_ONCE(cpudata->cppc_req_cached, value);
+
+ ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_CAP1, &value);
+ if (ret)
+ return ret;
+ WRITE_ONCE(cpudata->cppc_cap1_cached, value);
+ }
+ amd_pstate_boost_init(cpudata);
+
+ return 0;
+
+free_cpudata1:
+ kfree(cpudata);
+ return ret;
+}
+
+static int amd_pstate_epp_cpu_exit(struct cpufreq_policy *policy)
+{
+ pr_debug("CPU %d exiting\n", policy->cpu);
+ policy->fast_switch_possible = false;
+ return 0;
+}
+
+static void amd_pstate_epp_init(unsigned int cpu)
+{
+ struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
+ struct amd_cpudata *cpudata = policy->driver_data;
+ u32 max_perf, min_perf;
+ u64 value;
+ s16 epp;
+
+ max_perf = READ_ONCE(cpudata->highest_perf);
+ min_perf = READ_ONCE(cpudata->lowest_perf);
+
+ value = READ_ONCE(cpudata->cppc_req_cached);
+
+ if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE)
+ min_perf = max_perf;
+
+ /* Initial min/max values for CPPC Performance Controls Register */
+ value &= ~AMD_CPPC_MIN_PERF(~0L);
+ value |= AMD_CPPC_MIN_PERF(min_perf);
+
+ value &= ~AMD_CPPC_MAX_PERF(~0L);
+ value |= AMD_CPPC_MAX_PERF(max_perf);
+
+ /* CPPC EPP feature require to set zero to the desire perf bit */
+ value &= ~AMD_CPPC_DES_PERF(~0L);
+ value |= AMD_CPPC_DES_PERF(0);
+
+ if (cpudata->epp_policy == cpudata->policy)
+ goto skip_epp;
+
+ cpudata->epp_policy = cpudata->policy;
+
+ /* Get BIOS pre-defined epp value */
+ epp = amd_pstate_get_epp(cpudata, value);
+ if (epp < 0) {
+ /**
+ * This return value can only be negative for shared_memory
+ * systems where EPP register read/write not supported.
+ */
+ goto skip_epp;
+ }
+
+ if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE)
+ epp = 0;
+
+ /* Set initial EPP value */
+ if (boot_cpu_has(X86_FEATURE_CPPC)) {
+ value &= ~GENMASK_ULL(31, 24);
+ value |= (u64)epp << 24;
+ }
+
+ WRITE_ONCE(cpudata->cppc_req_cached, value);
+ amd_pstate_set_epp(cpudata, epp);
+skip_epp:
+ cpufreq_cpu_put(policy);
+}
+
+static int amd_pstate_epp_set_policy(struct cpufreq_policy *policy)
+{
+ struct amd_cpudata *cpudata = policy->driver_data;
+
+ if (!policy->cpuinfo.max_freq)
+ return -ENODEV;
+
+ pr_debug("set_policy: cpuinfo.max %u policy->max %u\n",
+ policy->cpuinfo.max_freq, policy->max);
+
+ cpudata->policy = policy->policy;
+
+ amd_pstate_epp_init(policy->cpu);
+
+ return 0;
+}
+
+static void amd_pstate_epp_reenable(struct amd_cpudata *cpudata)
+{
+ struct cppc_perf_ctrls perf_ctrls;
+ u64 value, max_perf;
+ int ret;
+
+ ret = amd_pstate_enable(true);
+ if (ret)
+ pr_err("failed to enable amd pstate during resume, return %d\n", ret);
+
+ value = READ_ONCE(cpudata->cppc_req_cached);
+ max_perf = READ_ONCE(cpudata->highest_perf);
+
+ if (boot_cpu_has(X86_FEATURE_CPPC)) {
+ wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, value);
+ } else {
+ perf_ctrls.max_perf = max_perf;
+ perf_ctrls.energy_perf = AMD_CPPC_ENERGY_PERF_PREF(cpudata->epp_cached);
+ cppc_set_perf(cpudata->cpu, &perf_ctrls);
+ }
+}
+
+static int amd_pstate_epp_cpu_online(struct cpufreq_policy *policy)
+{
+ struct amd_cpudata *cpudata = policy->driver_data;
+
+ pr_debug("AMD CPU Core %d going online\n", cpudata->cpu);
+
+ if (cppc_state == AMD_PSTATE_ACTIVE) {
+ amd_pstate_epp_reenable(cpudata);
+ cpudata->suspended = false;
+ }
+
+ return 0;
+}
+
+static void amd_pstate_epp_offline(struct cpufreq_policy *policy)
+{
+ struct amd_cpudata *cpudata = policy->driver_data;
+ struct cppc_perf_ctrls perf_ctrls;
+ int min_perf;
+ u64 value;
+
+ min_perf = READ_ONCE(cpudata->lowest_perf);
+ value = READ_ONCE(cpudata->cppc_req_cached);
+
+ mutex_lock(&amd_pstate_limits_lock);
+ if (boot_cpu_has(X86_FEATURE_CPPC)) {
+ cpudata->epp_policy = CPUFREQ_POLICY_UNKNOWN;
+
+ /* Set max perf same as min perf */
+ value &= ~AMD_CPPC_MAX_PERF(~0L);
+ value |= AMD_CPPC_MAX_PERF(min_perf);
+ value &= ~AMD_CPPC_MIN_PERF(~0L);
+ value |= AMD_CPPC_MIN_PERF(min_perf);
+ wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, value);
+ } else {
+ perf_ctrls.desired_perf = 0;
+ perf_ctrls.max_perf = min_perf;
+ perf_ctrls.energy_perf = AMD_CPPC_ENERGY_PERF_PREF(HWP_EPP_BALANCE_POWERSAVE);
+ cppc_set_perf(cpudata->cpu, &perf_ctrls);
+ }
+ mutex_unlock(&amd_pstate_limits_lock);
+}
+
+static int amd_pstate_epp_cpu_offline(struct cpufreq_policy *policy)
+{
+ struct amd_cpudata *cpudata = policy->driver_data;
+
+ pr_debug("AMD CPU Core %d going offline\n", cpudata->cpu);
+
+ if (cpudata->suspended)
+ return 0;
+
+ if (cppc_state == AMD_PSTATE_ACTIVE)
+ amd_pstate_epp_offline(policy);
+
+ return 0;
+}
+
+static int amd_pstate_epp_verify_policy(struct cpufreq_policy_data *policy)
+{
+ cpufreq_verify_within_cpu_limits(policy);
+ pr_debug("policy_max =%d, policy_min=%d\n", policy->max, policy->min);
+ return 0;
+}
+
+static int amd_pstate_epp_suspend(struct cpufreq_policy *policy)
+{
+ struct amd_cpudata *cpudata = policy->driver_data;
+ int ret;
+
+ /* avoid suspending when EPP is not enabled */
+ if (cppc_state != AMD_PSTATE_ACTIVE)
+ return 0;
+
+ /* set this flag to avoid setting core offline*/
+ cpudata->suspended = true;
+
+ /* disable CPPC in lowlevel firmware */
+ ret = amd_pstate_enable(false);
+ if (ret)
+ pr_err("failed to suspend, return %d\n", ret);
+
+ return 0;
+}
+
+static int amd_pstate_epp_resume(struct cpufreq_policy *policy)
+{
+ struct amd_cpudata *cpudata = policy->driver_data;
+
+ if (cpudata->suspended) {
+ mutex_lock(&amd_pstate_limits_lock);
+
+ /* enable amd pstate from suspend state*/
+ amd_pstate_epp_reenable(cpudata);
+
+ mutex_unlock(&amd_pstate_limits_lock);
+
+ cpudata->suspended = false;
+ }
+
+ return 0;
+}
+
static struct cpufreq_driver amd_pstate_driver = {
.flags = CPUFREQ_CONST_LOOPS | CPUFREQ_NEED_UPDATE_LIMITS,
.verify = amd_pstate_verify,
@@ -617,6 +1237,20 @@ static struct cpufreq_driver amd_pstate_driver = {
.attr = amd_pstate_attr,
};
+static struct cpufreq_driver amd_pstate_epp_driver = {
+ .flags = CPUFREQ_CONST_LOOPS,
+ .verify = amd_pstate_epp_verify_policy,
+ .setpolicy = amd_pstate_epp_set_policy,
+ .init = amd_pstate_epp_cpu_init,
+ .exit = amd_pstate_epp_cpu_exit,
+ .offline = amd_pstate_epp_cpu_offline,
+ .online = amd_pstate_epp_cpu_online,
+ .suspend = amd_pstate_epp_suspend,
+ .resume = amd_pstate_epp_resume,
+ .name = "amd_pstate_epp",
+ .attr = amd_pstate_epp_attr,
+};
+
static int __init amd_pstate_init(void)
{
int ret;
@@ -626,10 +1260,10 @@ static int __init amd_pstate_init(void)
/*
* by default the pstate driver is disabled to load
* enable the amd_pstate passive mode driver explicitly
- * with amd_pstate=passive in kernel command line
+ * with amd_pstate=passive or other modes in kernel command line
*/
- if (!cppc_load) {
- pr_debug("driver load is disabled, boot with amd_pstate=passive to enable this\n");
+ if (cppc_state == AMD_PSTATE_DISABLE) {
+ pr_debug("driver load is disabled, boot with specific mode to enable this\n");
return -ENODEV;
}
@@ -645,7 +1279,8 @@ static int __init amd_pstate_init(void)
/* capability check */
if (boot_cpu_has(X86_FEATURE_CPPC)) {
pr_debug("AMD CPPC MSR based functionality is supported\n");
- amd_pstate_driver.adjust_perf = amd_pstate_adjust_perf;
+ if (cppc_state == AMD_PSTATE_PASSIVE)
+ current_pstate_driver->adjust_perf = amd_pstate_adjust_perf;
} else {
pr_debug("AMD CPPC shared memory based functionality is supported\n");
static_call_update(amd_pstate_enable, cppc_enable);
@@ -656,31 +1291,63 @@ static int __init amd_pstate_init(void)
/* enable amd pstate feature */
ret = amd_pstate_enable(true);
if (ret) {
- pr_err("failed to enable amd-pstate with return %d\n", ret);
+ pr_err("failed to enable with return %d\n", ret);
return ret;
}
- ret = cpufreq_register_driver(&amd_pstate_driver);
+ ret = cpufreq_register_driver(current_pstate_driver);
if (ret)
- pr_err("failed to register amd_pstate_driver with return %d\n",
- ret);
+ pr_err("failed to register with return %d\n", ret);
+
+ amd_pstate_kobj = kobject_create_and_add("amd_pstate", &cpu_subsys.dev_root->kobj);
+ if (!amd_pstate_kobj) {
+ ret = -EINVAL;
+ pr_err("global sysfs registration failed.\n");
+ goto kobject_free;
+ }
+
+ ret = sysfs_create_group(amd_pstate_kobj, &amd_pstate_global_attr_group);
+ if (ret) {
+ pr_err("sysfs attribute export failed with error %d.\n", ret);
+ goto global_attr_free;
+ }
return ret;
+
+global_attr_free:
+ kobject_put(amd_pstate_kobj);
+kobject_free:
+ cpufreq_unregister_driver(current_pstate_driver);
+ return ret;
}
device_initcall(amd_pstate_init);
static int __init amd_pstate_param(char *str)
{
+ size_t size;
+ int mode_idx;
+
if (!str)
return -EINVAL;
- if (!strcmp(str, "disable")) {
- cppc_load = 0;
- pr_info("driver is explicitly disabled\n");
- } else if (!strcmp(str, "passive"))
- cppc_load = 1;
+ size = strlen(str);
+ mode_idx = get_mode_idx_from_str(str, size);
- return 0;
+ if (mode_idx >= AMD_PSTATE_DISABLE && mode_idx < AMD_PSTATE_MAX) {
+ cppc_state = mode_idx;
+ if (cppc_state == AMD_PSTATE_DISABLE)
+ pr_info("driver is explicitly disabled\n");
+
+ if (cppc_state == AMD_PSTATE_ACTIVE)
+ current_pstate_driver = &amd_pstate_epp_driver;
+
+ if (cppc_state == AMD_PSTATE_PASSIVE)
+ current_pstate_driver = &amd_pstate_driver;
+
+ return 0;
+ }
+
+ return -EINVAL;
}
early_param("amd_pstate", amd_pstate_param);
diff --git a/drivers/cpufreq/brcmstb-avs-cpufreq.c b/drivers/cpufreq/brcmstb-avs-cpufreq.c
index 4153150e20db..ffea6402189d 100644
--- a/drivers/cpufreq/brcmstb-avs-cpufreq.c
+++ b/drivers/cpufreq/brcmstb-avs-cpufreq.c
@@ -751,10 +751,7 @@ static int brcm_avs_cpufreq_probe(struct platform_device *pdev)
static int brcm_avs_cpufreq_remove(struct platform_device *pdev)
{
- int ret;
-
- ret = cpufreq_unregister_driver(&brcm_avs_driver);
- WARN_ON(ret);
+ cpufreq_unregister_driver(&brcm_avs_driver);
brcm_avs_prepare_uninit(pdev);
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 7e56a42750ea..6d8fd3b8dcb5 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -993,7 +993,7 @@ static const struct sysfs_ops sysfs_ops = {
.store = store,
};
-static struct kobj_type ktype_cpufreq = {
+static const struct kobj_type ktype_cpufreq = {
.sysfs_ops = &sysfs_ops,
.default_groups = cpufreq_groups,
.release = cpufreq_sysfs_release,
@@ -2904,12 +2904,12 @@ EXPORT_SYMBOL_GPL(cpufreq_register_driver);
* Returns zero if successful, and -EINVAL if the cpufreq_driver is
* currently not initialised.
*/
-int cpufreq_unregister_driver(struct cpufreq_driver *driver)
+void cpufreq_unregister_driver(struct cpufreq_driver *driver)
{
unsigned long flags;
- if (!cpufreq_driver || (driver != cpufreq_driver))
- return -EINVAL;
+ if (WARN_ON(!cpufreq_driver || (driver != cpufreq_driver)))
+ return;
pr_debug("unregistering driver %s\n", driver->name);
@@ -2926,8 +2926,6 @@ int cpufreq_unregister_driver(struct cpufreq_driver *driver)
write_unlock_irqrestore(&cpufreq_driver_lock, flags);
cpus_read_unlock();
-
- return 0;
}
EXPORT_SYMBOL_GPL(cpufreq_unregister_driver);
diff --git a/drivers/cpufreq/davinci-cpufreq.c b/drivers/cpufreq/davinci-cpufreq.c
index 9e97f60f8199..ebb3a8102681 100644
--- a/drivers/cpufreq/davinci-cpufreq.c
+++ b/drivers/cpufreq/davinci-cpufreq.c
@@ -133,12 +133,14 @@ static int __init davinci_cpufreq_probe(struct platform_device *pdev)
static int __exit davinci_cpufreq_remove(struct platform_device *pdev)
{
+ cpufreq_unregister_driver(&davinci_driver);
+
clk_put(cpufreq.armclk);
if (cpufreq.asyncclk)
clk_put(cpufreq.asyncclk);
- return cpufreq_unregister_driver(&davinci_driver);
+ return 0;
}
static struct platform_driver davinci_cpufreq_driver = {
diff --git a/drivers/cpufreq/loongson1-cpufreq.c b/drivers/cpufreq/loongson1-cpufreq.c
deleted file mode 100644
index fb72d709db56..000000000000
--- a/drivers/cpufreq/loongson1-cpufreq.c
+++ /dev/null
@@ -1,222 +0,0 @@
-/*
- * CPU Frequency Scaling for Loongson 1 SoC
- *
- * Copyright (C) 2014-2016 Zhang, Keguang <[email protected]>
- *
- * This file is licensed under the terms of the GNU General Public
- * License version 2. This program is licensed "as is" without any
- * warranty of any kind, whether express or implied.
- */
-
-#include <linux/clk.h>
-#include <linux/clk-provider.h>
-#include <linux/cpu.h>
-#include <linux/cpufreq.h>
-#include <linux/delay.h>
-#include <linux/io.h>
-#include <linux/module.h>
-#include <linux/platform_device.h>
-#include <linux/slab.h>
-
-#include <cpufreq.h>
-#include <loongson1.h>
-
-struct ls1x_cpufreq {
- struct device *dev;
- struct clk *clk; /* CPU clk */
- struct clk *mux_clk; /* MUX of CPU clk */
- struct clk *pll_clk; /* PLL clk */
- struct clk *osc_clk; /* OSC clk */
- unsigned int max_freq;
- unsigned int min_freq;
-};
-
-static struct ls1x_cpufreq *cpufreq;
-
-static int ls1x_cpufreq_notifier(struct notifier_block *nb,
- unsigned long val, void *data)
-{
- if (val == CPUFREQ_POSTCHANGE)
- current_cpu_data.udelay_val = loops_per_jiffy;
-
- return NOTIFY_OK;
-}
-
-static struct notifier_block ls1x_cpufreq_notifier_block = {
- .notifier_call = ls1x_cpufreq_notifier
-};
-
-static int ls1x_cpufreq_target(struct cpufreq_policy *policy,
- unsigned int index)
-{
- struct device *cpu_dev = get_cpu_device(policy->cpu);
- unsigned int old_freq, new_freq;
-
- old_freq = policy->cur;
- new_freq = policy->freq_table[index].frequency;
-
- /*
- * The procedure of reconfiguring CPU clk is as below.
- *
- * - Reparent CPU clk to OSC clk
- * - Reset CPU clock (very important)
- * - Reconfigure CPU DIV
- * - Reparent CPU clk back to CPU DIV clk
- */
-
- clk_set_parent(policy->clk, cpufreq->osc_clk);
- __raw_writel(__raw_readl(LS1X_CLK_PLL_DIV) | RST_CPU_EN | RST_CPU,
- LS1X_CLK_PLL_DIV);
- __raw_writel(__raw_readl(LS1X_CLK_PLL_DIV) & ~(RST_CPU_EN | RST_CPU),
- LS1X_CLK_PLL_DIV);
- clk_set_rate(cpufreq->mux_clk, new_freq * 1000);
- clk_set_parent(policy->clk, cpufreq->mux_clk);
- dev_dbg(cpu_dev, "%u KHz --> %u KHz\n", old_freq, new_freq);
-
- return 0;
-}
-
-static int ls1x_cpufreq_init(struct cpufreq_policy *policy)
-{
- struct device *cpu_dev = get_cpu_device(policy->cpu);
- struct cpufreq_frequency_table *freq_tbl;
- unsigned int pll_freq, freq;
- int steps, i;
-
- pll_freq = clk_get_rate(cpufreq->pll_clk) / 1000;
-
- steps = 1 << DIV_CPU_WIDTH;
- freq_tbl = kcalloc(steps, sizeof(*freq_tbl), GFP_KERNEL);
- if (!freq_tbl)
- return -ENOMEM;
-
- for (i = 0; i < (steps - 1); i++) {
- freq = pll_freq / (i + 1);
- if ((freq < cpufreq->min_freq) || (freq > cpufreq->max_freq))
- freq_tbl[i].frequency = CPUFREQ_ENTRY_INVALID;
- else
- freq_tbl[i].frequency = freq;
- dev_dbg(cpu_dev,
- "cpufreq table: index %d: frequency %d\n", i,
- freq_tbl[i].frequency);
- }
- freq_tbl[i].frequency = CPUFREQ_TABLE_END;
-
- policy->clk = cpufreq->clk;
- cpufreq_generic_init(policy, freq_tbl, 0);
-
- return 0;
-}
-
-static int ls1x_cpufreq_exit(struct cpufreq_policy *policy)
-{
- kfree(policy->freq_table);
- return 0;
-}
-
-static struct cpufreq_driver ls1x_cpufreq_driver = {
- .name = "cpufreq-ls1x",
- .flags = CPUFREQ_NEED_INITIAL_FREQ_CHECK,
- .verify = cpufreq_generic_frequency_table_verify,
- .target_index = ls1x_cpufreq_target,
- .get = cpufreq_generic_get,
- .init = ls1x_cpufreq_init,
- .exit = ls1x_cpufreq_exit,
- .attr = cpufreq_generic_attr,
-};
-
-static int ls1x_cpufreq_remove(struct platform_device *pdev)
-{
- cpufreq_unregister_notifier(&ls1x_cpufreq_notifier_block,
- CPUFREQ_TRANSITION_NOTIFIER);
- cpufreq_unregister_driver(&ls1x_cpufreq_driver);
-
- return 0;
-}
-
-static int ls1x_cpufreq_probe(struct platform_device *pdev)
-{
- struct plat_ls1x_cpufreq *pdata = dev_get_platdata(&pdev->dev);
- struct clk *clk;
- int ret;
-
- if (!pdata || !pdata->clk_name || !pdata->osc_clk_name) {
- dev_err(&pdev->dev, "platform data missing\n");
- return -EINVAL;
- }
-
- cpufreq =
- devm_kzalloc(&pdev->dev, sizeof(struct ls1x_cpufreq), GFP_KERNEL);
- if (!cpufreq)
- return -ENOMEM;
-
- cpufreq->dev = &pdev->dev;
-
- clk = devm_clk_get(&pdev->dev, pdata->clk_name);
- if (IS_ERR(clk)) {
- dev_err(&pdev->dev, "unable to get %s clock\n",
- pdata->clk_name);
- return PTR_ERR(clk);
- }
- cpufreq->clk = clk;
-
- clk = clk_get_parent(clk);
- if (IS_ERR(clk)) {
- dev_err(&pdev->dev, "unable to get parent of %s clock\n",
- __clk_get_name(cpufreq->clk));
- return PTR_ERR(clk);
- }
- cpufreq->mux_clk = clk;
-
- clk = clk_get_parent(clk);
- if (IS_ERR(clk)) {
- dev_err(&pdev->dev, "unable to get parent of %s clock\n",
- __clk_get_name(cpufreq->mux_clk));
- return PTR_ERR(clk);
- }
- cpufreq->pll_clk = clk;
-
- clk = devm_clk_get(&pdev->dev, pdata->osc_clk_name);
- if (IS_ERR(clk)) {
- dev_err(&pdev->dev, "unable to get %s clock\n",
- pdata->osc_clk_name);
- return PTR_ERR(clk);
- }
- cpufreq->osc_clk = clk;
-
- cpufreq->max_freq = pdata->max_freq;
- cpufreq->min_freq = pdata->min_freq;
-
- ret = cpufreq_register_driver(&ls1x_cpufreq_driver);
- if (ret) {
- dev_err(&pdev->dev,
- "failed to register CPUFreq driver: %d\n", ret);
- return ret;
- }
-
- ret = cpufreq_register_notifier(&ls1x_cpufreq_notifier_block,
- CPUFREQ_TRANSITION_NOTIFIER);
-
- if (ret) {
- dev_err(&pdev->dev,
- "failed to register CPUFreq notifier: %d\n",ret);
- cpufreq_unregister_driver(&ls1x_cpufreq_driver);
- }
-
- return ret;
-}
-
-static struct platform_driver ls1x_cpufreq_platdrv = {
- .probe = ls1x_cpufreq_probe,
- .remove = ls1x_cpufreq_remove,
- .driver = {
- .name = "ls1x-cpufreq",
- },
-};
-
-module_platform_driver(ls1x_cpufreq_platdrv);
-
-MODULE_ALIAS("platform:ls1x-cpufreq");
-MODULE_AUTHOR("Kelvin Cheung <[email protected]>");
-MODULE_DESCRIPTION("Loongson1 CPUFreq driver");
-MODULE_LICENSE("GPL");
diff --git a/drivers/cpufreq/mediatek-cpufreq-hw.c b/drivers/cpufreq/mediatek-cpufreq-hw.c
index f80339779084..b22f5cc8a463 100644
--- a/drivers/cpufreq/mediatek-cpufreq-hw.c
+++ b/drivers/cpufreq/mediatek-cpufreq-hw.c
@@ -317,13 +317,16 @@ static int mtk_cpufreq_hw_driver_probe(struct platform_device *pdev)
static int mtk_cpufreq_hw_driver_remove(struct platform_device *pdev)
{
- return cpufreq_unregister_driver(&cpufreq_mtk_hw_driver);
+ cpufreq_unregister_driver(&cpufreq_mtk_hw_driver);
+
+ return 0;
}
static const struct of_device_id mtk_cpufreq_hw_match[] = {
{ .compatible = "mediatek,cpufreq-hw", .data = &cpufreq_mtk_offsets },
{}
};
+MODULE_DEVICE_TABLE(of, mtk_cpufreq_hw_match);
static struct platform_driver mtk_cpufreq_hw_driver = {
.probe = mtk_cpufreq_hw_driver_probe,
diff --git a/drivers/cpufreq/omap-cpufreq.c b/drivers/cpufreq/omap-cpufreq.c
index 1b50df06c6bc..81649a1969b6 100644
--- a/drivers/cpufreq/omap-cpufreq.c
+++ b/drivers/cpufreq/omap-cpufreq.c
@@ -184,7 +184,9 @@ static int omap_cpufreq_probe(struct platform_device *pdev)
static int omap_cpufreq_remove(struct platform_device *pdev)
{
- return cpufreq_unregister_driver(&omap_driver);
+ cpufreq_unregister_driver(&omap_driver);
+
+ return 0;
}
static struct platform_driver omap_cpufreq_platdrv = {
diff --git a/drivers/cpufreq/qcom-cpufreq-hw.c b/drivers/cpufreq/qcom-cpufreq-hw.c
index 957cf6bb8c05..2f581d2d617d 100644
--- a/drivers/cpufreq/qcom-cpufreq-hw.c
+++ b/drivers/cpufreq/qcom-cpufreq-hw.c
@@ -706,6 +706,8 @@ static int qcom_cpufreq_hw_driver_probe(struct platform_device *pdev)
return -ENOMEM;
qcom_cpufreq.soc_data = of_device_get_match_data(dev);
+ if (!qcom_cpufreq.soc_data)
+ return -ENODEV;
clk_data = devm_kzalloc(dev, struct_size(clk_data, hws, num_domains), GFP_KERNEL);
if (!clk_data)
@@ -768,7 +770,9 @@ of_exit:
static int qcom_cpufreq_hw_driver_remove(struct platform_device *pdev)
{
- return cpufreq_unregister_driver(&cpufreq_qcom_hw_driver);
+ cpufreq_unregister_driver(&cpufreq_qcom_hw_driver);
+
+ return 0;
}
static struct platform_driver qcom_cpufreq_hw_driver = {
diff --git a/drivers/cpufreq/tegra194-cpufreq.c b/drivers/cpufreq/tegra194-cpufreq.c
index 4596c3e323aa..5890e25d7f77 100644
--- a/drivers/cpufreq/tegra194-cpufreq.c
+++ b/drivers/cpufreq/tegra194-cpufreq.c
@@ -411,7 +411,8 @@ static int tegra194_cpufreq_set_target(struct cpufreq_policy *policy,
static struct cpufreq_driver tegra194_cpufreq_driver = {
.name = "tegra194",
- .flags = CPUFREQ_CONST_LOOPS | CPUFREQ_NEED_INITIAL_FREQ_CHECK,
+ .flags = CPUFREQ_CONST_LOOPS | CPUFREQ_NEED_INITIAL_FREQ_CHECK |
+ CPUFREQ_IS_COOLING_DEV,
.verify = cpufreq_generic_frequency_table_verify,
.target_index = tegra194_cpufreq_set_target,
.get = tegra194_get_speed,
diff --git a/drivers/cpuidle/Kconfig b/drivers/cpuidle/Kconfig
index ff71dd662880..cac5997dca50 100644
--- a/drivers/cpuidle/Kconfig
+++ b/drivers/cpuidle/Kconfig
@@ -74,6 +74,7 @@ endmenu
config HALTPOLL_CPUIDLE
tristate "Halt poll cpuidle driver"
depends on X86 && KVM_GUEST
+ select CPU_IDLE_GOV_HALTPOLL
default y
help
This option enables halt poll cpuidle driver, which allows to poll
diff --git a/drivers/cpuidle/Kconfig.arm b/drivers/cpuidle/Kconfig.arm
index 747aa537389b..a1ee475d180d 100644
--- a/drivers/cpuidle/Kconfig.arm
+++ b/drivers/cpuidle/Kconfig.arm
@@ -24,6 +24,14 @@ config ARM_PSCI_CPUIDLE
It provides an idle driver that is capable of detecting and
managing idle states through the PSCI firmware interface.
+ The driver has limitations when used with PREEMPT_RT:
+ - If the idle states are described with the non-hierarchical layout,
+ all idle states are still available.
+
+ - If the idle states are described with the hierarchical layout,
+ only the idle states defined per CPU are available, but not the ones
+ being shared among a group of CPUs (aka cluster idle states).
+
config ARM_PSCI_CPUIDLE_DOMAIN
bool "PSCI CPU idle Domain"
depends on ARM_PSCI_CPUIDLE
@@ -102,6 +110,7 @@ config ARM_MVEBU_V7_CPUIDLE
config ARM_TEGRA_CPUIDLE
bool "CPU Idle Driver for NVIDIA Tegra SoCs"
depends on (ARCH_TEGRA || COMPILE_TEST) && !ARM64 && MMU
+ depends on ARCH_SUSPEND_POSSIBLE
select ARCH_NEEDS_CPU_IDLE_COUPLED if SMP
select ARM_CPU_SUSPEND
help
@@ -110,6 +119,7 @@ config ARM_TEGRA_CPUIDLE
config ARM_QCOM_SPM_CPUIDLE
bool "CPU Idle Driver for Qualcomm Subsystem Power Manager (SPM)"
depends on (ARCH_QCOM || COMPILE_TEST) && !ARM64 && MMU
+ depends on ARCH_SUSPEND_POSSIBLE
select ARM_CPU_SUSPEND
select CPU_IDLE_MULTIPLE_DRIVERS
select DT_IDLE_STATES
diff --git a/drivers/cpuidle/cpuidle-haltpoll.c b/drivers/cpuidle/cpuidle-haltpoll.c
index 3a39a7f48b77..e66df22f9695 100644
--- a/drivers/cpuidle/cpuidle-haltpoll.c
+++ b/drivers/cpuidle/cpuidle-haltpoll.c
@@ -32,7 +32,7 @@ static int default_enter_idle(struct cpuidle_device *dev,
local_irq_enable();
return index;
}
- default_idle();
+ arch_cpu_idle();
return index;
}
diff --git a/drivers/cpuidle/cpuidle-psci-domain.c b/drivers/cpuidle/cpuidle-psci-domain.c
index c80cf9ddabd8..6ad2954948a5 100644
--- a/drivers/cpuidle/cpuidle-psci-domain.c
+++ b/drivers/cpuidle/cpuidle-psci-domain.c
@@ -64,8 +64,11 @@ static int psci_pd_init(struct device_node *np, bool use_osi)
pd->flags |= GENPD_FLAG_IRQ_SAFE | GENPD_FLAG_CPU_DOMAIN;
- /* Allow power off when OSI has been successfully enabled. */
- if (use_osi)
+ /*
+ * Allow power off when OSI has been successfully enabled.
+ * PREEMPT_RT is not yet ready to enter domain idle states.
+ */
+ if (use_osi && !IS_ENABLED(CONFIG_PREEMPT_RT))
pd->power_off = psci_pd_power_off;
else
pd->flags |= GENPD_FLAG_ALWAYS_ON;
diff --git a/drivers/cpuidle/cpuidle-psci.c b/drivers/cpuidle/cpuidle-psci.c
index 57bc3e3ae391..68467a325dcb 100644
--- a/drivers/cpuidle/cpuidle-psci.c
+++ b/drivers/cpuidle/cpuidle-psci.c
@@ -231,6 +231,9 @@ static int psci_dt_cpu_init_topology(struct cpuidle_driver *drv,
if (!psci_has_osi_support())
return 0;
+ if (IS_ENABLED(CONFIG_PREEMPT_RT))
+ return 0;
+
data->dev = psci_dt_attach_cpu(cpu);
if (IS_ERR_OR_NULL(data->dev))
return PTR_ERR_OR_ZERO(data->dev);
diff --git a/drivers/cpuidle/driver.c b/drivers/cpuidle/driver.c
index f70aa17e2a8e..d9cda7f6ccb9 100644
--- a/drivers/cpuidle/driver.c
+++ b/drivers/cpuidle/driver.c
@@ -183,11 +183,15 @@ static void __cpuidle_driver_init(struct cpuidle_driver *drv)
s->target_residency_ns = s->target_residency * NSEC_PER_USEC;
else if (s->target_residency_ns < 0)
s->target_residency_ns = 0;
+ else
+ s->target_residency = div_u64(s->target_residency_ns, NSEC_PER_USEC);
if (s->exit_latency > 0)
s->exit_latency_ns = s->exit_latency * NSEC_PER_USEC;
else if (s->exit_latency_ns < 0)
s->exit_latency_ns = 0;
+ else
+ s->exit_latency = div_u64(s->exit_latency_ns, NSEC_PER_USEC);
}
}
diff --git a/drivers/cpuidle/governors/teo.c b/drivers/cpuidle/governors/teo.c
index d9262db79cae..987fc5f3997d 100644
--- a/drivers/cpuidle/governors/teo.c
+++ b/drivers/cpuidle/governors/teo.c
@@ -2,8 +2,13 @@
/*
* Timer events oriented CPU idle governor
*
+ * TEO governor:
* Copyright (C) 2018 - 2021 Intel Corporation
* Author: Rafael J. Wysocki <[email protected]>
+ *
+ * Util-awareness mechanism:
+ * Copyright (C) 2022 Arm Ltd.
+ * Author: Kajetan Puchalski <[email protected]>
*/
/**
@@ -99,15 +104,56 @@
* select the given idle state instead of the candidate one.
*
* 3. By default, select the candidate state.
+ *
+ * Util-awareness mechanism:
+ *
+ * The idea behind the util-awareness extension is that there are two distinct
+ * scenarios for the CPU which should result in two different approaches to idle
+ * state selection - utilized and not utilized.
+ *
+ * In this case, 'utilized' means that the average runqueue util of the CPU is
+ * above a certain threshold.
+ *
+ * When the CPU is utilized while going into idle, more likely than not it will
+ * be woken up to do more work soon and so a shallower idle state should be
+ * selected to minimise latency and maximise performance. When the CPU is not
+ * being utilized, the usual metrics-based approach to selecting the deepest
+ * available idle state should be preferred to take advantage of the power
+ * saving.
+ *
+ * In order to achieve this, the governor uses a utilization threshold.
+ * The threshold is computed per-CPU as a percentage of the CPU's capacity
+ * by bit shifting the capacity value. Based on testing, the shift of 6 (~1.56%)
+ * seems to be getting the best results.
+ *
+ * Before selecting the next idle state, the governor compares the current CPU
+ * util to the precomputed util threshold. If it's below, it defaults to the
+ * TEO metrics mechanism. If it's above, the closest shallower idle state will
+ * be selected instead, as long as is not a polling state.
*/
#include <linux/cpuidle.h>
#include <linux/jiffies.h>
#include <linux/kernel.h>
+#include <linux/sched.h>
#include <linux/sched/clock.h>
+#include <linux/sched/topology.h>
#include <linux/tick.h>
/*
+ * The number of bits to shift the CPU's capacity by in order to determine
+ * the utilized threshold.
+ *
+ * 6 was chosen based on testing as the number that achieved the best balance
+ * of power and performance on average.
+ *
+ * The resulting threshold is high enough to not be triggered by background
+ * noise and low enough to react quickly when activity starts to ramp up.
+ */
+#define UTIL_THRESHOLD_SHIFT 6
+
+
+/*
* The PULSE value is added to metrics when they grow and the DECAY_SHIFT value
* is used for decreasing metrics on a regular basis.
*/
@@ -137,9 +183,11 @@ struct teo_bin {
* @time_span_ns: Time between idle state selection and post-wakeup update.
* @sleep_length_ns: Time till the closest timer event (at the selection time).
* @state_bins: Idle state data bins for this CPU.
- * @total: Grand total of the "intercepts" and "hits" mertics for all bins.
+ * @total: Grand total of the "intercepts" and "hits" metrics for all bins.
* @next_recent_idx: Index of the next @recent_idx entry to update.
* @recent_idx: Indices of bins corresponding to recent "intercepts".
+ * @util_threshold: Threshold above which the CPU is considered utilized
+ * @utilized: Whether the last sleep on the CPU happened while utilized
*/
struct teo_cpu {
s64 time_span_ns;
@@ -148,11 +196,30 @@ struct teo_cpu {
unsigned int total;
int next_recent_idx;
int recent_idx[NR_RECENT];
+ unsigned long util_threshold;
+ bool utilized;
};
static DEFINE_PER_CPU(struct teo_cpu, teo_cpus);
/**
+ * teo_cpu_is_utilized - Check if the CPU's util is above the threshold
+ * @cpu: Target CPU
+ * @cpu_data: Governor CPU data for the target CPU
+ */
+#ifdef CONFIG_SMP
+static bool teo_cpu_is_utilized(int cpu, struct teo_cpu *cpu_data)
+{
+ return sched_cpu_util(cpu) > cpu_data->util_threshold;
+}
+#else
+static bool teo_cpu_is_utilized(int cpu, struct teo_cpu *cpu_data)
+{
+ return false;
+}
+#endif
+
+/**
* teo_update - Update CPU metrics after wakeup.
* @drv: cpuidle driver containing state data.
* @dev: Target CPU.
@@ -258,15 +325,17 @@ static s64 teo_middle_of_bin(int idx, struct cpuidle_driver *drv)
* @dev: Target CPU.
* @state_idx: Index of the capping idle state.
* @duration_ns: Idle duration value to match.
+ * @no_poll: Don't consider polling states.
*/
static int teo_find_shallower_state(struct cpuidle_driver *drv,
struct cpuidle_device *dev, int state_idx,
- s64 duration_ns)
+ s64 duration_ns, bool no_poll)
{
int i;
for (i = state_idx - 1; i >= 0; i--) {
- if (dev->states_usage[i].disable)
+ if (dev->states_usage[i].disable ||
+ (no_poll && drv->states[i].flags & CPUIDLE_FLAG_POLLING))
continue;
state_idx = i;
@@ -321,6 +390,22 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
goto end;
}
+ cpu_data->utilized = teo_cpu_is_utilized(dev->cpu, cpu_data);
+ /*
+ * If the CPU is being utilized over the threshold and there are only 2
+ * states to choose from, the metrics need not be considered, so choose
+ * the shallowest non-polling state and exit.
+ */
+ if (drv->state_count < 3 && cpu_data->utilized) {
+ for (i = 0; i < drv->state_count; ++i) {
+ if (!dev->states_usage[i].disable &&
+ !(drv->states[i].flags & CPUIDLE_FLAG_POLLING)) {
+ idx = i;
+ goto end;
+ }
+ }
+ }
+
/*
* Find the deepest idle state whose target residency does not exceed
* the current sleep length and the deepest idle state not deeper than
@@ -452,6 +537,13 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
if (idx > constraint_idx)
idx = constraint_idx;
+ /*
+ * If the CPU is being utilized over the threshold, choose a shallower
+ * non-polling state to improve latency
+ */
+ if (cpu_data->utilized)
+ idx = teo_find_shallower_state(drv, dev, idx, duration_ns, true);
+
end:
/*
* Don't stop the tick if the selected state is a polling one or if the
@@ -469,7 +561,7 @@ end:
*/
if (idx > idx0 &&
drv->states[idx].target_residency_ns > delta_tick)
- idx = teo_find_shallower_state(drv, dev, idx, delta_tick);
+ idx = teo_find_shallower_state(drv, dev, idx, delta_tick, false);
}
return idx;
@@ -508,9 +600,11 @@ static int teo_enable_device(struct cpuidle_driver *drv,
struct cpuidle_device *dev)
{
struct teo_cpu *cpu_data = per_cpu_ptr(&teo_cpus, dev->cpu);
+ unsigned long max_capacity = arch_scale_cpu_capacity(dev->cpu);
int i;
memset(cpu_data, 0, sizeof(*cpu_data));
+ cpu_data->util_threshold = max_capacity >> UTIL_THRESHOLD_SHIFT;
for (i = 0; i < NR_RECENT; i++)
cpu_data->recent_idx[i] = -1;
diff --git a/drivers/cpuidle/sysfs.c b/drivers/cpuidle/sysfs.c
index 2b496a53cbca..48948b171749 100644
--- a/drivers/cpuidle/sysfs.c
+++ b/drivers/cpuidle/sysfs.c
@@ -200,7 +200,7 @@ static void cpuidle_sysfs_release(struct kobject *kobj)
complete(&kdev->kobj_unregister);
}
-static struct kobj_type ktype_cpuidle = {
+static const struct kobj_type ktype_cpuidle = {
.sysfs_ops = &cpuidle_sysfs_ops,
.release = cpuidle_sysfs_release,
};
@@ -447,7 +447,7 @@ static void cpuidle_state_sysfs_release(struct kobject *kobj)
complete(&state_obj->kobj_unregister);
}
-static struct kobj_type ktype_state_cpuidle = {
+static const struct kobj_type ktype_state_cpuidle = {
.sysfs_ops = &cpuidle_state_sysfs_ops,
.default_groups = cpuidle_state_default_groups,
.release = cpuidle_state_sysfs_release,
@@ -594,7 +594,7 @@ static struct attribute *cpuidle_driver_default_attrs[] = {
};
ATTRIBUTE_GROUPS(cpuidle_driver_default);
-static struct kobj_type ktype_driver_cpuidle = {
+static const struct kobj_type ktype_driver_cpuidle = {
.sysfs_ops = &cpuidle_driver_sysfs_ops,
.default_groups = cpuidle_driver_default_groups,
.release = cpuidle_driver_sysfs_release,
diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c
index cfeb24d40d37..bb3d10099ba4 100644
--- a/drivers/idle/intel_idle.c
+++ b/drivers/idle/intel_idle.c
@@ -1430,6 +1430,7 @@ static const struct x86_cpu_id intel_idle_ids[] __initconst = {
X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, &idle_cpu_adl_l),
X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_N, &idle_cpu_adl_n),
X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &idle_cpu_spr),
+ X86_MATCH_INTEL_FAM6_MODEL(EMERALDRAPIDS_X, &idle_cpu_spr),
X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNL, &idle_cpu_knl),
X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNM, &idle_cpu_knl),
X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT, &idle_cpu_bxt),
@@ -1862,6 +1863,7 @@ static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv)
skx_idle_state_table_update();
break;
case INTEL_FAM6_SAPPHIRERAPIDS_X:
+ case INTEL_FAM6_EMERALDRAPIDS_X:
spr_idle_state_table_update();
break;
case INTEL_FAM6_ALDERLAKE:
diff --git a/drivers/powercap/idle_inject.c b/drivers/powercap/idle_inject.c
index fe86a09e3b67..c03b5402c03b 100644
--- a/drivers/powercap/idle_inject.c
+++ b/drivers/powercap/idle_inject.c
@@ -155,10 +155,12 @@ void idle_inject_set_duration(struct idle_inject_device *ii_dev,
unsigned int run_duration_us,
unsigned int idle_duration_us)
{
- if (run_duration_us && idle_duration_us) {
+ if (run_duration_us + idle_duration_us) {
WRITE_ONCE(ii_dev->run_duration_us, run_duration_us);
WRITE_ONCE(ii_dev->idle_duration_us, idle_duration_us);
}
+ if (!run_duration_us)
+ pr_debug("CPU is forced to 100 percent idle\n");
}
/**
@@ -201,7 +203,7 @@ int idle_inject_start(struct idle_inject_device *ii_dev)
unsigned int idle_duration_us = READ_ONCE(ii_dev->idle_duration_us);
unsigned int run_duration_us = READ_ONCE(ii_dev->run_duration_us);
- if (!idle_duration_us || !run_duration_us)
+ if (!(idle_duration_us + run_duration_us))
return -EINVAL;
pr_debug("Starting injecting idle cycles on CPUs '%*pbl'\n",
diff --git a/drivers/powercap/intel_rapl_common.c b/drivers/powercap/intel_rapl_common.c
index 26d00b1853b4..8970c7b80884 100644
--- a/drivers/powercap/intel_rapl_common.c
+++ b/drivers/powercap/intel_rapl_common.c
@@ -999,7 +999,15 @@ static u64 rapl_compute_time_window_core(struct rapl_package *rp, u64 value,
do_div(value, rp->time_unit);
y = ilog2(value);
- f = div64_u64(4 * (value - (1 << y)), 1 << y);
+
+ /*
+ * The target hardware field is 7 bits wide, so return all ones
+ * if the exponent is too large.
+ */
+ if (y > 0x1f)
+ return 0x7f;
+
+ f = div64_u64(4 * (value - (1ULL << y)), 1ULL << y);
value = (y & 0x1f) | ((f & 0x3) << 5);
}
return value;
@@ -1113,7 +1121,10 @@ static const struct x86_cpu_id rapl_ids[] __initconst = {
X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE, &rapl_defaults_core),
X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_P, &rapl_defaults_core),
X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_S, &rapl_defaults_core),
+ X86_MATCH_INTEL_FAM6_MODEL(METEORLAKE, &rapl_defaults_core),
+ X86_MATCH_INTEL_FAM6_MODEL(METEORLAKE_L, &rapl_defaults_core),
X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &rapl_defaults_spr_server),
+ X86_MATCH_INTEL_FAM6_MODEL(EMERALDRAPIDS_X, &rapl_defaults_spr_server),
X86_MATCH_INTEL_FAM6_MODEL(LAKEFIELD, &rapl_defaults_core),
X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT, &rapl_defaults_byt),
diff --git a/drivers/powercap/powercap_sys.c b/drivers/powercap/powercap_sys.c
index 1f968353d479..e180dee0f83d 100644
--- a/drivers/powercap/powercap_sys.c
+++ b/drivers/powercap/powercap_sys.c
@@ -530,9 +530,6 @@ struct powercap_zone *powercap_register_zone(
power_zone->name = kstrdup(name, GFP_KERNEL);
if (!power_zone->name)
goto err_name_alloc;
- dev_set_name(&power_zone->dev, "%s:%x",
- dev_name(power_zone->dev.parent),
- power_zone->id);
power_zone->constraints = kcalloc(nr_constraints,
sizeof(*power_zone->constraints),
GFP_KERNEL);
@@ -555,9 +552,16 @@ struct powercap_zone *powercap_register_zone(
power_zone->dev_attr_groups[0] = &power_zone->dev_zone_attr_group;
power_zone->dev_attr_groups[1] = NULL;
power_zone->dev.groups = power_zone->dev_attr_groups;
+ dev_set_name(&power_zone->dev, "%s:%x",
+ dev_name(power_zone->dev.parent),
+ power_zone->id);
result = device_register(&power_zone->dev);
- if (result)
- goto err_dev_ret;
+ if (result) {
+ put_device(&power_zone->dev);
+ mutex_unlock(&control_type->lock);
+
+ return ERR_PTR(result);
+ }
control_type->nr_zones++;
mutex_unlock(&control_type->lock);
diff --git a/include/acpi/cppc_acpi.h b/include/acpi/cppc_acpi.h
index c5614444031f..6b487a5bd638 100644
--- a/include/acpi/cppc_acpi.h
+++ b/include/acpi/cppc_acpi.h
@@ -108,12 +108,14 @@ struct cppc_perf_caps {
u32 lowest_nonlinear_perf;
u32 lowest_freq;
u32 nominal_freq;
+ u32 energy_perf;
};
struct cppc_perf_ctrls {
u32 max_perf;
u32 min_perf;
u32 desired_perf;
+ u32 energy_perf;
};
struct cppc_perf_fb_ctrs {
@@ -149,6 +151,8 @@ extern bool cpc_ffh_supported(void);
extern bool cpc_supported_by_cpu(void);
extern int cpc_read_ffh(int cpunum, struct cpc_reg *reg, u64 *val);
extern int cpc_write_ffh(int cpunum, struct cpc_reg *reg, u64 val);
+extern int cppc_get_epp_perf(int cpunum, u64 *epp_perf);
+extern int cppc_set_epp_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls, bool enable);
#else /* !CONFIG_ACPI_CPPC_LIB */
static inline int cppc_get_desired_perf(int cpunum, u64 *desired_perf)
{
@@ -202,6 +206,14 @@ static inline int cpc_write_ffh(int cpunum, struct cpc_reg *reg, u64 val)
{
return -ENOTSUPP;
}
+static inline int cppc_set_epp_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls, bool enable)
+{
+ return -ENOTSUPP;
+}
+static inline int cppc_get_epp_perf(int cpunum, u64 *epp_perf)
+{
+ return -ENOTSUPP;
+}
#endif /* !CONFIG_ACPI_CPPC_LIB */
#endif /* _CPPC_ACPI_H*/
diff --git a/include/linux/amd-pstate.h b/include/linux/amd-pstate.h
index 1c4b8659f171..f5f22418e64b 100644
--- a/include/linux/amd-pstate.h
+++ b/include/linux/amd-pstate.h
@@ -12,6 +12,11 @@
#include <linux/pm_qos.h>
+#define AMD_CPPC_EPP_PERFORMANCE 0x00
+#define AMD_CPPC_EPP_BALANCE_PERFORMANCE 0x80
+#define AMD_CPPC_EPP_BALANCE_POWERSAVE 0xBF
+#define AMD_CPPC_EPP_POWERSAVE 0xFF
+
/*********************************************************************
* AMD P-state INTERFACE *
*********************************************************************/
@@ -47,6 +52,10 @@ struct amd_aperf_mperf {
* @prev: Last Aperf/Mperf/tsc count value read from register
* @freq: current cpu frequency value
* @boost_supported: check whether the Processor or SBIOS supports boost mode
+ * @epp_policy: Last saved policy used to set energy-performance preference
+ * @epp_cached: Cached CPPC energy-performance preference value
+ * @policy: Cpufreq policy value
+ * @cppc_cap1_cached Cached MSR_AMD_CPPC_CAP1 register value
*
* The amd_cpudata is key private data for each CPU thread in AMD P-State, and
* represents all the attributes and goals that AMD P-State requests at runtime.
@@ -72,6 +81,29 @@ struct amd_cpudata {
u64 freq;
bool boost_supported;
+
+ /* EPP feature related attributes*/
+ s16 epp_policy;
+ s16 epp_cached;
+ u32 policy;
+ u64 cppc_cap1_cached;
+ bool suspended;
};
+/*
+ * enum amd_pstate_mode - driver working mode of amd pstate
+ */
+enum amd_pstate_mode {
+ AMD_PSTATE_DISABLE = 0,
+ AMD_PSTATE_PASSIVE,
+ AMD_PSTATE_ACTIVE,
+ AMD_PSTATE_MAX,
+};
+
+static const char * const amd_pstate_mode_string[] = {
+ [AMD_PSTATE_DISABLE] = "disable",
+ [AMD_PSTATE_PASSIVE] = "passive",
+ [AMD_PSTATE_ACTIVE] = "active",
+ NULL,
+};
#endif /* _LINUX_AMD_PSTATE_H */
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 6a94a6eaad27..65623233ab2f 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -448,7 +448,7 @@ struct cpufreq_driver {
#define CPUFREQ_NO_AUTO_DYNAMIC_SWITCHING BIT(6)
int cpufreq_register_driver(struct cpufreq_driver *driver_data);
-int cpufreq_unregister_driver(struct cpufreq_driver *driver_data);
+void cpufreq_unregister_driver(struct cpufreq_driver *driver_data);
bool cpufreq_driver_test_flags(u16 flags);
const char *cpufreq_get_current_driver(void);
diff --git a/include/linux/pm.h b/include/linux/pm.h
index 93cd34f00822..035d9649eba4 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -379,9 +379,13 @@ const struct dev_pm_ops name = { \
const struct dev_pm_ops name; \
__EXPORT_SYMBOL(name, sec, ns); \
const struct dev_pm_ops name
+#define EXPORT_PM_FN_GPL(name) EXPORT_SYMBOL_GPL(name)
+#define EXPORT_PM_FN_NS_GPL(name, ns) EXPORT_SYMBOL_NS_GPL(name, ns)
#else
#define _EXPORT_DEV_PM_OPS(name, sec, ns) \
static __maybe_unused const struct dev_pm_ops __static_##name
+#define EXPORT_PM_FN_GPL(name)
+#define EXPORT_PM_FN_NS_GPL(name, ns)
#endif
#define EXPORT_DEV_PM_OPS(name) _EXPORT_DEV_PM_OPS(name, "", "")
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index 60a1d3051cc7..4b31629c5be4 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -118,7 +118,6 @@ config PM_SLEEP
def_bool y
depends on SUSPEND || HIBERNATE_CALLBACKS
select PM
- select SRCU
config PM_SLEEP_SMP
def_bool y
diff --git a/kernel/power/energy_model.c b/kernel/power/energy_model.c
index f82111837b8d..7b44f5b89fa1 100644
--- a/kernel/power/energy_model.c
+++ b/kernel/power/energy_model.c
@@ -87,10 +87,7 @@ static void em_debug_create_pd(struct device *dev)
static void em_debug_remove_pd(struct device *dev)
{
- struct dentry *debug_dir;
-
- debug_dir = debugfs_lookup(dev_name(dev), rootdir);
- debugfs_remove_recursive(debug_dir);
+ debugfs_lookup_and_remove(dev_name(dev), rootdir);
}
static int __init em_debug_init(void)
diff --git a/kernel/power/swap.c b/kernel/power/swap.c
index 277434b6c0bf..36a1df48280c 100644
--- a/kernel/power/swap.c
+++ b/kernel/power/swap.c
@@ -581,7 +581,7 @@ static int save_image(struct swap_map_handle *handle,
return ret;
}
-/**
+/*
* Structure used for CRC32.
*/
struct crc_data {
@@ -596,7 +596,7 @@ struct crc_data {
unsigned char *unc[LZO_THREADS]; /* uncompressed data */
};
-/**
+/*
* CRC32 update function that runs in its own thread.
*/
static int crc32_threadfn(void *data)
@@ -623,7 +623,7 @@ static int crc32_threadfn(void *data)
}
return 0;
}
-/**
+/*
* Structure used for LZO data compression.
*/
struct cmp_data {
@@ -640,7 +640,7 @@ struct cmp_data {
unsigned char wrk[LZO1X_1_MEM_COMPRESS]; /* compression workspace */
};
-/**
+/*
* Compression function that runs in its own thread.
*/
static int lzo_compress_threadfn(void *data)
@@ -948,9 +948,9 @@ out_finish:
return error;
}
-/**
+/*
* The following functions allow us to read data using a swap map
- * in a file-alike way
+ * in a file-like way.
*/
static void release_swap_reader(struct swap_map_handle *handle)
@@ -1107,7 +1107,7 @@ static int load_image(struct swap_map_handle *handle,
return ret;
}
-/**
+/*
* Structure used for LZO data decompression.
*/
struct dec_data {
@@ -1123,7 +1123,7 @@ struct dec_data {
unsigned char cmp[LZO_CMP_SIZE]; /* compressed buffer */
};
-/**
+/*
* Decompression function that runs in its own thread.
*/
static int lzo_decompress_threadfn(void *data)