aboutsummaryrefslogtreecommitdiff
path: root/drivers/idle/intel_idle.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/idle/intel_idle.c')
-rw-r--r--drivers/idle/intel_idle.c275
1 files changed, 233 insertions, 42 deletions
diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c
index b7640cfe0020..3e101719689a 100644
--- a/drivers/idle/intel_idle.c
+++ b/drivers/idle/intel_idle.c
@@ -47,13 +47,16 @@
#include <linux/tick.h>
#include <trace/events/power.h>
#include <linux/sched.h>
+#include <linux/sched/smt.h>
#include <linux/notifier.h>
#include <linux/cpu.h>
#include <linux/moduleparam.h>
#include <asm/cpu_device_id.h>
#include <asm/intel-family.h>
+#include <asm/nospec-branch.h>
#include <asm/mwait.h>
#include <asm/msr.h>
+#include <asm/fpu/api.h>
#define INTEL_IDLE_VERSION "0.5.1"
@@ -69,7 +72,12 @@ static unsigned int preferred_states_mask;
static struct cpuidle_device __percpu *intel_idle_cpuidle_devices;
static unsigned long auto_demotion_disable_flags;
-static bool disable_promotion_to_c1e;
+
+static enum {
+ C1E_PROMOTION_PRESERVE,
+ C1E_PROMOTION_ENABLE,
+ C1E_PROMOTION_DISABLE
+} c1e_promotion = C1E_PROMOTION_PRESERVE;
struct idle_cpu {
struct cpuidle_state *state_table;
@@ -101,6 +109,17 @@ static unsigned int mwait_substates __initdata;
#define CPUIDLE_FLAG_ALWAYS_ENABLE BIT(15)
/*
+ * Disable IBRS across idle (when KERNEL_IBRS), is exclusive vs IRQ_ENABLE
+ * above.
+ */
+#define CPUIDLE_FLAG_IBRS BIT(16)
+
+/*
+ * Initialize large xstate for the C6-state entrance.
+ */
+#define CPUIDLE_FLAG_INIT_XSTATE BIT(17)
+
+/*
* MWAIT takes an 8-bit "hint" in EAX "suggesting"
* the C-state (top nibble) and sub-state (bottom nibble)
* 0x00 means "MWAIT(C1)", 0x10 means "MWAIT(C2)" etc.
@@ -110,6 +129,18 @@ static unsigned int mwait_substates __initdata;
#define flg2MWAIT(flags) (((flags) >> 24) & 0xFF)
#define MWAIT2flg(eax) ((eax & 0xFF) << 24)
+static __always_inline int __intel_idle(struct cpuidle_device *dev,
+ struct cpuidle_driver *drv, int index)
+{
+ struct cpuidle_state *state = &drv->states[index];
+ unsigned long eax = flg2MWAIT(state->flags);
+ unsigned long ecx = 1; /* break on interrupt flag */
+
+ mwait_idle_with_hints(eax, ecx);
+
+ return index;
+}
+
/**
* intel_idle - Ask the processor to enter the given idle state.
* @dev: cpuidle device of the target CPU.
@@ -127,16 +158,50 @@ static unsigned int mwait_substates __initdata;
static __cpuidle int intel_idle(struct cpuidle_device *dev,
struct cpuidle_driver *drv, int index)
{
- struct cpuidle_state *state = &drv->states[index];
- unsigned long eax = flg2MWAIT(state->flags);
- unsigned long ecx = 1; /* break on interrupt flag */
+ return __intel_idle(dev, drv, index);
+}
- if (state->flags & CPUIDLE_FLAG_IRQ_ENABLE)
- local_irq_enable();
+static __cpuidle int intel_idle_irq(struct cpuidle_device *dev,
+ struct cpuidle_driver *drv, int index)
+{
+ int ret;
- mwait_idle_with_hints(eax, ecx);
+ raw_local_irq_enable();
+ ret = __intel_idle(dev, drv, index);
- return index;
+ /*
+ * The lockdep hardirqs state may be changed to 'on' with timer
+ * tick interrupt followed by __do_softirq(). Use local_irq_disable()
+ * to keep the hardirqs state correct.
+ */
+ local_irq_disable();
+
+ return ret;
+}
+
+static __cpuidle int intel_idle_ibrs(struct cpuidle_device *dev,
+ struct cpuidle_driver *drv, int index)
+{
+ bool smt_active = sched_smt_active();
+ u64 spec_ctrl = spec_ctrl_current();
+ int ret;
+
+ if (smt_active)
+ wrmsrl(MSR_IA32_SPEC_CTRL, 0);
+
+ ret = __intel_idle(dev, drv, index);
+
+ if (smt_active)
+ wrmsrl(MSR_IA32_SPEC_CTRL, spec_ctrl);
+
+ return ret;
+}
+
+static __cpuidle int intel_idle_xstate(struct cpuidle_device *dev,
+ struct cpuidle_driver *drv, int index)
+{
+ fpu_idle_fpregs();
+ return __intel_idle(dev, drv, index);
}
/**
@@ -154,8 +219,12 @@ static __cpuidle int intel_idle(struct cpuidle_device *dev,
static __cpuidle int intel_idle_s2idle(struct cpuidle_device *dev,
struct cpuidle_driver *drv, int index)
{
- unsigned long eax = flg2MWAIT(drv->states[index].flags);
unsigned long ecx = 1; /* break on interrupt flag */
+ struct cpuidle_state *state = &drv->states[index];
+ unsigned long eax = flg2MWAIT(state->flags);
+
+ if (state->flags & CPUIDLE_FLAG_INIT_XSTATE)
+ fpu_idle_fpregs();
mwait_idle_with_hints(eax, ecx);
@@ -660,7 +729,7 @@ static struct cpuidle_state skl_cstates[] __initdata = {
{
.name = "C6",
.desc = "MWAIT 0x20",
- .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
+ .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
.exit_latency = 85,
.target_residency = 200,
.enter = &intel_idle,
@@ -668,7 +737,7 @@ static struct cpuidle_state skl_cstates[] __initdata = {
{
.name = "C7s",
.desc = "MWAIT 0x33",
- .flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED,
+ .flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
.exit_latency = 124,
.target_residency = 800,
.enter = &intel_idle,
@@ -676,7 +745,7 @@ static struct cpuidle_state skl_cstates[] __initdata = {
{
.name = "C8",
.desc = "MWAIT 0x40",
- .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
+ .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
.exit_latency = 200,
.target_residency = 800,
.enter = &intel_idle,
@@ -684,7 +753,7 @@ static struct cpuidle_state skl_cstates[] __initdata = {
{
.name = "C9",
.desc = "MWAIT 0x50",
- .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
+ .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
.exit_latency = 480,
.target_residency = 5000,
.enter = &intel_idle,
@@ -692,7 +761,7 @@ static struct cpuidle_state skl_cstates[] __initdata = {
{
.name = "C10",
.desc = "MWAIT 0x60",
- .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
+ .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
.exit_latency = 890,
.target_residency = 5000,
.enter = &intel_idle,
@@ -721,7 +790,7 @@ static struct cpuidle_state skx_cstates[] __initdata = {
{
.name = "C6",
.desc = "MWAIT 0x20",
- .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
+ .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
.exit_latency = 133,
.target_residency = 600,
.enter = &intel_idle,
@@ -760,15 +829,105 @@ static struct cpuidle_state icx_cstates[] __initdata = {
};
/*
- * On Sapphire Rapids Xeon C1 has to be disabled if C1E is enabled, and vice
- * versa. On SPR C1E is enabled only if "C1E promotion" bit is set in
- * MSR_IA32_POWER_CTL. But in this case there effectively no C1, because C1
- * requests are promoted to C1E. If the "C1E promotion" bit is cleared, then
- * both C1 and C1E requests end up with C1, so there is effectively no C1E.
+ * On AlderLake C1 has to be disabled if C1E is enabled, and vice versa.
+ * C1E is enabled only if "C1E promotion" bit is set in MSR_IA32_POWER_CTL.
+ * But in this case there is effectively no C1, because C1 requests are
+ * promoted to C1E. If the "C1E promotion" bit is cleared, then both C1
+ * and C1E requests end up with C1, so there is effectively no C1E.
*
- * By default we enable C1 and disable C1E by marking it with
+ * By default we enable C1E and disable C1 by marking it with
* 'CPUIDLE_FLAG_UNUSABLE'.
*/
+static struct cpuidle_state adl_cstates[] __initdata = {
+ {
+ .name = "C1",
+ .desc = "MWAIT 0x00",
+ .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_UNUSABLE,
+ .exit_latency = 1,
+ .target_residency = 1,
+ .enter = &intel_idle,
+ .enter_s2idle = intel_idle_s2idle, },
+ {
+ .name = "C1E",
+ .desc = "MWAIT 0x01",
+ .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
+ .exit_latency = 2,
+ .target_residency = 4,
+ .enter = &intel_idle,
+ .enter_s2idle = intel_idle_s2idle, },
+ {
+ .name = "C6",
+ .desc = "MWAIT 0x20",
+ .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
+ .exit_latency = 220,
+ .target_residency = 600,
+ .enter = &intel_idle,
+ .enter_s2idle = intel_idle_s2idle, },
+ {
+ .name = "C8",
+ .desc = "MWAIT 0x40",
+ .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
+ .exit_latency = 280,
+ .target_residency = 800,
+ .enter = &intel_idle,
+ .enter_s2idle = intel_idle_s2idle, },
+ {
+ .name = "C10",
+ .desc = "MWAIT 0x60",
+ .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
+ .exit_latency = 680,
+ .target_residency = 2000,
+ .enter = &intel_idle,
+ .enter_s2idle = intel_idle_s2idle, },
+ {
+ .enter = NULL }
+};
+
+static struct cpuidle_state adl_l_cstates[] __initdata = {
+ {
+ .name = "C1",
+ .desc = "MWAIT 0x00",
+ .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_UNUSABLE,
+ .exit_latency = 1,
+ .target_residency = 1,
+ .enter = &intel_idle,
+ .enter_s2idle = intel_idle_s2idle, },
+ {
+ .name = "C1E",
+ .desc = "MWAIT 0x01",
+ .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
+ .exit_latency = 2,
+ .target_residency = 4,
+ .enter = &intel_idle,
+ .enter_s2idle = intel_idle_s2idle, },
+ {
+ .name = "C6",
+ .desc = "MWAIT 0x20",
+ .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
+ .exit_latency = 170,
+ .target_residency = 500,
+ .enter = &intel_idle,
+ .enter_s2idle = intel_idle_s2idle, },
+ {
+ .name = "C8",
+ .desc = "MWAIT 0x40",
+ .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
+ .exit_latency = 200,
+ .target_residency = 600,
+ .enter = &intel_idle,
+ .enter_s2idle = intel_idle_s2idle, },
+ {
+ .name = "C10",
+ .desc = "MWAIT 0x60",
+ .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
+ .exit_latency = 230,
+ .target_residency = 700,
+ .enter = &intel_idle,
+ .enter_s2idle = intel_idle_s2idle, },
+ {
+ .enter = NULL }
+};
+
static struct cpuidle_state spr_cstates[] __initdata = {
{
.name = "C1",
@@ -781,8 +940,7 @@ static struct cpuidle_state spr_cstates[] __initdata = {
{
.name = "C1E",
.desc = "MWAIT 0x01",
- .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE |
- CPUIDLE_FLAG_UNUSABLE,
+ .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
.exit_latency = 2,
.target_residency = 4,
.enter = &intel_idle,
@@ -790,7 +948,8 @@ static struct cpuidle_state spr_cstates[] __initdata = {
{
.name = "C6",
.desc = "MWAIT 0x20",
- .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
+ .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED |
+ CPUIDLE_FLAG_INIT_XSTATE,
.exit_latency = 290,
.target_residency = 800,
.enter = &intel_idle,
@@ -1142,6 +1301,14 @@ static const struct idle_cpu idle_cpu_icx __initconst = {
.use_acpi = true,
};
+static const struct idle_cpu idle_cpu_adl __initconst = {
+ .state_table = adl_cstates,
+};
+
+static const struct idle_cpu idle_cpu_adl_l __initconst = {
+ .state_table = adl_l_cstates,
+};
+
static const struct idle_cpu idle_cpu_spr __initconst = {
.state_table = spr_cstates,
.disable_promotion_to_c1e = true,
@@ -1210,6 +1377,8 @@ static const struct x86_cpu_id intel_idle_ids[] __initconst = {
X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_X, &idle_cpu_skx),
X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, &idle_cpu_icx),
X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D, &idle_cpu_icx),
+ X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE, &idle_cpu_adl),
+ X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, &idle_cpu_adl_l),
X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &idle_cpu_spr),
X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNL, &idle_cpu_knl),
X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNM, &idle_cpu_knl),
@@ -1398,8 +1567,6 @@ static inline void intel_idle_init_cstates_acpi(struct cpuidle_driver *drv) { }
static inline bool intel_idle_off_by_default(u32 mwait_hint) { return false; }
#endif /* !CONFIG_ACPI_PROCESSOR_CSTATE */
-static void c1e_promotion_enable(void);
-
/**
* ivt_idle_state_table_update - Tune the idle states table for Ivy Town.
*
@@ -1571,25 +1738,30 @@ static void __init skx_idle_state_table_update(void)
}
/**
- * spr_idle_state_table_update - Adjust Sapphire Rapids idle states table.
+ * adl_idle_state_table_update - Adjust AlderLake idle states table.
*/
-static void __init spr_idle_state_table_update(void)
+static void __init adl_idle_state_table_update(void)
{
- unsigned long long msr;
+ /* Check if user prefers C1 over C1E. */
+ if (preferred_states_mask & BIT(1) && !(preferred_states_mask & BIT(2))) {
+ cpuidle_state_table[0].flags &= ~CPUIDLE_FLAG_UNUSABLE;
+ cpuidle_state_table[1].flags |= CPUIDLE_FLAG_UNUSABLE;
- /* Check if user prefers C1E over C1. */
- if (preferred_states_mask & BIT(2)) {
- if (preferred_states_mask & BIT(1))
- /* Both can't be enabled, stick to the defaults. */
- return;
+ /* Disable C1E by clearing the "C1E promotion" bit. */
+ c1e_promotion = C1E_PROMOTION_DISABLE;
+ return;
+ }
- spr_cstates[0].flags |= CPUIDLE_FLAG_UNUSABLE;
- spr_cstates[1].flags &= ~CPUIDLE_FLAG_UNUSABLE;
+ /* Make sure C1E is enabled by default */
+ c1e_promotion = C1E_PROMOTION_ENABLE;
+}
- /* Enable C1E using the "C1E promotion" bit. */
- c1e_promotion_enable();
- disable_promotion_to_c1e = false;
- }
+/**
+ * spr_idle_state_table_update - Adjust Sapphire Rapids idle states table.
+ */
+static void __init spr_idle_state_table_update(void)
+{
+ unsigned long long msr;
/*
* By default, the C6 state assumes the worst-case scenario of package
@@ -1642,6 +1814,10 @@ static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv)
case INTEL_FAM6_SAPPHIRERAPIDS_X:
spr_idle_state_table_update();
break;
+ case INTEL_FAM6_ALDERLAKE:
+ case INTEL_FAM6_ALDERLAKE_L:
+ adl_idle_state_table_update();
+ break;
}
for (cstate = 0; cstate < CPUIDLE_STATE_MAX; ++cstate) {
@@ -1668,6 +1844,18 @@ static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv)
/* Structure copy. */
drv->states[drv->state_count] = cpuidle_state_table[cstate];
+ if (cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_IRQ_ENABLE)
+ drv->states[drv->state_count].enter = intel_idle_irq;
+
+ if (cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS) &&
+ cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_IBRS) {
+ WARN_ON_ONCE(cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_IRQ_ENABLE);
+ drv->states[drv->state_count].enter = intel_idle_ibrs;
+ }
+
+ if (cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_INIT_XSTATE)
+ drv->states[drv->state_count].enter = intel_idle_xstate;
+
if ((disabled_states_mask & BIT(drv->state_count)) ||
((icpu->use_acpi || force_use_acpi) &&
intel_idle_off_by_default(mwait_hint) &&
@@ -1754,7 +1942,9 @@ static int intel_idle_cpu_init(unsigned int cpu)
if (auto_demotion_disable_flags)
auto_demotion_disable();
- if (disable_promotion_to_c1e)
+ if (c1e_promotion == C1E_PROMOTION_ENABLE)
+ c1e_promotion_enable();
+ else if (c1e_promotion == C1E_PROMOTION_DISABLE)
c1e_promotion_disable();
return 0;
@@ -1833,7 +2023,8 @@ static int __init intel_idle_init(void)
if (icpu) {
cpuidle_state_table = icpu->state_table;
auto_demotion_disable_flags = icpu->auto_demotion_disable_flags;
- disable_promotion_to_c1e = icpu->disable_promotion_to_c1e;
+ if (icpu->disable_promotion_to_c1e)
+ c1e_promotion = C1E_PROMOTION_DISABLE;
if (icpu->use_acpi || force_use_acpi)
intel_idle_acpi_cst_extract();
} else if (!intel_idle_acpi_cst_extract()) {