diff options
Diffstat (limited to 'drivers/idle/intel_idle.c')
| -rw-r--r-- | drivers/idle/intel_idle.c | 275 |
1 files changed, 233 insertions, 42 deletions
diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index b7640cfe0020..3e101719689a 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -47,13 +47,16 @@ #include <linux/tick.h> #include <trace/events/power.h> #include <linux/sched.h> +#include <linux/sched/smt.h> #include <linux/notifier.h> #include <linux/cpu.h> #include <linux/moduleparam.h> #include <asm/cpu_device_id.h> #include <asm/intel-family.h> +#include <asm/nospec-branch.h> #include <asm/mwait.h> #include <asm/msr.h> +#include <asm/fpu/api.h> #define INTEL_IDLE_VERSION "0.5.1" @@ -69,7 +72,12 @@ static unsigned int preferred_states_mask; static struct cpuidle_device __percpu *intel_idle_cpuidle_devices; static unsigned long auto_demotion_disable_flags; -static bool disable_promotion_to_c1e; + +static enum { + C1E_PROMOTION_PRESERVE, + C1E_PROMOTION_ENABLE, + C1E_PROMOTION_DISABLE +} c1e_promotion = C1E_PROMOTION_PRESERVE; struct idle_cpu { struct cpuidle_state *state_table; @@ -101,6 +109,17 @@ static unsigned int mwait_substates __initdata; #define CPUIDLE_FLAG_ALWAYS_ENABLE BIT(15) /* + * Disable IBRS across idle (when KERNEL_IBRS), is exclusive vs IRQ_ENABLE + * above. + */ +#define CPUIDLE_FLAG_IBRS BIT(16) + +/* + * Initialize large xstate for the C6-state entrance. + */ +#define CPUIDLE_FLAG_INIT_XSTATE BIT(17) + +/* * MWAIT takes an 8-bit "hint" in EAX "suggesting" * the C-state (top nibble) and sub-state (bottom nibble) * 0x00 means "MWAIT(C1)", 0x10 means "MWAIT(C2)" etc. @@ -110,6 +129,18 @@ static unsigned int mwait_substates __initdata; #define flg2MWAIT(flags) (((flags) >> 24) & 0xFF) #define MWAIT2flg(eax) ((eax & 0xFF) << 24) +static __always_inline int __intel_idle(struct cpuidle_device *dev, + struct cpuidle_driver *drv, int index) +{ + struct cpuidle_state *state = &drv->states[index]; + unsigned long eax = flg2MWAIT(state->flags); + unsigned long ecx = 1; /* break on interrupt flag */ + + mwait_idle_with_hints(eax, ecx); + + return index; +} + /** * intel_idle - Ask the processor to enter the given idle state. * @dev: cpuidle device of the target CPU. @@ -127,16 +158,50 @@ static unsigned int mwait_substates __initdata; static __cpuidle int intel_idle(struct cpuidle_device *dev, struct cpuidle_driver *drv, int index) { - struct cpuidle_state *state = &drv->states[index]; - unsigned long eax = flg2MWAIT(state->flags); - unsigned long ecx = 1; /* break on interrupt flag */ + return __intel_idle(dev, drv, index); +} - if (state->flags & CPUIDLE_FLAG_IRQ_ENABLE) - local_irq_enable(); +static __cpuidle int intel_idle_irq(struct cpuidle_device *dev, + struct cpuidle_driver *drv, int index) +{ + int ret; - mwait_idle_with_hints(eax, ecx); + raw_local_irq_enable(); + ret = __intel_idle(dev, drv, index); - return index; + /* + * The lockdep hardirqs state may be changed to 'on' with timer + * tick interrupt followed by __do_softirq(). Use local_irq_disable() + * to keep the hardirqs state correct. + */ + local_irq_disable(); + + return ret; +} + +static __cpuidle int intel_idle_ibrs(struct cpuidle_device *dev, + struct cpuidle_driver *drv, int index) +{ + bool smt_active = sched_smt_active(); + u64 spec_ctrl = spec_ctrl_current(); + int ret; + + if (smt_active) + wrmsrl(MSR_IA32_SPEC_CTRL, 0); + + ret = __intel_idle(dev, drv, index); + + if (smt_active) + wrmsrl(MSR_IA32_SPEC_CTRL, spec_ctrl); + + return ret; +} + +static __cpuidle int intel_idle_xstate(struct cpuidle_device *dev, + struct cpuidle_driver *drv, int index) +{ + fpu_idle_fpregs(); + return __intel_idle(dev, drv, index); } /** @@ -154,8 +219,12 @@ static __cpuidle int intel_idle(struct cpuidle_device *dev, static __cpuidle int intel_idle_s2idle(struct cpuidle_device *dev, struct cpuidle_driver *drv, int index) { - unsigned long eax = flg2MWAIT(drv->states[index].flags); unsigned long ecx = 1; /* break on interrupt flag */ + struct cpuidle_state *state = &drv->states[index]; + unsigned long eax = flg2MWAIT(state->flags); + + if (state->flags & CPUIDLE_FLAG_INIT_XSTATE) + fpu_idle_fpregs(); mwait_idle_with_hints(eax, ecx); @@ -660,7 +729,7 @@ static struct cpuidle_state skl_cstates[] __initdata = { { .name = "C6", .desc = "MWAIT 0x20", - .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, + .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS, .exit_latency = 85, .target_residency = 200, .enter = &intel_idle, @@ -668,7 +737,7 @@ static struct cpuidle_state skl_cstates[] __initdata = { { .name = "C7s", .desc = "MWAIT 0x33", - .flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED, + .flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS, .exit_latency = 124, .target_residency = 800, .enter = &intel_idle, @@ -676,7 +745,7 @@ static struct cpuidle_state skl_cstates[] __initdata = { { .name = "C8", .desc = "MWAIT 0x40", - .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, + .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS, .exit_latency = 200, .target_residency = 800, .enter = &intel_idle, @@ -684,7 +753,7 @@ static struct cpuidle_state skl_cstates[] __initdata = { { .name = "C9", .desc = "MWAIT 0x50", - .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED, + .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS, .exit_latency = 480, .target_residency = 5000, .enter = &intel_idle, @@ -692,7 +761,7 @@ static struct cpuidle_state skl_cstates[] __initdata = { { .name = "C10", .desc = "MWAIT 0x60", - .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, + .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS, .exit_latency = 890, .target_residency = 5000, .enter = &intel_idle, @@ -721,7 +790,7 @@ static struct cpuidle_state skx_cstates[] __initdata = { { .name = "C6", .desc = "MWAIT 0x20", - .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, + .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS, .exit_latency = 133, .target_residency = 600, .enter = &intel_idle, @@ -760,15 +829,105 @@ static struct cpuidle_state icx_cstates[] __initdata = { }; /* - * On Sapphire Rapids Xeon C1 has to be disabled if C1E is enabled, and vice - * versa. On SPR C1E is enabled only if "C1E promotion" bit is set in - * MSR_IA32_POWER_CTL. But in this case there effectively no C1, because C1 - * requests are promoted to C1E. If the "C1E promotion" bit is cleared, then - * both C1 and C1E requests end up with C1, so there is effectively no C1E. + * On AlderLake C1 has to be disabled if C1E is enabled, and vice versa. + * C1E is enabled only if "C1E promotion" bit is set in MSR_IA32_POWER_CTL. + * But in this case there is effectively no C1, because C1 requests are + * promoted to C1E. If the "C1E promotion" bit is cleared, then both C1 + * and C1E requests end up with C1, so there is effectively no C1E. * - * By default we enable C1 and disable C1E by marking it with + * By default we enable C1E and disable C1 by marking it with * 'CPUIDLE_FLAG_UNUSABLE'. */ +static struct cpuidle_state adl_cstates[] __initdata = { + { + .name = "C1", + .desc = "MWAIT 0x00", + .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_UNUSABLE, + .exit_latency = 1, + .target_residency = 1, + .enter = &intel_idle, + .enter_s2idle = intel_idle_s2idle, }, + { + .name = "C1E", + .desc = "MWAIT 0x01", + .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, + .exit_latency = 2, + .target_residency = 4, + .enter = &intel_idle, + .enter_s2idle = intel_idle_s2idle, }, + { + .name = "C6", + .desc = "MWAIT 0x20", + .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, + .exit_latency = 220, + .target_residency = 600, + .enter = &intel_idle, + .enter_s2idle = intel_idle_s2idle, }, + { + .name = "C8", + .desc = "MWAIT 0x40", + .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, + .exit_latency = 280, + .target_residency = 800, + .enter = &intel_idle, + .enter_s2idle = intel_idle_s2idle, }, + { + .name = "C10", + .desc = "MWAIT 0x60", + .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, + .exit_latency = 680, + .target_residency = 2000, + .enter = &intel_idle, + .enter_s2idle = intel_idle_s2idle, }, + { + .enter = NULL } +}; + +static struct cpuidle_state adl_l_cstates[] __initdata = { + { + .name = "C1", + .desc = "MWAIT 0x00", + .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_UNUSABLE, + .exit_latency = 1, + .target_residency = 1, + .enter = &intel_idle, + .enter_s2idle = intel_idle_s2idle, }, + { + .name = "C1E", + .desc = "MWAIT 0x01", + .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, + .exit_latency = 2, + .target_residency = 4, + .enter = &intel_idle, + .enter_s2idle = intel_idle_s2idle, }, + { + .name = "C6", + .desc = "MWAIT 0x20", + .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, + .exit_latency = 170, + .target_residency = 500, + .enter = &intel_idle, + .enter_s2idle = intel_idle_s2idle, }, + { + .name = "C8", + .desc = "MWAIT 0x40", + .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, + .exit_latency = 200, + .target_residency = 600, + .enter = &intel_idle, + .enter_s2idle = intel_idle_s2idle, }, + { + .name = "C10", + .desc = "MWAIT 0x60", + .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, + .exit_latency = 230, + .target_residency = 700, + .enter = &intel_idle, + .enter_s2idle = intel_idle_s2idle, }, + { + .enter = NULL } +}; + static struct cpuidle_state spr_cstates[] __initdata = { { .name = "C1", @@ -781,8 +940,7 @@ static struct cpuidle_state spr_cstates[] __initdata = { { .name = "C1E", .desc = "MWAIT 0x01", - .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE | - CPUIDLE_FLAG_UNUSABLE, + .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, .exit_latency = 2, .target_residency = 4, .enter = &intel_idle, @@ -790,7 +948,8 @@ static struct cpuidle_state spr_cstates[] __initdata = { { .name = "C6", .desc = "MWAIT 0x20", - .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, + .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED | + CPUIDLE_FLAG_INIT_XSTATE, .exit_latency = 290, .target_residency = 800, .enter = &intel_idle, @@ -1142,6 +1301,14 @@ static const struct idle_cpu idle_cpu_icx __initconst = { .use_acpi = true, }; +static const struct idle_cpu idle_cpu_adl __initconst = { + .state_table = adl_cstates, +}; + +static const struct idle_cpu idle_cpu_adl_l __initconst = { + .state_table = adl_l_cstates, +}; + static const struct idle_cpu idle_cpu_spr __initconst = { .state_table = spr_cstates, .disable_promotion_to_c1e = true, @@ -1210,6 +1377,8 @@ static const struct x86_cpu_id intel_idle_ids[] __initconst = { X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_X, &idle_cpu_skx), X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, &idle_cpu_icx), X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D, &idle_cpu_icx), + X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE, &idle_cpu_adl), + X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, &idle_cpu_adl_l), X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &idle_cpu_spr), X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNL, &idle_cpu_knl), X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNM, &idle_cpu_knl), @@ -1398,8 +1567,6 @@ static inline void intel_idle_init_cstates_acpi(struct cpuidle_driver *drv) { } static inline bool intel_idle_off_by_default(u32 mwait_hint) { return false; } #endif /* !CONFIG_ACPI_PROCESSOR_CSTATE */ -static void c1e_promotion_enable(void); - /** * ivt_idle_state_table_update - Tune the idle states table for Ivy Town. * @@ -1571,25 +1738,30 @@ static void __init skx_idle_state_table_update(void) } /** - * spr_idle_state_table_update - Adjust Sapphire Rapids idle states table. + * adl_idle_state_table_update - Adjust AlderLake idle states table. */ -static void __init spr_idle_state_table_update(void) +static void __init adl_idle_state_table_update(void) { - unsigned long long msr; + /* Check if user prefers C1 over C1E. */ + if (preferred_states_mask & BIT(1) && !(preferred_states_mask & BIT(2))) { + cpuidle_state_table[0].flags &= ~CPUIDLE_FLAG_UNUSABLE; + cpuidle_state_table[1].flags |= CPUIDLE_FLAG_UNUSABLE; - /* Check if user prefers C1E over C1. */ - if (preferred_states_mask & BIT(2)) { - if (preferred_states_mask & BIT(1)) - /* Both can't be enabled, stick to the defaults. */ - return; + /* Disable C1E by clearing the "C1E promotion" bit. */ + c1e_promotion = C1E_PROMOTION_DISABLE; + return; + } - spr_cstates[0].flags |= CPUIDLE_FLAG_UNUSABLE; - spr_cstates[1].flags &= ~CPUIDLE_FLAG_UNUSABLE; + /* Make sure C1E is enabled by default */ + c1e_promotion = C1E_PROMOTION_ENABLE; +} - /* Enable C1E using the "C1E promotion" bit. */ - c1e_promotion_enable(); - disable_promotion_to_c1e = false; - } +/** + * spr_idle_state_table_update - Adjust Sapphire Rapids idle states table. + */ +static void __init spr_idle_state_table_update(void) +{ + unsigned long long msr; /* * By default, the C6 state assumes the worst-case scenario of package @@ -1642,6 +1814,10 @@ static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv) case INTEL_FAM6_SAPPHIRERAPIDS_X: spr_idle_state_table_update(); break; + case INTEL_FAM6_ALDERLAKE: + case INTEL_FAM6_ALDERLAKE_L: + adl_idle_state_table_update(); + break; } for (cstate = 0; cstate < CPUIDLE_STATE_MAX; ++cstate) { @@ -1668,6 +1844,18 @@ static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv) /* Structure copy. */ drv->states[drv->state_count] = cpuidle_state_table[cstate]; + if (cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_IRQ_ENABLE) + drv->states[drv->state_count].enter = intel_idle_irq; + + if (cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS) && + cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_IBRS) { + WARN_ON_ONCE(cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_IRQ_ENABLE); + drv->states[drv->state_count].enter = intel_idle_ibrs; + } + + if (cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_INIT_XSTATE) + drv->states[drv->state_count].enter = intel_idle_xstate; + if ((disabled_states_mask & BIT(drv->state_count)) || ((icpu->use_acpi || force_use_acpi) && intel_idle_off_by_default(mwait_hint) && @@ -1754,7 +1942,9 @@ static int intel_idle_cpu_init(unsigned int cpu) if (auto_demotion_disable_flags) auto_demotion_disable(); - if (disable_promotion_to_c1e) + if (c1e_promotion == C1E_PROMOTION_ENABLE) + c1e_promotion_enable(); + else if (c1e_promotion == C1E_PROMOTION_DISABLE) c1e_promotion_disable(); return 0; @@ -1833,7 +2023,8 @@ static int __init intel_idle_init(void) if (icpu) { cpuidle_state_table = icpu->state_table; auto_demotion_disable_flags = icpu->auto_demotion_disable_flags; - disable_promotion_to_c1e = icpu->disable_promotion_to_c1e; + if (icpu->disable_promotion_to_c1e) + c1e_promotion = C1E_PROMOTION_DISABLE; if (icpu->use_acpi || force_use_acpi) intel_idle_acpi_cst_extract(); } else if (!intel_idle_acpi_cst_extract()) { |