From 9f3d6daf61e5156139cd05643f7f1c2a9b7b49b0 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 13 Dec 2019 09:55:52 +0100 Subject: intel_idle: Refactor intel_idle_cpuidle_driver_init() Move the C-state verification and checks from intel_idle_cpuidle_driver_init() to a separate function, intel_idle_verify_cstate(), and make the former call it after checking the CPUIDLE_FLAG_UNUSABLE state flag. Also combine the drv->states[] updates with the incrementation of drv->state_count. No intentional functional impact. Signed-off-by: Rafael J. Wysocki --- drivers/idle/intel_idle.c | 49 ++++++++++++++++++++++++----------------------- 1 file changed, 25 insertions(+), 24 deletions(-) (limited to 'drivers/idle/intel_idle.c') diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index 75fd2a7b0842..47255d3cf51f 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -944,6 +944,22 @@ static void intel_idle_s2idle(struct cpuidle_device *dev, mwait_idle_with_hints(eax, ecx); } +static bool intel_idle_verify_cstate(unsigned int mwait_hint) +{ + unsigned int mwait_cstate = MWAIT_HINT2CSTATE(mwait_hint) + 1; + unsigned int num_substates = (mwait_substates >> mwait_cstate * 4) & + MWAIT_SUBSTATE_MASK; + + /* Ignore the C-state if there are NO sub-states in CPUID for it. */ + if (num_substates == 0) + return false; + + if (mwait_cstate > 2 && !boot_cpu_has(X86_FEATURE_NONSTOP_TSC)) + mark_tsc_unstable("TSC halts in idle states deeper than C2"); + + return true; +} + static void __setup_broadcast_timer(bool on) { if (on) @@ -1332,10 +1348,10 @@ static void __init intel_idle_cpuidle_driver_init(void) drv->state_count = 1; for (cstate = 0; cstate < CPUIDLE_STATE_MAX; ++cstate) { - int num_substates, mwait_hint, mwait_cstate; + unsigned int mwait_hint; - if ((cpuidle_state_table[cstate].enter == NULL) && - (cpuidle_state_table[cstate].enter_s2idle == NULL)) + if (!cpuidle_state_table[cstate].enter && + !cpuidle_state_table[cstate].enter_s2idle) break; if (cstate + 1 > max_cstate) { @@ -1343,34 +1359,19 @@ static void __init intel_idle_cpuidle_driver_init(void) break; } - mwait_hint = flg2MWAIT(cpuidle_state_table[cstate].flags); - mwait_cstate = MWAIT_HINT2CSTATE(mwait_hint); - - /* number of sub-states for this state in CPUID.MWAIT */ - num_substates = (mwait_substates >> ((mwait_cstate + 1) * 4)) - & MWAIT_SUBSTATE_MASK; - - /* if NO sub-states for this state in CPUID, skip it */ - if (num_substates == 0) - continue; - - /* if state marked as disabled, skip it */ + /* If marked as unusable, skip this state. */ if (cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_UNUSABLE) { pr_debug("state %s is disabled\n", cpuidle_state_table[cstate].name); continue; } + mwait_hint = flg2MWAIT(cpuidle_state_table[cstate].flags); + if (!intel_idle_verify_cstate(mwait_hint)) + continue; - if (((mwait_cstate + 1) > 2) && - !boot_cpu_has(X86_FEATURE_NONSTOP_TSC)) - mark_tsc_unstable("TSC halts in idle" - " states deeper than C2"); - - drv->states[drv->state_count] = /* structure copy */ - cpuidle_state_table[cstate]; - - drv->state_count += 1; + /* Structure copy. */ + drv->states[drv->state_count++] = cpuidle_state_table[cstate]; } if (icpu->byt_auto_demotion_disable_flag) { -- cgit From 18734958e9bfbc055805d110a38dc76307eba742 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 13 Dec 2019 09:56:01 +0100 Subject: intel_idle: Use ACPI _CST for processor models without C-state tables Modify the intel_idle driver to get the C-states information from ACPI _CST if the processor model is not recognized by it. The processor is still required to support MWAIT and the information from ACPI _CST will only be used if all of the C-states listed by _CST are of the ACPI_CSTATE_FFH type (which means that they are expected to be entered via MWAIT). Moreover, the driver assumes that the _CST information is the same for all CPUs in the system, so it is sufficient to evaluate _CST for one of them and extract the common list of C-states from there. Also _CST is evaluated once at the system initialization time and the driver does not respond to _CST change notifications (that can be changed in the future). The main functional difference between intel_idle with this change and the ACPI processor driver is that the former sets the target residency to be equal to the exit latency (provided by _CST) for C1-type C-states and to 3 times the exit latency value for the other C-state types, whereas the latter obtains the target residency by multiplying the exit latency by the same number (2 by default) for all C-state types. Therefore it is expected that in general using the former instead of the latter on the same system will lead to improved energy-efficiency. Signed-off-by: Rafael J. Wysocki --- drivers/idle/intel_idle.c | 190 +++++++++++++++++++++++++++++++++++++++------- 1 file changed, 162 insertions(+), 28 deletions(-) (limited to 'drivers/idle/intel_idle.c') diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index 47255d3cf51f..28812d93d59a 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -41,6 +41,7 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include #include #include #include @@ -1111,6 +1112,129 @@ static const struct x86_cpu_id intel_idle_ids[] __initconst = { {} }; +#define INTEL_CPU_FAM6_MWAIT \ + { X86_VENDOR_INTEL, 6, X86_MODEL_ANY, X86_FEATURE_MWAIT, 0 } + +static const struct x86_cpu_id intel_mwait_ids[] __initconst = { + INTEL_CPU_FAM6_MWAIT, + {} +}; + +static bool intel_idle_max_cstate_reached(int cstate) +{ + if (cstate + 1 > max_cstate) { + pr_info("max_cstate %d reached\n", max_cstate); + return true; + } + return false; +} + +#ifdef CONFIG_ACPI_PROCESSOR_CSTATE +#include + +static struct acpi_processor_power acpi_state_table; + +/** + * intel_idle_cst_usable - Check if the _CST information can be used. + * + * Check if all of the C-states listed by _CST in the max_cstate range are + * ACPI_CSTATE_FFH, which means that they should be entered via MWAIT. + */ +static bool intel_idle_cst_usable(void) +{ + int cstate, limit; + + limit = min_t(int, min_t(int, CPUIDLE_STATE_MAX, max_cstate + 1), + acpi_state_table.count); + + for (cstate = 1; cstate < limit; cstate++) { + struct acpi_processor_cx *cx = &acpi_state_table.states[cstate]; + + if (cx->entry_method != ACPI_CSTATE_FFH) + return false; + } + + return true; +} + +static bool intel_idle_acpi_cst_extract(void) +{ + unsigned int cpu; + + for_each_possible_cpu(cpu) { + struct acpi_processor *pr = per_cpu(processors, cpu); + + if (!pr) + continue; + + if (acpi_processor_evaluate_cst(pr->handle, cpu, &acpi_state_table)) + continue; + + acpi_state_table.count++; + + if (!intel_idle_cst_usable()) + continue; + + if (!acpi_processor_claim_cst_control()) { + acpi_state_table.count = 0; + return false; + } + + return true; + } + + pr_debug("ACPI _CST not found or not usable\n"); + return false; +} + +static void intel_idle_init_cstates_acpi(struct cpuidle_driver *drv) +{ + int cstate, limit = min_t(int, CPUIDLE_STATE_MAX, acpi_state_table.count); + + /* + * If limit > 0, intel_idle_cst_usable() has returned 'true', so all of + * the interesting states are ACPI_CSTATE_FFH. + */ + for (cstate = 1; cstate < limit; cstate++) { + struct acpi_processor_cx *cx; + struct cpuidle_state *state; + + if (intel_idle_max_cstate_reached(cstate)) + break; + + cx = &acpi_state_table.states[cstate]; + + state = &drv->states[drv->state_count++]; + + snprintf(state->name, CPUIDLE_NAME_LEN, "C%d_ACPI", cstate); + strlcpy(state->desc, cx->desc, CPUIDLE_DESC_LEN); + state->exit_latency = cx->latency; + /* + * For C1-type C-states use the same number for both the exit + * latency and target residency, because that is the case for + * C1 in the majority of the static C-states tables above. + * For the other types of C-states, however, set the target + * residency to 3 times the exit latency which should lead to + * a reasonable balance between energy-efficiency and + * performance in the majority of interesting cases. + */ + state->target_residency = cx->latency; + if (cx->type > ACPI_STATE_C1) + state->target_residency *= 3; + + state->flags = MWAIT2flg(cx->address); + if (cx->type > ACPI_STATE_C2) + state->flags |= CPUIDLE_FLAG_TLB_FLUSHED; + + state->enter = intel_idle; + state->enter_s2idle = intel_idle_s2idle; + } +} +#else /* !CONFIG_ACPI_PROCESSOR_CSTATE */ +static inline bool intel_idle_acpi_cst_extract(void) { return false; } +static inline void intel_idle_init_cstates_acpi(struct cpuidle_driver *drv) { } +#endif /* !CONFIG_ACPI_PROCESSOR_CSTATE */ + /* * intel_idle_probe() */ @@ -1125,17 +1249,15 @@ static int __init intel_idle_probe(void) } id = x86_match_cpu(intel_idle_ids); - if (!id) { - if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && - boot_cpu_data.x86 == 6) - pr_debug("does not run on family %d model %d\n", - boot_cpu_data.x86, boot_cpu_data.x86_model); - return -ENODEV; - } - - if (!boot_cpu_has(X86_FEATURE_MWAIT)) { - pr_debug("Please enable MWAIT in BIOS SETUP\n"); - return -ENODEV; + if (id) { + if (!boot_cpu_has(X86_FEATURE_MWAIT)) { + pr_debug("Please enable MWAIT in BIOS SETUP\n"); + return -ENODEV; + } + } else { + id = x86_match_cpu(intel_mwait_ids); + if (!id) + return -ENODEV; } if (boot_cpu_data.cpuid_level < CPUID_MWAIT_LEAF) @@ -1151,7 +1273,10 @@ static int __init intel_idle_probe(void) pr_debug("MWAIT substates: 0x%x\n", mwait_substates); icpu = (const struct idle_cpu *)id->driver_data; - cpuidle_state_table = icpu->state_table; + if (icpu) + cpuidle_state_table = icpu->state_table; + else if (!intel_idle_acpi_cst_extract()) + return -ENODEV; pr_debug("v" INTEL_IDLE_VERSION " model 0x%X\n", boot_cpu_data.x86_model); @@ -1333,31 +1458,19 @@ static void intel_idle_state_table_update(void) } } -/* - * intel_idle_cpuidle_driver_init() - * allocate, initialize cpuidle_states - */ -static void __init intel_idle_cpuidle_driver_init(void) +static void intel_idle_init_cstates_icpu(struct cpuidle_driver *drv) { int cstate; - struct cpuidle_driver *drv = &intel_idle_driver; - - intel_idle_state_table_update(); - - cpuidle_poll_state_init(drv); - drv->state_count = 1; for (cstate = 0; cstate < CPUIDLE_STATE_MAX; ++cstate) { unsigned int mwait_hint; - if (!cpuidle_state_table[cstate].enter && - !cpuidle_state_table[cstate].enter_s2idle) + if (intel_idle_max_cstate_reached(cstate)) break; - if (cstate + 1 > max_cstate) { - pr_info("max_cstate %d reached\n", max_cstate); + if (!cpuidle_state_table[cstate].enter && + !cpuidle_state_table[cstate].enter_s2idle) break; - } /* If marked as unusable, skip this state. */ if (cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_UNUSABLE) { @@ -1380,6 +1493,24 @@ static void __init intel_idle_cpuidle_driver_init(void) } } +/* + * intel_idle_cpuidle_driver_init() + * allocate, initialize cpuidle_states + */ +static void __init intel_idle_cpuidle_driver_init(void) +{ + struct cpuidle_driver *drv = &intel_idle_driver; + + intel_idle_state_table_update(); + + cpuidle_poll_state_init(drv); + drv->state_count = 1; + + if (icpu) + intel_idle_init_cstates_icpu(drv); + else + intel_idle_init_cstates_acpi(drv); +} /* * intel_idle_cpu_init() @@ -1398,6 +1529,9 @@ static int intel_idle_cpu_init(unsigned int cpu) return -EIO; } + if (!icpu) + return 0; + if (icpu->auto_demotion_disable_flags) auto_demotion_disable(); -- cgit From bff8e60a86f4960133f90ad9add9adeb082b8154 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 13 Dec 2019 09:56:21 +0100 Subject: intel_idle: Allow ACPI _CST to be used for selected known processors Update the intel_idle driver to get the C-states information from ACPI _CST in some cases in which the processor is known to the driver, as long as that information is available and the new use_acpi flag is set in the profile of the processor in question. In the cases when there is a specific table of C-states for the given processor in the driver, that table is used as the primary source of information on the available C-states, but if ACPI _CST is present, the C-states that are not listed by it will not be enabled by default (they still can be enabled later by user space via sysfs, though). The new CPUIDLE_FLAG_ALWAYS_ENABLE flag can be used for marking C-states that should be enabled by default even if they are not listed by ACPI _CST. Signed-off-by: Rafael J. Wysocki --- drivers/idle/intel_idle.c | 45 ++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 42 insertions(+), 3 deletions(-) (limited to 'drivers/idle/intel_idle.c') diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index 28812d93d59a..a072b84d9595 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -80,6 +80,7 @@ struct idle_cpu { unsigned long auto_demotion_disable_flags; bool byt_auto_demotion_disable_flag; bool disable_promotion_to_c1e; + bool use_acpi; }; static const struct idle_cpu *icpu; @@ -90,6 +91,11 @@ static void intel_idle_s2idle(struct cpuidle_device *dev, struct cpuidle_driver *drv, int index); static struct cpuidle_state *cpuidle_state_table; +/* + * Enable this state by default even if the ACPI _CST does not list it. + */ +#define CPUIDLE_FLAG_ALWAYS_ENABLE BIT(15) + /* * Set this flag for states where the HW flushes the TLB for us * and so we don't need cross-calls to keep it consistent. @@ -1230,9 +1236,33 @@ static void intel_idle_init_cstates_acpi(struct cpuidle_driver *drv) state->enter_s2idle = intel_idle_s2idle; } } + +static bool intel_idle_off_by_default(u32 mwait_hint) +{ + int cstate, limit; + + /* + * If there are no _CST C-states, do not disable any C-states by + * default. + */ + if (!acpi_state_table.count) + return false; + + limit = min_t(int, CPUIDLE_STATE_MAX, acpi_state_table.count); + /* + * If limit > 0, intel_idle_cst_usable() has returned 'true', so all of + * the interesting states are ACPI_CSTATE_FFH. + */ + for (cstate = 1; cstate < limit; cstate++) { + if (acpi_state_table.states[cstate].address == mwait_hint) + return false; + } + return true; +} #else /* !CONFIG_ACPI_PROCESSOR_CSTATE */ static inline bool intel_idle_acpi_cst_extract(void) { return false; } static inline void intel_idle_init_cstates_acpi(struct cpuidle_driver *drv) { } +static inline bool intel_idle_off_by_default(u32 mwait_hint) { return false; } #endif /* !CONFIG_ACPI_PROCESSOR_CSTATE */ /* @@ -1273,10 +1303,13 @@ static int __init intel_idle_probe(void) pr_debug("MWAIT substates: 0x%x\n", mwait_substates); icpu = (const struct idle_cpu *)id->driver_data; - if (icpu) + if (icpu) { cpuidle_state_table = icpu->state_table; - else if (!intel_idle_acpi_cst_extract()) + if (icpu->use_acpi) + intel_idle_acpi_cst_extract(); + } else if (!intel_idle_acpi_cst_extract()) { return -ENODEV; + } pr_debug("v" INTEL_IDLE_VERSION " model 0x%X\n", boot_cpu_data.x86_model); @@ -1484,7 +1517,13 @@ static void intel_idle_init_cstates_icpu(struct cpuidle_driver *drv) continue; /* Structure copy. */ - drv->states[drv->state_count++] = cpuidle_state_table[cstate]; + drv->states[drv->state_count] = cpuidle_state_table[cstate]; + + if (icpu->use_acpi && intel_idle_off_by_default(mwait_hint) && + !(cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_ALWAYS_ENABLE)) + drv->states[drv->state_count].flags |= CPUIDLE_FLAG_OFF; + + drv->state_count++; } if (icpu->byt_auto_demotion_disable_flag) { -- cgit From 4ec32d9e8e5b6d6eb491eeee3938665d8a2388fa Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 13 Dec 2019 09:56:29 +0100 Subject: intel_idle: Add module parameter to prevent ACPI _CST from being used Add a new module parameter called "no_acpi" to the intel_idle driver to allow the driver to be prevented from using ACPI _CST via kernel command line. Signed-off-by: Rafael J. Wysocki --- drivers/idle/intel_idle.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'drivers/idle/intel_idle.c') diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index a072b84d9595..26fe383bb921 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -1138,6 +1138,10 @@ static bool intel_idle_max_cstate_reached(int cstate) #ifdef CONFIG_ACPI_PROCESSOR_CSTATE #include +static bool no_acpi __read_mostly; +module_param(no_acpi, bool, 0444); +MODULE_PARM_DESC(no_acpi, "Do not use ACPI _CST for building the idle states list"); + static struct acpi_processor_power acpi_state_table; /** @@ -1167,6 +1171,11 @@ static bool intel_idle_acpi_cst_extract(void) { unsigned int cpu; + if (no_acpi) { + pr_debug("Not allowed to use ACPI _CST\n"); + return false; + } + for_each_possible_cpu(cpu) { struct acpi_processor *pr = per_cpu(processors, cpu); -- cgit From e6d4f08a677654385869ba0c39d7c9ceec47e5c5 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 13 Dec 2019 09:56:38 +0100 Subject: intel_idle: Use ACPI _CST on server systems In many cases, especially on server systems, it is desirable to avoid enabling C-states that have been disabled in the platform firmware (BIOS) setup, except for C1E. As a rule, the C-states disabled this way are not listed by ACPI _CST, so if that is used by intel_idle along with the specific table of C-states that it has for the given processor, the C-states disabled through the platform firmware will not be enabled by default by intel_idle. Accordingly, set the use_acpi flag (introduced previously) in all server processor profiles defined in intel_idle (so as to make it use ACPI _CST to decide which C-states to enable by default) and set the CPUIDLE_FLAG_ALWAYS_ENABLE flag (also introduced previously) for C1E in all C-states tables in intel_idle that contain C1 too (so that C1E is enabled regardless of whether or not it is listed by ACPI _CST). Signed-off-by: Rafael J. Wysocki --- drivers/idle/intel_idle.c | 70 +++++++++++++++++++++++++++++++++-------------- 1 file changed, 50 insertions(+), 20 deletions(-) (limited to 'drivers/idle/intel_idle.c') diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index 26fe383bb921..1467490adfc3 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -131,7 +131,7 @@ static struct cpuidle_state nehalem_cstates[] = { { .name = "C1E", .desc = "MWAIT 0x01", - .flags = MWAIT2flg(0x01), + .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, .exit_latency = 10, .target_residency = 20, .enter = &intel_idle, @@ -168,7 +168,7 @@ static struct cpuidle_state snb_cstates[] = { { .name = "C1E", .desc = "MWAIT 0x01", - .flags = MWAIT2flg(0x01), + .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, .exit_latency = 10, .target_residency = 20, .enter = &intel_idle, @@ -303,7 +303,7 @@ static struct cpuidle_state ivb_cstates[] = { { .name = "C1E", .desc = "MWAIT 0x01", - .flags = MWAIT2flg(0x01), + .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, .exit_latency = 10, .target_residency = 20, .enter = &intel_idle, @@ -348,7 +348,7 @@ static struct cpuidle_state ivt_cstates[] = { { .name = "C1E", .desc = "MWAIT 0x01", - .flags = MWAIT2flg(0x01), + .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, .exit_latency = 10, .target_residency = 80, .enter = &intel_idle, @@ -385,7 +385,7 @@ static struct cpuidle_state ivt_cstates_4s[] = { { .name = "C1E", .desc = "MWAIT 0x01", - .flags = MWAIT2flg(0x01), + .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, .exit_latency = 10, .target_residency = 250, .enter = &intel_idle, @@ -422,7 +422,7 @@ static struct cpuidle_state ivt_cstates_8s[] = { { .name = "C1E", .desc = "MWAIT 0x01", - .flags = MWAIT2flg(0x01), + .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, .exit_latency = 10, .target_residency = 500, .enter = &intel_idle, @@ -459,7 +459,7 @@ static struct cpuidle_state hsw_cstates[] = { { .name = "C1E", .desc = "MWAIT 0x01", - .flags = MWAIT2flg(0x01), + .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, .exit_latency = 10, .target_residency = 20, .enter = &intel_idle, @@ -527,7 +527,7 @@ static struct cpuidle_state bdw_cstates[] = { { .name = "C1E", .desc = "MWAIT 0x01", - .flags = MWAIT2flg(0x01), + .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, .exit_latency = 10, .target_residency = 20, .enter = &intel_idle, @@ -596,7 +596,7 @@ static struct cpuidle_state skl_cstates[] = { { .name = "C1E", .desc = "MWAIT 0x01", - .flags = MWAIT2flg(0x01), + .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, .exit_latency = 10, .target_residency = 20, .enter = &intel_idle, @@ -665,7 +665,7 @@ static struct cpuidle_state skx_cstates[] = { { .name = "C1E", .desc = "MWAIT 0x01", - .flags = MWAIT2flg(0x01), + .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, .exit_latency = 10, .target_residency = 20, .enter = &intel_idle, @@ -815,7 +815,7 @@ static struct cpuidle_state bxt_cstates[] = { { .name = "C1E", .desc = "MWAIT 0x01", - .flags = MWAIT2flg(0x01), + .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, .exit_latency = 10, .target_residency = 20, .enter = &intel_idle, @@ -876,7 +876,7 @@ static struct cpuidle_state dnv_cstates[] = { { .name = "C1E", .desc = "MWAIT 0x01", - .flags = MWAIT2flg(0x01), + .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, .exit_latency = 10, .target_residency = 20, .enter = &intel_idle, @@ -998,6 +998,13 @@ static const struct idle_cpu idle_cpu_nehalem = { .disable_promotion_to_c1e = true, }; +static const struct idle_cpu idle_cpu_nhx = { + .state_table = nehalem_cstates, + .auto_demotion_disable_flags = NHM_C1_AUTO_DEMOTE | NHM_C3_AUTO_DEMOTE, + .disable_promotion_to_c1e = true, + .use_acpi = true, +}; + static const struct idle_cpu idle_cpu_atom = { .state_table = atom_cstates, }; @@ -1016,6 +1023,12 @@ static const struct idle_cpu idle_cpu_snb = { .disable_promotion_to_c1e = true, }; +static const struct idle_cpu idle_cpu_snx = { + .state_table = snb_cstates, + .disable_promotion_to_c1e = true, + .use_acpi = true, +}; + static const struct idle_cpu idle_cpu_byt = { .state_table = byt_cstates, .disable_promotion_to_c1e = true, @@ -1036,6 +1049,7 @@ static const struct idle_cpu idle_cpu_ivb = { static const struct idle_cpu idle_cpu_ivt = { .state_table = ivt_cstates, .disable_promotion_to_c1e = true, + .use_acpi = true, }; static const struct idle_cpu idle_cpu_hsw = { @@ -1043,11 +1057,23 @@ static const struct idle_cpu idle_cpu_hsw = { .disable_promotion_to_c1e = true, }; +static const struct idle_cpu idle_cpu_hsx = { + .state_table = hsw_cstates, + .disable_promotion_to_c1e = true, + .use_acpi = true, +}; + static const struct idle_cpu idle_cpu_bdw = { .state_table = bdw_cstates, .disable_promotion_to_c1e = true, }; +static const struct idle_cpu idle_cpu_bdx = { + .state_table = bdw_cstates, + .disable_promotion_to_c1e = true, + .use_acpi = true, +}; + static const struct idle_cpu idle_cpu_skl = { .state_table = skl_cstates, .disable_promotion_to_c1e = true, @@ -1056,15 +1082,18 @@ static const struct idle_cpu idle_cpu_skl = { static const struct idle_cpu idle_cpu_skx = { .state_table = skx_cstates, .disable_promotion_to_c1e = true, + .use_acpi = true, }; static const struct idle_cpu idle_cpu_avn = { .state_table = avn_cstates, .disable_promotion_to_c1e = true, + .use_acpi = true, }; static const struct idle_cpu idle_cpu_knl = { .state_table = knl_cstates, + .use_acpi = true, }; static const struct idle_cpu idle_cpu_bxt = { @@ -1075,20 +1104,21 @@ static const struct idle_cpu idle_cpu_bxt = { static const struct idle_cpu idle_cpu_dnv = { .state_table = dnv_cstates, .disable_promotion_to_c1e = true, + .use_acpi = true, }; static const struct x86_cpu_id intel_idle_ids[] __initconst = { - INTEL_CPU_FAM6(NEHALEM_EP, idle_cpu_nehalem), + INTEL_CPU_FAM6(NEHALEM_EP, idle_cpu_nhx), INTEL_CPU_FAM6(NEHALEM, idle_cpu_nehalem), INTEL_CPU_FAM6(NEHALEM_G, idle_cpu_nehalem), INTEL_CPU_FAM6(WESTMERE, idle_cpu_nehalem), - INTEL_CPU_FAM6(WESTMERE_EP, idle_cpu_nehalem), - INTEL_CPU_FAM6(NEHALEM_EX, idle_cpu_nehalem), + INTEL_CPU_FAM6(WESTMERE_EP, idle_cpu_nhx), + INTEL_CPU_FAM6(NEHALEM_EX, idle_cpu_nhx), INTEL_CPU_FAM6(ATOM_BONNELL, idle_cpu_atom), INTEL_CPU_FAM6(ATOM_BONNELL_MID, idle_cpu_lincroft), - INTEL_CPU_FAM6(WESTMERE_EX, idle_cpu_nehalem), + INTEL_CPU_FAM6(WESTMERE_EX, idle_cpu_nhx), INTEL_CPU_FAM6(SANDYBRIDGE, idle_cpu_snb), - INTEL_CPU_FAM6(SANDYBRIDGE_X, idle_cpu_snb), + INTEL_CPU_FAM6(SANDYBRIDGE_X, idle_cpu_snx), INTEL_CPU_FAM6(ATOM_SALTWELL, idle_cpu_atom), INTEL_CPU_FAM6(ATOM_SILVERMONT, idle_cpu_byt), INTEL_CPU_FAM6(ATOM_SILVERMONT_MID, idle_cpu_tangier), @@ -1096,14 +1126,14 @@ static const struct x86_cpu_id intel_idle_ids[] __initconst = { INTEL_CPU_FAM6(IVYBRIDGE, idle_cpu_ivb), INTEL_CPU_FAM6(IVYBRIDGE_X, idle_cpu_ivt), INTEL_CPU_FAM6(HASWELL, idle_cpu_hsw), - INTEL_CPU_FAM6(HASWELL_X, idle_cpu_hsw), + INTEL_CPU_FAM6(HASWELL_X, idle_cpu_hsx), INTEL_CPU_FAM6(HASWELL_L, idle_cpu_hsw), INTEL_CPU_FAM6(HASWELL_G, idle_cpu_hsw), INTEL_CPU_FAM6(ATOM_SILVERMONT_D, idle_cpu_avn), INTEL_CPU_FAM6(BROADWELL, idle_cpu_bdw), INTEL_CPU_FAM6(BROADWELL_G, idle_cpu_bdw), - INTEL_CPU_FAM6(BROADWELL_X, idle_cpu_bdw), - INTEL_CPU_FAM6(BROADWELL_D, idle_cpu_bdw), + INTEL_CPU_FAM6(BROADWELL_X, idle_cpu_bdx), + INTEL_CPU_FAM6(BROADWELL_D, idle_cpu_bdx), INTEL_CPU_FAM6(SKYLAKE_L, idle_cpu_skl), INTEL_CPU_FAM6(SKYLAKE, idle_cpu_skl), INTEL_CPU_FAM6(KABYLAKE_L, idle_cpu_skl), -- cgit From 32ad73db7fc5fe7eebafdab3b528f99ab8498e3f Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 20 Dec 2019 20:44:55 -0800 Subject: x86/msr-index: Clean up bit defines for IA32_FEATURE_CONTROL MSR As pointed out by Boris, the defines for bits in IA32_FEATURE_CONTROL are quite a mouthful, especially the VMX bits which must differentiate between enabling VMX inside and outside SMX (TXT) operation. Rename the MSR and its bit defines to abbreviate FEATURE_CONTROL as FEAT_CTL to make them a little friendlier on the eyes. Arguably, the MSR itself should keep the full IA32_FEATURE_CONTROL name to match Intel's SDM, but a future patch will add a dedicated Kconfig, file and functions for the MSR. Using the full name for those assets is rather unwieldy, so bite the bullet and use IA32_FEAT_CTL so that its nomenclature is consistent throughout the kernel. Opportunistically, fix a few other annoyances with the defines: - Relocate the bit defines so that they immediately follow the MSR define, e.g. aren't mistaken as belonging to MISC_FEATURE_CONTROL. - Add whitespace around the block of feature control defines to make it clear they're all related. - Use BIT() instead of manually encoding the bit shift. - Use "VMX" instead of "VMXON" to match the SDM. - Append "_ENABLED" to the LMCE (Local Machine Check Exception) bit to be consistent with the kernel's verbiage used for all other feature control bits. Note, the SDM refers to the LMCE bit as LMCE_ON, likely to differentiate it from IA32_MCG_EXT_CTL.LMCE_EN. Ignore the (literal) one-off usage of _ON, the SDM is simply "wrong". Signed-off-by: Sean Christopherson Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20191221044513.21680-2-sean.j.christopherson@intel.com --- arch/x86/include/asm/msr-index.h | 14 ++++++------ arch/x86/kernel/cpu/mce/intel.c | 10 ++++----- arch/x86/kvm/vmx/nested.c | 4 ++-- arch/x86/kvm/vmx/vmx.c | 46 ++++++++++++++++++++-------------------- arch/x86/kvm/vmx/vmx.h | 2 +- arch/x86/kvm/x86.c | 2 +- drivers/idle/intel_idle.c | 2 +- 7 files changed, 41 insertions(+), 39 deletions(-) (limited to 'drivers/idle/intel_idle.c') diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 084e98da04a7..ebe1685e92dd 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -558,7 +558,14 @@ #define MSR_IA32_EBL_CR_POWERON 0x0000002a #define MSR_EBC_FREQUENCY_ID 0x0000002c #define MSR_SMI_COUNT 0x00000034 -#define MSR_IA32_FEATURE_CONTROL 0x0000003a + +/* Referred to as IA32_FEATURE_CONTROL in Intel's SDM. */ +#define MSR_IA32_FEAT_CTL 0x0000003a +#define FEAT_CTL_LOCKED BIT(0) +#define FEAT_CTL_VMX_ENABLED_INSIDE_SMX BIT(1) +#define FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX BIT(2) +#define FEAT_CTL_LMCE_ENABLED BIT(20) + #define MSR_IA32_TSC_ADJUST 0x0000003b #define MSR_IA32_BNDCFGS 0x00000d90 @@ -566,11 +573,6 @@ #define MSR_IA32_XSS 0x00000da0 -#define FEATURE_CONTROL_LOCKED (1<<0) -#define FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX (1<<1) -#define FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX (1<<2) -#define FEATURE_CONTROL_LMCE (1<<20) - #define MSR_IA32_APICBASE 0x0000001b #define MSR_IA32_APICBASE_BSP (1<<8) #define MSR_IA32_APICBASE_ENABLE (1<<11) diff --git a/arch/x86/kernel/cpu/mce/intel.c b/arch/x86/kernel/cpu/mce/intel.c index e270d0770134..c238518b84a2 100644 --- a/arch/x86/kernel/cpu/mce/intel.c +++ b/arch/x86/kernel/cpu/mce/intel.c @@ -115,12 +115,12 @@ static bool lmce_supported(void) /* * BIOS should indicate support for LMCE by setting bit 20 in - * IA32_FEATURE_CONTROL without which touching MCG_EXT_CTL will - * generate a #GP fault. + * IA32_FEAT_CTL without which touching MCG_EXT_CTL will generate a #GP + * fault. */ - rdmsrl(MSR_IA32_FEATURE_CONTROL, tmp); - if ((tmp & (FEATURE_CONTROL_LOCKED | FEATURE_CONTROL_LMCE)) == - (FEATURE_CONTROL_LOCKED | FEATURE_CONTROL_LMCE)) + rdmsrl(MSR_IA32_FEAT_CTL, tmp); + if ((tmp & (FEAT_CTL_LOCKED | FEAT_CTL_LMCE_ENABLED)) == + (FEAT_CTL_LOCKED | FEAT_CTL_LMCE_ENABLED)) return true; return false; diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 4aea7d304beb..6879966b7648 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -4588,8 +4588,8 @@ static int handle_vmon(struct kvm_vcpu *vcpu) gpa_t vmptr; uint32_t revision; struct vcpu_vmx *vmx = to_vmx(vcpu); - const u64 VMXON_NEEDED_FEATURES = FEATURE_CONTROL_LOCKED - | FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX; + const u64 VMXON_NEEDED_FEATURES = FEAT_CTL_LOCKED + | FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX; /* * The Intel VMX Instruction Reference lists a bunch of bits that are diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index e3394c839dea..91b2517e3b62 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -1839,11 +1839,11 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_IA32_MCG_EXT_CTL: if (!msr_info->host_initiated && !(vmx->msr_ia32_feature_control & - FEATURE_CONTROL_LMCE)) + FEAT_CTL_LMCE_ENABLED)) return 1; msr_info->data = vcpu->arch.mcg_ext_ctl; break; - case MSR_IA32_FEATURE_CONTROL: + case MSR_IA32_FEAT_CTL: msr_info->data = vmx->msr_ia32_feature_control; break; case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC: @@ -2074,15 +2074,15 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_IA32_MCG_EXT_CTL: if ((!msr_info->host_initiated && !(to_vmx(vcpu)->msr_ia32_feature_control & - FEATURE_CONTROL_LMCE)) || + FEAT_CTL_LMCE_ENABLED)) || (data & ~MCG_EXT_CTL_LMCE_EN)) return 1; vcpu->arch.mcg_ext_ctl = data; break; - case MSR_IA32_FEATURE_CONTROL: + case MSR_IA32_FEAT_CTL: if (!vmx_feature_control_msr_valid(vcpu, data) || (to_vmx(vcpu)->msr_ia32_feature_control & - FEATURE_CONTROL_LOCKED && !msr_info->host_initiated)) + FEAT_CTL_LOCKED && !msr_info->host_initiated)) return 1; vmx->msr_ia32_feature_control = data; if (msr_info->host_initiated && data == 0) @@ -2206,22 +2206,22 @@ static __init int vmx_disabled_by_bios(void) { u64 msr; - rdmsrl(MSR_IA32_FEATURE_CONTROL, msr); - if (msr & FEATURE_CONTROL_LOCKED) { + rdmsrl(MSR_IA32_FEAT_CTL, msr); + if (msr & FEAT_CTL_LOCKED) { /* launched w/ TXT and VMX disabled */ - if (!(msr & FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX) + if (!(msr & FEAT_CTL_VMX_ENABLED_INSIDE_SMX) && tboot_enabled()) return 1; /* launched w/o TXT and VMX only enabled w/ TXT */ - if (!(msr & FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX) - && (msr & FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX) + if (!(msr & FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX) + && (msr & FEAT_CTL_VMX_ENABLED_INSIDE_SMX) && !tboot_enabled()) { printk(KERN_WARNING "kvm: disable TXT in the BIOS or " "activate TXT before enabling KVM\n"); return 1; } /* launched w/o TXT and VMX disabled */ - if (!(msr & FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX) + if (!(msr & FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX) && !tboot_enabled()) return 1; } @@ -2269,16 +2269,16 @@ static int hardware_enable(void) */ crash_enable_local_vmclear(cpu); - rdmsrl(MSR_IA32_FEATURE_CONTROL, old); + rdmsrl(MSR_IA32_FEAT_CTL, old); - test_bits = FEATURE_CONTROL_LOCKED; - test_bits |= FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX; + test_bits = FEAT_CTL_LOCKED; + test_bits |= FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX; if (tboot_enabled()) - test_bits |= FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX; + test_bits |= FEAT_CTL_VMX_ENABLED_INSIDE_SMX; if ((old & test_bits) != test_bits) { /* enable and lock */ - wrmsrl(MSR_IA32_FEATURE_CONTROL, old | test_bits); + wrmsrl(MSR_IA32_FEAT_CTL, old | test_bits); } kvm_cpu_vmxon(phys_addr); if (enable_ept) @@ -6801,7 +6801,7 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) vmx->nested.posted_intr_nv = -1; vmx->nested.current_vmptr = -1ull; - vmx->msr_ia32_feature_control_valid_bits = FEATURE_CONTROL_LOCKED; + vmx->msr_ia32_feature_control_valid_bits = FEAT_CTL_LOCKED; /* * Enforce invariant: pi_desc.nv is always either POSTED_INTR_VECTOR @@ -7099,12 +7099,12 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu) if (nested_vmx_allowed(vcpu)) to_vmx(vcpu)->msr_ia32_feature_control_valid_bits |= - FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX | - FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX; + FEAT_CTL_VMX_ENABLED_INSIDE_SMX | + FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX; else to_vmx(vcpu)->msr_ia32_feature_control_valid_bits &= - ~(FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX | - FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX); + ~(FEAT_CTL_VMX_ENABLED_INSIDE_SMX | + FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX); if (nested_vmx_allowed(vcpu)) { nested_vmx_cr_fixed1_bits_update(vcpu); @@ -7523,10 +7523,10 @@ static void vmx_setup_mce(struct kvm_vcpu *vcpu) { if (vcpu->arch.mcg_cap & MCG_LMCE_P) to_vmx(vcpu)->msr_ia32_feature_control_valid_bits |= - FEATURE_CONTROL_LMCE; + FEAT_CTL_LMCE_ENABLED; else to_vmx(vcpu)->msr_ia32_feature_control_valid_bits &= - ~FEATURE_CONTROL_LMCE; + ~FEAT_CTL_LMCE_ENABLED; } static int vmx_smi_allowed(struct kvm_vcpu *vcpu) diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index a4f7f737c5d4..7f42cf3dcd70 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -289,7 +289,7 @@ struct vcpu_vmx { /* * Only bits masked by msr_ia32_feature_control_valid_bits can be set in - * msr_ia32_feature_control. FEATURE_CONTROL_LOCKED is always included + * msr_ia32_feature_control. FEAT_CTL_LOCKED is always included * in msr_ia32_feature_control_valid_bits. */ u64 msr_ia32_feature_control; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index cf917139de6b..740d3ee42455 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1142,7 +1142,7 @@ static const u32 msrs_to_save_all[] = { MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR, #endif MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA, - MSR_IA32_FEATURE_CONTROL, MSR_IA32_BNDCFGS, MSR_TSC_AUX, + MSR_IA32_FEAT_CTL, MSR_IA32_BNDCFGS, MSR_TSC_AUX, MSR_IA32_SPEC_CTRL, MSR_IA32_RTIT_CTL, MSR_IA32_RTIT_STATUS, MSR_IA32_RTIT_CR3_MATCH, MSR_IA32_RTIT_OUTPUT_BASE, MSR_IA32_RTIT_OUTPUT_MASK, diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index 75fd2a7b0842..5a44cd8142e9 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -1284,7 +1284,7 @@ static void sklh_idle_state_table_update(void) /* if SGX is present */ if (ebx & (1 << 2)) { - rdmsrl(MSR_IA32_FEATURE_CONTROL, msr); + rdmsrl(MSR_IA32_FEAT_CTL, msr); /* if SGX is enabled */ if (msr & (1 << 18)) -- cgit From cbd2c4c25d7e0ba7afe42f2397865b164d1109eb Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 10 Jan 2020 11:43:23 +0100 Subject: intel_idle: Eliminate __setup_broadcast_timer() The __setup_broadcast_timer() static function is only called in one place and "true" is passed to it as the argument in there, so effectively it is a wrapper arround tick_broadcast_enable(). To simplify the code, call tick_broadcast_enable() directly instead of __setup_broadcast_timer() and drop the latter. No intentional functional impact. Signed-off-by: Rafael J. Wysocki --- drivers/idle/intel_idle.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) (limited to 'drivers/idle/intel_idle.c') diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index 1467490adfc3..33e353bdd4b6 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -967,14 +967,6 @@ static bool intel_idle_verify_cstate(unsigned int mwait_hint) return true; } -static void __setup_broadcast_timer(bool on) -{ - if (on) - tick_broadcast_enable(); - else - tick_broadcast_disable(); -} - static void auto_demotion_disable(void) { unsigned long long msr_bits; @@ -1624,7 +1616,7 @@ static int intel_idle_cpu_online(unsigned int cpu) struct cpuidle_device *dev; if (lapic_timer_reliable_states != LAPIC_TIMER_ALWAYS_RELIABLE) - __setup_broadcast_timer(true); + tick_broadcast_enable(); /* * Some systems can hotplug a cpu at runtime after -- cgit From a6c86e336282c67a0b04f64b3b5574794249e3e1 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 10 Jan 2020 11:44:58 +0100 Subject: intel_idle: Fold intel_idle_probe() into intel_idle_init() There is no particular reason why intel_idle_probe() needs to be a separate function and folding it into intel_idle_init() causes the code to be somewhat easier to follow, so do just that. No intentional functional impact. Signed-off-by: Rafael J. Wysocki --- drivers/idle/intel_idle.c | 97 ++++++++++++++++++++--------------------------- 1 file changed, 42 insertions(+), 55 deletions(-) (limited to 'drivers/idle/intel_idle.c') diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index 33e353bdd4b6..690309c78dfb 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -1296,58 +1296,6 @@ static inline void intel_idle_init_cstates_acpi(struct cpuidle_driver *drv) { } static inline bool intel_idle_off_by_default(u32 mwait_hint) { return false; } #endif /* !CONFIG_ACPI_PROCESSOR_CSTATE */ -/* - * intel_idle_probe() - */ -static int __init intel_idle_probe(void) -{ - unsigned int eax, ebx, ecx; - const struct x86_cpu_id *id; - - if (max_cstate == 0) { - pr_debug("disabled\n"); - return -EPERM; - } - - id = x86_match_cpu(intel_idle_ids); - if (id) { - if (!boot_cpu_has(X86_FEATURE_MWAIT)) { - pr_debug("Please enable MWAIT in BIOS SETUP\n"); - return -ENODEV; - } - } else { - id = x86_match_cpu(intel_mwait_ids); - if (!id) - return -ENODEV; - } - - if (boot_cpu_data.cpuid_level < CPUID_MWAIT_LEAF) - return -ENODEV; - - cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &mwait_substates); - - if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED) || - !(ecx & CPUID5_ECX_INTERRUPT_BREAK) || - !mwait_substates) - return -ENODEV; - - pr_debug("MWAIT substates: 0x%x\n", mwait_substates); - - icpu = (const struct idle_cpu *)id->driver_data; - if (icpu) { - cpuidle_state_table = icpu->state_table; - if (icpu->use_acpi) - intel_idle_acpi_cst_extract(); - } else if (!intel_idle_acpi_cst_extract()) { - return -ENODEV; - } - - pr_debug("v" INTEL_IDLE_VERSION " model 0x%X\n", - boot_cpu_data.x86_model); - - return 0; -} - /* * intel_idle_cpuidle_devices_uninit() * Unregisters the cpuidle devices. @@ -1632,15 +1580,54 @@ static int intel_idle_cpu_online(unsigned int cpu) static int __init intel_idle_init(void) { + const struct x86_cpu_id *id; + unsigned int eax, ebx, ecx; int retval; /* Do not load intel_idle at all for now if idle= is passed */ if (boot_option_idle_override != IDLE_NO_OVERRIDE) return -ENODEV; - retval = intel_idle_probe(); - if (retval) - return retval; + if (max_cstate == 0) { + pr_debug("disabled\n"); + return -EPERM; + } + + id = x86_match_cpu(intel_idle_ids); + if (id) { + if (!boot_cpu_has(X86_FEATURE_MWAIT)) { + pr_debug("Please enable MWAIT in BIOS SETUP\n"); + return -ENODEV; + } + } else { + id = x86_match_cpu(intel_mwait_ids); + if (!id) + return -ENODEV; + } + + if (boot_cpu_data.cpuid_level < CPUID_MWAIT_LEAF) + return -ENODEV; + + cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &mwait_substates); + + if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED) || + !(ecx & CPUID5_ECX_INTERRUPT_BREAK) || + !mwait_substates) + return -ENODEV; + + pr_debug("MWAIT substates: 0x%x\n", mwait_substates); + + icpu = (const struct idle_cpu *)id->driver_data; + if (icpu) { + cpuidle_state_table = icpu->state_table; + if (icpu->use_acpi) + intel_idle_acpi_cst_extract(); + } else if (!intel_idle_acpi_cst_extract()) { + return -ENODEV; + } + + pr_debug("v" INTEL_IDLE_VERSION " model 0x%X\n", + boot_cpu_data.x86_model); intel_idle_cpuidle_devices = alloc_percpu(struct cpuidle_device); if (intel_idle_cpuidle_devices == NULL) -- cgit From 533da74a8c8d75af7958f4aa1abc944f061d1d85 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 10 Jan 2020 11:45:49 +0100 Subject: intel_idle: Clean up NULL pointer check in intel_idle_init() Instead of comparing intel_idle_cpuidle_devices with NULL apply the "!" (not) operator to it when checking it against NULL. No intentional functional impact. Signed-off-by: Rafael J. Wysocki --- drivers/idle/intel_idle.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/idle/intel_idle.c') diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index 690309c78dfb..fddb72e5f6fa 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -1630,7 +1630,7 @@ static int __init intel_idle_init(void) boot_cpu_data.x86_model); intel_idle_cpuidle_devices = alloc_percpu(struct cpuidle_device); - if (intel_idle_cpuidle_devices == NULL) + if (!intel_idle_cpuidle_devices) return -ENOMEM; intel_idle_cpuidle_driver_init(); -- cgit From 3d3a1ae9b4bedc3e8c88ff4baeb0a195cb447fa1 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 10 Jan 2020 11:48:25 +0100 Subject: intel_idle: Rearrange intel_idle_cpuidle_driver_init() Notice that intel_idle_state_table_update() only needs to be called if icpu is not NULL, so fold it into intel_idle_init_cstates_icpu(), and pass a pointer to the driver object to intel_idle_cpuidle_driver_init() as an argument instead of referencing it locally in there. No intentional functional impact. Signed-off-by: Rafael J. Wysocki --- drivers/idle/intel_idle.c | 24 ++++++------------------ 1 file changed, 6 insertions(+), 18 deletions(-) (limited to 'drivers/idle/intel_idle.c') diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index fddb72e5f6fa..c535b0b55ea5 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -1447,16 +1447,12 @@ static void sklh_idle_state_table_update(void) skl_cstates[5].flags |= CPUIDLE_FLAG_UNUSABLE; /* C8-SKL */ skl_cstates[6].flags |= CPUIDLE_FLAG_UNUSABLE; /* C9-SKL */ } -/* - * intel_idle_state_table_update() - * - * Update the default state_table for this CPU-id - */ -static void intel_idle_state_table_update(void) +static void intel_idle_init_cstates_icpu(struct cpuidle_driver *drv) { - switch (boot_cpu_data.x86_model) { + int cstate; + switch (boot_cpu_data.x86_model) { case INTEL_FAM6_IVYBRIDGE_X: ivt_idle_state_table_update(); break; @@ -1468,11 +1464,6 @@ static void intel_idle_state_table_update(void) sklh_idle_state_table_update(); break; } -} - -static void intel_idle_init_cstates_icpu(struct cpuidle_driver *drv) -{ - int cstate; for (cstate = 0; cstate < CPUIDLE_STATE_MAX; ++cstate) { unsigned int mwait_hint; @@ -1515,12 +1506,8 @@ static void intel_idle_init_cstates_icpu(struct cpuidle_driver *drv) * intel_idle_cpuidle_driver_init() * allocate, initialize cpuidle_states */ -static void __init intel_idle_cpuidle_driver_init(void) +static void __init intel_idle_cpuidle_driver_init(struct cpuidle_driver *drv) { - struct cpuidle_driver *drv = &intel_idle_driver; - - intel_idle_state_table_update(); - cpuidle_poll_state_init(drv); drv->state_count = 1; @@ -1633,7 +1620,8 @@ static int __init intel_idle_init(void) if (!intel_idle_cpuidle_devices) return -ENOMEM; - intel_idle_cpuidle_driver_init(); + intel_idle_cpuidle_driver_init(&intel_idle_driver); + retval = cpuidle_register_driver(&intel_idle_driver); if (retval) { struct cpuidle_driver *drv = cpuidle_get_driver(); -- cgit From 0755a9bd9963ec3d611129cf2cdbb3e041ccbd6a Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 10 Jan 2020 11:49:58 +0100 Subject: intel_idle: Move and clean up intel_idle_cpuidle_devices_uninit() Move intel_idle_cpuidle_devices_uninit() closer to its caller, intel_idle_init(), add the __init modifier to its header, drop a redundant local variable from it and fix up its kerneldoc comment. No intentional functional impact. Signed-off-by: Rafael J. Wysocki --- drivers/idle/intel_idle.c | 26 +++++++++++--------------- 1 file changed, 11 insertions(+), 15 deletions(-) (limited to 'drivers/idle/intel_idle.c') diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index c535b0b55ea5..9841046dfd32 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -1296,21 +1296,6 @@ static inline void intel_idle_init_cstates_acpi(struct cpuidle_driver *drv) { } static inline bool intel_idle_off_by_default(u32 mwait_hint) { return false; } #endif /* !CONFIG_ACPI_PROCESSOR_CSTATE */ -/* - * intel_idle_cpuidle_devices_uninit() - * Unregisters the cpuidle devices. - */ -static void intel_idle_cpuidle_devices_uninit(void) -{ - int i; - struct cpuidle_device *dev; - - for_each_online_cpu(i) { - dev = per_cpu_ptr(intel_idle_cpuidle_devices, i); - cpuidle_unregister_device(dev); - } -} - /* * ivt_idle_state_table_update(void) * @@ -1565,6 +1550,17 @@ static int intel_idle_cpu_online(unsigned int cpu) return 0; } +/** + * intel_idle_cpuidle_devices_uninit - Unregister all cpuidle devices. + */ +static void __init intel_idle_cpuidle_devices_uninit(void) +{ + int i; + + for_each_online_cpu(i) + cpuidle_unregister_device(per_cpu_ptr(intel_idle_cpuidle_devices, i)); +} + static int __init intel_idle_init(void) { const struct x86_cpu_id *id; -- cgit From 095928ae484b9f765fb4967c6cf76a2f86fc9787 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 10 Jan 2020 11:51:22 +0100 Subject: intel_idle: Annotate initialization code and data structures Annotate the functions that are only used at the initialization time with __init and the data structures used by them with __initdata or __initconst. No intentional functional impact. Signed-off-by: Rafael J. Wysocki --- drivers/idle/intel_idle.c | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) (limited to 'drivers/idle/intel_idle.c') diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index 9841046dfd32..b796eaea8715 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -1148,7 +1148,7 @@ static const struct x86_cpu_id intel_mwait_ids[] __initconst = { {} }; -static bool intel_idle_max_cstate_reached(int cstate) +static bool __init intel_idle_max_cstate_reached(int cstate) { if (cstate + 1 > max_cstate) { pr_info("max_cstate %d reached\n", max_cstate); @@ -1164,7 +1164,7 @@ static bool no_acpi __read_mostly; module_param(no_acpi, bool, 0444); MODULE_PARM_DESC(no_acpi, "Do not use ACPI _CST for building the idle states list"); -static struct acpi_processor_power acpi_state_table; +static struct acpi_processor_power acpi_state_table __initdata; /** * intel_idle_cst_usable - Check if the _CST information can be used. @@ -1172,7 +1172,7 @@ static struct acpi_processor_power acpi_state_table; * Check if all of the C-states listed by _CST in the max_cstate range are * ACPI_CSTATE_FFH, which means that they should be entered via MWAIT. */ -static bool intel_idle_cst_usable(void) +static bool __init intel_idle_cst_usable(void) { int cstate, limit; @@ -1189,7 +1189,7 @@ static bool intel_idle_cst_usable(void) return true; } -static bool intel_idle_acpi_cst_extract(void) +static bool __init intel_idle_acpi_cst_extract(void) { unsigned int cpu; @@ -1224,7 +1224,7 @@ static bool intel_idle_acpi_cst_extract(void) return false; } -static void intel_idle_init_cstates_acpi(struct cpuidle_driver *drv) +static void __init intel_idle_init_cstates_acpi(struct cpuidle_driver *drv) { int cstate, limit = min_t(int, CPUIDLE_STATE_MAX, acpi_state_table.count); @@ -1268,7 +1268,7 @@ static void intel_idle_init_cstates_acpi(struct cpuidle_driver *drv) } } -static bool intel_idle_off_by_default(u32 mwait_hint) +static bool __init intel_idle_off_by_default(u32 mwait_hint) { int cstate, limit; @@ -1302,7 +1302,7 @@ static inline bool intel_idle_off_by_default(u32 mwait_hint) { return false; } * Tune IVT multi-socket targets * Assumption: num_sockets == (max_package_num + 1) */ -static void ivt_idle_state_table_update(void) +static void __init ivt_idle_state_table_update(void) { /* IVT uses a different table for 1-2, 3-4, and > 4 sockets */ int cpu, package_num, num_sockets = 1; @@ -1329,10 +1329,11 @@ static void ivt_idle_state_table_update(void) * Translate IRTL (Interrupt Response Time Limit) MSR to usec */ -static unsigned int irtl_ns_units[] = { - 1, 32, 1024, 32768, 1048576, 33554432, 0, 0 }; +static const unsigned int irtl_ns_units[] __initconst = { + 1, 32, 1024, 32768, 1048576, 33554432, 0, 0 +}; -static unsigned long long irtl_2_usec(unsigned long long irtl) +static unsigned long long __init irtl_2_usec(unsigned long long irtl) { unsigned long long ns; @@ -1349,7 +1350,7 @@ static unsigned long long irtl_2_usec(unsigned long long irtl) * On BXT, we trust the IRTL to show the definitive maximum latency * We use the same value for target_residency. */ -static void bxt_idle_state_table_update(void) +static void __init bxt_idle_state_table_update(void) { unsigned long long msr; unsigned int usec; @@ -1396,7 +1397,7 @@ static void bxt_idle_state_table_update(void) * On SKL-H (model 0x5e) disable C8 and C9 if: * C10 is enabled and SGX disabled */ -static void sklh_idle_state_table_update(void) +static void __init sklh_idle_state_table_update(void) { unsigned long long msr; unsigned int eax, ebx, ecx, edx; @@ -1433,7 +1434,7 @@ static void sklh_idle_state_table_update(void) skl_cstates[6].flags |= CPUIDLE_FLAG_UNUSABLE; /* C9-SKL */ } -static void intel_idle_init_cstates_icpu(struct cpuidle_driver *drv) +static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv) { int cstate; -- cgit From 1aefbd7aeb7695c1a9e6ece9a1612ee88e1ea3f8 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 10 Jan 2020 11:52:32 +0100 Subject: intel_idle: Move 3 functions closer to their callers Move intel_idle_verify_cstate(), auto_demotion_disable() and c1e_promotion_disable() closer to their callers. While at it, annotate intel_idle_verify_cstate() with __init, as it is only used during the initialization of the driver. No intentional functional impact. Signed-off-by: Rafael J. Wysocki --- drivers/idle/intel_idle.c | 67 ++++++++++++++++++++++++----------------------- 1 file changed, 34 insertions(+), 33 deletions(-) (limited to 'drivers/idle/intel_idle.c') diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index b796eaea8715..27c50fba26d5 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -951,39 +951,6 @@ static void intel_idle_s2idle(struct cpuidle_device *dev, mwait_idle_with_hints(eax, ecx); } -static bool intel_idle_verify_cstate(unsigned int mwait_hint) -{ - unsigned int mwait_cstate = MWAIT_HINT2CSTATE(mwait_hint) + 1; - unsigned int num_substates = (mwait_substates >> mwait_cstate * 4) & - MWAIT_SUBSTATE_MASK; - - /* Ignore the C-state if there are NO sub-states in CPUID for it. */ - if (num_substates == 0) - return false; - - if (mwait_cstate > 2 && !boot_cpu_has(X86_FEATURE_NONSTOP_TSC)) - mark_tsc_unstable("TSC halts in idle states deeper than C2"); - - return true; -} - -static void auto_demotion_disable(void) -{ - unsigned long long msr_bits; - - rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_bits); - msr_bits &= ~(icpu->auto_demotion_disable_flags); - wrmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_bits); -} -static void c1e_promotion_disable(void) -{ - unsigned long long msr_bits; - - rdmsrl(MSR_IA32_POWER_CTL, msr_bits); - msr_bits &= ~0x2; - wrmsrl(MSR_IA32_POWER_CTL, msr_bits); -} - static const struct idle_cpu idle_cpu_nehalem = { .state_table = nehalem_cstates, .auto_demotion_disable_flags = NHM_C1_AUTO_DEMOTE | NHM_C3_AUTO_DEMOTE, @@ -1434,6 +1401,22 @@ static void __init sklh_idle_state_table_update(void) skl_cstates[6].flags |= CPUIDLE_FLAG_UNUSABLE; /* C9-SKL */ } +static bool __init intel_idle_verify_cstate(unsigned int mwait_hint) +{ + unsigned int mwait_cstate = MWAIT_HINT2CSTATE(mwait_hint) + 1; + unsigned int num_substates = (mwait_substates >> mwait_cstate * 4) & + MWAIT_SUBSTATE_MASK; + + /* Ignore the C-state if there are NO sub-states in CPUID for it. */ + if (num_substates == 0) + return false; + + if (mwait_cstate > 2 && !boot_cpu_has(X86_FEATURE_NONSTOP_TSC)) + mark_tsc_unstable("TSC halts in idle states deeper than C2"); + + return true; +} + static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv) { int cstate; @@ -1503,6 +1486,24 @@ static void __init intel_idle_cpuidle_driver_init(struct cpuidle_driver *drv) intel_idle_init_cstates_acpi(drv); } +static void auto_demotion_disable(void) +{ + unsigned long long msr_bits; + + rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_bits); + msr_bits &= ~(icpu->auto_demotion_disable_flags); + wrmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_bits); +} + +static void c1e_promotion_disable(void) +{ + unsigned long long msr_bits; + + rdmsrl(MSR_IA32_POWER_CTL, msr_bits); + msr_bits &= ~0x2; + wrmsrl(MSR_IA32_POWER_CTL, msr_bits); +} + /* * intel_idle_cpu_init() * allocate, initialize, register cpuidle_devices -- cgit From 86e9466ae620824dee861f0f6ed933fcd0b449f3 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 17 Jan 2020 11:46:24 +0100 Subject: intel_idle: Clean up irtl_2_usec() Move the irtl_ns_units[] definition into irtl_2_usec() which is the only user of it, use div_u64() for the division in there (as the divisor is small enough) and use the NSEC_PER_USEC symbol for the divisor. Also convert the irtl_2_usec() comment to a proper kerneldo one. No intentional functional impact. Signed-off-by: Rafael J. Wysocki --- drivers/idle/intel_idle.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) (limited to 'drivers/idle/intel_idle.c') diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index 27c50fba26d5..9f38ff02a7b8 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -1292,16 +1292,17 @@ static void __init ivt_idle_state_table_update(void) /* else, 1 and 2 socket systems use default ivt_cstates */ } -/* - * Translate IRTL (Interrupt Response Time Limit) MSR to usec +/** + * irtl_2_usec - IRTL to microseconds conversion. + * @irtl: IRTL MSR value. + * + * Translate the IRTL (Interrupt Response Time Limit) MSR value to microseconds. */ - -static const unsigned int irtl_ns_units[] __initconst = { - 1, 32, 1024, 32768, 1048576, 33554432, 0, 0 -}; - static unsigned long long __init irtl_2_usec(unsigned long long irtl) { + static const unsigned int irtl_ns_units[] __initconst = { + 1, 32, 1024, 32768, 1048576, 33554432, 0, 0 + }; unsigned long long ns; if (!irtl) @@ -1309,8 +1310,9 @@ static unsigned long long __init irtl_2_usec(unsigned long long irtl) ns = irtl_ns_units[(irtl >> 10) & 0x7]; - return div64_u64((irtl & 0x3FF) * ns, 1000); + return div_u64((irtl & 0x3FF) * ns, NSEC_PER_USEC); } + /* * bxt_idle_state_table_update(void) * -- cgit From 3a5be9b8f43346a24f31c0017cb2566a6b2c72c5 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 3 Feb 2020 11:57:08 +0100 Subject: intel_idle: Introduce 'use_acpi' module parameter For diagnostics, it is generally useful to be able to make intel_idle take the system's ACPI tables into consideration even if that is not required for the processor model in there, so introduce a new module parameter, 'use_acpi', to make that happen and update the documentation to cover it. While at it, fix the 'no_acpi' module parameter name in the documentation. Signed-off-by: Rafael J. Wysocki --- Documentation/admin-guide/pm/intel_idle.rst | 13 +++++++++---- drivers/idle/intel_idle.c | 11 +++++++++-- 2 files changed, 18 insertions(+), 6 deletions(-) (limited to 'drivers/idle/intel_idle.c') diff --git a/Documentation/admin-guide/pm/intel_idle.rst b/Documentation/admin-guide/pm/intel_idle.rst index afbf778035f8..8998598746a4 100644 --- a/Documentation/admin-guide/pm/intel_idle.rst +++ b/Documentation/admin-guide/pm/intel_idle.rst @@ -60,6 +60,9 @@ of the system. The former are always used if the processor model at hand is recognized by ``intel_idle`` and the latter are used if that is required for the given processor model (which is the case for all server processor models recognized by ``intel_idle``) or if the processor model is not recognized. +[There is a module parameter that can be used to make the driver use the ACPI +tables with any processor model recognized by it; see +`below `_.] If the ACPI tables are going to be used for building the list of available idle states, ``intel_idle`` first looks for a ``_CST`` object under one of the ACPI @@ -165,7 +168,7 @@ and ``idle=nomwait``. If any of them is present in the kernel command line, the ``MWAIT`` instruction is not allowed to be used, so the initialization of ``intel_idle`` will fail. -Apart from that there are two module parameters recognized by ``intel_idle`` +Apart from that there are three module parameters recognized by ``intel_idle`` itself that can be set via the kernel command line (they cannot be updated via sysfs, so that is the only way to change their values). @@ -186,9 +189,11 @@ QoS) feature can be used to prevent ``CPUIdle`` from touching those idle states even if they have been enumerated (see :ref:`cpu-pm-qos` in :doc:`cpuidle`). Setting ``max_cstate`` to 0 causes the ``intel_idle`` initialization to fail. -The ``noacpi`` module parameter (which is recognized by ``intel_idle`` if the -kernel has been configured with ACPI support), can be set to make the driver -ignore the system's ACPI tables entirely (it is unset by default). +The ``no_acpi`` and ``use_acpi`` module parameters (recognized by ``intel_idle`` +if the kernel has been configured with ACPI support) can be set to make the +driver ignore the system's ACPI tables entirely or use them for all of the +recognized processor models, respectively (they both are unset by default and +``use_acpi`` has no effect if ``no_acpi`` is set). .. _intel-idle-core-and-package-idle-states: diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index 7833e650789f..6fbd94f85fa5 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -1131,6 +1131,10 @@ static bool no_acpi __read_mostly; module_param(no_acpi, bool, 0444); MODULE_PARM_DESC(no_acpi, "Do not use ACPI _CST for building the idle states list"); +static bool force_use_acpi __read_mostly; /* No effect if no_acpi is set. */ +module_param_named(use_acpi, force_use_acpi, bool, 0444); +MODULE_PARM_DESC(use_acpi, "Use ACPI _CST for building the idle states list"); + static struct acpi_processor_power acpi_state_table __initdata; /** @@ -1258,6 +1262,8 @@ static bool __init intel_idle_off_by_default(u32 mwait_hint) return true; } #else /* !CONFIG_ACPI_PROCESSOR_CSTATE */ +#define force_use_acpi (false) + static inline bool intel_idle_acpi_cst_extract(void) { return false; } static inline void intel_idle_init_cstates_acpi(struct cpuidle_driver *drv) { } static inline bool intel_idle_off_by_default(u32 mwait_hint) { return false; } @@ -1460,7 +1466,8 @@ static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv) /* Structure copy. */ drv->states[drv->state_count] = cpuidle_state_table[cstate]; - if (icpu->use_acpi && intel_idle_off_by_default(mwait_hint) && + if ((icpu->use_acpi || force_use_acpi) && + intel_idle_off_by_default(mwait_hint) && !(cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_ALWAYS_ENABLE)) drv->states[drv->state_count].flags |= CPUIDLE_FLAG_OFF; @@ -1607,7 +1614,7 @@ static int __init intel_idle_init(void) icpu = (const struct idle_cpu *)id->driver_data; if (icpu) { cpuidle_state_table = icpu->state_table; - if (icpu->use_acpi) + if (icpu->use_acpi || force_use_acpi) intel_idle_acpi_cst_extract(); } else if (!intel_idle_acpi_cst_extract()) { return -ENODEV; -- cgit From 4dcb78ee579cdf90e30c5a0223f6f160ea37182d Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 3 Feb 2020 11:57:18 +0100 Subject: intel_idle: Introduce 'states_off' module parameter In certain system configurations it may not be desirable to use some C-states assumed to be available by intel_idle and the driver needs to be prevented from using them even before the cpuidle sysfs interface becomes accessible to user space. Currently, the only way to achieve that is by setting the 'max_cstate' module parameter to a value lower than the index of the shallowest of the C-states in question, but that may be overly intrusive, because it effectively makes all of the idle states deeper than the 'max_cstate' one go away (and the C-state to avoid may be in the middle of the range normally regarded as available). To allow that limitation to be overcome, introduce a new module parameter called 'states_off' to represent a list of idle states to be disabled by default in the form of a bitmask and update the documentation to cover it. Signed-off-by: Rafael J. Wysocki --- Documentation/admin-guide/pm/intel_idle.rst | 19 ++++++++++++++++++- drivers/idle/intel_idle.c | 23 ++++++++++++++++++++--- 2 files changed, 38 insertions(+), 4 deletions(-) (limited to 'drivers/idle/intel_idle.c') diff --git a/Documentation/admin-guide/pm/intel_idle.rst b/Documentation/admin-guide/pm/intel_idle.rst index 8998598746a4..89309e1b0e48 100644 --- a/Documentation/admin-guide/pm/intel_idle.rst +++ b/Documentation/admin-guide/pm/intel_idle.rst @@ -168,7 +168,7 @@ and ``idle=nomwait``. If any of them is present in the kernel command line, the ``MWAIT`` instruction is not allowed to be used, so the initialization of ``intel_idle`` will fail. -Apart from that there are three module parameters recognized by ``intel_idle`` +Apart from that there are four module parameters recognized by ``intel_idle`` itself that can be set via the kernel command line (they cannot be updated via sysfs, so that is the only way to change their values). @@ -195,6 +195,23 @@ driver ignore the system's ACPI tables entirely or use them for all of the recognized processor models, respectively (they both are unset by default and ``use_acpi`` has no effect if ``no_acpi`` is set). +The value of the ``states_off`` module parameter (0 by default) represents a +list of idle states to be disabled by default in the form of a bitmask. + +Namely, the positions of the bits that are set in the ``states_off`` value are +the indices of idle states to be disabled by default (as reflected by the names +of the corresponding idle state directories in ``sysfs``, :file:`state0`, +:file:`state1` ... :file:`state` ..., where ```` is the index of the given +idle state; see :ref:`idle-states-representation` in :doc:`cpuidle`). + +For example, if ``states_off`` is equal to 3, the driver will disable idle +states 0 and 1 by default, and if it is equal to 8, idle state 3 will be +disabled by default and so on (bit positions beyond the maximum idle state index +are ignored). + +The idle states disabled this way can be enabled (on a per-CPU basis) from user +space via ``sysfs``. + .. _intel-idle-core-and-package-idle-states: diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index 6fbd94f85fa5..d55606608ac8 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -63,6 +63,7 @@ static struct cpuidle_driver intel_idle_driver = { }; /* intel_idle.max_cstate=0 disables driver */ static int max_cstate = CPUIDLE_STATE_MAX - 1; +static unsigned int disabled_states_mask; static unsigned int mwait_substates; @@ -1234,6 +1235,9 @@ static void __init intel_idle_init_cstates_acpi(struct cpuidle_driver *drv) if (cx->type > ACPI_STATE_C2) state->flags |= CPUIDLE_FLAG_TLB_FLUSHED; + if (disabled_states_mask & BIT(cstate)) + state->flags |= CPUIDLE_FLAG_OFF; + state->enter = intel_idle; state->enter_s2idle = intel_idle_s2idle; } @@ -1466,9 +1470,10 @@ static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv) /* Structure copy. */ drv->states[drv->state_count] = cpuidle_state_table[cstate]; - if ((icpu->use_acpi || force_use_acpi) && - intel_idle_off_by_default(mwait_hint) && - !(cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_ALWAYS_ENABLE)) + if ((disabled_states_mask & BIT(drv->state_count)) || + ((icpu->use_acpi || force_use_acpi) && + intel_idle_off_by_default(mwait_hint) && + !(cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_ALWAYS_ENABLE))) drv->states[drv->state_count].flags |= CPUIDLE_FLAG_OFF; drv->state_count++; @@ -1487,6 +1492,10 @@ static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv) static void __init intel_idle_cpuidle_driver_init(struct cpuidle_driver *drv) { cpuidle_poll_state_init(drv); + + if (disabled_states_mask & BIT(0)) + drv->states[0].flags |= CPUIDLE_FLAG_OFF; + drv->state_count = 1; if (icpu) @@ -1667,3 +1676,11 @@ device_initcall(intel_idle_init); * is the easiest way (currently) to continue doing that. */ module_param(max_cstate, int, 0444); +/* + * The positions of the bits that are set in this number are the indices of the + * idle states to be disabled by default (as reflected by the names of the + * corresponding idle state directories in sysfs, "state0", "state1" ... + * "state" ..., where is the index of the given state). + */ +module_param_named(states_off, disabled_states_mask, uint, 0444); +MODULE_PARM_DESC(states_off, "Mask of disabled idle states"); -- cgit