From 496d0a648509bce665a85e19a871375dfe4c8f2e Mon Sep 17 00:00:00 2001 From: Parshuram Sangle Date: Thu, 11 Jan 2024 19:29:50 +0530 Subject: cpuidle: haltpoll: do not shrink guest poll_limit_ns below grow_start While adjusting guest halt poll limit, grow block starts at guest_halt_poll_grow_start without taking intermediate values. Similar behavior is expected while shrinking the value. This avoids short interval values which are really not required. VCPU1 trace (guest_halt_poll_shrink equals 2): VCPU1 grow 10000 VCPU1 shrink 5000 VCPU1 shrink 2500 VCPU1 shrink 1250 VCPU1 shrink 625 VCPU1 shrink 312 VCPU1 shrink 156 VCPU1 shrink 78 VCPU1 shrink 39 VCPU1 shrink 19 VCPU1 shrink 9 VCPU1 shrink 4 Similar change is done in KVM halt poll flow with below patch: Link: https://lore.kernel.org/kvm/20211006133021.271905-3-sashal@kernel.org/ Co-developed-by: Rajendran Jaishankar Signed-off-by: Rajendran Jaishankar Signed-off-by: Parshuram Sangle Reviewed-by: Marcelo Tosatti [ rjw: Subject edits ] Signed-off-by: Rafael J. Wysocki --- drivers/cpuidle/governors/haltpoll.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/cpuidle/governors/haltpoll.c b/drivers/cpuidle/governors/haltpoll.c index 1dff3a52917d..663b7f164d20 100644 --- a/drivers/cpuidle/governors/haltpoll.c +++ b/drivers/cpuidle/governors/haltpoll.c @@ -98,10 +98,15 @@ static void adjust_poll_limit(struct cpuidle_device *dev, u64 block_ns) unsigned int shrink = guest_halt_poll_shrink; val = dev->poll_limit_ns; - if (shrink == 0) + if (shrink == 0) { val = 0; - else + } else { val /= shrink; + /* Reset value to 0 if shrunk below grow_start */ + if (val < guest_halt_poll_grow_start) + val = 0; + } + trace_guest_halt_poll_ns_shrink(val, dev->poll_limit_ns); dev->poll_limit_ns = val; } -- cgit From 88390dd788db485912ee7f9a8d3d56fc5265d52f Mon Sep 17 00:00:00 2001 From: C Cheng Date: Tue, 19 Dec 2023 11:14:42 +0800 Subject: cpuidle: Avoid potential overflow in integer multiplication In detail: In C language, when you perform a multiplication operation, if both operands are of int type, the multiplication operation is performed on the int type, and then the result is converted to the target type. This means that if the product of int type multiplication exceeds the range that int type can represent, an overflow will occur even if you store the result in a variable of int64_t type. For a multiplication of two int values, it is better to use mul_u32_u32() rather than s->exit_latency_ns = s->exit_latency * NSEC_PER_USEC to avoid potential overflow happenning. Signed-off-by: C Cheng Signed-off-by: Bo Ye Reviewed-by: AngeloGioacchino Del Regno [ rjw: New subject ] Signed-off-by: Rafael J. Wysocki --- drivers/cpuidle/driver.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/cpuidle/driver.c b/drivers/cpuidle/driver.c index d9cda7f6ccb9..cf5873cc45dc 100644 --- a/drivers/cpuidle/driver.c +++ b/drivers/cpuidle/driver.c @@ -16,6 +16,7 @@ #include #include #include +#include #include "cpuidle.h" @@ -187,7 +188,7 @@ static void __cpuidle_driver_init(struct cpuidle_driver *drv) s->target_residency = div_u64(s->target_residency_ns, NSEC_PER_USEC); if (s->exit_latency > 0) - s->exit_latency_ns = s->exit_latency * NSEC_PER_USEC; + s->exit_latency_ns = mul_u32_u32(s->exit_latency, NSEC_PER_USEC); else if (s->exit_latency_ns < 0) s->exit_latency_ns = 0; else -- cgit From 6b8e288f49570ee2ba15a2a07c2ebf7ad2210422 Mon Sep 17 00:00:00 2001 From: He Rongguang Date: Mon, 4 Mar 2024 14:14:06 +0800 Subject: cpuidle: ACPI/intel: fix MWAIT hint target C-state computation According to x86 spec ([1] and [2]), MWAIT hint_address[7:4] plus 1 is the corresponding C-state, and 0xF means C0. ACPI C-state table usually only contains C1+, but nothing prevents ACPI firmware from presenting a C-state (maybe C1+) but using MWAIT address C0 (i.e., 0xF in ACPI FFH MWAIT hint address). And if this is the case, Linux erroneously treat this cstate as C16, while actually this should be valid C0 instead of C16, as per the specifications. Since ACPI firmware is out of Linux kernel scope, fix the kernel handling of 0xF ->(to) C0 in this situation. This is found when a tweaked ACPI C-state table is presented by Qemu to VM. Also modify the intel_idle case for code consistency. [1]. Intel SDM Vol 2, Table 4-11. MWAIT Hints Register (EAX): "Value of 0 means C1; 1 means C2 and so on Value of 01111B means C0". [2]. AMD manual Vol 3, MWAIT: "The processor C-state is EAX[7:4]+1, so to request C0 is to place the value F in EAX[7:4] and to request C1 is to place the value 0 in EAX[7:4].". Signed-off-by: He Rongguang [ rjw: Subject and changelog edits, whitespace fixups ] Signed-off-by: Rafael J. Wysocki --- arch/x86/kernel/acpi/cstate.c | 4 ++-- drivers/idle/intel_idle.c | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c index 401808b47af3..f3ffd0a3a012 100644 --- a/arch/x86/kernel/acpi/cstate.c +++ b/arch/x86/kernel/acpi/cstate.c @@ -131,8 +131,8 @@ static long acpi_processor_ffh_cstate_probe_cpu(void *_cx) cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &edx); /* Check whether this particular cx_type (in CST) is supported or not */ - cstate_type = ((cx->address >> MWAIT_SUBSTATE_SIZE) & - MWAIT_CSTATE_MASK) + 1; + cstate_type = (((cx->address >> MWAIT_SUBSTATE_SIZE) & + MWAIT_CSTATE_MASK) + 1) & MWAIT_CSTATE_MASK; edx_part = edx >> (cstate_type * MWAIT_SUBSTATE_SIZE); num_cstate_subtype = edx_part & MWAIT_SUBSTATE_MASK; diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index bcf1198e8991..e486027f8b07 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -1934,7 +1934,8 @@ static void __init spr_idle_state_table_update(void) static bool __init intel_idle_verify_cstate(unsigned int mwait_hint) { - unsigned int mwait_cstate = MWAIT_HINT2CSTATE(mwait_hint) + 1; + unsigned int mwait_cstate = (MWAIT_HINT2CSTATE(mwait_hint) + 1) & + MWAIT_CSTATE_MASK; unsigned int num_substates = (mwait_substates >> mwait_cstate * 4) & MWAIT_SUBSTATE_MASK; -- cgit