From 259231a045616c4101d023a8f4dcc8379af265a6 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Wed, 3 Jul 2019 20:51:26 -0300 Subject: cpuidle: add poll_limit_ns to cpuidle_device structure Add a poll_limit_ns variable to cpuidle_device structure. Calculate and configure it in the new cpuidle_poll_time function, in case its zero. Individual governors are allowed to override this value. Signed-off-by: Marcelo Tosatti Signed-off-by: Rafael J. Wysocki --- include/linux/cpuidle.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h index bb9a0db89f1a..b484dd69ec21 100644 --- a/include/linux/cpuidle.h +++ b/include/linux/cpuidle.h @@ -86,6 +86,7 @@ struct cpuidle_device { ktime_t next_hrtimer; int last_residency; + u64 poll_limit_ns; struct cpuidle_state_usage states_usage[CPUIDLE_STATE_MAX]; struct cpuidle_state_kobj *kobjs[CPUIDLE_STATE_MAX]; struct cpuidle_driver_kobj *kobj_driver; @@ -132,6 +133,8 @@ extern int cpuidle_select(struct cpuidle_driver *drv, extern int cpuidle_enter(struct cpuidle_driver *drv, struct cpuidle_device *dev, int index); extern void cpuidle_reflect(struct cpuidle_device *dev, int index); +extern u64 cpuidle_poll_time(struct cpuidle_driver *drv, + struct cpuidle_device *dev); extern int cpuidle_register_driver(struct cpuidle_driver *drv); extern struct cpuidle_driver *cpuidle_get_driver(void); @@ -166,6 +169,9 @@ static inline int cpuidle_enter(struct cpuidle_driver *drv, struct cpuidle_device *dev, int index) {return -ENODEV; } static inline void cpuidle_reflect(struct cpuidle_device *dev, int index) { } +extern u64 cpuidle_poll_time(struct cpuidle_driver *drv, + struct cpuidle_device *dev) +{return 0; } static inline int cpuidle_register_driver(struct cpuidle_driver *drv) {return -ENODEV; } static inline struct cpuidle_driver *cpuidle_get_driver(void) {return NULL; } -- cgit From 7d4daeedd575bbc3c40c87fc6708a8b88c50fe7e Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Wed, 3 Jul 2019 20:51:27 -0300 Subject: governors: unify last_state_idx Since this field is shared by all governors, move it to cpuidle device structure. Signed-off-by: Marcelo Tosatti Signed-off-by: Rafael J. Wysocki --- drivers/cpuidle/governors/ladder.c | 21 ++++++++++----------- drivers/cpuidle/governors/menu.c | 5 ++--- drivers/cpuidle/governors/teo.c | 12 ++++++------ include/linux/cpuidle.h | 1 + 4 files changed, 19 insertions(+), 20 deletions(-) (limited to 'include/linux') diff --git a/drivers/cpuidle/governors/ladder.c b/drivers/cpuidle/governors/ladder.c index f0dddc66af26..428eeb832fe7 100644 --- a/drivers/cpuidle/governors/ladder.c +++ b/drivers/cpuidle/governors/ladder.c @@ -38,7 +38,6 @@ struct ladder_device_state { struct ladder_device { struct ladder_device_state states[CPUIDLE_STATE_MAX]; - int last_state_idx; }; static DEFINE_PER_CPU(struct ladder_device, ladder_devices); @@ -49,12 +48,13 @@ static DEFINE_PER_CPU(struct ladder_device, ladder_devices); * @old_idx: the current state index * @new_idx: the new target state index */ -static inline void ladder_do_selection(struct ladder_device *ldev, +static inline void ladder_do_selection(struct cpuidle_device *dev, + struct ladder_device *ldev, int old_idx, int new_idx) { ldev->states[old_idx].stats.promotion_count = 0; ldev->states[old_idx].stats.demotion_count = 0; - ldev->last_state_idx = new_idx; + dev->last_state_idx = new_idx; } /** @@ -68,13 +68,13 @@ static int ladder_select_state(struct cpuidle_driver *drv, { struct ladder_device *ldev = this_cpu_ptr(&ladder_devices); struct ladder_device_state *last_state; - int last_residency, last_idx = ldev->last_state_idx; + int last_residency, last_idx = dev->last_state_idx; int first_idx = drv->states[0].flags & CPUIDLE_FLAG_POLLING ? 1 : 0; int latency_req = cpuidle_governor_latency_req(dev->cpu); /* Special case when user has set very strict latency requirement */ if (unlikely(latency_req == 0)) { - ladder_do_selection(ldev, last_idx, 0); + ladder_do_selection(dev, ldev, last_idx, 0); return 0; } @@ -91,7 +91,7 @@ static int ladder_select_state(struct cpuidle_driver *drv, last_state->stats.promotion_count++; last_state->stats.demotion_count = 0; if (last_state->stats.promotion_count >= last_state->threshold.promotion_count) { - ladder_do_selection(ldev, last_idx, last_idx + 1); + ladder_do_selection(dev, ldev, last_idx, last_idx + 1); return last_idx + 1; } } @@ -107,7 +107,7 @@ static int ladder_select_state(struct cpuidle_driver *drv, if (drv->states[i].exit_latency <= latency_req) break; } - ladder_do_selection(ldev, last_idx, i); + ladder_do_selection(dev, ldev, last_idx, i); return i; } @@ -116,7 +116,7 @@ static int ladder_select_state(struct cpuidle_driver *drv, last_state->stats.demotion_count++; last_state->stats.promotion_count = 0; if (last_state->stats.demotion_count >= last_state->threshold.demotion_count) { - ladder_do_selection(ldev, last_idx, last_idx - 1); + ladder_do_selection(dev, ldev, last_idx, last_idx - 1); return last_idx - 1; } } @@ -139,7 +139,7 @@ static int ladder_enable_device(struct cpuidle_driver *drv, struct ladder_device_state *lstate; struct cpuidle_state *state; - ldev->last_state_idx = first_idx; + dev->last_state_idx = first_idx; for (i = first_idx; i < drv->state_count; i++) { state = &drv->states[i]; @@ -167,9 +167,8 @@ static int ladder_enable_device(struct cpuidle_driver *drv, */ static void ladder_reflect(struct cpuidle_device *dev, int index) { - struct ladder_device *ldev = this_cpu_ptr(&ladder_devices); if (index > 0) - ldev->last_state_idx = index; + dev->last_state_idx = index; } static struct cpuidle_governor ladder_governor = { diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c index e9a28c7846d6..dace4c7f830c 100644 --- a/drivers/cpuidle/governors/menu.c +++ b/drivers/cpuidle/governors/menu.c @@ -117,7 +117,6 @@ */ struct menu_device { - int last_state_idx; int needs_update; int tick_wakeup; @@ -455,7 +454,7 @@ static void menu_reflect(struct cpuidle_device *dev, int index) { struct menu_device *data = this_cpu_ptr(&menu_devices); - data->last_state_idx = index; + dev->last_state_idx = index; data->needs_update = 1; data->tick_wakeup = tick_nohz_idle_got_tick(); } @@ -468,7 +467,7 @@ static void menu_reflect(struct cpuidle_device *dev, int index) static void menu_update(struct cpuidle_driver *drv, struct cpuidle_device *dev) { struct menu_device *data = this_cpu_ptr(&menu_devices); - int last_idx = data->last_state_idx; + int last_idx = dev->last_state_idx; struct cpuidle_state *target = &drv->states[last_idx]; unsigned int measured_us; unsigned int new_factor; diff --git a/drivers/cpuidle/governors/teo.c b/drivers/cpuidle/governors/teo.c index 7d05efdbd3c6..a2fd81067a13 100644 --- a/drivers/cpuidle/governors/teo.c +++ b/drivers/cpuidle/governors/teo.c @@ -96,7 +96,6 @@ struct teo_idle_state { * @time_span_ns: Time between idle state selection and post-wakeup update. * @sleep_length_ns: Time till the closest timer event (at the selection time). * @states: Idle states data corresponding to this CPU. - * @last_state: Idle state entered by the CPU last time. * @interval_idx: Index of the most recent saved idle interval. * @intervals: Saved idle duration values. */ @@ -104,7 +103,6 @@ struct teo_cpu { u64 time_span_ns; u64 sleep_length_ns; struct teo_idle_state states[CPUIDLE_STATE_MAX]; - int last_state; int interval_idx; unsigned int intervals[INTERVALS]; }; @@ -130,7 +128,9 @@ static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev) */ measured_us = sleep_length_us; } else { - unsigned int lat = drv->states[cpu_data->last_state].exit_latency; + unsigned int lat; + + lat = drv->states[dev->last_state_idx].exit_latency; measured_us = ktime_to_us(cpu_data->time_span_ns); /* @@ -245,9 +245,9 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, int max_early_idx, idx, i; ktime_t delta_tick; - if (cpu_data->last_state >= 0) { + if (dev->last_state_idx >= 0) { teo_update(drv, dev); - cpu_data->last_state = -1; + dev->last_state_idx = -1; } cpu_data->time_span_ns = local_clock(); @@ -394,7 +394,7 @@ static void teo_reflect(struct cpuidle_device *dev, int state) { struct teo_cpu *cpu_data = per_cpu_ptr(&teo_cpus, dev->cpu); - cpu_data->last_state = state; + dev->last_state_idx = state; /* * If the wakeup was not "natural", but triggered by one of the safety * nets, assume that the CPU might have been idle for the entire sleep diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h index b484dd69ec21..ba535a1a47d5 100644 --- a/include/linux/cpuidle.h +++ b/include/linux/cpuidle.h @@ -85,6 +85,7 @@ struct cpuidle_device { unsigned int cpu; ktime_t next_hrtimer; + int last_state_idx; int last_residency; u64 poll_limit_ns; struct cpuidle_state_usage states_usage[CPUIDLE_STATE_MAX]; -- cgit From a1c4423b02b2121108e3ea9580741e0f26309a48 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Wed, 3 Jul 2019 20:51:29 -0300 Subject: cpuidle-haltpoll: disable host side polling when kvm virtualized When performing guest side polling, it is not necessary to also perform host side polling. So disable host side polling, via the new MSR interface, when loading cpuidle-haltpoll driver. Signed-off-by: Marcelo Tosatti Signed-off-by: Rafael J. Wysocki --- arch/x86/Kconfig | 7 ++++++ arch/x86/include/asm/cpuidle_haltpoll.h | 8 +++++++ arch/x86/kernel/kvm.c | 42 +++++++++++++++++++++++++++++++++ drivers/cpuidle/cpuidle-haltpoll.c | 9 ++++++- include/linux/cpuidle_haltpoll.h | 16 +++++++++++++ 5 files changed, 81 insertions(+), 1 deletion(-) create mode 100644 arch/x86/include/asm/cpuidle_haltpoll.h create mode 100644 include/linux/cpuidle_haltpoll.h (limited to 'include/linux') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 222855cc0158..05e78acb187c 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -794,6 +794,7 @@ config KVM_GUEST bool "KVM Guest support (including kvmclock)" depends on PARAVIRT select PARAVIRT_CLOCK + select ARCH_CPUIDLE_HALTPOLL default y ---help--- This option enables various optimizations for running under the KVM @@ -802,6 +803,12 @@ config KVM_GUEST underlying device model, the host provides the guest with timing infrastructure such as time of day, and system time +config ARCH_CPUIDLE_HALTPOLL + def_bool n + prompt "Disable host haltpoll when loading haltpoll driver" + help + If virtualized under KVM, disable host haltpoll. + config PVH bool "Support for running PVH guests" ---help--- diff --git a/arch/x86/include/asm/cpuidle_haltpoll.h b/arch/x86/include/asm/cpuidle_haltpoll.h new file mode 100644 index 000000000000..ff8607d81526 --- /dev/null +++ b/arch/x86/include/asm/cpuidle_haltpoll.h @@ -0,0 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ARCH_HALTPOLL_H +#define _ARCH_HALTPOLL_H + +void arch_haltpoll_enable(void); +void arch_haltpoll_disable(void); + +#endif diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index b7f34fe2171e..f48401be8ce0 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -875,3 +875,45 @@ void __init kvm_spinlock_init(void) } #endif /* CONFIG_PARAVIRT_SPINLOCKS */ + +#ifdef CONFIG_ARCH_CPUIDLE_HALTPOLL + +static void kvm_disable_host_haltpoll(void *i) +{ + wrmsrl(MSR_KVM_POLL_CONTROL, 0); +} + +static void kvm_enable_host_haltpoll(void *i) +{ + wrmsrl(MSR_KVM_POLL_CONTROL, 1); +} + +void arch_haltpoll_enable(void) +{ + if (!kvm_para_has_feature(KVM_FEATURE_POLL_CONTROL)) { + printk(KERN_ERR "kvm: host does not support poll control\n"); + printk(KERN_ERR "kvm: host upgrade recommended\n"); + return; + } + + preempt_disable(); + /* Enable guest halt poll disables host halt poll */ + kvm_disable_host_haltpoll(NULL); + smp_call_function(kvm_disable_host_haltpoll, NULL, 1); + preempt_enable(); +} +EXPORT_SYMBOL_GPL(arch_haltpoll_enable); + +void arch_haltpoll_disable(void) +{ + if (!kvm_para_has_feature(KVM_FEATURE_POLL_CONTROL)) + return; + + preempt_disable(); + /* Enable guest halt poll disables host halt poll */ + kvm_enable_host_haltpoll(NULL); + smp_call_function(kvm_enable_host_haltpoll, NULL, 1); + preempt_enable(); +} +EXPORT_SYMBOL_GPL(arch_haltpoll_disable); +#endif diff --git a/drivers/cpuidle/cpuidle-haltpoll.c b/drivers/cpuidle/cpuidle-haltpoll.c index 35cfb53e9287..9ac093dcbb01 100644 --- a/drivers/cpuidle/cpuidle-haltpoll.c +++ b/drivers/cpuidle/cpuidle-haltpoll.c @@ -15,6 +15,7 @@ #include #include #include +#include static int default_enter_idle(struct cpuidle_device *dev, struct cpuidle_driver *drv, int index) @@ -47,6 +48,7 @@ static struct cpuidle_driver haltpoll_driver = { static int __init haltpoll_init(void) { + int ret; struct cpuidle_driver *drv = &haltpoll_driver; cpuidle_poll_state_init(drv); @@ -54,11 +56,16 @@ static int __init haltpoll_init(void) if (!kvm_para_available()) return 0; - return cpuidle_register(&haltpoll_driver, NULL); + ret = cpuidle_register(&haltpoll_driver, NULL); + if (ret == 0) + arch_haltpoll_enable(); + + return ret; } static void __exit haltpoll_exit(void) { + arch_haltpoll_disable(); cpuidle_unregister(&haltpoll_driver); } diff --git a/include/linux/cpuidle_haltpoll.h b/include/linux/cpuidle_haltpoll.h new file mode 100644 index 000000000000..fe5954c2409e --- /dev/null +++ b/include/linux/cpuidle_haltpoll.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _CPUIDLE_HALTPOLL_H +#define _CPUIDLE_HALTPOLL_H + +#ifdef CONFIG_ARCH_CPUIDLE_HALTPOLL +#include +#else +static inline void arch_haltpoll_enable(void) +{ +} + +static inline void arch_haltpoll_disable(void) +{ +} +#endif +#endif -- cgit From 7dcddef6f769d7e60691c732eb6d09cdb1d9df76 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Wed, 31 Jul 2019 15:29:52 +1000 Subject: cpuidle: header file stubs must be "static inline" An x86_64 allmodconfig build produces these errors: x86_64-linux-gnu-ld: kernel/sched/core.o: in function `cpuidle_poll_time': core.c:(.text+0x230): multiple definition of `cpuidle_poll_time'; arch/x86/= kernel/process.o:process.c:(.text+0xc0): first defined here (and more) Fixes: 259231a04561 ("cpuidle: add poll_limit_ns to cpuidle_device structure") Signed-off-by: Stephen Rothwell Signed-off-by: Rafael J. Wysocki --- include/linux/cpuidle.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h index ba535a1a47d5..1a9f54eb3aa1 100644 --- a/include/linux/cpuidle.h +++ b/include/linux/cpuidle.h @@ -170,7 +170,7 @@ static inline int cpuidle_enter(struct cpuidle_driver *drv, struct cpuidle_device *dev, int index) {return -ENODEV; } static inline void cpuidle_reflect(struct cpuidle_device *dev, int index) { } -extern u64 cpuidle_poll_time(struct cpuidle_driver *drv, +static inline u64 cpuidle_poll_time(struct cpuidle_driver *drv, struct cpuidle_device *dev) {return 0; } static inline int cpuidle_register_driver(struct cpuidle_driver *drv) -- cgit From 97d3eb9da84cae0548359b0aecb8619faad003b7 Mon Sep 17 00:00:00 2001 From: Joao Martins Date: Mon, 2 Sep 2019 11:40:31 +0100 Subject: cpuidle-haltpoll: vcpu hotplug support When cpus != maxcpus cpuidle-haltpoll will fail to register all vcpus past the online ones and thus fail to register the idle driver. This is because cpuidle_add_sysfs() will return with -ENODEV as a consequence from get_cpu_device() return no device for a non-existing CPU. Instead switch to cpuidle_register_driver() and manually register each of the present cpus through cpuhp_setup_state() callbacks and future ones that get onlined or offlined. This mimmics similar logic that intel_idle does. Fixes: fa86ee90eb11 ("add cpuidle-haltpoll driver") Signed-off-by: Joao Martins Signed-off-by: Boris Ostrovsky Reviewed-by: Marcelo Tosatti Signed-off-by: Rafael J. Wysocki --- arch/x86/include/asm/cpuidle_haltpoll.h | 4 +- arch/x86/kernel/kvm.c | 18 +++------ drivers/cpuidle/cpuidle-haltpoll.c | 68 ++++++++++++++++++++++++++++++--- include/linux/cpuidle_haltpoll.h | 4 +- 4 files changed, 73 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/include/asm/cpuidle_haltpoll.h b/arch/x86/include/asm/cpuidle_haltpoll.h index ff8607d81526..c8b39c6716ff 100644 --- a/arch/x86/include/asm/cpuidle_haltpoll.h +++ b/arch/x86/include/asm/cpuidle_haltpoll.h @@ -2,7 +2,7 @@ #ifndef _ARCH_HALTPOLL_H #define _ARCH_HALTPOLL_H -void arch_haltpoll_enable(void); -void arch_haltpoll_disable(void); +void arch_haltpoll_enable(unsigned int cpu); +void arch_haltpoll_disable(unsigned int cpu); #endif diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index f48401be8ce0..60bab4a3b36b 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -888,32 +888,26 @@ static void kvm_enable_host_haltpoll(void *i) wrmsrl(MSR_KVM_POLL_CONTROL, 1); } -void arch_haltpoll_enable(void) +void arch_haltpoll_enable(unsigned int cpu) { if (!kvm_para_has_feature(KVM_FEATURE_POLL_CONTROL)) { - printk(KERN_ERR "kvm: host does not support poll control\n"); - printk(KERN_ERR "kvm: host upgrade recommended\n"); + pr_err_once("kvm: host does not support poll control\n"); + pr_err_once("kvm: host upgrade recommended\n"); return; } - preempt_disable(); /* Enable guest halt poll disables host halt poll */ - kvm_disable_host_haltpoll(NULL); - smp_call_function(kvm_disable_host_haltpoll, NULL, 1); - preempt_enable(); + smp_call_function_single(cpu, kvm_disable_host_haltpoll, NULL, 1); } EXPORT_SYMBOL_GPL(arch_haltpoll_enable); -void arch_haltpoll_disable(void) +void arch_haltpoll_disable(unsigned int cpu) { if (!kvm_para_has_feature(KVM_FEATURE_POLL_CONTROL)) return; - preempt_disable(); /* Enable guest halt poll disables host halt poll */ - kvm_enable_host_haltpoll(NULL); - smp_call_function(kvm_enable_host_haltpoll, NULL, 1); - preempt_enable(); + smp_call_function_single(cpu, kvm_enable_host_haltpoll, NULL, 1); } EXPORT_SYMBOL_GPL(arch_haltpoll_disable); #endif diff --git a/drivers/cpuidle/cpuidle-haltpoll.c b/drivers/cpuidle/cpuidle-haltpoll.c index 9ac093dcbb01..56d8ab814466 100644 --- a/drivers/cpuidle/cpuidle-haltpoll.c +++ b/drivers/cpuidle/cpuidle-haltpoll.c @@ -11,12 +11,16 @@ */ #include +#include #include #include #include #include #include +static struct cpuidle_device __percpu *haltpoll_cpuidle_devices; +static enum cpuhp_state haltpoll_hp_state; + static int default_enter_idle(struct cpuidle_device *dev, struct cpuidle_driver *drv, int index) { @@ -46,6 +50,46 @@ static struct cpuidle_driver haltpoll_driver = { .state_count = 2, }; +static int haltpoll_cpu_online(unsigned int cpu) +{ + struct cpuidle_device *dev; + + dev = per_cpu_ptr(haltpoll_cpuidle_devices, cpu); + if (!dev->registered) { + dev->cpu = cpu; + if (cpuidle_register_device(dev)) { + pr_notice("cpuidle_register_device %d failed!\n", cpu); + return -EIO; + } + arch_haltpoll_enable(cpu); + } + + return 0; +} + +static int haltpoll_cpu_offline(unsigned int cpu) +{ + struct cpuidle_device *dev; + + dev = per_cpu_ptr(haltpoll_cpuidle_devices, cpu); + if (dev->registered) { + arch_haltpoll_disable(cpu); + cpuidle_unregister_device(dev); + } + + return 0; +} + +static void haltpoll_uninit(void) +{ + if (haltpoll_hp_state) + cpuhp_remove_state(haltpoll_hp_state); + cpuidle_unregister_driver(&haltpoll_driver); + + free_percpu(haltpoll_cpuidle_devices); + haltpoll_cpuidle_devices = NULL; +} + static int __init haltpoll_init(void) { int ret; @@ -56,17 +100,31 @@ static int __init haltpoll_init(void) if (!kvm_para_available()) return 0; - ret = cpuidle_register(&haltpoll_driver, NULL); - if (ret == 0) - arch_haltpoll_enable(); + ret = cpuidle_register_driver(drv); + if (ret < 0) + return ret; + + haltpoll_cpuidle_devices = alloc_percpu(struct cpuidle_device); + if (haltpoll_cpuidle_devices == NULL) { + cpuidle_unregister_driver(drv); + return -ENOMEM; + } + + ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "cpuidle/haltpoll:online", + haltpoll_cpu_online, haltpoll_cpu_offline); + if (ret < 0) { + haltpoll_uninit(); + } else { + haltpoll_hp_state = ret; + ret = 0; + } return ret; } static void __exit haltpoll_exit(void) { - arch_haltpoll_disable(); - cpuidle_unregister(&haltpoll_driver); + haltpoll_uninit(); } module_init(haltpoll_init); diff --git a/include/linux/cpuidle_haltpoll.h b/include/linux/cpuidle_haltpoll.h index fe5954c2409e..d50c1e0411a2 100644 --- a/include/linux/cpuidle_haltpoll.h +++ b/include/linux/cpuidle_haltpoll.h @@ -5,11 +5,11 @@ #ifdef CONFIG_ARCH_CPUIDLE_HALTPOLL #include #else -static inline void arch_haltpoll_enable(void) +static inline void arch_haltpoll_enable(unsigned int cpu) { } -static inline void arch_haltpoll_disable(void) +static inline void arch_haltpoll_disable(unsigned int cpu) { } #endif -- cgit From 82e430a6df7f0b5972c7fe717faffea823c6b84a Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Fri, 2 Aug 2019 19:34:23 +0200 Subject: cpuidle: play_idle: Increase the resolution to usec The play_idle resolution is 1ms. The intel_powerclamp bases the idle duration on jiffies. The idle injection API is also using msec based duration but has no user yet. Unfortunately, msec based time does not fit well when we want to inject idle cycle precisely with shallow idle state. In order to set the scene for the incoming idle injection user, move the precision up to usec when calling play_idle. Signed-off-by: Daniel Lezcano Signed-off-by: Rafael J. Wysocki --- drivers/powercap/idle_inject.c | 2 +- drivers/thermal/intel/intel_powerclamp.c | 2 +- include/linux/cpu.h | 2 +- kernel/sched/idle.c | 7 ++++--- 4 files changed, 7 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/drivers/powercap/idle_inject.c b/drivers/powercap/idle_inject.c index 24ff2a068978..10601f4bdf72 100644 --- a/drivers/powercap/idle_inject.c +++ b/drivers/powercap/idle_inject.c @@ -138,7 +138,7 @@ static void idle_inject_fn(unsigned int cpu) */ iit->should_run = 0; - play_idle(READ_ONCE(ii_dev->idle_duration_ms)); + play_idle(READ_ONCE(ii_dev->idle_duration_ms) * USEC_PER_MSEC); } /** diff --git a/drivers/thermal/intel/intel_powerclamp.c b/drivers/thermal/intel/intel_powerclamp.c index 5149a817456b..53216dcbe173 100644 --- a/drivers/thermal/intel/intel_powerclamp.c +++ b/drivers/thermal/intel/intel_powerclamp.c @@ -430,7 +430,7 @@ static void clamp_idle_injection_func(struct kthread_work *work) if (should_skip) goto balance; - play_idle(jiffies_to_msecs(w_data->duration_jiffies)); + play_idle(jiffies_to_usecs(w_data->duration_jiffies)); balance: if (clamping && w_data->clamping && cpu_online(w_data->cpu)) diff --git a/include/linux/cpu.h b/include/linux/cpu.h index fcb1386bb0d4..88dc0c653925 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -179,7 +179,7 @@ void arch_cpu_idle_dead(void); int cpu_report_state(int cpu); int cpu_check_up_prepare(int cpu); void cpu_set_state_online(int cpu); -void play_idle(unsigned long duration_ms); +void play_idle(unsigned long duration_us); #ifdef CONFIG_HOTPLUG_CPU bool cpu_wait_death(unsigned int cpu, int seconds); diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c index 80940939b733..b98283fc6914 100644 --- a/kernel/sched/idle.c +++ b/kernel/sched/idle.c @@ -311,7 +311,7 @@ static enum hrtimer_restart idle_inject_timer_fn(struct hrtimer *timer) return HRTIMER_NORESTART; } -void play_idle(unsigned long duration_ms) +void play_idle(unsigned long duration_us) { struct idle_timer it; @@ -323,7 +323,7 @@ void play_idle(unsigned long duration_ms) WARN_ON_ONCE(current->nr_cpus_allowed != 1); WARN_ON_ONCE(!(current->flags & PF_KTHREAD)); WARN_ON_ONCE(!(current->flags & PF_NO_SETAFFINITY)); - WARN_ON_ONCE(!duration_ms); + WARN_ON_ONCE(!duration_us); rcu_sleep_check(); preempt_disable(); @@ -333,7 +333,8 @@ void play_idle(unsigned long duration_ms) it.done = 0; hrtimer_init_on_stack(&it.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); it.timer.function = idle_inject_timer_fn; - hrtimer_start(&it.timer, ms_to_ktime(duration_ms), HRTIMER_MODE_REL_PINNED); + hrtimer_start(&it.timer, ns_to_ktime(duration_us * NSEC_PER_USEC), + HRTIMER_MODE_REL_PINNED); while (!READ_ONCE(it.done)) do_idle(); -- cgit From cd4c0763064f02f42824eed61be38eafdc702281 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Fri, 2 Aug 2019 19:34:24 +0200 Subject: powercap: idle_inject: Use higher resolution for idle injection The resolution of the idle injection is limited to 1ms. If there is a need for an injection of 1.2 ms, it is not possible. The idle injection API is not yet used, so it is safe to convert the existing API to the new time unit instead of adding more functions. Convert to microsecond in order to use a finer grain time unit when injecting idle cycles. Signed-off-by: Daniel Lezcano Signed-off-by: Rafael J. Wysocki --- drivers/powercap/idle_inject.c | 53 +++++++++++++++++++++--------------------- include/linux/idle_inject.h | 8 +++---- 2 files changed, 31 insertions(+), 30 deletions(-) (limited to 'include/linux') diff --git a/drivers/powercap/idle_inject.c b/drivers/powercap/idle_inject.c index 10601f4bdf72..cd1270614cc6 100644 --- a/drivers/powercap/idle_inject.c +++ b/drivers/powercap/idle_inject.c @@ -59,14 +59,14 @@ struct idle_inject_thread { /** * struct idle_inject_device - idle injection data * @timer: idle injection period timer - * @idle_duration_ms: duration of CPU idle time to inject - * @run_duration_ms: duration of CPU run time to allow + * @idle_duration_us: duration of CPU idle time to inject + * @run_duration_us: duration of CPU run time to allow * @cpumask: mask of CPUs affected by idle injection */ struct idle_inject_device { struct hrtimer timer; - unsigned int idle_duration_ms; - unsigned int run_duration_ms; + unsigned int idle_duration_us; + unsigned int run_duration_us; unsigned long int cpumask[0]; }; @@ -104,16 +104,16 @@ static void idle_inject_wakeup(struct idle_inject_device *ii_dev) */ static enum hrtimer_restart idle_inject_timer_fn(struct hrtimer *timer) { - unsigned int duration_ms; + unsigned int duration_us; struct idle_inject_device *ii_dev = container_of(timer, struct idle_inject_device, timer); - duration_ms = READ_ONCE(ii_dev->run_duration_ms); - duration_ms += READ_ONCE(ii_dev->idle_duration_ms); + duration_us = READ_ONCE(ii_dev->run_duration_us); + duration_us += READ_ONCE(ii_dev->idle_duration_us); idle_inject_wakeup(ii_dev); - hrtimer_forward_now(timer, ms_to_ktime(duration_ms)); + hrtimer_forward_now(timer, ns_to_ktime(duration_us * NSEC_PER_USEC)); return HRTIMER_RESTART; } @@ -138,35 +138,35 @@ static void idle_inject_fn(unsigned int cpu) */ iit->should_run = 0; - play_idle(READ_ONCE(ii_dev->idle_duration_ms) * USEC_PER_MSEC); + play_idle(READ_ONCE(ii_dev->idle_duration_us)); } /** * idle_inject_set_duration - idle and run duration update helper - * @run_duration_ms: CPU run time to allow in milliseconds - * @idle_duration_ms: CPU idle time to inject in milliseconds + * @run_duration_us: CPU run time to allow in microseconds + * @idle_duration_us: CPU idle time to inject in microseconds */ void idle_inject_set_duration(struct idle_inject_device *ii_dev, - unsigned int run_duration_ms, - unsigned int idle_duration_ms) + unsigned int run_duration_us, + unsigned int idle_duration_us) { - if (run_duration_ms && idle_duration_ms) { - WRITE_ONCE(ii_dev->run_duration_ms, run_duration_ms); - WRITE_ONCE(ii_dev->idle_duration_ms, idle_duration_ms); + if (run_duration_us && idle_duration_us) { + WRITE_ONCE(ii_dev->run_duration_us, run_duration_us); + WRITE_ONCE(ii_dev->idle_duration_us, idle_duration_us); } } /** * idle_inject_get_duration - idle and run duration retrieval helper - * @run_duration_ms: memory location to store the current CPU run time - * @idle_duration_ms: memory location to store the current CPU idle time + * @run_duration_us: memory location to store the current CPU run time + * @idle_duration_us: memory location to store the current CPU idle time */ void idle_inject_get_duration(struct idle_inject_device *ii_dev, - unsigned int *run_duration_ms, - unsigned int *idle_duration_ms) + unsigned int *run_duration_us, + unsigned int *idle_duration_us) { - *run_duration_ms = READ_ONCE(ii_dev->run_duration_ms); - *idle_duration_ms = READ_ONCE(ii_dev->idle_duration_ms); + *run_duration_us = READ_ONCE(ii_dev->run_duration_us); + *idle_duration_us = READ_ONCE(ii_dev->idle_duration_us); } /** @@ -181,10 +181,10 @@ void idle_inject_get_duration(struct idle_inject_device *ii_dev, */ int idle_inject_start(struct idle_inject_device *ii_dev) { - unsigned int idle_duration_ms = READ_ONCE(ii_dev->idle_duration_ms); - unsigned int run_duration_ms = READ_ONCE(ii_dev->run_duration_ms); + unsigned int idle_duration_us = READ_ONCE(ii_dev->idle_duration_us); + unsigned int run_duration_us = READ_ONCE(ii_dev->run_duration_us); - if (!idle_duration_ms || !run_duration_ms) + if (!idle_duration_us || !run_duration_us) return -EINVAL; pr_debug("Starting injecting idle cycles on CPUs '%*pbl'\n", @@ -193,7 +193,8 @@ int idle_inject_start(struct idle_inject_device *ii_dev) idle_inject_wakeup(ii_dev); hrtimer_start(&ii_dev->timer, - ms_to_ktime(idle_duration_ms + run_duration_ms), + ns_to_ktime((idle_duration_us + run_duration_us) * + NSEC_PER_USEC), HRTIMER_MODE_REL); return 0; diff --git a/include/linux/idle_inject.h b/include/linux/idle_inject.h index bdc0293fb6cb..a445cd1a36c5 100644 --- a/include/linux/idle_inject.h +++ b/include/linux/idle_inject.h @@ -20,10 +20,10 @@ int idle_inject_start(struct idle_inject_device *ii_dev); void idle_inject_stop(struct idle_inject_device *ii_dev); void idle_inject_set_duration(struct idle_inject_device *ii_dev, - unsigned int run_duration_ms, - unsigned int idle_duration_ms); + unsigned int run_duration_us, + unsigned int idle_duration_us); void idle_inject_get_duration(struct idle_inject_device *ii_dev, - unsigned int *run_duration_ms, - unsigned int *idle_duration_ms); + unsigned int *run_duration_us, + unsigned int *idle_duration_us); #endif /* __IDLE_INJECT_H__ */ -- cgit From cb5d8c45ab6c3daf8269e550cfb2d5018a876fe3 Mon Sep 17 00:00:00 2001 From: Joao Martins Date: Sun, 8 Sep 2019 00:45:21 +0100 Subject: cpuidle: allow governor switch on cpuidle_register_driver() The recently introduced haltpoll driver is largely only useful with haltpoll governor. To allow drivers to associate with a particular idle behaviour, add a @governor property to 'struct cpuidle_driver' and thus allow a cpuidle driver to switch to a *preferred* governor on idle driver registration. We save the previous governor, and when an idle driver is unregistered we switch back to that. The @governor can be overridden by cpuidle.governor= boot param or alternatively be ignored if the governor doesn't exist. Signed-off-by: Joao Martins Signed-off-by: Rafael J. Wysocki --- drivers/cpuidle/cpuidle.h | 2 ++ drivers/cpuidle/driver.c | 25 +++++++++++++++++++++++++ drivers/cpuidle/governor.c | 7 ++++--- include/linux/cpuidle.h | 3 +++ 4 files changed, 34 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/drivers/cpuidle/cpuidle.h b/drivers/cpuidle/cpuidle.h index d6613101af92..9f336af17fa6 100644 --- a/drivers/cpuidle/cpuidle.h +++ b/drivers/cpuidle/cpuidle.h @@ -9,6 +9,7 @@ /* For internal use only */ extern char param_governor[]; extern struct cpuidle_governor *cpuidle_curr_governor; +extern struct cpuidle_governor *cpuidle_prev_governor; extern struct list_head cpuidle_governors; extern struct list_head cpuidle_detected_devices; extern struct mutex cpuidle_lock; @@ -22,6 +23,7 @@ extern void cpuidle_install_idle_handler(void); extern void cpuidle_uninstall_idle_handler(void); /* governors */ +extern struct cpuidle_governor *cpuidle_find_governor(const char *str); extern int cpuidle_switch_governor(struct cpuidle_governor *gov); /* sysfs */ diff --git a/drivers/cpuidle/driver.c b/drivers/cpuidle/driver.c index dc32f34e68d9..80c1a830d991 100644 --- a/drivers/cpuidle/driver.c +++ b/drivers/cpuidle/driver.c @@ -254,12 +254,25 @@ static void __cpuidle_unregister_driver(struct cpuidle_driver *drv) */ int cpuidle_register_driver(struct cpuidle_driver *drv) { + struct cpuidle_governor *gov; int ret; spin_lock(&cpuidle_driver_lock); ret = __cpuidle_register_driver(drv); spin_unlock(&cpuidle_driver_lock); + if (!ret && !strlen(param_governor) && drv->governor && + (cpuidle_get_driver() == drv)) { + mutex_lock(&cpuidle_lock); + gov = cpuidle_find_governor(drv->governor); + if (gov) { + cpuidle_prev_governor = cpuidle_curr_governor; + if (cpuidle_switch_governor(gov) < 0) + cpuidle_prev_governor = NULL; + } + mutex_unlock(&cpuidle_lock); + } + return ret; } EXPORT_SYMBOL_GPL(cpuidle_register_driver); @@ -274,9 +287,21 @@ EXPORT_SYMBOL_GPL(cpuidle_register_driver); */ void cpuidle_unregister_driver(struct cpuidle_driver *drv) { + bool enabled = (cpuidle_get_driver() == drv); + spin_lock(&cpuidle_driver_lock); __cpuidle_unregister_driver(drv); spin_unlock(&cpuidle_driver_lock); + + if (!enabled) + return; + + mutex_lock(&cpuidle_lock); + if (cpuidle_prev_governor) { + if (!cpuidle_switch_governor(cpuidle_prev_governor)) + cpuidle_prev_governor = NULL; + } + mutex_unlock(&cpuidle_lock); } EXPORT_SYMBOL_GPL(cpuidle_unregister_driver); diff --git a/drivers/cpuidle/governor.c b/drivers/cpuidle/governor.c index 2e3e14192bee..e9801f26c732 100644 --- a/drivers/cpuidle/governor.c +++ b/drivers/cpuidle/governor.c @@ -20,14 +20,15 @@ char param_governor[CPUIDLE_NAME_LEN]; LIST_HEAD(cpuidle_governors); struct cpuidle_governor *cpuidle_curr_governor; +struct cpuidle_governor *cpuidle_prev_governor; /** - * __cpuidle_find_governor - finds a governor of the specified name + * cpuidle_find_governor - finds a governor of the specified name * @str: the name * * Must be called with cpuidle_lock acquired. */ -static struct cpuidle_governor * __cpuidle_find_governor(const char *str) +struct cpuidle_governor *cpuidle_find_governor(const char *str) { struct cpuidle_governor *gov; @@ -87,7 +88,7 @@ int cpuidle_register_governor(struct cpuidle_governor *gov) return -ENODEV; mutex_lock(&cpuidle_lock); - if (__cpuidle_find_governor(gov->name) == NULL) { + if (cpuidle_find_governor(gov->name) == NULL) { ret = 0; list_add_tail(&gov->governor_list, &cpuidle_governors); if (!cpuidle_curr_governor || diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h index 1a9f54eb3aa1..2dc4c6b19c25 100644 --- a/include/linux/cpuidle.h +++ b/include/linux/cpuidle.h @@ -121,6 +121,9 @@ struct cpuidle_driver { /* the driver handles the cpus in cpumask */ struct cpumask *cpumask; + + /* preferred governor to switch at register time */ + const char *governor; }; #ifdef CONFIG_CPU_IDLE -- cgit