From 9397fa2ea3e7634f61da1ab76b9eb88ba04dfdfc Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 19 May 2023 12:21:07 +0200 Subject: clocksource: hyper-v: Adjust hv_read_tsc_page_tsc() to avoid special casing U64_MAX Currently hv_read_tsc_page_tsc() (ab)uses the (valid) time value of U64_MAX as an error return. This breaks the clean wrap-around of the clock. Modify the function signature to return a boolean state and provide another u64 pointer to store the actual time on success. This obviates the need to steal one time value and restores the full counter width. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Michael Kelley Tested-by: Michael Kelley # Hyper-V Link: https://lore.kernel.org/r/20230519102715.775630881@infradead.org --- drivers/clocksource/hyperv_timer.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) (limited to 'drivers/clocksource/hyperv_timer.c') diff --git a/drivers/clocksource/hyperv_timer.c b/drivers/clocksource/hyperv_timer.c index bcd9042a0c9f..c643bfe2f3d4 100644 --- a/drivers/clocksource/hyperv_timer.c +++ b/drivers/clocksource/hyperv_timer.c @@ -393,14 +393,20 @@ struct ms_hyperv_tsc_page *hv_get_tsc_page(void) } EXPORT_SYMBOL_GPL(hv_get_tsc_page); -static u64 notrace read_hv_clock_tsc(void) +static notrace u64 read_hv_clock_tsc(void) { - u64 current_tick = hv_read_tsc_page(hv_get_tsc_page()); + u64 cur_tsc, time; - if (current_tick == U64_MAX) - current_tick = hv_get_register(HV_REGISTER_TIME_REF_COUNT); + /* + * The Hyper-V Top-Level Function Spec (TLFS), section Timers, + * subsection Refererence Counter, guarantees that the TSC and MSR + * times are in sync and monotonic. Therefore we can fall back + * to the MSR in case the TSC page indicates unavailability. + */ + if (!hv_read_tsc_page_tsc(tsc_page, &cur_tsc, &time)) + time = hv_get_register(HV_REGISTER_TIME_REF_COUNT); - return current_tick; + return time; } static u64 notrace read_hv_clock_tsc_cs(struct clocksource *arg) -- cgit From e39acc37db34f6688e2c16e958fb1d662c422c81 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 19 May 2023 12:21:08 +0200 Subject: clocksource: hyper-v: Provide noinstr sched_clock() With the intent to provide local_clock_noinstr(), a variant of local_clock() that's safe to be called from noinstr code (with the assumption that any such code will already be non-preemptible), prepare for things by making the Hyper-V TSC and MSR sched_clock implementations noinstr. Signed-off-by: Peter Zijlstra (Intel) Co-developed-by: Michael Kelley Signed-off-by: Michael Kelley Signed-off-by: Peter Zijlstra (Intel) Tested-by: Michael Kelley # Hyper-V Link: https://lore.kernel.org/r/20230519102715.843039089@infradead.org --- arch/x86/include/asm/mshyperv.h | 5 +++++ drivers/clocksource/hyperv_timer.c | 32 ++++++++++++++++++-------------- 2 files changed, 23 insertions(+), 14 deletions(-) (limited to 'drivers/clocksource/hyperv_timer.c') diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index 49bb4f2bd300..88d9ef98e087 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -257,6 +257,11 @@ void hv_set_register(unsigned int reg, u64 value); u64 hv_get_non_nested_register(unsigned int reg); void hv_set_non_nested_register(unsigned int reg, u64 value); +static __always_inline u64 hv_raw_get_register(unsigned int reg) +{ + return __rdmsr(reg); +} + #else /* CONFIG_HYPERV */ static inline void hyperv_init(void) {} static inline void hyperv_setup_mmu_ops(void) {} diff --git a/drivers/clocksource/hyperv_timer.c b/drivers/clocksource/hyperv_timer.c index c643bfe2f3d4..d851970e310c 100644 --- a/drivers/clocksource/hyperv_timer.c +++ b/drivers/clocksource/hyperv_timer.c @@ -365,6 +365,20 @@ void hv_stimer_global_cleanup(void) } EXPORT_SYMBOL_GPL(hv_stimer_global_cleanup); +static __always_inline u64 read_hv_clock_msr(void) +{ + /* + * Read the partition counter to get the current tick count. This count + * is set to 0 when the partition is created and is incremented in 100 + * nanosecond units. + * + * Use hv_raw_get_register() because this function is used from + * noinstr. Notable; while HV_REGISTER_TIME_REF_COUNT is a synthetic + * register it doesn't need the GHCB path. + */ + return hv_raw_get_register(HV_REGISTER_TIME_REF_COUNT); +} + /* * Code and definitions for the Hyper-V clocksources. Two * clocksources are defined: one that reads the Hyper-V defined MSR, and @@ -393,7 +407,7 @@ struct ms_hyperv_tsc_page *hv_get_tsc_page(void) } EXPORT_SYMBOL_GPL(hv_get_tsc_page); -static notrace u64 read_hv_clock_tsc(void) +static __always_inline u64 read_hv_clock_tsc(void) { u64 cur_tsc, time; @@ -404,7 +418,7 @@ static notrace u64 read_hv_clock_tsc(void) * to the MSR in case the TSC page indicates unavailability. */ if (!hv_read_tsc_page_tsc(tsc_page, &cur_tsc, &time)) - time = hv_get_register(HV_REGISTER_TIME_REF_COUNT); + time = read_hv_clock_msr(); return time; } @@ -414,7 +428,7 @@ static u64 notrace read_hv_clock_tsc_cs(struct clocksource *arg) return read_hv_clock_tsc(); } -static u64 notrace read_hv_sched_clock_tsc(void) +static u64 noinstr read_hv_sched_clock_tsc(void) { return (read_hv_clock_tsc() - hv_sched_clock_offset) * (NSEC_PER_SEC / HV_CLOCK_HZ); @@ -466,22 +480,12 @@ static struct clocksource hyperv_cs_tsc = { #endif }; -static u64 notrace read_hv_clock_msr(void) -{ - /* - * Read the partition counter to get the current tick count. This count - * is set to 0 when the partition is created and is incremented in - * 100 nanosecond units. - */ - return hv_get_register(HV_REGISTER_TIME_REF_COUNT); -} - static u64 notrace read_hv_clock_msr_cs(struct clocksource *arg) { return read_hv_clock_msr(); } -static u64 notrace read_hv_sched_clock_msr(void) +static u64 noinstr read_hv_sched_clock_msr(void) { return (read_hv_clock_msr() - hv_sched_clock_offset) * (NSEC_PER_SEC / HV_CLOCK_HZ); -- cgit From e5313f1c540434b18ea57927633b1584c534b14a Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Mon, 19 Jun 2023 12:02:40 -0700 Subject: clocksource/drivers/hyper-v: Rework clocksource and sched clock setup Current code assigns either the Hyper-V TSC page or MSR-based ref counter as the sched clock. This may be sub-optimal in two cases. First, if there is hardware support to ensure consistent TSC frequency across live migrations and Hyper-V is using that support, the raw TSC is a faster source of time than the Hyper-V TSC page. Second, the MSR-based ref counter is relatively slow because reads require a trap to the hypervisor. As such, it should never be used as the sched clock. The native sched clock based on the raw TSC or jiffies is much better. Rework the sched clock setup so it is set to the TSC page only if Hyper-V indicates that the TSC may have inconsistent frequency across live migrations. Also, remove the code that sets the sched clock to the MSR-based ref counter. In the cases where it is not set, the sched clock will then be the native sched clock. As part of the rework, always enable both the TSC page clocksource and the MSR-based ref counter clocksource. Set the ratings so the TSC page clocksource is preferred. While the MSR-based ref counter clocksource is unlikely to ever be the default, having it available for manual selection is convenient for development purposes. Signed-off-by: Michael Kelley Reviewed-by: Dexuan Cui Signed-off-by: Daniel Lezcano Link: https://lore.kernel.org/r/1687201360-16003-1-git-send-email-mikelley@microsoft.com --- drivers/clocksource/hyperv_timer.c | 54 ++++++++++++++++---------------------- 1 file changed, 23 insertions(+), 31 deletions(-) (limited to 'drivers/clocksource/hyperv_timer.c') diff --git a/drivers/clocksource/hyperv_timer.c b/drivers/clocksource/hyperv_timer.c index bcd9042a0c9f..9fc008c16636 100644 --- a/drivers/clocksource/hyperv_timer.c +++ b/drivers/clocksource/hyperv_timer.c @@ -475,15 +475,9 @@ static u64 notrace read_hv_clock_msr_cs(struct clocksource *arg) return read_hv_clock_msr(); } -static u64 notrace read_hv_sched_clock_msr(void) -{ - return (read_hv_clock_msr() - hv_sched_clock_offset) * - (NSEC_PER_SEC / HV_CLOCK_HZ); -} - static struct clocksource hyperv_cs_msr = { .name = "hyperv_clocksource_msr", - .rating = 500, + .rating = 495, .read = read_hv_clock_msr_cs, .mask = CLOCKSOURCE_MASK(64), .flags = CLOCK_SOURCE_IS_CONTINUOUS, @@ -513,7 +507,7 @@ static __always_inline void hv_setup_sched_clock(void *sched_clock) static __always_inline void hv_setup_sched_clock(void *sched_clock) {} #endif /* CONFIG_GENERIC_SCHED_CLOCK */ -static bool __init hv_init_tsc_clocksource(void) +static void __init hv_init_tsc_clocksource(void) { union hv_reference_tsc_msr tsc_msr; @@ -524,17 +518,14 @@ static bool __init hv_init_tsc_clocksource(void) * Hyper-V Reference TSC rating, causing the generic TSC to be used. * TSC_INVARIANT is not offered on ARM64, so the Hyper-V Reference * TSC will be preferred over the virtualized ARM64 arch counter. - * While the Hyper-V MSR clocksource won't be used since the - * Reference TSC clocksource is present, change its rating as - * well for consistency. */ if (ms_hyperv.features & HV_ACCESS_TSC_INVARIANT) { hyperv_cs_tsc.rating = 250; - hyperv_cs_msr.rating = 250; + hyperv_cs_msr.rating = 245; } if (!(ms_hyperv.features & HV_MSR_REFERENCE_TSC_AVAILABLE)) - return false; + return; hv_read_reference_counter = read_hv_clock_tsc; @@ -565,33 +556,34 @@ static bool __init hv_init_tsc_clocksource(void) clocksource_register_hz(&hyperv_cs_tsc, NSEC_PER_SEC/100); - hv_sched_clock_offset = hv_read_reference_counter(); - hv_setup_sched_clock(read_hv_sched_clock_tsc); - - return true; + /* + * If TSC is invariant, then let it stay as the sched clock since it + * will be faster than reading the TSC page. But if not invariant, use + * the TSC page so that live migrations across hosts with different + * frequencies is handled correctly. + */ + if (!(ms_hyperv.features & HV_ACCESS_TSC_INVARIANT)) { + hv_sched_clock_offset = hv_read_reference_counter(); + hv_setup_sched_clock(read_hv_sched_clock_tsc); + } } void __init hv_init_clocksource(void) { /* - * Try to set up the TSC page clocksource. If it succeeds, we're - * done. Otherwise, set up the MSR clocksource. At least one of - * these will always be available except on very old versions of - * Hyper-V on x86. In that case we won't have a Hyper-V + * Try to set up the TSC page clocksource, then the MSR clocksource. + * At least one of these will always be available except on very old + * versions of Hyper-V on x86. In that case we won't have a Hyper-V * clocksource, but Linux will still run with a clocksource based * on the emulated PIT or LAPIC timer. + * + * Never use the MSR clocksource as sched clock. It's too slow. + * Better to use the native sched clock as the fallback. */ - if (hv_init_tsc_clocksource()) - return; - - if (!(ms_hyperv.features & HV_MSR_TIME_REF_COUNT_AVAILABLE)) - return; - - hv_read_reference_counter = read_hv_clock_msr; - clocksource_register_hz(&hyperv_cs_msr, NSEC_PER_SEC/100); + hv_init_tsc_clocksource(); - hv_sched_clock_offset = hv_read_reference_counter(); - hv_setup_sched_clock(read_hv_sched_clock_msr); + if (ms_hyperv.features & HV_MSR_TIME_REF_COUNT_AVAILABLE) + clocksource_register_hz(&hyperv_cs_msr, NSEC_PER_SEC/100); } void __init hv_remap_tsc_clocksource(void) -- cgit