From fcd1ec9cb59c4375803c2c3c18ba7f473fe91cdc Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 27 Sep 2024 06:25:35 -0400 Subject: KVM: x86/mmu: fix KVM_X86_QUIRK_SLOT_ZAP_ALL for shadow MMU As was tried in commit 4e103134b862 ("KVM: x86/mmu: Zap only the relevant pages when removing a memslot"), all shadow pages, i.e. non-leaf SPTEs, need to be zapped. All of the accounting for a shadow page is tied to the memslot, i.e. the shadow page holds a reference to the memslot, for all intents and purposes. Deleting the memslot without removing all relevant shadow pages, as is done when KVM_X86_QUIRK_SLOT_ZAP_ALL is disabled, results in NULL pointer derefs when tearing down the VM. Reintroduce from that commit the code that walks the whole memslot when there are active shadow MMU pages. Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/mmu.c | 60 ++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 46 insertions(+), 14 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index e081f785fb23..912bad4fa88c 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -1884,10 +1884,14 @@ static bool sp_has_gptes(struct kvm_mmu_page *sp) if (is_obsolete_sp((_kvm), (_sp))) { \ } else -#define for_each_gfn_valid_sp_with_gptes(_kvm, _sp, _gfn) \ +#define for_each_gfn_valid_sp(_kvm, _sp, _gfn) \ for_each_valid_sp(_kvm, _sp, \ &(_kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(_gfn)]) \ - if ((_sp)->gfn != (_gfn) || !sp_has_gptes(_sp)) {} else + if ((_sp)->gfn != (_gfn)) {} else + +#define for_each_gfn_valid_sp_with_gptes(_kvm, _sp, _gfn) \ + for_each_gfn_valid_sp(_kvm, _sp, _gfn) \ + if (!sp_has_gptes(_sp)) {} else static bool kvm_sync_page_check(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) { @@ -7049,14 +7053,42 @@ void kvm_arch_flush_shadow_all(struct kvm *kvm) kvm_mmu_zap_all(kvm); } -/* - * Zapping leaf SPTEs with memslot range when a memslot is moved/deleted. - * - * Zapping non-leaf SPTEs, a.k.a. not-last SPTEs, isn't required, worst - * case scenario we'll have unused shadow pages lying around until they - * are recycled due to age or when the VM is destroyed. - */ -static void kvm_mmu_zap_memslot_leafs(struct kvm *kvm, struct kvm_memory_slot *slot) +static void kvm_mmu_zap_memslot_pages_and_flush(struct kvm *kvm, + struct kvm_memory_slot *slot, + bool flush) +{ + LIST_HEAD(invalid_list); + unsigned long i; + + if (list_empty(&kvm->arch.active_mmu_pages)) + goto out_flush; + + /* + * Since accounting information is stored in struct kvm_arch_memory_slot, + * shadow pages deletion (e.g. unaccount_shadowed()) requires that all + * gfns with a shadow page have a corresponding memslot. Do so before + * the memslot goes away. + */ + for (i = 0; i < slot->npages; i++) { + struct kvm_mmu_page *sp; + gfn_t gfn = slot->base_gfn + i; + + for_each_gfn_valid_sp(kvm, sp, gfn) + kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list); + + if (need_resched() || rwlock_needbreak(&kvm->mmu_lock)) { + kvm_mmu_remote_flush_or_zap(kvm, &invalid_list, flush); + flush = false; + cond_resched_rwlock_write(&kvm->mmu_lock); + } + } + +out_flush: + kvm_mmu_remote_flush_or_zap(kvm, &invalid_list, flush); +} + +static void kvm_mmu_zap_memslot(struct kvm *kvm, + struct kvm_memory_slot *slot) { struct kvm_gfn_range range = { .slot = slot, @@ -7064,11 +7096,11 @@ static void kvm_mmu_zap_memslot_leafs(struct kvm *kvm, struct kvm_memory_slot *s .end = slot->base_gfn + slot->npages, .may_block = true, }; + bool flush; write_lock(&kvm->mmu_lock); - if (kvm_unmap_gfn_range(kvm, &range)) - kvm_flush_remote_tlbs_memslot(kvm, slot); - + flush = kvm_unmap_gfn_range(kvm, &range); + kvm_mmu_zap_memslot_pages_and_flush(kvm, slot, flush); write_unlock(&kvm->mmu_lock); } @@ -7084,7 +7116,7 @@ void kvm_arch_flush_shadow_memslot(struct kvm *kvm, if (kvm_memslot_flush_zap_all(kvm)) kvm_mmu_zap_all_fast(kvm); else - kvm_mmu_zap_memslot_leafs(kvm, slot); + kvm_mmu_zap_memslot(kvm, slot); } void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, u64 gen) -- cgit From ea4290d77bda2bd1f173a86f07aa79b568e0a6f8 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 1 Oct 2024 10:15:01 -0400 Subject: KVM: x86: leave kvm.ko out of the build if no vendor module is requested kvm.ko is nothing but library code shared by kvm-intel.ko and kvm-amd.ko. It provides no functionality on its own and it is unnecessary unless one of the vendor-specific module is compiled. In particular, /dev/kvm is not created until one of kvm-intel.ko or kvm-amd.ko is loaded. Use CONFIG_KVM to decide if it is built-in or a module, but use the vendor-specific modules for the actual decision on whether to build it. This also fixes a build failure when CONFIG_KVM_INTEL and CONFIG_KVM_AMD are both disabled. The cpu_emergency_register_virt_callback() function is called from kvm.ko, but it is only defined if at least one of CONFIG_KVM_INTEL and CONFIG_KVM_AMD is provided. Fixes: 590b09b1d88e ("KVM: x86: Register "emergency disable" callbacks when virt is enabled") Signed-off-by: Paolo Bonzini --- arch/x86/kvm/Kconfig | 9 ++++++--- arch/x86/kvm/Makefile | 2 +- 2 files changed, 7 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index 730c2f34d347..f09f13c01c6b 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig @@ -17,8 +17,8 @@ menuconfig VIRTUALIZATION if VIRTUALIZATION -config KVM - tristate "Kernel-based Virtual Machine (KVM) support" +config KVM_X86 + def_tristate KVM if KVM_INTEL || KVM_AMD depends on X86_LOCAL_APIC select KVM_COMMON select KVM_GENERIC_MMU_NOTIFIER @@ -44,7 +44,11 @@ config KVM select HAVE_KVM_PM_NOTIFIER if PM select KVM_GENERIC_HARDWARE_ENABLING select KVM_GENERIC_PRE_FAULT_MEMORY + select KVM_GENERIC_PRIVATE_MEM if KVM_SW_PROTECTED_VM select KVM_WERROR if WERROR + +config KVM + tristate "Kernel-based Virtual Machine (KVM) support" help Support hosting fully virtualized guest machines using hardware virtualization extensions. You will need a fairly recent @@ -77,7 +81,6 @@ config KVM_SW_PROTECTED_VM bool "Enable support for KVM software-protected VMs" depends on EXPERT depends on KVM && X86_64 - select KVM_GENERIC_PRIVATE_MEM help Enable support for KVM software-protected VMs. Currently, software- protected VMs are purely a development and testing vehicle for diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile index 5494669a055a..f9dddb8cb466 100644 --- a/arch/x86/kvm/Makefile +++ b/arch/x86/kvm/Makefile @@ -32,7 +32,7 @@ kvm-intel-y += vmx/vmx_onhyperv.o vmx/hyperv_evmcs.o kvm-amd-y += svm/svm_onhyperv.o endif -obj-$(CONFIG_KVM) += kvm.o +obj-$(CONFIG_KVM_X86) += kvm.o obj-$(CONFIG_KVM_INTEL) += kvm-intel.o obj-$(CONFIG_KVM_AMD) += kvm-amd.o -- cgit From 2a5fe5a01668e831af1de3951718fbf88b9a9b9c Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 1 Oct 2024 10:34:58 -0400 Subject: x86/reboot: emergency callbacks are now registered by common KVM code Guard them with CONFIG_KVM_X86_COMMON rather than the two vendor modules. In practice this has no functional change, because CONFIG_KVM_X86_COMMON is set if and only if at least one vendor-specific module is being built. However, it is cleaner to specify CONFIG_KVM_X86_COMMON for functions that are used in kvm.ko. Reported-by: Linus Torvalds Fixes: 590b09b1d88e ("KVM: x86: Register "emergency disable" callbacks when virt is enabled") Fixes: 6d55a94222db ("x86/reboot: Unconditionally define cpu_emergency_virt_cb typedef") Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/reboot.h | 4 ++-- arch/x86/kernel/reboot.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/reboot.h b/arch/x86/include/asm/reboot.h index d0ef2a678d66..2d8525a51478 100644 --- a/arch/x86/include/asm/reboot.h +++ b/arch/x86/include/asm/reboot.h @@ -26,13 +26,13 @@ void __noreturn machine_real_restart(unsigned int type); #define MRR_APM 1 typedef void (cpu_emergency_virt_cb)(void); -#if IS_ENABLED(CONFIG_KVM_INTEL) || IS_ENABLED(CONFIG_KVM_AMD) +#if IS_ENABLED(CONFIG_KVM_X86) void cpu_emergency_register_virt_callback(cpu_emergency_virt_cb *callback); void cpu_emergency_unregister_virt_callback(cpu_emergency_virt_cb *callback); void cpu_emergency_disable_virtualization(void); #else static inline void cpu_emergency_disable_virtualization(void) {} -#endif /* CONFIG_KVM_INTEL || CONFIG_KVM_AMD */ +#endif /* CONFIG_KVM_X86 */ typedef void (*nmi_shootdown_cb)(int, struct pt_regs*); void nmi_shootdown_cpus(nmi_shootdown_cb callback); diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 0e0a4cf6b5eb..615922838c51 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -530,7 +530,7 @@ static inline void kb_wait(void) static inline void nmi_shootdown_cpus_on_restart(void); -#if IS_ENABLED(CONFIG_KVM_INTEL) || IS_ENABLED(CONFIG_KVM_AMD) +#if IS_ENABLED(CONFIG_KVM_X86) /* RCU-protected callback to disable virtualization prior to reboot. */ static cpu_emergency_virt_cb __rcu *cpu_emergency_virt_callback; @@ -600,7 +600,7 @@ static void emergency_reboot_disable_virtualization(void) } #else static void emergency_reboot_disable_virtualization(void) { } -#endif /* CONFIG_KVM_INTEL || CONFIG_KVM_AMD */ +#endif /* CONFIG_KVM_X86 */ void __attribute__((weak)) mach_reboot_fixups(void) { -- cgit