diff options
Diffstat (limited to 'arch/x86/kvm/vmx/vmx.c')
| -rw-r--r-- | arch/x86/kvm/vmx/vmx.c | 191 | 
1 files changed, 110 insertions, 81 deletions
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 1111d9d08903..c37a89eda90f 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -38,6 +38,7 @@  #include <asm/desc.h>  #include <asm/fpu/api.h>  #include <asm/fpu/xstate.h> +#include <asm/fred.h>  #include <asm/idtentry.h>  #include <asm/io.h>  #include <asm/irq_remapping.h> @@ -49,6 +50,8 @@  #include <asm/spec-ctrl.h>  #include <asm/vmx.h> +#include <trace/events/ipi.h> +  #include "capabilities.h"  #include "cpuid.h"  #include "hyperv.h" @@ -159,7 +162,7 @@ module_param(allow_smaller_maxphyaddr, bool, S_IRUGO);  /*   * List of MSRs that can be directly passed to the guest. - * In addition to these x2apic and PT MSRs are handled specially. + * In addition to these x2apic, PT and LBR MSRs are handled specially.   */  static u32 vmx_possible_passthrough_msrs[MAX_POSSIBLE_PASSTHROUGH_MSRS] = {  	MSR_IA32_SPEC_CTRL, @@ -388,7 +391,16 @@ static __always_inline void vmx_enable_fb_clear(struct vcpu_vmx *vmx)  static void vmx_update_fb_clear_dis(struct kvm_vcpu *vcpu, struct vcpu_vmx *vmx)  { -	vmx->disable_fb_clear = (host_arch_capabilities & ARCH_CAP_FB_CLEAR_CTRL) && +	/* +	 * Disable VERW's behavior of clearing CPU buffers for the guest if the +	 * CPU isn't affected by MDS/TAA, and the host hasn't forcefully enabled +	 * the mitigation. Disabling the clearing behavior provides a +	 * performance boost for guests that aren't aware that manually clearing +	 * CPU buffers is unnecessary, at the cost of MSR accesses on VM-Entry +	 * and VM-Exit. +	 */ +	vmx->disable_fb_clear = !cpu_feature_enabled(X86_FEATURE_CLEAR_CPU_BUF) && +				(host_arch_capabilities & ARCH_CAP_FB_CLEAR_CTRL) &&  				!boot_cpu_has_bug(X86_BUG_MDS) &&  				!boot_cpu_has_bug(X86_BUG_TAA); @@ -658,25 +670,14 @@ static inline bool cpu_need_virtualize_apic_accesses(struct kvm_vcpu *vcpu)  	return flexpriority_enabled && lapic_in_kernel(vcpu);  } -static int possible_passthrough_msr_slot(u32 msr) -{ -	u32 i; - -	for (i = 0; i < ARRAY_SIZE(vmx_possible_passthrough_msrs); i++) -		if (vmx_possible_passthrough_msrs[i] == msr) -			return i; - -	return -ENOENT; -} - -static bool is_valid_passthrough_msr(u32 msr) +static int vmx_get_passthrough_msr_slot(u32 msr)  { -	bool r; +	int i;  	switch (msr) {  	case 0x800 ... 0x8ff:  		/* x2APIC MSRs. These are handled in vmx_update_msr_bitmap_x2apic() */ -		return true; +		return -ENOENT;  	case MSR_IA32_RTIT_STATUS:  	case MSR_IA32_RTIT_OUTPUT_BASE:  	case MSR_IA32_RTIT_OUTPUT_MASK: @@ -691,14 +692,16 @@ static bool is_valid_passthrough_msr(u32 msr)  	case MSR_LBR_CORE_FROM ... MSR_LBR_CORE_FROM + 8:  	case MSR_LBR_CORE_TO ... MSR_LBR_CORE_TO + 8:  		/* LBR MSRs. These are handled in vmx_update_intercept_for_lbr_msrs() */ -		return true; +		return -ENOENT;  	} -	r = possible_passthrough_msr_slot(msr) != -ENOENT; - -	WARN(!r, "Invalid MSR %x, please adapt vmx_possible_passthrough_msrs[]", msr); +	for (i = 0; i < ARRAY_SIZE(vmx_possible_passthrough_msrs); i++) { +		if (vmx_possible_passthrough_msrs[i] == msr) +			return i; +	} -	return r; +	WARN(1, "Invalid MSR %x, please adapt vmx_possible_passthrough_msrs[]", msr); +	return -ENOENT;  }  struct vmx_uret_msr *vmx_find_uret_msr(struct vcpu_vmx *vmx, u32 msr) @@ -1281,8 +1284,6 @@ void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu)  	u16 fs_sel, gs_sel;  	int i; -	vmx->req_immediate_exit = false; -  	/*  	 * Note that guest MSRs to be saved/restored can also be changed  	 * when guest state is loaded. This happens when guest transitions @@ -3954,6 +3955,7 @@ void vmx_disable_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr, int type)  {  	struct vcpu_vmx *vmx = to_vmx(vcpu);  	unsigned long *msr_bitmap = vmx->vmcs01.msr_bitmap; +	int idx;  	if (!cpu_has_vmx_msr_bitmap())  		return; @@ -3963,16 +3965,13 @@ void vmx_disable_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr, int type)  	/*  	 * Mark the desired intercept state in shadow bitmap, this is needed  	 * for resync when the MSR filters change. -	*/ -	if (is_valid_passthrough_msr(msr)) { -		int idx = possible_passthrough_msr_slot(msr); - -		if (idx != -ENOENT) { -			if (type & MSR_TYPE_R) -				clear_bit(idx, vmx->shadow_msr_intercept.read); -			if (type & MSR_TYPE_W) -				clear_bit(idx, vmx->shadow_msr_intercept.write); -		} +	 */ +	idx = vmx_get_passthrough_msr_slot(msr); +	if (idx >= 0) { +		if (type & MSR_TYPE_R) +			clear_bit(idx, vmx->shadow_msr_intercept.read); +		if (type & MSR_TYPE_W) +			clear_bit(idx, vmx->shadow_msr_intercept.write);  	}  	if ((type & MSR_TYPE_R) && @@ -3998,6 +3997,7 @@ void vmx_enable_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr, int type)  {  	struct vcpu_vmx *vmx = to_vmx(vcpu);  	unsigned long *msr_bitmap = vmx->vmcs01.msr_bitmap; +	int idx;  	if (!cpu_has_vmx_msr_bitmap())  		return; @@ -4007,16 +4007,13 @@ void vmx_enable_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr, int type)  	/*  	 * Mark the desired intercept state in shadow bitmap, this is needed  	 * for resync when the MSR filter changes. -	*/ -	if (is_valid_passthrough_msr(msr)) { -		int idx = possible_passthrough_msr_slot(msr); - -		if (idx != -ENOENT) { -			if (type & MSR_TYPE_R) -				set_bit(idx, vmx->shadow_msr_intercept.read); -			if (type & MSR_TYPE_W) -				set_bit(idx, vmx->shadow_msr_intercept.write); -		} +	 */ +	idx = vmx_get_passthrough_msr_slot(msr); +	if (idx >= 0) { +		if (type & MSR_TYPE_R) +			set_bit(idx, vmx->shadow_msr_intercept.read); +		if (type & MSR_TYPE_W) +			set_bit(idx, vmx->shadow_msr_intercept.write);  	}  	if (type & MSR_TYPE_R) @@ -4127,6 +4124,9 @@ static void vmx_msr_filter_changed(struct kvm_vcpu *vcpu)  	struct vcpu_vmx *vmx = to_vmx(vcpu);  	u32 i; +	if (!cpu_has_vmx_msr_bitmap()) +		return; +  	/*  	 * Redo intercept permissions for MSRs that KVM is passing through to  	 * the guest.  Disabling interception will check the new MSR filter and @@ -5566,10 +5566,7 @@ static int handle_dr(struct kvm_vcpu *vcpu)  	reg = DEBUG_REG_ACCESS_REG(exit_qualification);  	if (exit_qualification & TYPE_MOV_FROM_DR) { -		unsigned long val; - -		kvm_get_dr(vcpu, dr, &val); -		kvm_register_write(vcpu, reg, val); +		kvm_register_write(vcpu, reg, kvm_get_dr(vcpu, dr));  		err = 0;  	} else {  		err = kvm_set_dr(vcpu, dr, kvm_register_read(vcpu, reg)); @@ -5991,22 +5988,46 @@ static int handle_pml_full(struct kvm_vcpu *vcpu)  	return 1;  } -static fastpath_t handle_fastpath_preemption_timer(struct kvm_vcpu *vcpu) +static fastpath_t handle_fastpath_preemption_timer(struct kvm_vcpu *vcpu, +						   bool force_immediate_exit)  {  	struct vcpu_vmx *vmx = to_vmx(vcpu); -	if (!vmx->req_immediate_exit && -	    !unlikely(vmx->loaded_vmcs->hv_timer_soft_disabled)) { -		kvm_lapic_expired_hv_timer(vcpu); +	/* +	 * In the *extremely* unlikely scenario that this is a spurious VM-Exit +	 * due to the timer expiring while it was "soft" disabled, just eat the +	 * exit and re-enter the guest. +	 */ +	if (unlikely(vmx->loaded_vmcs->hv_timer_soft_disabled))  		return EXIT_FASTPATH_REENTER_GUEST; -	} -	return EXIT_FASTPATH_NONE; +	/* +	 * If the timer expired because KVM used it to force an immediate exit, +	 * then mission accomplished. +	 */ +	if (force_immediate_exit) +		return EXIT_FASTPATH_EXIT_HANDLED; + +	/* +	 * If L2 is active, go down the slow path as emulating the guest timer +	 * expiration likely requires synthesizing a nested VM-Exit. +	 */ +	if (is_guest_mode(vcpu)) +		return EXIT_FASTPATH_NONE; + +	kvm_lapic_expired_hv_timer(vcpu); +	return EXIT_FASTPATH_REENTER_GUEST;  }  static int handle_preemption_timer(struct kvm_vcpu *vcpu)  { -	handle_fastpath_preemption_timer(vcpu); +	/* +	 * This non-fastpath handler is reached if and only if the preemption +	 * timer was being used to emulate a guest timer while L2 is active. +	 * All other scenarios are supposed to be handled in the fastpath. +	 */ +	WARN_ON_ONCE(!is_guest_mode(vcpu)); +	kvm_lapic_expired_hv_timer(vcpu);  	return 1;  } @@ -6509,7 +6530,7 @@ static int __vmx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath)  		vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_DELIVERY_EV;  		vcpu->run->internal.data[0] = vectoring_info;  		vcpu->run->internal.data[1] = exit_reason.full; -		vcpu->run->internal.data[2] = vcpu->arch.exit_qualification; +		vcpu->run->internal.data[2] = vmx_get_exit_qual(vcpu);  		if (exit_reason.basic == EXIT_REASON_EPT_MISCONFIG) {  			vcpu->run->internal.data[ndata++] =  				vmcs_read64(GUEST_PHYSICAL_ADDRESS); @@ -6543,7 +6564,7 @@ static int __vmx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath)  	if (exit_reason.basic >= kvm_vmx_max_exit_handlers)  		goto unexpected_vmexit; -#ifdef CONFIG_RETPOLINE +#ifdef CONFIG_MITIGATION_RETPOLINE  	if (exit_reason.basic == EXIT_REASON_MSR_WRITE)  		return kvm_emulate_wrmsr(vcpu);  	else if (exit_reason.basic == EXIT_REASON_PREEMPTION_TIMER) @@ -6960,14 +6981,16 @@ static void handle_external_interrupt_irqoff(struct kvm_vcpu *vcpu)  {  	u32 intr_info = vmx_get_intr_info(vcpu);  	unsigned int vector = intr_info & INTR_INFO_VECTOR_MASK; -	gate_desc *desc = (gate_desc *)host_idt_base + vector;  	if (KVM_BUG(!is_external_intr(intr_info), vcpu->kvm,  	    "unexpected VM-Exit interrupt info: 0x%x", intr_info))  		return;  	kvm_before_interrupt(vcpu, KVM_HANDLING_IRQ); -	vmx_do_interrupt_irqoff(gate_offset(desc)); +	if (cpu_feature_enabled(X86_FEATURE_FRED)) +		fred_entry_from_kvm(EVENT_TYPE_EXTINT, vector); +	else +		vmx_do_interrupt_irqoff(gate_offset((gate_desc *)host_idt_base + vector));  	kvm_after_interrupt(vcpu);  	vcpu->arch.at_instruction_boundary = true; @@ -7146,13 +7169,13 @@ static void atomic_switch_perf_msrs(struct vcpu_vmx *vmx)  					msrs[i].host, false);  } -static void vmx_update_hv_timer(struct kvm_vcpu *vcpu) +static void vmx_update_hv_timer(struct kvm_vcpu *vcpu, bool force_immediate_exit)  {  	struct vcpu_vmx *vmx = to_vmx(vcpu);  	u64 tscl;  	u32 delta_tsc; -	if (vmx->req_immediate_exit) { +	if (force_immediate_exit) {  		vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, 0);  		vmx->loaded_vmcs->hv_timer_soft_disabled = false;  	} else if (vmx->hv_deadline_tsc != -1) { @@ -7205,13 +7228,22 @@ void noinstr vmx_spec_ctrl_restore_host(struct vcpu_vmx *vmx,  	barrier_nospec();  } -static fastpath_t vmx_exit_handlers_fastpath(struct kvm_vcpu *vcpu) +static fastpath_t vmx_exit_handlers_fastpath(struct kvm_vcpu *vcpu, +					     bool force_immediate_exit)  { +	/* +	 * If L2 is active, some VMX preemption timer exits can be handled in +	 * the fastpath even, all other exits must use the slow path. +	 */ +	if (is_guest_mode(vcpu) && +	    to_vmx(vcpu)->exit_reason.basic != EXIT_REASON_PREEMPTION_TIMER) +		return EXIT_FASTPATH_NONE; +  	switch (to_vmx(vcpu)->exit_reason.basic) {  	case EXIT_REASON_MSR_WRITE:  		return handle_fastpath_set_msr_irqoff(vcpu);  	case EXIT_REASON_PREEMPTION_TIMER: -		return handle_fastpath_preemption_timer(vcpu); +		return handle_fastpath_preemption_timer(vcpu, force_immediate_exit);  	default:  		return EXIT_FASTPATH_NONE;  	} @@ -7224,11 +7256,14 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu,  	guest_state_enter_irqoff(); -	/* L1D Flush includes CPU buffer clear to mitigate MDS */ +	/* +	 * L1D Flush includes CPU buffer clear to mitigate MDS, but VERW +	 * mitigation for MDS is done late in VMentry and is still +	 * executed in spite of L1D Flush. This is because an extra VERW +	 * should not matter much after the big hammer L1D Flush. +	 */  	if (static_branch_unlikely(&vmx_l1d_should_flush))  		vmx_l1d_flush(vcpu); -	else if (static_branch_unlikely(&mds_user_clear)) -		mds_clear_cpu_buffers();  	else if (static_branch_unlikely(&mmio_stale_data_clear) &&  		 kvm_arch_has_assigned_device(vcpu->kvm))  		mds_clear_cpu_buffers(); @@ -7260,7 +7295,10 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu,  	if ((u16)vmx->exit_reason.basic == EXIT_REASON_EXCEPTION_NMI &&  	    is_nmi(vmx_get_intr_info(vcpu))) {  		kvm_before_interrupt(vcpu, KVM_HANDLING_NMI); -		vmx_do_nmi_irqoff(); +		if (cpu_feature_enabled(X86_FEATURE_FRED)) +			fred_entry_from_kvm(EVENT_TYPE_NMI, NMI_VECTOR); +		else +			vmx_do_nmi_irqoff();  		kvm_after_interrupt(vcpu);  	} @@ -7268,7 +7306,7 @@ out:  	guest_state_exit_irqoff();  } -static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu) +static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu, bool force_immediate_exit)  {  	struct vcpu_vmx *vmx = to_vmx(vcpu);  	unsigned long cr3, cr4; @@ -7295,7 +7333,7 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu)  		return EXIT_FASTPATH_NONE;  	} -	trace_kvm_entry(vcpu); +	trace_kvm_entry(vcpu, force_immediate_exit);  	if (vmx->ple_window_dirty) {  		vmx->ple_window_dirty = false; @@ -7354,7 +7392,9 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu)  		vmx_passthrough_lbr_msrs(vcpu);  	if (enable_preemption_timer) -		vmx_update_hv_timer(vcpu); +		vmx_update_hv_timer(vcpu, force_immediate_exit); +	else if (force_immediate_exit) +		smp_send_reschedule(vcpu->cpu);  	kvm_wait_lapic_expire(vcpu); @@ -7418,10 +7458,7 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu)  	vmx_recover_nmi_blocking(vmx);  	vmx_complete_interrupts(vmx); -	if (is_guest_mode(vcpu)) -		return EXIT_FASTPATH_NONE; - -	return vmx_exit_handlers_fastpath(vcpu); +	return vmx_exit_handlers_fastpath(vcpu, force_immediate_exit);  }  static void vmx_vcpu_free(struct kvm_vcpu *vcpu) @@ -7901,11 +7938,6 @@ static __init void vmx_set_cpu_caps(void)  		kvm_cpu_cap_check_and_set(X86_FEATURE_WAITPKG);  } -static void vmx_request_immediate_exit(struct kvm_vcpu *vcpu) -{ -	to_vmx(vcpu)->req_immediate_exit = true; -} -  static int vmx_check_intercept_io(struct kvm_vcpu *vcpu,  				  struct x86_instruction_info *info)  { @@ -8358,8 +8390,6 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {  	.check_intercept = vmx_check_intercept,  	.handle_exit_irqoff = vmx_handle_exit_irqoff, -	.request_immediate_exit = vmx_request_immediate_exit, -  	.sched_in = vmx_sched_in,  	.cpu_dirty_log_size = PML_ENTITY_NUM, @@ -8619,7 +8649,6 @@ static __init int hardware_setup(void)  	if (!enable_preemption_timer) {  		vmx_x86_ops.set_hv_timer = NULL;  		vmx_x86_ops.cancel_hv_timer = NULL; -		vmx_x86_ops.request_immediate_exit = __kvm_request_immediate_exit;  	}  	kvm_caps.supported_mce_cap |= MCG_LMCE_P;  |