diff options
Diffstat (limited to 'arch/x86/kvm/vmx/nested.c')
| -rw-r--r-- | arch/x86/kvm/vmx/nested.c | 147 | 
1 files changed, 100 insertions, 47 deletions
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 9c941535f78c..ba34e94049c7 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -7,6 +7,7 @@  #include <asm/mmu_context.h>  #include "cpuid.h" +#include "evmcs.h"  #include "hyperv.h"  #include "mmu.h"  #include "nested.h" @@ -245,7 +246,8 @@ static void vmx_sync_vmcs_host_state(struct vcpu_vmx *vmx,  	src = &prev->host_state;  	dest = &vmx->loaded_vmcs->host_state; -	vmx_set_host_fs_gs(dest, src->fs_sel, src->gs_sel, src->fs_base, src->gs_base); +	vmx_set_vmcs_host_state(dest, src->cr3, src->fs_sel, src->gs_sel, +				src->fs_base, src->gs_base);  	dest->ldt_sel = src->ldt_sel;  #ifdef CONFIG_X86_64  	dest->ds_sel = src->ds_sel; @@ -269,7 +271,13 @@ static void vmx_switch_vmcs(struct kvm_vcpu *vcpu, struct loaded_vmcs *vmcs)  	vmx_sync_vmcs_host_state(vmx, prev);  	put_cpu(); -	vmx_register_cache_reset(vcpu); +	vcpu->arch.regs_avail = ~VMX_REGS_LAZY_LOAD_SET; + +	/* +	 * All lazily updated registers will be reloaded from VMCS12 on both +	 * vmentry and vmexit. +	 */ +	vcpu->arch.regs_dirty = 0;  }  /* @@ -391,9 +399,11 @@ static void nested_ept_inject_page_fault(struct kvm_vcpu *vcpu,  static void nested_ept_new_eptp(struct kvm_vcpu *vcpu)  { -	kvm_init_shadow_ept_mmu(vcpu, -				to_vmx(vcpu)->nested.msrs.ept_caps & -				VMX_EPT_EXECUTE_ONLY_BIT, +	struct vcpu_vmx *vmx = to_vmx(vcpu); +	bool execonly = vmx->nested.msrs.ept_caps & VMX_EPT_EXECUTE_ONLY_BIT; +	int ept_lpage_level = ept_caps_to_lpage_level(vmx->nested.msrs.ept_caps); + +	kvm_init_shadow_ept_mmu(vcpu, execonly, ept_lpage_level,  				nested_ept_ad_enabled(vcpu),  				nested_ept_get_eptp(vcpu));  } @@ -591,6 +601,7 @@ static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu,  	int msr;  	unsigned long *msr_bitmap_l1;  	unsigned long *msr_bitmap_l0 = vmx->nested.vmcs02.msr_bitmap; +	struct hv_enlightened_vmcs *evmcs = vmx->nested.hv_evmcs;  	struct kvm_host_map *map = &vmx->nested.msr_bitmap_map;  	/* Nothing to do if the MSR bitmap is not in use.  */ @@ -598,6 +609,19 @@ static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu,  	    !nested_cpu_has(vmcs12, CPU_BASED_USE_MSR_BITMAPS))  		return false; +	/* +	 * MSR bitmap update can be skipped when: +	 * - MSR bitmap for L1 hasn't changed. +	 * - Nested hypervisor (L1) is attempting to launch the same L2 as +	 *   before. +	 * - Nested hypervisor (L1) has enabled 'Enlightened MSR Bitmap' feature +	 *   and tells KVM (L0) there were no changes in MSR bitmap for L2. +	 */ +	if (!vmx->nested.force_msr_bitmap_recalc && evmcs && +	    evmcs->hv_enlightenments_control.msr_bitmap && +	    evmcs->hv_clean_fields & HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP) +		return true; +  	if (kvm_vcpu_map(vcpu, gpa_to_gfn(vmcs12->msr_bitmap), map))  		return false; @@ -664,6 +688,8 @@ static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu,  	kvm_vcpu_unmap(vcpu, &vmx->nested.msr_bitmap_map, false); +	vmx->nested.force_msr_bitmap_recalc = false; +  	return true;  } @@ -1095,7 +1121,7 @@ static int nested_vmx_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3,  	 * must not be dereferenced.  	 */  	if (reload_pdptrs && !nested_ept && is_pae_paging(vcpu) && -	    CC(!load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3))) { +	    CC(!load_pdptrs(vcpu, cr3))) {  		*entry_failure_code = ENTRY_FAIL_PDPTE;  		return -EINVAL;  	} @@ -1104,7 +1130,7 @@ static int nested_vmx_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3,  		kvm_mmu_new_pgd(vcpu, cr3);  	vcpu->arch.cr3 = cr3; -	kvm_register_mark_available(vcpu, VCPU_EXREG_CR3); +	kvm_register_mark_dirty(vcpu, VCPU_EXREG_CR3);  	/* Re-initialize the MMU, e.g. to pick up CR4 MMU role changes. */  	kvm_init_mmu(vcpu); @@ -2021,10 +2047,13 @@ static enum nested_evmptrld_status nested_vmx_handle_enlightened_vmptrld(  	 * Clean fields data can't be used on VMLAUNCH and when we switch  	 * between different L2 guests as KVM keeps a single VMCS12 per L1.  	 */ -	if (from_launch || evmcs_gpa_changed) +	if (from_launch || evmcs_gpa_changed) {  		vmx->nested.hv_evmcs->hv_clean_fields &=  			~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL; +		vmx->nested.force_msr_bitmap_recalc = true; +	} +  	return EVMPTRLD_SUCCEEDED;  } @@ -3027,7 +3056,7 @@ static int nested_vmx_check_guest_state(struct kvm_vcpu *vcpu,  static int nested_vmx_check_vmentry_hw(struct kvm_vcpu *vcpu)  {  	struct vcpu_vmx *vmx = to_vmx(vcpu); -	unsigned long cr3, cr4; +	unsigned long cr4;  	bool vm_fail;  	if (!nested_early_check) @@ -3050,12 +3079,6 @@ static int nested_vmx_check_vmentry_hw(struct kvm_vcpu *vcpu)  	 */  	vmcs_writel(GUEST_RFLAGS, 0); -	cr3 = __get_current_cr3_fast(); -	if (unlikely(cr3 != vmx->loaded_vmcs->host_state.cr3)) { -		vmcs_writel(HOST_CR3, cr3); -		vmx->loaded_vmcs->host_state.cr3 = cr3; -	} -  	cr4 = cr4_read_shadow();  	if (unlikely(cr4 != vmx->loaded_vmcs->host_state.cr4)) {  		vmcs_writel(HOST_CR4, cr4); @@ -3145,7 +3168,7 @@ static bool nested_get_vmcs12_pages(struct kvm_vcpu *vcpu)  		 * the guest CR3 might be restored prior to setting the nested  		 * state which can lead to a load of wrong PDPTRs.  		 */ -		if (CC(!load_pdptrs(vcpu, vcpu->arch.walk_mmu, vcpu->arch.cr3))) +		if (CC(!load_pdptrs(vcpu, vcpu->arch.cr3)))  			return false;  	} @@ -3504,10 +3527,13 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)  	if (evmptrld_status == EVMPTRLD_ERROR) {  		kvm_queue_exception(vcpu, UD_VECTOR);  		return 1; -	} else if (CC(evmptrld_status == EVMPTRLD_VMFAIL)) { -		return nested_vmx_failInvalid(vcpu);  	} +	kvm_pmu_trigger_event(vcpu, PERF_COUNT_HW_BRANCH_INSTRUCTIONS); + +	if (CC(evmptrld_status == EVMPTRLD_VMFAIL)) +		return nested_vmx_failInvalid(vcpu); +  	if (CC(!evmptr_is_valid(vmx->nested.hv_evmcs_vmptr) &&  	       vmx->nested.current_vmptr == INVALID_GPA))  		return nested_vmx_failInvalid(vcpu); @@ -3603,7 +3629,7 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)  		    !(nested_cpu_has(vmcs12, CPU_BASED_INTR_WINDOW_EXITING) &&  		      (vmcs12->guest_rflags & X86_EFLAGS_IF))) {  			vmx->nested.nested_run_pending = 0; -			return kvm_vcpu_halt(vcpu); +			return kvm_emulate_halt_noskip(vcpu);  		}  		break;  	case GUEST_ACTIVITY_WAIT_SIPI: @@ -4826,18 +4852,20 @@ static struct vmcs *alloc_shadow_vmcs(struct kvm_vcpu *vcpu)  	struct loaded_vmcs *loaded_vmcs = vmx->loaded_vmcs;  	/* -	 * We should allocate a shadow vmcs for vmcs01 only when L1 -	 * executes VMXON and free it when L1 executes VMXOFF. -	 * As it is invalid to execute VMXON twice, we shouldn't reach -	 * here when vmcs01 already have an allocated shadow vmcs. +	 * KVM allocates a shadow VMCS only when L1 executes VMXON and frees it +	 * when L1 executes VMXOFF or the vCPU is forced out of nested +	 * operation.  VMXON faults if the CPU is already post-VMXON, so it +	 * should be impossible to already have an allocated shadow VMCS.  KVM +	 * doesn't support virtualization of VMCS shadowing, so vmcs01 should +	 * always be the loaded VMCS.  	 */ -	WARN_ON(loaded_vmcs == &vmx->vmcs01 && loaded_vmcs->shadow_vmcs); +	if (WARN_ON(loaded_vmcs != &vmx->vmcs01 || loaded_vmcs->shadow_vmcs)) +		return loaded_vmcs->shadow_vmcs; + +	loaded_vmcs->shadow_vmcs = alloc_vmcs(true); +	if (loaded_vmcs->shadow_vmcs) +		vmcs_clear(loaded_vmcs->shadow_vmcs); -	if (!loaded_vmcs->shadow_vmcs) { -		loaded_vmcs->shadow_vmcs = alloc_vmcs(true); -		if (loaded_vmcs->shadow_vmcs) -			vmcs_clear(loaded_vmcs->shadow_vmcs); -	}  	return loaded_vmcs->shadow_vmcs;  } @@ -5074,27 +5102,49 @@ static int handle_vmread(struct kvm_vcpu *vcpu)  	if (!nested_vmx_check_permission(vcpu))  		return 1; -	/* -	 * In VMX non-root operation, when the VMCS-link pointer is INVALID_GPA, -	 * any VMREAD sets the ALU flags for VMfailInvalid. -	 */ -	if (vmx->nested.current_vmptr == INVALID_GPA || -	    (is_guest_mode(vcpu) && -	     get_vmcs12(vcpu)->vmcs_link_pointer == INVALID_GPA)) -		return nested_vmx_failInvalid(vcpu); -  	/* Decode instruction info and find the field to read */  	field = kvm_register_read(vcpu, (((instr_info) >> 28) & 0xf)); -	offset = vmcs_field_to_offset(field); -	if (offset < 0) -		return nested_vmx_fail(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT); +	if (!evmptr_is_valid(vmx->nested.hv_evmcs_vmptr)) { +		/* +		 * In VMX non-root operation, when the VMCS-link pointer is INVALID_GPA, +		 * any VMREAD sets the ALU flags for VMfailInvalid. +		 */ +		if (vmx->nested.current_vmptr == INVALID_GPA || +		    (is_guest_mode(vcpu) && +		     get_vmcs12(vcpu)->vmcs_link_pointer == INVALID_GPA)) +			return nested_vmx_failInvalid(vcpu); -	if (!is_guest_mode(vcpu) && is_vmcs12_ext_field(field)) -		copy_vmcs02_to_vmcs12_rare(vcpu, vmcs12); +		offset = get_vmcs12_field_offset(field); +		if (offset < 0) +			return nested_vmx_fail(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT); + +		if (!is_guest_mode(vcpu) && is_vmcs12_ext_field(field)) +			copy_vmcs02_to_vmcs12_rare(vcpu, vmcs12); + +		/* Read the field, zero-extended to a u64 value */ +		value = vmcs12_read_any(vmcs12, field, offset); +	} else { +		/* +		 * Hyper-V TLFS (as of 6.0b) explicitly states, that while an +		 * enlightened VMCS is active VMREAD/VMWRITE instructions are +		 * unsupported. Unfortunately, certain versions of Windows 11 +		 * don't comply with this requirement which is not enforced in +		 * genuine Hyper-V. Allow VMREAD from an enlightened VMCS as a +		 * workaround, as misbehaving guests will panic on VM-Fail. +		 * Note, enlightened VMCS is incompatible with shadow VMCS so +		 * all VMREADs from L2 should go to L1. +		 */ +		if (WARN_ON_ONCE(is_guest_mode(vcpu))) +			return nested_vmx_failInvalid(vcpu); -	/* Read the field, zero-extended to a u64 value */ -	value = vmcs12_read_any(vmcs12, field, offset); +		offset = evmcs_field_offset(field, NULL); +		if (offset < 0) +			return nested_vmx_fail(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT); + +		/* Read the field, zero-extended to a u64 value */ +		value = evmcs_read_any(vmx->nested.hv_evmcs, field, offset); +	}  	/*  	 * Now copy part of this value to register or memory, as requested. @@ -5189,7 +5239,7 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu)  	field = kvm_register_read(vcpu, (((instr_info) >> 28) & 0xf)); -	offset = vmcs_field_to_offset(field); +	offset = get_vmcs12_field_offset(field);  	if (offset < 0)  		return nested_vmx_fail(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT); @@ -5258,6 +5308,7 @@ static void set_current_vmptr(struct vcpu_vmx *vmx, gpa_t vmptr)  		vmx->nested.need_vmcs12_to_shadow_sync = true;  	}  	vmx->nested.dirty_vmcs12 = true; +	vmx->nested.force_msr_bitmap_recalc = true;  }  /* Emulate the VMPTRLD instruction */ @@ -6393,6 +6444,7 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu,  		goto error_guest_mode;  	vmx->nested.dirty_vmcs12 = true; +	vmx->nested.force_msr_bitmap_recalc = true;  	ret = nested_vmx_enter_non_root_mode(vcpu, false);  	if (ret)  		goto error_guest_mode; @@ -6435,7 +6487,7 @@ static u64 nested_vmx_calc_vmcs_enum_msr(void)  	max_idx = 0;  	for (i = 0; i < nr_vmcs12_fields; i++) {  		/* The vmcs12 table is very, very sparsely populated. */ -		if (!vmcs_field_to_offset_table[i]) +		if (!vmcs12_field_offsets[i])  			continue;  		idx = vmcs_field_index(VMCS12_IDX_TO_ENC(i)); @@ -6744,6 +6796,7 @@ __init int nested_vmx_hardware_setup(int (*exit_handlers[])(struct kvm_vcpu *))  }  struct kvm_x86_nested_ops vmx_nested_ops = { +	.leave_nested = vmx_leave_nested,  	.check_events = vmx_check_nested_events,  	.hv_timer_pending = nested_vmx_preemption_timer_pending,  	.triple_fault = nested_vmx_triple_fault,  |