diff options
Diffstat (limited to 'arch/x86/kvm/vmx/nested.c')
| -rw-r--r-- | arch/x86/kvm/vmx/nested.c | 212 | 
1 files changed, 133 insertions, 79 deletions
| diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index b213ca966d41..f235f77cbc03 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -245,7 +245,8 @@ static void vmx_sync_vmcs_host_state(struct vcpu_vmx *vmx,  	src = &prev->host_state;  	dest = &vmx->loaded_vmcs->host_state; -	vmx_set_host_fs_gs(dest, src->fs_sel, src->gs_sel, src->fs_base, src->gs_base); +	vmx_set_vmcs_host_state(dest, src->cr3, src->fs_sel, src->gs_sel, +				src->fs_base, src->gs_base);  	dest->ldt_sel = src->ldt_sel;  #ifdef CONFIG_X86_64  	dest->ds_sel = src->ds_sel; @@ -269,7 +270,13 @@ static void vmx_switch_vmcs(struct kvm_vcpu *vcpu, struct loaded_vmcs *vmcs)  	vmx_sync_vmcs_host_state(vmx, prev);  	put_cpu(); -	vmx_register_cache_reset(vcpu); +	vcpu->arch.regs_avail = ~VMX_REGS_LAZY_LOAD_SET; + +	/* +	 * All lazily updated registers will be reloaded from VMCS12 on both +	 * vmentry and vmexit. +	 */ +	vcpu->arch.regs_dirty = 0;  }  /* @@ -391,9 +398,11 @@ static void nested_ept_inject_page_fault(struct kvm_vcpu *vcpu,  static void nested_ept_new_eptp(struct kvm_vcpu *vcpu)  { -	kvm_init_shadow_ept_mmu(vcpu, -				to_vmx(vcpu)->nested.msrs.ept_caps & -				VMX_EPT_EXECUTE_ONLY_BIT, +	struct vcpu_vmx *vmx = to_vmx(vcpu); +	bool execonly = vmx->nested.msrs.ept_caps & VMX_EPT_EXECUTE_ONLY_BIT; +	int ept_lpage_level = ept_caps_to_lpage_level(vmx->nested.msrs.ept_caps); + +	kvm_init_shadow_ept_mmu(vcpu, execonly, ept_lpage_level,  				nested_ept_ad_enabled(vcpu),  				nested_ept_get_eptp(vcpu));  } @@ -591,6 +600,7 @@ static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu,  	int msr;  	unsigned long *msr_bitmap_l1;  	unsigned long *msr_bitmap_l0 = vmx->nested.vmcs02.msr_bitmap; +	struct hv_enlightened_vmcs *evmcs = vmx->nested.hv_evmcs;  	struct kvm_host_map *map = &vmx->nested.msr_bitmap_map;  	/* Nothing to do if the MSR bitmap is not in use.  */ @@ -598,6 +608,19 @@ static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu,  	    !nested_cpu_has(vmcs12, CPU_BASED_USE_MSR_BITMAPS))  		return false; +	/* +	 * MSR bitmap update can be skipped when: +	 * - MSR bitmap for L1 hasn't changed. +	 * - Nested hypervisor (L1) is attempting to launch the same L2 as +	 *   before. +	 * - Nested hypervisor (L1) has enabled 'Enlightened MSR Bitmap' feature +	 *   and tells KVM (L0) there were no changes in MSR bitmap for L2. +	 */ +	if (!vmx->nested.force_msr_bitmap_recalc && evmcs && +	    evmcs->hv_enlightenments_control.msr_bitmap && +	    evmcs->hv_clean_fields & HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP) +		return true; +  	if (kvm_vcpu_map(vcpu, gpa_to_gfn(vmcs12->msr_bitmap), map))  		return false; @@ -664,39 +687,47 @@ static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu,  	kvm_vcpu_unmap(vcpu, &vmx->nested.msr_bitmap_map, false); +	vmx->nested.force_msr_bitmap_recalc = false; +  	return true;  }  static void nested_cache_shadow_vmcs12(struct kvm_vcpu *vcpu,  				       struct vmcs12 *vmcs12)  { -	struct kvm_host_map map; -	struct vmcs12 *shadow; +	struct vcpu_vmx *vmx = to_vmx(vcpu); +	struct gfn_to_hva_cache *ghc = &vmx->nested.shadow_vmcs12_cache;  	if (!nested_cpu_has_shadow_vmcs(vmcs12) ||  	    vmcs12->vmcs_link_pointer == INVALID_GPA)  		return; -	shadow = get_shadow_vmcs12(vcpu); - -	if (kvm_vcpu_map(vcpu, gpa_to_gfn(vmcs12->vmcs_link_pointer), &map)) +	if (ghc->gpa != vmcs12->vmcs_link_pointer && +	    kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc, +				      vmcs12->vmcs_link_pointer, VMCS12_SIZE))  		return; -	memcpy(shadow, map.hva, VMCS12_SIZE); -	kvm_vcpu_unmap(vcpu, &map, false); +	kvm_read_guest_cached(vmx->vcpu.kvm, ghc, get_shadow_vmcs12(vcpu), +			      VMCS12_SIZE);  }  static void nested_flush_cached_shadow_vmcs12(struct kvm_vcpu *vcpu,  					      struct vmcs12 *vmcs12)  {  	struct vcpu_vmx *vmx = to_vmx(vcpu); +	struct gfn_to_hva_cache *ghc = &vmx->nested.shadow_vmcs12_cache;  	if (!nested_cpu_has_shadow_vmcs(vmcs12) ||  	    vmcs12->vmcs_link_pointer == INVALID_GPA)  		return; -	kvm_write_guest(vmx->vcpu.kvm, vmcs12->vmcs_link_pointer, -			get_shadow_vmcs12(vcpu), VMCS12_SIZE); +	if (ghc->gpa != vmcs12->vmcs_link_pointer && +	    kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc, +				      vmcs12->vmcs_link_pointer, VMCS12_SIZE)) +		return; + +	kvm_write_guest_cached(vmx->vcpu.kvm, ghc, get_shadow_vmcs12(vcpu), +			       VMCS12_SIZE);  }  /* @@ -1089,7 +1120,7 @@ static int nested_vmx_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3,  	 * must not be dereferenced.  	 */  	if (reload_pdptrs && !nested_ept && is_pae_paging(vcpu) && -	    CC(!load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3))) { +	    CC(!load_pdptrs(vcpu, cr3))) {  		*entry_failure_code = ENTRY_FAIL_PDPTE;  		return -EINVAL;  	} @@ -1098,7 +1129,7 @@ static int nested_vmx_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3,  		kvm_mmu_new_pgd(vcpu, cr3);  	vcpu->arch.cr3 = cr3; -	kvm_register_mark_available(vcpu, VCPU_EXREG_CR3); +	kvm_register_mark_dirty(vcpu, VCPU_EXREG_CR3);  	/* Re-initialize the MMU, e.g. to pick up CR4 MMU role changes. */  	kvm_init_mmu(vcpu); @@ -1156,29 +1187,26 @@ static void nested_vmx_transition_tlb_flush(struct kvm_vcpu *vcpu,  	WARN_ON(!enable_vpid);  	/* -	 * If VPID is enabled and used by vmc12, but L2 does not have a unique -	 * TLB tag (ASID), i.e. EPT is disabled and KVM was unable to allocate -	 * a VPID for L2, flush the current context as the effective ASID is -	 * common to both L1 and L2. -	 * -	 * Defer the flush so that it runs after vmcs02.EPTP has been set by -	 * KVM_REQ_LOAD_MMU_PGD (if nested EPT is enabled) and to avoid -	 * redundant flushes further down the nested pipeline. -	 * -	 * If a TLB flush isn't required due to any of the above, and vpid12 is -	 * changing then the new "virtual" VPID (vpid12) will reuse the same -	 * "real" VPID (vpid02), and so needs to be flushed.  There's no direct -	 * mapping between vpid02 and vpid12, vpid02 is per-vCPU and reused for -	 * all nested vCPUs.  Remember, a flush on VM-Enter does not invalidate -	 * guest-physical mappings, so there is no need to sync the nEPT MMU. +	 * VPID is enabled and in use by vmcs12.  If vpid12 is changing, then +	 * emulate a guest TLB flush as KVM does not track vpid12 history nor +	 * is the VPID incorporated into the MMU context.  I.e. KVM must assume +	 * that the new vpid12 has never been used and thus represents a new +	 * guest ASID that cannot have entries in the TLB.  	 */ -	if (!nested_has_guest_tlb_tag(vcpu)) { -		kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu); -	} else if (is_vmenter && -		   vmcs12->virtual_processor_id != vmx->nested.last_vpid) { +	if (is_vmenter && vmcs12->virtual_processor_id != vmx->nested.last_vpid) {  		vmx->nested.last_vpid = vmcs12->virtual_processor_id; -		vpid_sync_context(nested_get_vpid02(vcpu)); +		kvm_make_request(KVM_REQ_TLB_FLUSH_GUEST, vcpu); +		return;  	} + +	/* +	 * If VPID is enabled, used by vmc12, and vpid12 is not changing but +	 * does not have a unique TLB tag (ASID), i.e. EPT is disabled and +	 * KVM was unable to allocate a VPID for L2, flush the current context +	 * as the effective ASID is common to both L1 and L2. +	 */ +	if (!nested_has_guest_tlb_tag(vcpu)) +		kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu);  }  static bool is_bitwise_subset(u64 superset, u64 subset, u64 mask) @@ -2018,10 +2046,13 @@ static enum nested_evmptrld_status nested_vmx_handle_enlightened_vmptrld(  	 * Clean fields data can't be used on VMLAUNCH and when we switch  	 * between different L2 guests as KVM keeps a single VMCS12 per L1.  	 */ -	if (from_launch || evmcs_gpa_changed) +	if (from_launch || evmcs_gpa_changed) {  		vmx->nested.hv_evmcs->hv_clean_fields &=  			~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL; +		vmx->nested.force_msr_bitmap_recalc = true; +	} +  	return EVMPTRLD_SUCCEEDED;  } @@ -2588,8 +2619,10 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,  	if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL) &&  	    WARN_ON_ONCE(kvm_set_msr(vcpu, MSR_CORE_PERF_GLOBAL_CTRL, -				     vmcs12->guest_ia32_perf_global_ctrl))) +				     vmcs12->guest_ia32_perf_global_ctrl))) { +		*entry_failure_code = ENTRY_FAIL_DEFAULT;  		return -EINVAL; +	}  	kvm_rsp_write(vcpu, vmcs12->guest_rsp);  	kvm_rip_write(vcpu, vmcs12->guest_rip); @@ -2830,6 +2863,17 @@ static int nested_vmx_check_controls(struct kvm_vcpu *vcpu,  	return 0;  } +static int nested_vmx_check_address_space_size(struct kvm_vcpu *vcpu, +				       struct vmcs12 *vmcs12) +{ +#ifdef CONFIG_X86_64 +	if (CC(!!(vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE) != +		!!(vcpu->arch.efer & EFER_LMA))) +		return -EINVAL; +#endif +	return 0; +} +  static int nested_vmx_check_host_state(struct kvm_vcpu *vcpu,  				       struct vmcs12 *vmcs12)  { @@ -2854,18 +2898,16 @@ static int nested_vmx_check_host_state(struct kvm_vcpu *vcpu,  		return -EINVAL;  #ifdef CONFIG_X86_64 -	ia32e = !!(vcpu->arch.efer & EFER_LMA); +	ia32e = !!(vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE);  #else  	ia32e = false;  #endif  	if (ia32e) { -		if (CC(!(vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE)) || -		    CC(!(vmcs12->host_cr4 & X86_CR4_PAE))) +		if (CC(!(vmcs12->host_cr4 & X86_CR4_PAE)))  			return -EINVAL;  	} else { -		if (CC(vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE) || -		    CC(vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE) || +		if (CC(vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE) ||  		    CC(vmcs12->host_cr4 & X86_CR4_PCIDE) ||  		    CC((vmcs12->host_rip) >> 32))  			return -EINVAL; @@ -2910,9 +2952,9 @@ static int nested_vmx_check_host_state(struct kvm_vcpu *vcpu,  static int nested_vmx_check_vmcs_link_ptr(struct kvm_vcpu *vcpu,  					  struct vmcs12 *vmcs12)  { -	int r = 0; -	struct vmcs12 *shadow; -	struct kvm_host_map map; +	struct vcpu_vmx *vmx = to_vmx(vcpu); +	struct gfn_to_hva_cache *ghc = &vmx->nested.shadow_vmcs12_cache; +	struct vmcs_hdr hdr;  	if (vmcs12->vmcs_link_pointer == INVALID_GPA)  		return 0; @@ -2920,17 +2962,21 @@ static int nested_vmx_check_vmcs_link_ptr(struct kvm_vcpu *vcpu,  	if (CC(!page_address_valid(vcpu, vmcs12->vmcs_link_pointer)))  		return -EINVAL; -	if (CC(kvm_vcpu_map(vcpu, gpa_to_gfn(vmcs12->vmcs_link_pointer), &map))) -		return -EINVAL; +	if (ghc->gpa != vmcs12->vmcs_link_pointer && +	    CC(kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc, +					 vmcs12->vmcs_link_pointer, VMCS12_SIZE))) +                return -EINVAL; -	shadow = map.hva; +	if (CC(kvm_read_guest_offset_cached(vcpu->kvm, ghc, &hdr, +					    offsetof(struct vmcs12, hdr), +					    sizeof(hdr)))) +		return -EINVAL; -	if (CC(shadow->hdr.revision_id != VMCS12_REVISION) || -	    CC(shadow->hdr.shadow_vmcs != nested_cpu_has_shadow_vmcs(vmcs12))) -		r = -EINVAL; +	if (CC(hdr.revision_id != VMCS12_REVISION) || +	    CC(hdr.shadow_vmcs != nested_cpu_has_shadow_vmcs(vmcs12))) +		return -EINVAL; -	kvm_vcpu_unmap(vcpu, &map, false); -	return r; +	return 0;  }  /* @@ -3009,7 +3055,7 @@ static int nested_vmx_check_guest_state(struct kvm_vcpu *vcpu,  static int nested_vmx_check_vmentry_hw(struct kvm_vcpu *vcpu)  {  	struct vcpu_vmx *vmx = to_vmx(vcpu); -	unsigned long cr3, cr4; +	unsigned long cr4;  	bool vm_fail;  	if (!nested_early_check) @@ -3032,12 +3078,6 @@ static int nested_vmx_check_vmentry_hw(struct kvm_vcpu *vcpu)  	 */  	vmcs_writel(GUEST_RFLAGS, 0); -	cr3 = __get_current_cr3_fast(); -	if (unlikely(cr3 != vmx->loaded_vmcs->host_state.cr3)) { -		vmcs_writel(HOST_CR3, cr3); -		vmx->loaded_vmcs->host_state.cr3 = cr3; -	} -  	cr4 = cr4_read_shadow();  	if (unlikely(cr4 != vmx->loaded_vmcs->host_state.cr4)) {  		vmcs_writel(HOST_CR4, cr4); @@ -3127,7 +3167,7 @@ static bool nested_get_vmcs12_pages(struct kvm_vcpu *vcpu)  		 * the guest CR3 might be restored prior to setting the nested  		 * state which can lead to a load of wrong PDPTRs.  		 */ -		if (CC(!load_pdptrs(vcpu, vcpu->arch.walk_mmu, vcpu->arch.cr3))) +		if (CC(!load_pdptrs(vcpu, vcpu->arch.cr3)))  			return false;  	} @@ -3325,8 +3365,7 @@ enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu,  	};  	u32 failed_index; -	if (kvm_check_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu)) -		kvm_vcpu_flush_tlb_current(vcpu); +	kvm_service_local_tlb_flush_requests(vcpu);  	evaluate_pending_interrupts = exec_controls_get(vmx) &  		(CPU_BASED_INTR_WINDOW_EXITING | CPU_BASED_NMI_WINDOW_EXITING); @@ -3487,10 +3526,13 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)  	if (evmptrld_status == EVMPTRLD_ERROR) {  		kvm_queue_exception(vcpu, UD_VECTOR);  		return 1; -	} else if (CC(evmptrld_status == EVMPTRLD_VMFAIL)) { -		return nested_vmx_failInvalid(vcpu);  	} +	kvm_pmu_trigger_event(vcpu, PERF_COUNT_HW_BRANCH_INSTRUCTIONS); + +	if (CC(evmptrld_status == EVMPTRLD_VMFAIL)) +		return nested_vmx_failInvalid(vcpu); +  	if (CC(!evmptr_is_valid(vmx->nested.hv_evmcs_vmptr) &&  	       vmx->nested.current_vmptr == INVALID_GPA))  		return nested_vmx_failInvalid(vcpu); @@ -3535,6 +3577,9 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)  	if (nested_vmx_check_controls(vcpu, vmcs12))  		return nested_vmx_fail(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD); +	if (nested_vmx_check_address_space_size(vcpu, vmcs12)) +		return nested_vmx_fail(vcpu, VMXERR_ENTRY_INVALID_HOST_STATE_FIELD); +  	if (nested_vmx_check_host_state(vcpu, vmcs12))  		return nested_vmx_fail(vcpu, VMXERR_ENTRY_INVALID_HOST_STATE_FIELD); @@ -3583,7 +3628,7 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)  		    !(nested_cpu_has(vmcs12, CPU_BASED_INTR_WINDOW_EXITING) &&  		      (vmcs12->guest_rflags & X86_EFLAGS_IF))) {  			vmx->nested.nested_run_pending = 0; -			return kvm_vcpu_halt(vcpu); +			return kvm_emulate_halt_noskip(vcpu);  		}  		break;  	case GUEST_ACTIVITY_WAIT_SIPI: @@ -4480,9 +4525,8 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason,  		(void)nested_get_evmcs_page(vcpu);  	} -	/* Service the TLB flush request for L2 before switching to L1. */ -	if (kvm_check_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu)) -		kvm_vcpu_flush_tlb_current(vcpu); +	/* Service pending TLB flush requests for L2 before switching to L1. */ +	kvm_service_local_tlb_flush_requests(vcpu);  	/*  	 * VCPU_EXREG_PDPTR will be clobbered in arch/x86/kvm/vmx/vmx.h between @@ -4835,6 +4879,7 @@ static int enter_vmx_operation(struct kvm_vcpu *vcpu)  	if (!vmx->nested.cached_vmcs12)  		goto out_cached_vmcs12; +	vmx->nested.shadow_vmcs12_cache.gpa = INVALID_GPA;  	vmx->nested.cached_shadow_vmcs12 = kzalloc(VMCS12_SIZE, GFP_KERNEL_ACCOUNT);  	if (!vmx->nested.cached_shadow_vmcs12)  		goto out_cached_shadow_vmcs12; @@ -5238,6 +5283,7 @@ static void set_current_vmptr(struct vcpu_vmx *vmx, gpa_t vmptr)  		vmx->nested.need_vmcs12_to_shadow_sync = true;  	}  	vmx->nested.dirty_vmcs12 = true; +	vmx->nested.force_msr_bitmap_recalc = true;  }  /* Emulate the VMPTRLD instruction */ @@ -5264,10 +5310,10 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu)  		return 1;  	if (vmx->nested.current_vmptr != vmptr) { -		struct kvm_host_map map; -		struct vmcs12 *new_vmcs12; +		struct gfn_to_hva_cache *ghc = &vmx->nested.vmcs12_cache; +		struct vmcs_hdr hdr; -		if (kvm_vcpu_map(vcpu, gpa_to_gfn(vmptr), &map)) { +		if (kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc, vmptr, VMCS12_SIZE)) {  			/*  			 * Reads from an unbacked page return all 1s,  			 * which means that the 32 bits located at the @@ -5278,12 +5324,16 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu)  				VMXERR_VMPTRLD_INCORRECT_VMCS_REVISION_ID);  		} -		new_vmcs12 = map.hva; +		if (kvm_read_guest_offset_cached(vcpu->kvm, ghc, &hdr, +						 offsetof(struct vmcs12, hdr), +						 sizeof(hdr))) { +			return nested_vmx_fail(vcpu, +				VMXERR_VMPTRLD_INCORRECT_VMCS_REVISION_ID); +		} -		if (new_vmcs12->hdr.revision_id != VMCS12_REVISION || -		    (new_vmcs12->hdr.shadow_vmcs && +		if (hdr.revision_id != VMCS12_REVISION || +		    (hdr.shadow_vmcs &&  		     !nested_cpu_has_vmx_shadow_vmcs(vcpu))) { -			kvm_vcpu_unmap(vcpu, &map, false);  			return nested_vmx_fail(vcpu,  				VMXERR_VMPTRLD_INCORRECT_VMCS_REVISION_ID);  		} @@ -5294,8 +5344,11 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu)  		 * Load VMCS12 from guest memory since it is not already  		 * cached.  		 */ -		memcpy(vmx->nested.cached_vmcs12, new_vmcs12, VMCS12_SIZE); -		kvm_vcpu_unmap(vcpu, &map, false); +		if (kvm_read_guest_cached(vcpu->kvm, ghc, vmx->nested.cached_vmcs12, +					  VMCS12_SIZE)) { +			return nested_vmx_fail(vcpu, +				VMXERR_VMPTRLD_INCORRECT_VMCS_REVISION_ID); +		}  		set_current_vmptr(vmx, vmptr);  	} @@ -6366,6 +6419,7 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu,  		goto error_guest_mode;  	vmx->nested.dirty_vmcs12 = true; +	vmx->nested.force_msr_bitmap_recalc = true;  	ret = nested_vmx_enter_non_root_mode(vcpu, false);  	if (ret)  		goto error_guest_mode; |