diff options
Diffstat (limited to 'arch/x86/kvm/vmx/nested.c')
-rw-r--r-- | arch/x86/kvm/vmx/nested.c | 107 |
1 files changed, 67 insertions, 40 deletions
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index a8e7bc04d9bf..aa78b6f38dfe 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -7,6 +7,7 @@ #include <asm/debugreg.h> #include <asm/mmu_context.h> +#include "x86.h" #include "cpuid.h" #include "hyperv.h" #include "mmu.h" @@ -16,7 +17,6 @@ #include "sgx.h" #include "trace.h" #include "vmx.h" -#include "x86.h" #include "smm.h" static bool __read_mostly enable_shadow_vmcs = 1; @@ -231,11 +231,8 @@ static inline void nested_release_evmcs(struct kvm_vcpu *vcpu) struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu); struct vcpu_vmx *vmx = to_vmx(vcpu); - if (nested_vmx_is_evmptr12_valid(vmx)) { - kvm_vcpu_unmap(vcpu, &vmx->nested.hv_evmcs_map, true); - vmx->nested.hv_evmcs = NULL; - } - + kvm_vcpu_unmap(vcpu, &vmx->nested.hv_evmcs_map); + vmx->nested.hv_evmcs = NULL; vmx->nested.hv_evmcs_vmptr = EVMPTR_INVALID; if (hv_vcpu) { @@ -317,6 +314,16 @@ static void vmx_switch_vmcs(struct kvm_vcpu *vcpu, struct loaded_vmcs *vmcs) vcpu->arch.regs_dirty = 0; } +static void nested_put_vmcs12_pages(struct kvm_vcpu *vcpu) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + + kvm_vcpu_unmap(vcpu, &vmx->nested.apic_access_page_map); + kvm_vcpu_unmap(vcpu, &vmx->nested.virtual_apic_map); + kvm_vcpu_unmap(vcpu, &vmx->nested.pi_desc_map); + vmx->nested.pi_desc = NULL; +} + /* * Free whatever needs to be freed from vmx->nested when L1 goes down, or * just stops using VMX. @@ -349,15 +356,8 @@ static void free_nested(struct kvm_vcpu *vcpu) vmx->nested.cached_vmcs12 = NULL; kfree(vmx->nested.cached_shadow_vmcs12); vmx->nested.cached_shadow_vmcs12 = NULL; - /* - * Unpin physical memory we referred to in the vmcs02. The APIC access - * page's backing page (yeah, confusing) shouldn't actually be accessed, - * and if it is written, the contents are irrelevant. - */ - kvm_vcpu_unmap(vcpu, &vmx->nested.apic_access_page_map, false); - kvm_vcpu_unmap(vcpu, &vmx->nested.virtual_apic_map, true); - kvm_vcpu_unmap(vcpu, &vmx->nested.pi_desc_map, true); - vmx->nested.pi_desc = NULL; + + nested_put_vmcs12_pages(vcpu); kvm_mmu_free_roots(vcpu->kvm, &vcpu->arch.guest_mmu, KVM_MMU_ROOTS_ALL); @@ -624,7 +624,7 @@ static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu, int msr; unsigned long *msr_bitmap_l1; unsigned long *msr_bitmap_l0 = vmx->nested.vmcs02.msr_bitmap; - struct kvm_host_map *map = &vmx->nested.msr_bitmap_map; + struct kvm_host_map map; /* Nothing to do if the MSR bitmap is not in use. */ if (!cpu_has_vmx_msr_bitmap() || @@ -647,10 +647,10 @@ static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu, return true; } - if (kvm_vcpu_map(vcpu, gpa_to_gfn(vmcs12->msr_bitmap), map)) + if (kvm_vcpu_map_readonly(vcpu, gpa_to_gfn(vmcs12->msr_bitmap), &map)) return false; - msr_bitmap_l1 = (unsigned long *)map->hva; + msr_bitmap_l1 = (unsigned long *)map.hva; /* * To keep the control flow simple, pay eight 8-byte writes (sixteen @@ -714,7 +714,7 @@ static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu, nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0, MSR_IA32_FLUSH_CMD, MSR_TYPE_W); - kvm_vcpu_unmap(vcpu, &vmx->nested.msr_bitmap_map, false); + kvm_vcpu_unmap(vcpu, &map); vmx->nested.force_msr_bitmap_recalc = false; @@ -1197,11 +1197,14 @@ static void nested_vmx_transition_tlb_flush(struct kvm_vcpu *vcpu, kvm_hv_nested_transtion_tlb_flush(vcpu, enable_ept); /* - * If vmcs12 doesn't use VPID, L1 expects linear and combined mappings - * for *all* contexts to be flushed on VM-Enter/VM-Exit, i.e. it's a - * full TLB flush from the guest's perspective. This is required even - * if VPID is disabled in the host as KVM may need to synchronize the - * MMU in response to the guest TLB flush. + * If VPID is disabled, then guest TLB accesses use VPID=0, i.e. the + * same VPID as the host, and so architecturally, linear and combined + * mappings for VPID=0 must be flushed at VM-Enter and VM-Exit. KVM + * emulates L2 sharing L1's VPID=0 by using vpid01 while running L2, + * and so KVM must also emulate TLB flush of VPID=0, i.e. vpid01. This + * is required if VPID is disabled in KVM, as a TLB flush (there are no + * VPIDs) still occurs from L1's perspective, and KVM may need to + * synchronize the MMU in response to the guest TLB flush. * * Note, using TLB_FLUSH_GUEST is correct even if nested EPT is in use. * EPT is a special snowflake, as guest-physical mappings aren't @@ -2315,6 +2318,17 @@ static void prepare_vmcs02_early_rare(struct vcpu_vmx *vmx, vmcs_write64(VMCS_LINK_POINTER, INVALID_GPA); + /* + * If VPID is disabled, then guest TLB accesses use VPID=0, i.e. the + * same VPID as the host. Emulate this behavior by using vpid01 for L2 + * if VPID is disabled in vmcs12. Note, if VPID is disabled, VM-Enter + * and VM-Exit are architecturally required to flush VPID=0, but *only* + * VPID=0. I.e. using vpid02 would be ok (so long as KVM emulates the + * required flushes), but doing so would cause KVM to over-flush. E.g. + * if L1 runs L2 X with VPID12=1, then runs L2 Y with VPID12 disabled, + * and then runs L2 X again, then KVM can and should retain TLB entries + * for VPID12=1. + */ if (enable_vpid) { if (nested_cpu_has_vpid(vmcs12) && vmx->nested.vpid02) vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->nested.vpid02); @@ -2996,6 +3010,17 @@ static int nested_vmx_check_address_space_size(struct kvm_vcpu *vcpu, return 0; } +static bool is_l1_noncanonical_address_on_vmexit(u64 la, struct vmcs12 *vmcs12) +{ + /* + * Check that the given linear address is canonical after a VM exit + * from L2, based on HOST_CR4.LA57 value that will be loaded for L1. + */ + u8 l1_address_bits_on_exit = (vmcs12->host_cr4 & X86_CR4_LA57) ? 57 : 48; + + return !__is_canonical_address(la, l1_address_bits_on_exit); +} + static int nested_vmx_check_host_state(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) { @@ -3006,8 +3031,8 @@ static int nested_vmx_check_host_state(struct kvm_vcpu *vcpu, CC(!kvm_vcpu_is_legal_cr3(vcpu, vmcs12->host_cr3))) return -EINVAL; - if (CC(is_noncanonical_address(vmcs12->host_ia32_sysenter_esp, vcpu)) || - CC(is_noncanonical_address(vmcs12->host_ia32_sysenter_eip, vcpu))) + if (CC(is_noncanonical_msr_address(vmcs12->host_ia32_sysenter_esp, vcpu)) || + CC(is_noncanonical_msr_address(vmcs12->host_ia32_sysenter_eip, vcpu))) return -EINVAL; if ((vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PAT) && @@ -3041,12 +3066,12 @@ static int nested_vmx_check_host_state(struct kvm_vcpu *vcpu, CC(vmcs12->host_ss_selector == 0 && !ia32e)) return -EINVAL; - if (CC(is_noncanonical_address(vmcs12->host_fs_base, vcpu)) || - CC(is_noncanonical_address(vmcs12->host_gs_base, vcpu)) || - CC(is_noncanonical_address(vmcs12->host_gdtr_base, vcpu)) || - CC(is_noncanonical_address(vmcs12->host_idtr_base, vcpu)) || - CC(is_noncanonical_address(vmcs12->host_tr_base, vcpu)) || - CC(is_noncanonical_address(vmcs12->host_rip, vcpu))) + if (CC(is_noncanonical_base_address(vmcs12->host_fs_base, vcpu)) || + CC(is_noncanonical_base_address(vmcs12->host_gs_base, vcpu)) || + CC(is_noncanonical_base_address(vmcs12->host_gdtr_base, vcpu)) || + CC(is_noncanonical_base_address(vmcs12->host_idtr_base, vcpu)) || + CC(is_noncanonical_base_address(vmcs12->host_tr_base, vcpu)) || + CC(is_l1_noncanonical_address_on_vmexit(vmcs12->host_rip, vmcs12))) return -EINVAL; /* @@ -3164,7 +3189,7 @@ static int nested_vmx_check_guest_state(struct kvm_vcpu *vcpu, } if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS) && - (CC(is_noncanonical_address(vmcs12->guest_bndcfgs & PAGE_MASK, vcpu)) || + (CC(is_noncanonical_msr_address(vmcs12->guest_bndcfgs & PAGE_MASK, vcpu)) || CC((vmcs12->guest_bndcfgs & MSR_IA32_BNDCFGS_RSVD)))) return -EINVAL; @@ -5013,11 +5038,7 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason, vmx_update_cpu_dirty_logging(vcpu); } - /* Unpin physical memory we referred to in vmcs02 */ - kvm_vcpu_unmap(vcpu, &vmx->nested.apic_access_page_map, false); - kvm_vcpu_unmap(vcpu, &vmx->nested.virtual_apic_map, true); - kvm_vcpu_unmap(vcpu, &vmx->nested.pi_desc_map, true); - vmx->nested.pi_desc = NULL; + nested_put_vmcs12_pages(vcpu); if (vmx->nested.reload_vmcs01_apic_access_page) { vmx->nested.reload_vmcs01_apic_access_page = false; @@ -5153,7 +5174,7 @@ int get_vmx_mem_address(struct kvm_vcpu *vcpu, unsigned long exit_qualification, * non-canonical form. This is the only check on the memory * destination for long mode! */ - exn = is_noncanonical_address(*ret, vcpu); + exn = is_noncanonical_address(*ret, vcpu, 0); } else { /* * When not in long mode, the virtual/linear address is @@ -5950,6 +5971,12 @@ static int handle_invvpid(struct kvm_vcpu *vcpu) return nested_vmx_fail(vcpu, VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID); + /* + * Always flush the effective vpid02, i.e. never flush the current VPID + * and never explicitly flush vpid01. INVVPID targets a VPID, not a + * VMCS, and so whether or not the current vmcs12 has VPID enabled is + * irrelevant (and there may not be a loaded vmcs12). + */ vpid02 = nested_get_vpid02(vcpu); switch (type) { case VMX_VPID_EXTENT_INDIVIDUAL_ADDR: @@ -5958,7 +5985,7 @@ static int handle_invvpid(struct kvm_vcpu *vcpu) * invalidation. */ if (!operand.vpid || - is_noncanonical_address(operand.gla, vcpu)) + is_noncanonical_invlpg_address(operand.gla, vcpu)) return nested_vmx_fail(vcpu, VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID); vpid_sync_vcpu_addr(vpid02, operand.gla); |