diff options
Diffstat (limited to 'arch/x86/kvm/vmx/nested.c')
| -rw-r--r-- | arch/x86/kvm/vmx/nested.c | 229 |
1 files changed, 98 insertions, 131 deletions
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index eedcebf58004..b213ca966d41 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -191,7 +191,7 @@ static int nested_vmx_fail(struct kvm_vcpu *vcpu, u32 vm_instruction_error) * failValid writes the error number to the current VMCS, which * can't be done if there isn't a current VMCS. */ - if (vmx->nested.current_vmptr == -1ull && + if (vmx->nested.current_vmptr == INVALID_GPA && !evmptr_is_valid(vmx->nested.hv_evmcs_vmptr)) return nested_vmx_failInvalid(vcpu); @@ -218,7 +218,7 @@ static inline u64 vmx_control_msr(u32 low, u32 high) static void vmx_disable_shadow_vmcs(struct vcpu_vmx *vmx) { secondary_exec_controls_clearbit(vmx, SECONDARY_EXEC_SHADOW_VMCS); - vmcs_write64(VMCS_LINK_POINTER, -1ull); + vmcs_write64(VMCS_LINK_POINTER, INVALID_GPA); vmx->nested.need_vmcs12_to_shadow_sync = false; } @@ -290,9 +290,10 @@ static void free_nested(struct kvm_vcpu *vcpu) vmx->nested.vmxon = false; vmx->nested.smm.vmxon = false; + vmx->nested.vmxon_ptr = INVALID_GPA; free_vpid(vmx->nested.vpid02); vmx->nested.posted_intr_nv = -1; - vmx->nested.current_vmptr = -1ull; + vmx->nested.current_vmptr = INVALID_GPA; if (enable_shadow_vmcs) { vmx_disable_shadow_vmcs(vmx); vmcs_clear(vmx->vmcs01.shadow_vmcs); @@ -524,67 +525,19 @@ static int nested_vmx_check_tpr_shadow_controls(struct kvm_vcpu *vcpu, } /* - * Check if MSR is intercepted for L01 MSR bitmap. + * For x2APIC MSRs, ignore the vmcs01 bitmap. L1 can enable x2APIC without L1 + * itself utilizing x2APIC. All MSRs were previously set to be intercepted, + * only the "disable intercept" case needs to be handled. */ -static bool msr_write_intercepted_l01(struct kvm_vcpu *vcpu, u32 msr) +static void nested_vmx_disable_intercept_for_x2apic_msr(unsigned long *msr_bitmap_l1, + unsigned long *msr_bitmap_l0, + u32 msr, int type) { - unsigned long *msr_bitmap; - int f = sizeof(unsigned long); + if (type & MSR_TYPE_R && !vmx_test_msr_bitmap_read(msr_bitmap_l1, msr)) + vmx_clear_msr_bitmap_read(msr_bitmap_l0, msr); - if (!cpu_has_vmx_msr_bitmap()) - return true; - - msr_bitmap = to_vmx(vcpu)->vmcs01.msr_bitmap; - - if (msr <= 0x1fff) { - return !!test_bit(msr, msr_bitmap + 0x800 / f); - } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) { - msr &= 0x1fff; - return !!test_bit(msr, msr_bitmap + 0xc00 / f); - } - - return true; -} - -/* - * If a msr is allowed by L0, we should check whether it is allowed by L1. - * The corresponding bit will be cleared unless both of L0 and L1 allow it. - */ -static void nested_vmx_disable_intercept_for_msr(unsigned long *msr_bitmap_l1, - unsigned long *msr_bitmap_nested, - u32 msr, int type) -{ - int f = sizeof(unsigned long); - - /* - * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals - * have the write-low and read-high bitmap offsets the wrong way round. - * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff. - */ - if (msr <= 0x1fff) { - if (type & MSR_TYPE_R && - !test_bit(msr, msr_bitmap_l1 + 0x000 / f)) - /* read-low */ - __clear_bit(msr, msr_bitmap_nested + 0x000 / f); - - if (type & MSR_TYPE_W && - !test_bit(msr, msr_bitmap_l1 + 0x800 / f)) - /* write-low */ - __clear_bit(msr, msr_bitmap_nested + 0x800 / f); - - } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) { - msr &= 0x1fff; - if (type & MSR_TYPE_R && - !test_bit(msr, msr_bitmap_l1 + 0x400 / f)) - /* read-high */ - __clear_bit(msr, msr_bitmap_nested + 0x400 / f); - - if (type & MSR_TYPE_W && - !test_bit(msr, msr_bitmap_l1 + 0xc00 / f)) - /* write-high */ - __clear_bit(msr, msr_bitmap_nested + 0xc00 / f); - - } + if (type & MSR_TYPE_W && !vmx_test_msr_bitmap_write(msr_bitmap_l1, msr)) + vmx_clear_msr_bitmap_write(msr_bitmap_l0, msr); } static inline void enable_x2apic_msr_intercepts(unsigned long *msr_bitmap) @@ -599,6 +552,34 @@ static inline void enable_x2apic_msr_intercepts(unsigned long *msr_bitmap) } } +#define BUILD_NVMX_MSR_INTERCEPT_HELPER(rw) \ +static inline \ +void nested_vmx_set_msr_##rw##_intercept(struct vcpu_vmx *vmx, \ + unsigned long *msr_bitmap_l1, \ + unsigned long *msr_bitmap_l0, u32 msr) \ +{ \ + if (vmx_test_msr_bitmap_##rw(vmx->vmcs01.msr_bitmap, msr) || \ + vmx_test_msr_bitmap_##rw(msr_bitmap_l1, msr)) \ + vmx_set_msr_bitmap_##rw(msr_bitmap_l0, msr); \ + else \ + vmx_clear_msr_bitmap_##rw(msr_bitmap_l0, msr); \ +} +BUILD_NVMX_MSR_INTERCEPT_HELPER(read) +BUILD_NVMX_MSR_INTERCEPT_HELPER(write) + +static inline void nested_vmx_set_intercept_for_msr(struct vcpu_vmx *vmx, + unsigned long *msr_bitmap_l1, + unsigned long *msr_bitmap_l0, + u32 msr, int types) +{ + if (types & MSR_TYPE_R) + nested_vmx_set_msr_read_intercept(vmx, msr_bitmap_l1, + msr_bitmap_l0, msr); + if (types & MSR_TYPE_W) + nested_vmx_set_msr_write_intercept(vmx, msr_bitmap_l1, + msr_bitmap_l0, msr); +} + /* * Merge L0's and L1's MSR bitmap, return false to indicate that * we do not use the hardware. @@ -606,10 +587,11 @@ static inline void enable_x2apic_msr_intercepts(unsigned long *msr_bitmap) static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) { + struct vcpu_vmx *vmx = to_vmx(vcpu); int msr; unsigned long *msr_bitmap_l1; - unsigned long *msr_bitmap_l0 = to_vmx(vcpu)->nested.vmcs02.msr_bitmap; - struct kvm_host_map *map = &to_vmx(vcpu)->nested.msr_bitmap_map; + unsigned long *msr_bitmap_l0 = vmx->nested.vmcs02.msr_bitmap; + struct kvm_host_map *map = &vmx->nested.msr_bitmap_map; /* Nothing to do if the MSR bitmap is not in use. */ if (!cpu_has_vmx_msr_bitmap() || @@ -624,7 +606,7 @@ static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu, /* * To keep the control flow simple, pay eight 8-byte writes (sixteen * 4-byte writes on 32-bit systems) up front to enable intercepts for - * the x2APIC MSR range and selectively disable them below. + * the x2APIC MSR range and selectively toggle those relevant to L2. */ enable_x2apic_msr_intercepts(msr_bitmap_l0); @@ -643,61 +625,44 @@ static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu, } } - nested_vmx_disable_intercept_for_msr( + nested_vmx_disable_intercept_for_x2apic_msr( msr_bitmap_l1, msr_bitmap_l0, X2APIC_MSR(APIC_TASKPRI), MSR_TYPE_R | MSR_TYPE_W); if (nested_cpu_has_vid(vmcs12)) { - nested_vmx_disable_intercept_for_msr( + nested_vmx_disable_intercept_for_x2apic_msr( msr_bitmap_l1, msr_bitmap_l0, X2APIC_MSR(APIC_EOI), MSR_TYPE_W); - nested_vmx_disable_intercept_for_msr( + nested_vmx_disable_intercept_for_x2apic_msr( msr_bitmap_l1, msr_bitmap_l0, X2APIC_MSR(APIC_SELF_IPI), MSR_TYPE_W); } } - /* KVM unconditionally exposes the FS/GS base MSRs to L1. */ + /* + * Always check vmcs01's bitmap to honor userspace MSR filters and any + * other runtime changes to vmcs01's bitmap, e.g. dynamic pass-through. + */ #ifdef CONFIG_X86_64 - nested_vmx_disable_intercept_for_msr(msr_bitmap_l1, msr_bitmap_l0, - MSR_FS_BASE, MSR_TYPE_RW); + nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0, + MSR_FS_BASE, MSR_TYPE_RW); - nested_vmx_disable_intercept_for_msr(msr_bitmap_l1, msr_bitmap_l0, - MSR_GS_BASE, MSR_TYPE_RW); + nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0, + MSR_GS_BASE, MSR_TYPE_RW); - nested_vmx_disable_intercept_for_msr(msr_bitmap_l1, msr_bitmap_l0, - MSR_KERNEL_GS_BASE, MSR_TYPE_RW); + nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0, + MSR_KERNEL_GS_BASE, MSR_TYPE_RW); #endif + nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0, + MSR_IA32_SPEC_CTRL, MSR_TYPE_RW); - /* - * Checking the L0->L1 bitmap is trying to verify two things: - * - * 1. L0 gave a permission to L1 to actually passthrough the MSR. This - * ensures that we do not accidentally generate an L02 MSR bitmap - * from the L12 MSR bitmap that is too permissive. - * 2. That L1 or L2s have actually used the MSR. This avoids - * unnecessarily merging of the bitmap if the MSR is unused. This - * works properly because we only update the L01 MSR bitmap lazily. - * So even if L0 should pass L1 these MSRs, the L01 bitmap is only - * updated to reflect this when L1 (or its L2s) actually write to - * the MSR. - */ - if (!msr_write_intercepted_l01(vcpu, MSR_IA32_SPEC_CTRL)) - nested_vmx_disable_intercept_for_msr( - msr_bitmap_l1, msr_bitmap_l0, - MSR_IA32_SPEC_CTRL, - MSR_TYPE_R | MSR_TYPE_W); - - if (!msr_write_intercepted_l01(vcpu, MSR_IA32_PRED_CMD)) - nested_vmx_disable_intercept_for_msr( - msr_bitmap_l1, msr_bitmap_l0, - MSR_IA32_PRED_CMD, - MSR_TYPE_W); + nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0, + MSR_IA32_PRED_CMD, MSR_TYPE_W); - kvm_vcpu_unmap(vcpu, &to_vmx(vcpu)->nested.msr_bitmap_map, false); + kvm_vcpu_unmap(vcpu, &vmx->nested.msr_bitmap_map, false); return true; } @@ -709,7 +674,7 @@ static void nested_cache_shadow_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *shadow; if (!nested_cpu_has_shadow_vmcs(vmcs12) || - vmcs12->vmcs_link_pointer == -1ull) + vmcs12->vmcs_link_pointer == INVALID_GPA) return; shadow = get_shadow_vmcs12(vcpu); @@ -727,7 +692,7 @@ static void nested_flush_cached_shadow_vmcs12(struct kvm_vcpu *vcpu, struct vcpu_vmx *vmx = to_vmx(vcpu); if (!nested_cpu_has_shadow_vmcs(vmcs12) || - vmcs12->vmcs_link_pointer == -1ull) + vmcs12->vmcs_link_pointer == INVALID_GPA) return; kvm_write_guest(vmx->vcpu.kvm, vmcs12->vmcs_link_pointer, @@ -1994,7 +1959,7 @@ static enum nested_evmptrld_status nested_vmx_handle_enlightened_vmptrld( } if (unlikely(evmcs_gpa != vmx->nested.hv_evmcs_vmptr)) { - vmx->nested.current_vmptr = -1ull; + vmx->nested.current_vmptr = INVALID_GPA; nested_release_evmcs(vcpu); @@ -2178,7 +2143,7 @@ static void prepare_vmcs02_constant_state(struct vcpu_vmx *vmx) } if (cpu_has_vmx_encls_vmexit()) - vmcs_write64(ENCLS_EXITING_BITMAP, -1ull); + vmcs_write64(ENCLS_EXITING_BITMAP, INVALID_GPA); /* * Set the MSR load/store lists to match L0's settings. Only the @@ -2197,7 +2162,7 @@ static void prepare_vmcs02_early_rare(struct vcpu_vmx *vmx, { prepare_vmcs02_constant_state(vmx); - vmcs_write64(VMCS_LINK_POINTER, -1ull); + vmcs_write64(VMCS_LINK_POINTER, INVALID_GPA); if (enable_vpid) { if (nested_cpu_has_vpid(vmcs12) && vmx->nested.vpid02) @@ -2949,7 +2914,7 @@ static int nested_vmx_check_vmcs_link_ptr(struct kvm_vcpu *vcpu, struct vmcs12 *shadow; struct kvm_host_map map; - if (vmcs12->vmcs_link_pointer == -1ull) + if (vmcs12->vmcs_link_pointer == INVALID_GPA) return 0; if (CC(!page_address_valid(vcpu, vmcs12->vmcs_link_pointer))) @@ -3216,7 +3181,7 @@ static bool nested_get_vmcs12_pages(struct kvm_vcpu *vcpu) * Write an illegal value to VIRTUAL_APIC_PAGE_ADDR to * force VM-Entry to fail. */ - vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, -1ull); + vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, INVALID_GPA); } } @@ -3527,7 +3492,7 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) } if (CC(!evmptr_is_valid(vmx->nested.hv_evmcs_vmptr) && - vmx->nested.current_vmptr == -1ull)) + vmx->nested.current_vmptr == INVALID_GPA)) return nested_vmx_failInvalid(vcpu); vmcs12 = get_vmcs12(vcpu); @@ -4975,7 +4940,7 @@ static inline void nested_release_vmcs12(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); - if (vmx->nested.current_vmptr == -1ull) + if (vmx->nested.current_vmptr == INVALID_GPA) return; copy_vmcs02_to_vmcs12_rare(vcpu, get_vmcs12(vcpu)); @@ -4995,7 +4960,7 @@ static inline void nested_release_vmcs12(struct kvm_vcpu *vcpu) kvm_mmu_free_roots(vcpu, &vcpu->arch.guest_mmu, KVM_MMU_ROOTS_ALL); - vmx->nested.current_vmptr = -1ull; + vmx->nested.current_vmptr = INVALID_GPA; } /* Emulate the VMXOFF instruction */ @@ -5090,12 +5055,12 @@ static int handle_vmread(struct kvm_vcpu *vcpu) return 1; /* - * In VMX non-root operation, when the VMCS-link pointer is -1ull, + * In VMX non-root operation, when the VMCS-link pointer is INVALID_GPA, * any VMREAD sets the ALU flags for VMfailInvalid. */ - if (vmx->nested.current_vmptr == -1ull || + if (vmx->nested.current_vmptr == INVALID_GPA || (is_guest_mode(vcpu) && - get_vmcs12(vcpu)->vmcs_link_pointer == -1ull)) + get_vmcs12(vcpu)->vmcs_link_pointer == INVALID_GPA)) return nested_vmx_failInvalid(vcpu); /* Decode instruction info and find the field to read */ @@ -5182,12 +5147,12 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu) return 1; /* - * In VMX non-root operation, when the VMCS-link pointer is -1ull, + * In VMX non-root operation, when the VMCS-link pointer is INVALID_GPA, * any VMWRITE sets the ALU flags for VMfailInvalid. */ - if (vmx->nested.current_vmptr == -1ull || + if (vmx->nested.current_vmptr == INVALID_GPA || (is_guest_mode(vcpu) && - get_vmcs12(vcpu)->vmcs_link_pointer == -1ull)) + get_vmcs12(vcpu)->vmcs_link_pointer == INVALID_GPA)) return nested_vmx_failInvalid(vcpu); if (instr_info & BIT(10)) @@ -5378,7 +5343,7 @@ static int handle_invept(struct kvm_vcpu *vcpu) struct { u64 eptp, gpa; } operand; - int i, r; + int i, r, gpr_index; if (!(vmx->nested.msrs.secondary_ctls_high & SECONDARY_EXEC_ENABLE_EPT) || @@ -5391,7 +5356,8 @@ static int handle_invept(struct kvm_vcpu *vcpu) return 1; vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO); - type = kvm_register_read(vcpu, (vmx_instruction_info >> 28) & 0xf); + gpr_index = vmx_get_instr_info_reg2(vmx_instruction_info); + type = kvm_register_read(vcpu, gpr_index); types = (vmx->nested.msrs.ept_caps >> VMX_EPT_EXTENT_SHIFT) & 6; @@ -5458,7 +5424,7 @@ static int handle_invvpid(struct kvm_vcpu *vcpu) u64 gla; } operand; u16 vpid02; - int r; + int r, gpr_index; if (!(vmx->nested.msrs.secondary_ctls_high & SECONDARY_EXEC_ENABLE_VPID) || @@ -5471,7 +5437,8 @@ static int handle_invvpid(struct kvm_vcpu *vcpu) return 1; vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO); - type = kvm_register_read(vcpu, (vmx_instruction_info >> 28) & 0xf); + gpr_index = vmx_get_instr_info_reg2(vmx_instruction_info); + type = kvm_register_read(vcpu, gpr_index); types = (vmx->nested.msrs.vpid_caps & VMX_VPID_EXTENT_SUPPORTED_MASK) >> 8; @@ -5630,7 +5597,7 @@ bool nested_vmx_check_io_bitmaps(struct kvm_vcpu *vcpu, unsigned int port, gpa_t bitmap, last_bitmap; u8 b; - last_bitmap = (gpa_t)-1; + last_bitmap = INVALID_GPA; b = -1; while (size > 0) { @@ -6065,7 +6032,7 @@ bool nested_vmx_reflect_vmexit(struct kvm_vcpu *vcpu) goto reflect_vmexit; } - trace_kvm_nested_vmexit(exit_reason.full, vcpu, KVM_ISA_VMX); + trace_kvm_nested_vmexit(vcpu, KVM_ISA_VMX); /* If L0 (KVM) wants the exit, it trumps L1's desires. */ if (nested_vmx_l0_wants_exit(vcpu, exit_reason)) @@ -6106,8 +6073,8 @@ static int vmx_get_nested_state(struct kvm_vcpu *vcpu, .format = KVM_STATE_NESTED_FORMAT_VMX, .size = sizeof(kvm_state), .hdr.vmx.flags = 0, - .hdr.vmx.vmxon_pa = -1ull, - .hdr.vmx.vmcs12_pa = -1ull, + .hdr.vmx.vmxon_pa = INVALID_GPA, + .hdr.vmx.vmcs12_pa = INVALID_GPA, .hdr.vmx.preemption_timer_deadline = 0, }; struct kvm_vmx_nested_state_data __user *user_vmx_nested_state = @@ -6133,7 +6100,7 @@ static int vmx_get_nested_state(struct kvm_vcpu *vcpu, if (is_guest_mode(vcpu) && nested_cpu_has_shadow_vmcs(vmcs12) && - vmcs12->vmcs_link_pointer != -1ull) + vmcs12->vmcs_link_pointer != INVALID_GPA) kvm_state.size += sizeof(user_vmx_nested_state->shadow_vmcs12); } @@ -6209,7 +6176,7 @@ static int vmx_get_nested_state(struct kvm_vcpu *vcpu, return -EFAULT; if (nested_cpu_has_shadow_vmcs(vmcs12) && - vmcs12->vmcs_link_pointer != -1ull) { + vmcs12->vmcs_link_pointer != INVALID_GPA) { if (copy_to_user(user_vmx_nested_state->shadow_vmcs12, get_shadow_vmcs12(vcpu), VMCS12_SIZE)) return -EFAULT; @@ -6244,11 +6211,11 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu, if (kvm_state->format != KVM_STATE_NESTED_FORMAT_VMX) return -EINVAL; - if (kvm_state->hdr.vmx.vmxon_pa == -1ull) { + if (kvm_state->hdr.vmx.vmxon_pa == INVALID_GPA) { if (kvm_state->hdr.vmx.smm.flags) return -EINVAL; - if (kvm_state->hdr.vmx.vmcs12_pa != -1ull) + if (kvm_state->hdr.vmx.vmcs12_pa != INVALID_GPA) return -EINVAL; /* @@ -6302,7 +6269,7 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu, vmx_leave_nested(vcpu); - if (kvm_state->hdr.vmx.vmxon_pa == -1ull) + if (kvm_state->hdr.vmx.vmxon_pa == INVALID_GPA) return 0; vmx->nested.vmxon_ptr = kvm_state->hdr.vmx.vmxon_pa; @@ -6315,13 +6282,13 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu, /* See vmx_has_valid_vmcs12. */ if ((kvm_state->flags & KVM_STATE_NESTED_GUEST_MODE) || (kvm_state->flags & KVM_STATE_NESTED_EVMCS) || - (kvm_state->hdr.vmx.vmcs12_pa != -1ull)) + (kvm_state->hdr.vmx.vmcs12_pa != INVALID_GPA)) return -EINVAL; else return 0; } - if (kvm_state->hdr.vmx.vmcs12_pa != -1ull) { + if (kvm_state->hdr.vmx.vmcs12_pa != INVALID_GPA) { if (kvm_state->hdr.vmx.vmcs12_pa == kvm_state->hdr.vmx.vmxon_pa || !page_address_valid(vcpu, kvm_state->hdr.vmx.vmcs12_pa)) return -EINVAL; @@ -6366,7 +6333,7 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu, ret = -EINVAL; if (nested_cpu_has_shadow_vmcs(vmcs12) && - vmcs12->vmcs_link_pointer != -1ull) { + vmcs12->vmcs_link_pointer != INVALID_GPA) { struct vmcs12 *shadow_vmcs12 = get_shadow_vmcs12(vcpu); if (kvm_state->size < |