diff options
Diffstat (limited to 'arch/x86/kvm/vmx')
| -rw-r--r-- | arch/x86/kvm/vmx/capabilities.h | 6 | ||||
| -rw-r--r-- | arch/x86/kvm/vmx/evmcs.h | 2 | ||||
| -rw-r--r-- | arch/x86/kvm/vmx/nested.c | 375 | ||||
| -rw-r--r-- | arch/x86/kvm/vmx/ops.h | 93 | ||||
| -rw-r--r-- | arch/x86/kvm/vmx/vmenter.S | 4 | ||||
| -rw-r--r-- | arch/x86/kvm/vmx/vmx.c | 374 | ||||
| -rw-r--r-- | arch/x86/kvm/vmx/vmx.h | 11 |
7 files changed, 534 insertions, 331 deletions
diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h index d6664ee3d127..7aa69716d516 100644 --- a/arch/x86/kvm/vmx/capabilities.h +++ b/arch/x86/kvm/vmx/capabilities.h @@ -247,6 +247,12 @@ static inline bool vmx_xsaves_supported(void) SECONDARY_EXEC_XSAVES; } +static inline bool vmx_waitpkg_supported(void) +{ + return vmcs_config.cpu_based_2nd_exec_ctrl & + SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE; +} + static inline bool cpu_has_vmx_tsc_scaling(void) { return vmcs_config.cpu_based_2nd_exec_ctrl & diff --git a/arch/x86/kvm/vmx/evmcs.h b/arch/x86/kvm/vmx/evmcs.h index 39a24eec8884..07ebf6882a45 100644 --- a/arch/x86/kvm/vmx/evmcs.h +++ b/arch/x86/kvm/vmx/evmcs.h @@ -178,6 +178,8 @@ static inline void evmcs_load(u64 phys_addr) struct hv_vp_assist_page *vp_ap = hv_get_vp_assist_page(smp_processor_id()); + if (current_evmcs->hv_enlightenments_control.nested_flush_hypercall) + vp_ap->nested_control.features.directhypercall = 1; vp_ap->current_nested_vmcs = phys_addr; vp_ap->enlighten_vmentry = 1; } diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index a3cba321b5c5..41abc62c9a8a 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -19,6 +19,14 @@ module_param_named(enable_shadow_vmcs, enable_shadow_vmcs, bool, S_IRUGO); static bool __read_mostly nested_early_check = 0; module_param(nested_early_check, bool, S_IRUGO); +#define CC(consistency_check) \ +({ \ + bool failed = (consistency_check); \ + if (failed) \ + trace_kvm_nested_vmenter_failed(#consistency_check, 0); \ + failed; \ +}) + /* * Hyper-V requires all of these, so mark them as supported even though * they are just treated the same as all-context. @@ -190,6 +198,16 @@ static void nested_vmx_abort(struct kvm_vcpu *vcpu, u32 indicator) pr_debug_ratelimited("kvm: nested vmx abort, indicator %d\n", indicator); } +static inline bool vmx_control_verify(u32 control, u32 low, u32 high) +{ + return fixed_bits_valid(control, low, high); +} + +static inline u64 vmx_control_msr(u32 low, u32 high) +{ + return low | ((u64)high << 32); +} + static void vmx_disable_shadow_vmcs(struct vcpu_vmx *vmx) { secondary_exec_controls_clearbit(vmx, SECONDARY_EXEC_SHADOW_VMCS); @@ -430,8 +448,8 @@ static int nested_vmx_check_io_bitmap_controls(struct kvm_vcpu *vcpu, if (!nested_cpu_has(vmcs12, CPU_BASED_USE_IO_BITMAPS)) return 0; - if (!page_address_valid(vcpu, vmcs12->io_bitmap_a) || - !page_address_valid(vcpu, vmcs12->io_bitmap_b)) + if (CC(!page_address_valid(vcpu, vmcs12->io_bitmap_a)) || + CC(!page_address_valid(vcpu, vmcs12->io_bitmap_b))) return -EINVAL; return 0; @@ -443,7 +461,7 @@ static int nested_vmx_check_msr_bitmap_controls(struct kvm_vcpu *vcpu, if (!nested_cpu_has(vmcs12, CPU_BASED_USE_MSR_BITMAPS)) return 0; - if (!page_address_valid(vcpu, vmcs12->msr_bitmap)) + if (CC(!page_address_valid(vcpu, vmcs12->msr_bitmap))) return -EINVAL; return 0; @@ -455,7 +473,7 @@ static int nested_vmx_check_tpr_shadow_controls(struct kvm_vcpu *vcpu, if (!nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)) return 0; - if (!page_address_valid(vcpu, vmcs12->virtual_apic_page_addr)) + if (CC(!page_address_valid(vcpu, vmcs12->virtual_apic_page_addr))) return -EINVAL; return 0; @@ -688,7 +706,7 @@ static int nested_vmx_check_apic_access_controls(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) { if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES) && - !page_address_valid(vcpu, vmcs12->apic_access_addr)) + CC(!page_address_valid(vcpu, vmcs12->apic_access_addr))) return -EINVAL; else return 0; @@ -707,16 +725,15 @@ static int nested_vmx_check_apicv_controls(struct kvm_vcpu *vcpu, * If virtualize x2apic mode is enabled, * virtualize apic access must be disabled. */ - if (nested_cpu_has_virt_x2apic_mode(vmcs12) && - nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) + if (CC(nested_cpu_has_virt_x2apic_mode(vmcs12) && + nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES))) return -EINVAL; /* * If virtual interrupt delivery is enabled, * we must exit on external interrupts. */ - if (nested_cpu_has_vid(vmcs12) && - !nested_exit_on_intr(vcpu)) + if (CC(nested_cpu_has_vid(vmcs12) && !nested_exit_on_intr(vcpu))) return -EINVAL; /* @@ -727,15 +744,15 @@ static int nested_vmx_check_apicv_controls(struct kvm_vcpu *vcpu, * bits 5:0 of posted_intr_desc_addr should be zero. */ if (nested_cpu_has_posted_intr(vmcs12) && - (!nested_cpu_has_vid(vmcs12) || - !nested_exit_intr_ack_set(vcpu) || - (vmcs12->posted_intr_nv & 0xff00) || - (vmcs12->posted_intr_desc_addr & 0x3f) || - (vmcs12->posted_intr_desc_addr >> cpuid_maxphyaddr(vcpu)))) + (CC(!nested_cpu_has_vid(vmcs12)) || + CC(!nested_exit_intr_ack_set(vcpu)) || + CC((vmcs12->posted_intr_nv & 0xff00)) || + CC((vmcs12->posted_intr_desc_addr & 0x3f)) || + CC((vmcs12->posted_intr_desc_addr >> cpuid_maxphyaddr(vcpu))))) return -EINVAL; /* tpr shadow is needed by all apicv features. */ - if (!nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)) + if (CC(!nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW))) return -EINVAL; return 0; @@ -759,10 +776,12 @@ static int nested_vmx_check_msr_switch(struct kvm_vcpu *vcpu, static int nested_vmx_check_exit_msr_switch_controls(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) { - if (nested_vmx_check_msr_switch(vcpu, vmcs12->vm_exit_msr_load_count, - vmcs12->vm_exit_msr_load_addr) || - nested_vmx_check_msr_switch(vcpu, vmcs12->vm_exit_msr_store_count, - vmcs12->vm_exit_msr_store_addr)) + if (CC(nested_vmx_check_msr_switch(vcpu, + vmcs12->vm_exit_msr_load_count, + vmcs12->vm_exit_msr_load_addr)) || + CC(nested_vmx_check_msr_switch(vcpu, + vmcs12->vm_exit_msr_store_count, + vmcs12->vm_exit_msr_store_addr))) return -EINVAL; return 0; @@ -771,8 +790,9 @@ static int nested_vmx_check_exit_msr_switch_controls(struct kvm_vcpu *vcpu, static int nested_vmx_check_entry_msr_switch_controls(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) { - if (nested_vmx_check_msr_switch(vcpu, vmcs12->vm_entry_msr_load_count, - vmcs12->vm_entry_msr_load_addr)) + if (CC(nested_vmx_check_msr_switch(vcpu, + vmcs12->vm_entry_msr_load_count, + vmcs12->vm_entry_msr_load_addr))) return -EINVAL; return 0; @@ -784,8 +804,8 @@ static int nested_vmx_check_pml_controls(struct kvm_vcpu *vcpu, if (!nested_cpu_has_pml(vmcs12)) return 0; - if (!nested_cpu_has_ept(vmcs12) || - !page_address_valid(vcpu, vmcs12->pml_address)) + if (CC(!nested_cpu_has_ept(vmcs12)) || + CC(!page_address_valid(vcpu, vmcs12->pml_address))) return -EINVAL; return 0; @@ -794,8 +814,8 @@ static int nested_vmx_check_pml_controls(struct kvm_vcpu *vcpu, static int nested_vmx_check_unrestricted_guest_controls(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) { - if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_UNRESTRICTED_GUEST) && - !nested_cpu_has_ept(vmcs12)) + if (CC(nested_cpu_has2(vmcs12, SECONDARY_EXEC_UNRESTRICTED_GUEST) && + !nested_cpu_has_ept(vmcs12))) return -EINVAL; return 0; } @@ -803,8 +823,8 @@ static int nested_vmx_check_unrestricted_guest_controls(struct kvm_vcpu *vcpu, static int nested_vmx_check_mode_based_ept_exec_controls(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) { - if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_MODE_BASED_EPT_EXEC) && - !nested_cpu_has_ept(vmcs12)) + if (CC(nested_cpu_has2(vmcs12, SECONDARY_EXEC_MODE_BASED_EPT_EXEC) && + !nested_cpu_has_ept(vmcs12))) return -EINVAL; return 0; } @@ -815,8 +835,8 @@ static int nested_vmx_check_shadow_vmcs_controls(struct kvm_vcpu *vcpu, if (!nested_cpu_has_shadow_vmcs(vmcs12)) return 0; - if (!page_address_valid(vcpu, vmcs12->vmread_bitmap) || - !page_address_valid(vcpu, vmcs12->vmwrite_bitmap)) + if (CC(!page_address_valid(vcpu, vmcs12->vmread_bitmap)) || + CC(!page_address_valid(vcpu, vmcs12->vmwrite_bitmap))) return -EINVAL; return 0; @@ -826,12 +846,12 @@ static int nested_vmx_msr_check_common(struct kvm_vcpu *vcpu, struct vmx_msr_entry *e) { /* x2APIC MSR accesses are not allowed */ - if (vcpu->arch.apic_base & X2APIC_ENABLE && e->index >> 8 == 0x8) + if (CC(vcpu->arch.apic_base & X2APIC_ENABLE && e->index >> 8 == 0x8)) return -EINVAL; - if (e->index == MSR_IA32_UCODE_WRITE || /* SDM Table 35-2 */ - e->index == MSR_IA32_UCODE_REV) + if (CC(e->index == MSR_IA32_UCODE_WRITE) || /* SDM Table 35-2 */ + CC(e->index == MSR_IA32_UCODE_REV)) return -EINVAL; - if (e->reserved != 0) + if (CC(e->reserved != 0)) return -EINVAL; return 0; } @@ -839,9 +859,9 @@ static int nested_vmx_msr_check_common(struct kvm_vcpu *vcpu, static int nested_vmx_load_msr_check(struct kvm_vcpu *vcpu, struct vmx_msr_entry *e) { - if (e->index == MSR_FS_BASE || - e->index == MSR_GS_BASE || - e->index == MSR_IA32_SMM_MONITOR_CTL || /* SMM is not supported */ + if (CC(e->index == MSR_FS_BASE) || + CC(e->index == MSR_GS_BASE) || + CC(e->index == MSR_IA32_SMM_MONITOR_CTL) || /* SMM is not supported */ nested_vmx_msr_check_common(vcpu, e)) return -EINVAL; return 0; @@ -850,24 +870,40 @@ static int nested_vmx_load_msr_check(struct kvm_vcpu *vcpu, static int nested_vmx_store_msr_check(struct kvm_vcpu *vcpu, struct vmx_msr_entry *e) { - if (e->index == MSR_IA32_SMBASE || /* SMM is not supported */ + if (CC(e->index == MSR_IA32_SMBASE) || /* SMM is not supported */ nested_vmx_msr_check_common(vcpu, e)) return -EINVAL; return 0; } +static u32 nested_vmx_max_atomic_switch_msrs(struct kvm_vcpu *vcpu) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + u64 vmx_misc = vmx_control_msr(vmx->nested.msrs.misc_low, + vmx->nested.msrs.misc_high); + + return (vmx_misc_max_msr(vmx_misc) + 1) * VMX_MISC_MSR_LIST_MULTIPLIER; +} + /* * Load guest's/host's msr at nested entry/exit. * return 0 for success, entry index for failure. + * + * One of the failure modes for MSR load/store is when a list exceeds the + * virtual hardware's capacity. To maintain compatibility with hardware inasmuch + * as possible, process all valid entries before failing rather than precheck + * for a capacity violation. */ static u32 nested_vmx_load_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count) { u32 i; struct vmx_msr_entry e; - struct msr_data msr; + u32 max_msr_list_size = nested_vmx_max_atomic_switch_msrs(vcpu); - msr.host_initiated = false; for (i = 0; i < count; i++) { + if (unlikely(i >= max_msr_list_size)) + goto fail; + if (kvm_vcpu_read_guest(vcpu, gpa + i * sizeof(e), &e, sizeof(e))) { pr_debug_ratelimited( @@ -881,9 +917,7 @@ static u32 nested_vmx_load_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count) __func__, i, e.index, e.reserved); goto fail; } - msr.index = e.index; - msr.data = e.value; - if (kvm_set_msr(vcpu, &msr)) { + if (kvm_set_msr(vcpu, e.index, e.value)) { pr_debug_ratelimited( "%s cannot write MSR (%u, 0x%x, 0x%llx)\n", __func__, i, e.index, e.value); @@ -897,11 +931,15 @@ fail: static int nested_vmx_store_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count) { + u64 data; u32 i; struct vmx_msr_entry e; + u32 max_msr_list_size = nested_vmx_max_atomic_switch_msrs(vcpu); for (i = 0; i < count; i++) { - struct msr_data msr_info; + if (unlikely(i >= max_msr_list_size)) + return -EINVAL; + if (kvm_vcpu_read_guest(vcpu, gpa + i * sizeof(e), &e, 2 * sizeof(u32))) { @@ -916,9 +954,7 @@ static int nested_vmx_store_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count) __func__, i, e.index, e.reserved); return -EINVAL; } - msr_info.host_initiated = false; - msr_info.index = e.index; - if (kvm_get_msr(vcpu, &msr_info)) { + if (kvm_get_msr(vcpu, e.index, &data)) { pr_debug_ratelimited( "%s cannot read MSR (%u, 0x%x)\n", __func__, i, e.index); @@ -927,10 +963,10 @@ static int nested_vmx_store_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count) if (kvm_vcpu_write_guest(vcpu, gpa + i * sizeof(e) + offsetof(struct vmx_msr_entry, value), - &msr_info.data, sizeof(msr_info.data))) { + &data, sizeof(data))) { pr_debug_ratelimited( "%s cannot write MSR (%u, 0x%x, 0x%llx)\n", - __func__, i, e.index, msr_info.data); + __func__, i, e.index, data); return -EINVAL; } } @@ -955,7 +991,7 @@ static int nested_vmx_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3, bool ne u32 *entry_failure_code) { if (cr3 != kvm_read_cr3(vcpu) || (!nested_ept && pdptrs_changed(vcpu))) { - if (!nested_cr3_valid(vcpu, cr3)) { + if (CC(!nested_cr3_valid(vcpu, cr3))) { *entry_failure_code = ENTRY_FAIL_DEFAULT; return -EINVAL; } @@ -965,7 +1001,7 @@ static int nested_vmx_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3, bool ne * must not be dereferenced. */ if (is_pae_paging(vcpu) && !nested_ept) { - if (!load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3)) { + if (CC(!load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3))) { *entry_failure_code = ENTRY_FAIL_PDPTE; return -EINVAL; } @@ -1009,17 +1045,6 @@ static u16 nested_get_vpid02(struct kvm_vcpu *vcpu) return vmx->nested.vpid02 ? vmx->nested.vpid02 : vmx->vpid; } - -static inline bool vmx_control_verify(u32 control, u32 low, u32 high) -{ - return fixed_bits_valid(control, low, high); -} - -static inline u64 vmx_control_msr(u32 low, u32 high) -{ - return low | ((u64)high << 32); -} - static bool is_bitwise_subset(u64 superset, u64 subset, u64 mask) { superset &= mask; @@ -2085,6 +2110,7 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12) SECONDARY_EXEC_ENABLE_INVPCID | SECONDARY_EXEC_RDTSCP | SECONDARY_EXEC_XSAVES | + SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | SECONDARY_EXEC_APIC_REGISTER_VIRT | SECONDARY_EXEC_ENABLE_VMFUNC); @@ -2411,12 +2437,12 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, static int nested_vmx_check_nmi_controls(struct vmcs12 *vmcs12) { - if (!nested_cpu_has_nmi_exiting(vmcs12) && - nested_cpu_has_virtual_nmis(vmcs12)) + if (CC(!nested_cpu_has_nmi_exiting(vmcs12) && + nested_cpu_has_virtual_nmis(vmcs12))) return -EINVAL; - if (!nested_cpu_has_virtual_nmis(vmcs12) && - nested_cpu_has(vmcs12, CPU_BASED_VIRTUAL_NMI_PENDING)) + if (CC(!nested_cpu_has_virtual_nmis(vmcs12) && + nested_cpu_has(vmcs12, CPU_BASED_VIRTUAL_NMI_PENDING))) return -EINVAL; return 0; @@ -2430,11 +2456,11 @@ static bool valid_ept_address(struct kvm_vcpu *vcpu, u64 address) /* Check for memory type validity */ switch (address & VMX_EPTP_MT_MASK) { case VMX_EPTP_MT_UC: - if (!(vmx->nested.msrs.ept_caps & VMX_EPTP_UC_BIT)) + if (CC(!(vmx->nested.msrs.ept_caps & VMX_EPTP_UC_BIT))) return false; break; case VMX_EPTP_MT_WB: - if (!(vmx->nested.msrs.ept_caps & VMX_EPTP_WB_BIT)) + if (CC(!(vmx->nested.msrs.ept_caps & VMX_EPTP_WB_BIT))) return false; break; default: @@ -2442,16 +2468,16 @@ static bool valid_ept_address(struct kvm_vcpu *vcpu, u64 address) } /* only 4 levels page-walk length are valid */ - if ((address & VMX_EPTP_PWL_MASK) != VMX_EPTP_PWL_4) + if (CC((address & VMX_EPTP_PWL_MASK) != VMX_EPTP_PWL_4)) return false; /* Reserved bits should not be set */ - if (address >> maxphyaddr || ((address >> 7) & 0x1f)) + if (CC(address >> maxphyaddr || ((address >> 7) & 0x1f))) return false; /* AD, if set, should be supported */ if (address & VMX_EPTP_AD_ENABLE_BIT) { - if (!(vmx->nested.msrs.ept_caps & VMX_EPT_AD_BIT)) + if (CC(!(vmx->nested.msrs.ept_caps & VMX_EPT_AD_BIT))) return false; } @@ -2466,21 +2492,21 @@ static int nested_check_vm_execution_controls(struct kvm_vcpu *vcpu, { struct vcpu_vmx *vmx = to_vmx(vcpu); - if (!vmx_control_verify(vmcs12->pin_based_vm_exec_control, - vmx->nested.msrs.pinbased_ctls_low, - vmx->nested.msrs.pinbased_ctls_high) || - !vmx_control_verify(vmcs12->cpu_based_vm_exec_control, - vmx->nested.msrs.procbased_ctls_low, - vmx->nested.msrs.procbased_ctls_high)) + if (CC(!vmx_control_verify(vmcs12->pin_based_vm_exec_control, + vmx->nested.msrs.pinbased_ctls_low, + vmx->nested.msrs.pinbased_ctls_high)) || + CC(!vmx_control_verify(vmcs12->cpu_based_vm_exec_control, + vmx->nested.msrs.procbased_ctls_low, + vmx->nested.msrs.procbased_ctls_high))) return -EINVAL; if (nested_cpu_has(vmcs12, CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) && - !vmx_control_verify(vmcs12->secondary_vm_exec_control, - vmx->nested.msrs.secondary_ctls_low, - vmx->nested.msrs.secondary_ctls_high)) + CC(!vmx_control_verify(vmcs12->secondary_vm_exec_control, + vmx->nested.msrs.secondary_ctls_low, + vmx->nested.msrs.secondary_ctls_high))) return -EINVAL; - if (vmcs12->cr3_target_count > nested_cpu_vmx_misc_cr3_count(vcpu) || + if (CC(vmcs12->cr3_target_count > nested_cpu_vmx_misc_cr3_count(vcpu)) || nested_vmx_check_io_bitmap_controls(vcpu, vmcs12) || nested_vmx_check_msr_bitmap_controls(vcpu, vmcs12) || nested_vmx_check_tpr_shadow_controls(vcpu, vmcs12) || @@ -2491,7 +2517,7 @@ static int nested_check_vm_execution_controls(struct kvm_vcpu *vcpu, nested_vmx_check_unrestricted_guest_controls(vcpu, vmcs12) || nested_vmx_check_mode_based_ept_exec_controls(vcpu, vmcs12) || nested_vmx_check_shadow_vmcs_controls(vcpu, vmcs12) || - (nested_cpu_has_vpid(vmcs12) && !vmcs12->virtual_processor_id)) + CC(nested_cpu_has_vpid(vmcs12) && !vmcs12->virtual_processor_id)) return -EINVAL; if (!nested_cpu_has_preemption_timer(vmcs12) && @@ -2499,17 +2525,17 @@ static int nested_check_vm_execution_controls(struct kvm_vcpu *vcpu, return -EINVAL; if (nested_cpu_has_ept(vmcs12) && - !valid_ept_address(vcpu, vmcs12->ept_pointer)) + CC(!valid_ept_address(vcpu, vmcs12->ept_pointer))) return -EINVAL; if (nested_cpu_has_vmfunc(vmcs12)) { - if (vmcs12->vm_function_control & - ~vmx->nested.msrs.vmfunc_controls) + if (CC(vmcs12->vm_function_control & + ~vmx->nested.msrs.vmfunc_controls)) return -EINVAL; if (nested_cpu_has_eptp_switching(vmcs12)) { - if (!nested_cpu_has_ept(vmcs12) || - !page_address_valid(vcpu, vmcs12->eptp_list_address)) + if (CC(!nested_cpu_has_ept(vmcs12)) || + CC(!page_address_valid(vcpu, vmcs12->eptp_list_address))) return -EINVAL; } } @@ -2525,10 +2551,10 @@ static int nested_check_vm_exit_controls(struct kvm_vcpu *vcpu, { struct vcpu_vmx *vmx = to_vmx(vcpu); - if (!vmx_control_verify(vmcs12->vm_exit_controls, - vmx->nested.msrs.exit_ctls_low, - vmx->nested.msrs.exit_ctls_high) || - nested_vmx_check_exit_msr_switch_controls(vcpu, vmcs12)) + if (CC(!vmx_control_verify(vmcs12->vm_exit_controls, + vmx->nested.msrs.exit_ctls_low, + vmx->nested.msrs.exit_ctls_high)) || + CC(nested_vmx_check_exit_msr_switch_controls(vcpu, vmcs12))) return -EINVAL; return 0; @@ -2542,9 +2568,9 @@ static int nested_check_vm_entry_controls(struct kvm_vcpu *vcpu, { struct vcpu_vmx *vmx = to_vmx(vcpu); - if (!vmx_control_verify(vmcs12->vm_entry_controls, - vmx->nested.msrs.entry_ctls_low, - vmx->nested.msrs.entry_ctls_high)) + if (CC(!vmx_control_verify(vmcs12->vm_entry_controls, + vmx->nested.msrs.entry_ctls_low, + vmx->nested.msrs.entry_ctls_high))) return -EINVAL; /* @@ -2564,31 +2590,31 @@ static int nested_check_vm_entry_controls(struct kvm_vcpu *vcpu, bool prot_mode = !urg || vmcs12->guest_cr0 & X86_CR0_PE; /* VM-entry interruption-info field: interruption type */ - if (intr_type == INTR_TYPE_RESERVED || - (intr_type == INTR_TYPE_OTHER_EVENT && - !nested_cpu_supports_monitor_trap_flag(vcpu))) + if (CC(intr_type == INTR_TYPE_RESERVED) || + CC(intr_type == INTR_TYPE_OTHER_EVENT && + !nested_cpu_supports_monitor_trap_flag(vcpu))) return -EINVAL; /* VM-entry interruption-info field: vector */ - if ((intr_type == INTR_TYPE_NMI_INTR && vector != NMI_VECTOR) || - (intr_type == INTR_TYPE_HARD_EXCEPTION && vector > 31) || - (intr_type == INTR_TYPE_OTHER_EVENT && vector != 0)) + if (CC(intr_type == INTR_TYPE_NMI_INTR && vector != NMI_VECTOR) || + CC(intr_type == INTR_TYPE_HARD_EXCEPTION && vector > 31) || + CC(intr_type == INTR_TYPE_OTHER_EVENT && vector != 0)) return -EINVAL; /* VM-entry interruption-info field: deliver error code */ should_have_error_code = intr_type == INTR_TYPE_HARD_EXCEPTION && prot_mode && x86_exception_has_error_code(vector); - if (has_error_code != should_have_error_code) + if (CC(has_error_code != should_have_error_code)) return -EINVAL; /* VM-entry exception error code */ - if (has_error_code && - vmcs12->vm_entry_exception_error_code & GENMASK(31, 15)) + if (CC(has_error_code && + vmcs12->vm_entry_exception_error_code & GENMASK(31, 15))) return -EINVAL; /* VM-entry interruption-info field: reserved bits */ - if (intr_info & INTR_INFO_RESVD_BITS_MASK) + if (CC(intr_info & INTR_INFO_RESVD_BITS_MASK)) return -EINVAL; /* VM-entry instruction length */ @@ -2596,9 +2622,9 @@ static int nested_check_vm_entry_controls(struct kvm_vcpu *vcpu, case INTR_TYPE_SOFT_EXCEPTION: case INTR_TYPE_SOFT_INTR: case INTR_TYPE_PRIV_SW_EXCEPTION: - if ((vmcs12->vm_entry_instruction_len > 15) || - (vmcs12->vm_entry_instruction_len == 0 && - !nested_cpu_has_zero_length_injection(vcpu))) + if (CC(vmcs12->vm_entry_instruction_len > 15) || + CC(vmcs12->vm_entry_instruction_len == 0 && + CC(!nested_cpu_has_zero_length_injection(vcpu)))) return -EINVAL; } } @@ -2625,40 +2651,56 @@ static int nested_vmx_check_host_state(struct kvm_vcpu *vcpu, { bool ia32e; - if (!nested_host_cr0_valid(vcpu, vmcs12->host_cr0) || - !nested_host_cr4_valid(vcpu, vmcs12->host_cr4) || - !nested_cr3_valid(vcpu, vmcs12->host_cr3)) + if (CC(!nested_host_cr0_valid(vcpu, vmcs12->host_cr0)) || + CC(!nested_host_cr4_valid(vcpu, vmcs12->host_cr4)) || + CC(!nested_cr3_valid(vcpu, vmcs12->host_cr3))) return -EINVAL; - if (is_noncanonical_address(vmcs12->host_ia32_sysenter_esp, vcpu) || - is_noncanonical_address(vmcs12->host_ia32_sysenter_eip, vcpu)) + if (CC(is_noncanonical_address(vmcs12->host_ia32_sysenter_esp, vcpu)) || + CC(is_noncanonical_address(vmcs12->host_ia32_sysenter_eip, vcpu))) return -EINVAL; if ((vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PAT) && - !kvm_pat_valid(vmcs12->host_ia32_pat)) + CC(!kvm_pat_valid(vmcs12->host_ia32_pat))) return -EINVAL; - ia32e = (vmcs12->vm_exit_controls & - VM_EXIT_HOST_ADDR_SPACE_SIZE) != 0; - - if (vmcs12->host_cs_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK) || - vmcs12->host_ss_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK) || - vmcs12->host_ds_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK) || - vmcs12->host_es_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK) || - vmcs12->host_fs_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK) || - vmcs12->host_gs_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK) || - vmcs12->host_tr_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK) || - vmcs12->host_cs_selector == 0 || - vmcs12->host_tr_selector == 0 || - (vmcs12->host_ss_selector == 0 && !ia32e)) +#ifdef CONFIG_X86_64 + ia32e = !!(vcpu->arch.efer & EFER_LMA); +#else + ia32e = false; +#endif + + if (ia32e) { + if (CC(!(vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE)) || + CC(!(vmcs12->host_cr4 & X86_CR4_PAE))) + return -EINVAL; + } else { + if (CC(vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE) || + CC(vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE) || + CC(vmcs12->host_cr4 & X86_CR4_PCIDE) || + CC((vmcs12->host_rip) >> 32)) + return -EINVAL; + } + + if (CC(vmcs12->host_cs_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK)) || + CC(vmcs12->host_ss_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK)) || + CC(vmcs12->host_ds_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK)) || + CC(vmcs12->host_es_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK)) || + CC(vmcs12->host_fs_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK)) || + CC(vmcs12->host_gs_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK)) || + CC(vmcs12->host_tr_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK)) || + CC(vmcs12->host_cs_selector == 0) || + CC(vmcs12->host_tr_selector == 0) || + CC(vmcs12->host_ss_selector == 0 && !ia32e)) return -EINVAL; #ifdef CONFIG_X86_64 - if (is_noncanonical_address(vmcs12->host_fs_base, vcpu) || - is_noncanonical_address(vmcs12->host_gs_base, vcpu) || - is_noncanonical_address(vmcs12->host_gdtr_base, vcpu) || - is_noncanonical_address(vmcs12->host_idtr_base, vcpu) || - is_noncanonical_address(vmcs12->host_tr_base, vcpu)) + if (CC(is_noncanonical_address(vmcs12->host_fs_base, vcpu)) || + CC(is_noncanonical_address(vmcs12->host_gs_base, vcpu)) || + CC(is_noncanonical_address(vmcs12->host_gdtr_base, vcpu)) || + CC(is_noncanonical_address(vmcs12->host_idtr_base, vcpu)) || + CC(is_noncanonical_address(vmcs12->host_tr_base, vcpu)) || + CC(is_noncanonical_address(vmcs12->host_rip, vcpu))) return -EINVAL; #endif @@ -2669,9 +2711,9 @@ static int nested_vmx_check_host_state(struct kvm_vcpu *vcpu, * the host address-space size VM-exit control. */ if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_EFER) { - if (!kvm_valid_efer(vcpu, vmcs12->host_ia32_efer) || - ia32e != !!(vmcs12->host_ia32_efer & EFER_LMA) || - ia32e != !!(vmcs12->host_ia32_efer & EFER_LME)) + if (CC(!kvm_valid_efer(vcpu, vmcs12->host_ia32_efer)) || + CC(ia32e != !!(vmcs12->host_ia32_efer & EFER_LMA)) || + CC(ia32e != !!(vmcs12->host_ia32_efer & EFER_LME))) return -EINVAL; } @@ -2688,16 +2730,16 @@ static int nested_vmx_check_vmcs_link_ptr(struct kvm_vcpu *vcpu, if (vmcs12->vmcs_link_pointer == -1ull) return 0; - if (!page_address_valid(vcpu, vmcs12->vmcs_link_pointer)) + if (CC(!page_address_valid(vcpu, vmcs12->vmcs_link_pointer))) return -EINVAL; - if (kvm_vcpu_map(vcpu, gpa_to_gfn(vmcs12->vmcs_link_pointer), &map)) + if (CC(kvm_vcpu_map(vcpu, gpa_to_gfn(vmcs12->vmcs_link_pointer), &map))) return -EINVAL; shadow = map.hva; - if (shadow->hdr.revision_id != VMCS12_REVISION || - shadow->hdr.shadow_vmcs != nested_cpu_has_shadow_vmcs(vmcs12)) + if (CC(shadow->hdr.revision_id != VMCS12_REVISION) || + CC(shadow->hdr.shadow_vmcs != nested_cpu_has_shadow_vmcs(vmcs12))) r = -EINVAL; kvm_vcpu_unmap(vcpu, &map, false); @@ -2709,8 +2751,8 @@ static int nested_vmx_check_vmcs_link_ptr(struct kvm_vcpu *vcpu, */ static int nested_check_guest_non_reg_state(struct vmcs12 *vmcs12) { - if (vmcs12->guest_activity_state != GUEST_ACTIVITY_ACTIVE && - vmcs12->guest_activity_state != GUEST_ACTIVITY_HLT) + if (CC(vmcs12->guest_activity_state != GUEST_ACTIVITY_ACTIVE && + vmcs12->guest_activity_state != GUEST_ACTIVITY_HLT)) return -EINVAL; return 0; @@ -2724,12 +2766,12 @@ static int nested_vmx_check_guest_state(struct kvm_vcpu *vcpu, *exit_qual = ENTRY_FAIL_DEFAULT; - if (!nested_guest_cr0_valid(vcpu, vmcs12->guest_cr0) || - !nested_guest_cr4_valid(vcpu, vmcs12->guest_cr4)) + if (CC(!nested_guest_cr0_valid(vcpu, vmcs12->guest_cr0)) || + CC(!nested_guest_cr4_valid(vcpu, vmcs12->guest_cr4))) return -EINVAL; if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PAT) && - !kvm_pat_valid(vmcs12->guest_ia32_pat)) + CC(!kvm_pat_valid(vmcs12->guest_ia32_pat))) return -EINVAL; if (nested_vmx_check_vmcs_link_ptr(vcpu, vmcs12)) { @@ -2749,16 +2791,16 @@ static int nested_vmx_check_guest_state(struct kvm_vcpu *vcpu, if (to_vmx(vcpu)->nested.nested_run_pending && (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_EFER)) { ia32e = (vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE) != 0; - if (!kvm_valid_efer(vcpu, vmcs12->guest_ia32_efer) || - ia32e != !!(vmcs12->guest_ia32_efer & EFER_LMA) || - ((vmcs12->guest_cr0 & X86_CR0_PG) && - ia32e != !!(vmcs12->guest_ia32_efer & EFER_LME))) + if (CC(!kvm_valid_efer(vcpu, vmcs12->guest_ia32_efer)) || + CC(ia32e != !!(vmcs12->guest_ia32_efer & EFER_LMA)) || + CC(((vmcs12->guest_cr0 & X86_CR0_PG) && + ia32e != !!(vmcs12->guest_ia32_efer & EFER_LME)))) return -EINVAL; } if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS) && - (is_noncanonical_address(vmcs12->guest_bndcfgs & PAGE_MASK, vcpu) || - (vmcs12->guest_bndcfgs & MSR_IA32_BNDCFGS_RSVD))) + (CC(is_noncanonical_address(vmcs12->guest_bndcfgs & PAGE_MASK, vcpu)) || + CC((vmcs12->guest_bndcfgs & MSR_IA32_BNDCFGS_RSVD)))) return -EINVAL; if (nested_check_guest_non_reg_state(vmcs12)) @@ -2841,9 +2883,13 @@ static int nested_vmx_check_vmentry_hw(struct kvm_vcpu *vcpu) vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.guest.nr); if (vm_fail) { + u32 error = vmcs_read32(VM_INSTRUCTION_ERROR); + preempt_enable(); - WARN_ON_ONCE(vmcs_read32(VM_INSTRUCTION_ERROR) != - VMXERR_ENTRY_INVALID_CONTROL_FIELD); + + trace_kvm_nested_vmenter_failed( + "early hardware check VM-instruction error: ", error); + WARN_ON_ONCE(error != VMXERR_ENTRY_INVALID_CONTROL_FIELD); return 1; } @@ -3401,6 +3447,15 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr) unsigned long exit_qual; bool block_nested_events = vmx->nested.nested_run_pending || kvm_event_needs_reinjection(vcpu); + struct kvm_lapic *apic = vcpu->arch.apic; + + if (lapic_in_kernel(vcpu) && + test_bit(KVM_APIC_INIT, &apic->pending_events)) { + if (block_nested_events) + return -EBUSY; + nested_vmx_vmexit(vcpu, EXIT_REASON_INIT_SIGNAL, 0, 0); + return 0; + } if (vcpu->arch.exception.pending && nested_vmx_check_exception(vcpu, &exit_qual)) { @@ -3889,7 +3944,6 @@ static void nested_vmx_restore_host_state(struct kvm_vcpu *vcpu) struct vmcs12 *vmcs12 = get_vmcs12(vcpu); struct vcpu_vmx *vmx = to_vmx(vcpu); struct vmx_msr_entry g, h; - struct msr_data msr; gpa_t gpa; u32 i, j; @@ -3949,7 +4003,6 @@ static void nested_vmx_restore_host_state(struct kvm_vcpu *vcpu) * from the guest value. The intent is to stuff host state as * silently as possible, not to fully process the exit load list. */ - msr.host_initiated = false; for (i = 0; i < vmcs12->vm_entry_msr_load_count; i++) { gpa = vmcs12->vm_entry_msr_load_addr + (i * sizeof(g)); if (kvm_vcpu_read_guest(vcpu, gpa, &g, sizeof(g))) { @@ -3979,9 +4032,7 @@ static void nested_vmx_restore_host_state(struct kvm_vcpu *vcpu) goto vmabort; } - msr.index = h.index; - msr.data = h.value; - if (kvm_set_msr(vcpu, &msr)) { + if (kvm_set_msr(vcpu, h.index, h.value)) { pr_debug_ratelimited( "%s WRMSR failed (%u, 0x%x, 0x%llx)\n", __func__, j, h.index, h.value); @@ -4466,7 +4517,12 @@ static int handle_vmoff(struct kvm_vcpu *vcpu) { if (!nested_vmx_check_permission(vcpu)) return 1; + free_nested(vcpu); + + /* Process a latched INIT during time CPU was in VMX operation */ + kvm_make_request(KVM_REQ_EVENT, vcpu); + return nested_vmx_succeed(vcpu); } @@ -5261,8 +5317,9 @@ bool nested_vmx_exit_reflected(struct kvm_vcpu *vcpu, u32 exit_reason) return false; if (unlikely(vmx->fail)) { - pr_info_ratelimited("%s failed vm entry %x\n", __func__, - vmcs_read32(VM_INSTRUCTION_ERROR)); + trace_kvm_nested_vmenter_failed( + "hardware VM-instruction error: ", + vmcs_read32(VM_INSTRUCTION_ERROR)); return true; } @@ -5422,6 +5479,10 @@ bool nested_vmx_exit_reflected(struct kvm_vcpu *vcpu, u32 exit_reason) case EXIT_REASON_ENCLS: /* SGX is never exposed to L1 */ return false; + case EXIT_REASON_UMWAIT: + case EXIT_REASON_TPAUSE: + return nested_cpu_has2(vmcs12, + SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE); default: return true; } diff --git a/arch/x86/kvm/vmx/ops.h b/arch/x86/kvm/vmx/ops.h index 2200fb698dd0..45eaedee2ac0 100644 --- a/arch/x86/kvm/vmx/ops.h +++ b/arch/x86/kvm/vmx/ops.h @@ -11,8 +11,13 @@ #include "vmcs.h" #define __ex(x) __kvm_handle_fault_on_reboot(x) -#define __ex_clear(x, reg) \ - ____kvm_handle_fault_on_reboot(x, "xor " reg ", " reg) + +asmlinkage void vmread_error(unsigned long field, bool fault); +void vmwrite_error(unsigned long field, unsigned long value); +void vmclear_error(struct vmcs *vmcs, u64 phys_addr); +void vmptrld_error(struct vmcs *vmcs, u64 phys_addr); +void invvpid_error(unsigned long ext, u16 vpid, gva_t gva); +void invept_error(unsigned long ext, u64 eptp, gpa_t gpa); static __always_inline void vmcs_check16(unsigned long field) { @@ -62,8 +67,22 @@ static __always_inline unsigned long __vmcs_readl(unsigned long field) { unsigned long value; - asm volatile (__ex_clear("vmread %1, %0", "%k0") - : "=r"(value) : "r"(field)); + asm volatile("1: vmread %2, %1\n\t" + ".byte 0x3e\n\t" /* branch taken hint */ + "ja 3f\n\t" + "mov %2, %%" _ASM_ARG1 "\n\t" + "xor %%" _ASM_ARG2 ", %%" _ASM_ARG2 "\n\t" + "2: call vmread_error\n\t" + "xor %k1, %k1\n\t" + "3:\n\t" + + ".pushsection .fixup, \"ax\"\n\t" + "4: mov %2, %%" _ASM_ARG1 "\n\t" + "mov $1, %%" _ASM_ARG2 "\n\t" + "jmp 2b\n\t" + ".popsection\n\t" + _ASM_EXTABLE(1b, 4b) + : ASM_CALL_CONSTRAINT, "=r"(value) : "r"(field) : "cc"); return value; } @@ -103,21 +122,39 @@ static __always_inline unsigned long vmcs_readl(unsigned long field) return __vmcs_readl(field); } -static noinline void vmwrite_error(unsigned long field, unsigned long value) -{ - printk(KERN_ERR "vmwrite error: reg %lx value %lx (err %d)\n", - field, value, vmcs_read32(VM_INSTRUCTION_ERROR)); - dump_stack(); -} +#define vmx_asm1(insn, op1, error_args...) \ +do { \ + asm_volatile_goto("1: " __stringify(insn) " %0\n\t" \ + ".byte 0x2e\n\t" /* branch not taken hint */ \ + "jna %l[error]\n\t" \ + _ASM_EXTABLE(1b, %l[fault]) \ + : : op1 : "cc" : error, fault); \ + return; \ +error: \ + insn##_error(error_args); \ + return; \ +fault: \ + kvm_spurious_fault(); \ +} while (0) + +#define vmx_asm2(insn, op1, op2, error_args...) \ +do { \ + asm_volatile_goto("1: " __stringify(insn) " %1, %0\n\t" \ + ".byte 0x2e\n\t" /* branch not taken hint */ \ + "jna %l[error]\n\t" \ + _ASM_EXTABLE(1b, %l[fault]) \ + : : op1, op2 : "cc" : error, fault); \ + return; \ +error: \ + insn##_error(error_args); \ + return; \ +fault: \ + kvm_spurious_fault(); \ +} while (0) static __always_inline void __vmcs_writel(unsigned long field, unsigned long value) { - bool error; - - asm volatile (__ex("vmwrite %2, %1") CC_SET(na) - : CC_OUT(na) (error) : "r"(field), "rm"(value)); - if (unlikely(error)) - vmwrite_error(field, value); + vmx_asm2(vmwrite, "r"(field), "rm"(value), field, value); } static __always_inline void vmcs_write16(unsigned long field, u16 value) @@ -182,28 +219,18 @@ static __always_inline void vmcs_set_bits(unsigned long field, u32 mask) static inline void vmcs_clear(struct vmcs *vmcs) { u64 phys_addr = __pa(vmcs); - bool error; - asm volatile (__ex("vmclear %1") CC_SET(na) - : CC_OUT(na) (error) : "m"(phys_addr)); - if (unlikely(error)) - printk(KERN_ERR "kvm: vmclear fail: %p/%llx\n", - vmcs, phys_addr); + vmx_asm1(vmclear, "m"(phys_addr), vmcs, phys_addr); } static inline void vmcs_load(struct vmcs *vmcs) { u64 phys_addr = __pa(vmcs); - bool error; if (static_branch_unlikely(&enable_evmcs)) return evmcs_load(phys_addr); - asm volatile (__ex("vmptrld %1") CC_SET(na) - : CC_OUT(na) (error) : "m"(phys_addr)); - if (unlikely(error)) - printk(KERN_ERR "kvm: vmptrld %p/%llx failed\n", - vmcs, phys_addr); + vmx_asm1(vmptrld, "m"(phys_addr), vmcs, phys_addr); } static inline void __invvpid(unsigned long ext, u16 vpid, gva_t gva) @@ -213,11 +240,8 @@ static inline void __invvpid(unsigned long ext, u16 vpid, gva_t gva) u64 rsvd : 48; u64 gva; } operand = { vpid, 0, gva }; - bool error; - asm volatile (__ex("invvpid %2, %1") CC_SET(na) - : CC_OUT(na) (error) : "r"(ext), "m"(operand)); - BUG_ON(error); + vmx_asm2(invvpid, "r"(ext), "m"(operand), ext, vpid, gva); } static inline void __invept(unsigned long ext, u64 eptp, gpa_t gpa) @@ -225,11 +249,8 @@ static inline void __invept(unsigned long ext, u64 eptp, gpa_t gpa) struct { u64 eptp, gpa; } operand = {eptp, gpa}; - bool error; - asm volatile (__ex("invept %2, %1") CC_SET(na) - : CC_OUT(na) (error) : "r"(ext), "m"(operand)); - BUG_ON(error); + vmx_asm2(invept, "r"(ext), "m"(operand), ext, eptp, gpa); } static inline bool vpid_sync_vcpu_addr(int vpid, gva_t addr) diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S index 4010d519eb8c..751a384c2eb0 100644 --- a/arch/x86/kvm/vmx/vmenter.S +++ b/arch/x86/kvm/vmx/vmenter.S @@ -94,7 +94,7 @@ ENDPROC(vmx_vmexit) /** * __vmx_vcpu_run - Run a vCPU via a transition to VMX guest mode - * @vmx: struct vcpu_vmx * + * @vmx: struct vcpu_vmx * (forwarded to vmx_update_host_rsp) * @regs: unsigned long * (to guest registers) * @launched: %true if the VMCS has been launched * @@ -151,7 +151,7 @@ ENTRY(__vmx_vcpu_run) mov VCPU_R14(%_ASM_AX), %r14 mov VCPU_R15(%_ASM_AX), %r15 #endif - /* Load guest RAX. This kills the vmx_vcpu pointer! */ + /* Load guest RAX. This kills the @regs pointer! */ mov VCPU_RAX(%_ASM_AX), %_ASM_AX /* Enter guest mode */ diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index c030c96fc81a..d4575ffb3cec 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -343,6 +343,48 @@ static __always_inline void vmx_disable_intercept_for_msr(unsigned long *msr_bit void vmx_vmexit(void); +#define vmx_insn_failed(fmt...) \ +do { \ + WARN_ONCE(1, fmt); \ + pr_warn_ratelimited(fmt); \ +} while (0) + +asmlinkage void vmread_error(unsigned long field, bool fault) +{ + if (fault) + kvm_spurious_fault(); + else + vmx_insn_failed("kvm: vmread failed: field=%lx\n", field); +} + +noinline void vmwrite_error(unsigned long field, unsigned long value) +{ + vmx_insn_failed("kvm: vmwrite failed: field=%lx val=%lx err=%d\n", + field, value, vmcs_read32(VM_INSTRUCTION_ERROR)); +} + +noinline void vmclear_error(struct vmcs *vmcs, u64 phys_addr) +{ + vmx_insn_failed("kvm: vmclear failed: %p/%llx\n", vmcs, phys_addr); +} + +noinline void vmptrld_error(struct vmcs *vmcs, u64 phys_addr) +{ + vmx_insn_failed("kvm: vmptrld failed: %p/%llx\n", vmcs, phys_addr); +} + +noinline void invvpid_error(unsigned long ext, u16 vpid, gva_t gva) +{ + vmx_insn_failed("kvm: invvpid failed: ext=0x%lx vpid=%u gva=0x%lx\n", + ext, vpid, gva); +} + +noinline void invept_error(unsigned long ext, u64 eptp, gpa_t gpa) +{ + vmx_insn_failed("kvm: invept failed: ext=0x%lx eptp=%llx gpa=0x%llx\n", + ext, eptp, gpa); +} + static DEFINE_PER_CPU(struct vmcs *, vmxarea); DEFINE_PER_CPU(struct vmcs *, current_vmcs); /* @@ -486,6 +528,31 @@ static int hv_remote_flush_tlb(struct kvm *kvm) return hv_remote_flush_tlb_with_range(kvm, NULL); } +static int hv_enable_direct_tlbflush(struct kvm_vcpu *vcpu) +{ + struct hv_enlightened_vmcs *evmcs; + struct hv_partition_assist_pg **p_hv_pa_pg = + &vcpu->kvm->arch.hyperv.hv_pa_pg; + /* + * Synthetic VM-Exit is not enabled in current code and so All + * evmcs in singe VM shares same assist page. + */ + if (!*p_hv_pa_pg) + *p_hv_pa_pg = kzalloc(PAGE_SIZE, GFP_KERNEL); + + if (!*p_hv_pa_pg) + return -ENOMEM; + + evmcs = (struct hv_enlightened_vmcs *)to_vmx(vcpu)->loaded_vmcs->vmcs; + + evmcs->partition_assist_page = + __pa(*p_hv_pa_pg); + evmcs->hv_vm_id = (unsigned long)vcpu->kvm; + evmcs->hv_enlightenments_control.nested_flush_hypercall = 1; + + return 0; +} + #endif /* IS_ENABLED(CONFIG_HYPERV) */ /* @@ -1472,17 +1539,32 @@ static int vmx_rtit_ctl_check(struct kvm_vcpu *vcpu, u64 data) return 0; } - -static void skip_emulated_instruction(struct kvm_vcpu *vcpu) +static int skip_emulated_instruction(struct kvm_vcpu *vcpu) { unsigned long rip; - rip = kvm_rip_read(vcpu); - rip += vmcs_read32(VM_EXIT_INSTRUCTION_LEN); - kvm_rip_write(vcpu, rip); + /* + * Using VMCS.VM_EXIT_INSTRUCTION_LEN on EPT misconfig depends on + * undefined behavior: Intel's SDM doesn't mandate the VMCS field be + * set when EPT misconfig occurs. In practice, real hardware updates + * VM_EXIT_INSTRUCTION_LEN on EPT misconfig, but other hypervisors + * (namely Hyper-V) don't set it due to it being undefined behavior, + * i.e. we end up advancing IP with some random value. + */ + if (!static_cpu_has(X86_FEATURE_HYPERVISOR) || + to_vmx(vcpu)->exit_reason != EXIT_REASON_EPT_MISCONFIG) { + rip = kvm_rip_read(vcpu); + rip += vmcs_read32(VM_EXIT_INSTRUCTION_LEN); + kvm_rip_write(vcpu, rip); + } else { + if (!kvm_emulate_instruction(vcpu, EMULTYPE_SKIP)) + return 0; + } /* skipping an emulated instruction also counts */ vmx_set_interrupt_shadow(vcpu, 0); + + return 1; } static void vmx_clear_hlt(struct kvm_vcpu *vcpu) @@ -1517,8 +1599,7 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu) int inc_eip = 0; if (kvm_exception_is_soft(nr)) inc_eip = vcpu->arch.event_exit_inst_len; - if (kvm_inject_realmode_interrupt(vcpu, nr, inc_eip) != EMULATE_DONE) - kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); + kvm_inject_realmode_interrupt(vcpu, nr, inc_eip); return; } @@ -1690,6 +1771,12 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) #endif case MSR_EFER: return kvm_get_msr_common(vcpu, msr_info); + case MSR_IA32_UMWAIT_CONTROL: + if (!msr_info->host_initiated && !vmx_has_waitpkg(vmx)) + return 1; + + msr_info->data = vmx->msr_ia32_umwait_control; + break; case MSR_IA32_SPEC_CTRL: if (!msr_info->host_initiated && !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)) @@ -1863,6 +1950,16 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) return 1; vmcs_write64(GUEST_BNDCFGS, data); break; + case MSR_IA32_UMWAIT_CONTROL: + if (!msr_info->host_initiated && !vmx_has_waitpkg(vmx)) + return 1; + + /* The reserved bit 1 and non-32 bit [63:32] should be zero */ + if (data & (BIT_ULL(1) | GENMASK_ULL(63, 32))) + return 1; + + vmx->msr_ia32_umwait_control = data; + break; case MSR_IA32_SPEC_CTRL: if (!msr_info->host_initiated && !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)) @@ -2280,6 +2377,7 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf, SECONDARY_EXEC_RDRAND_EXITING | SECONDARY_EXEC_ENABLE_PML | SECONDARY_EXEC_TSC_SCALING | + SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE | SECONDARY_EXEC_PT_USE_GPA | SECONDARY_EXEC_PT_CONCEAL_VMX | SECONDARY_EXEC_ENABLE_VMFUNC | @@ -4016,6 +4114,23 @@ static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx) } } + if (vmx_waitpkg_supported()) { + bool waitpkg_enabled = + guest_cpuid_has(vcpu, X86_FEATURE_WAITPKG); + + if (!waitpkg_enabled) + exec_control &= ~SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE; + + if (nested) { + if (waitpkg_enabled) + vmx->nested.msrs.secondary_ctls_high |= + SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE; + else + vmx->nested.msrs.secondary_ctls_high &= + ~SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE; + } + } + vmx->secondary_exec_control = exec_control; } @@ -4026,7 +4141,7 @@ static void ept_set_mmio_spte_mask(void) * of an EPT paging-structure entry is 110b (write/execute). */ kvm_mmu_set_mmio_spte_mask(VMX_EPT_RWX_MASK, - VMX_EPT_MISCONFIG_WX_VALUE); + VMX_EPT_MISCONFIG_WX_VALUE, 0); } #define VMX_XSS_EXIT_BITMAP 0 @@ -4150,8 +4265,11 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) vmx->rmode.vm86_active = 0; vmx->spec_ctrl = 0; + vmx->msr_ia32_umwait_control = 0; + vcpu->arch.microcode_version = 0x100000000ULL; vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val(); + vmx->hv_deadline_tsc = -1; kvm_set_cr8(vcpu, 0); if (!init_event) { @@ -4266,8 +4384,7 @@ static void vmx_inject_irq(struct kvm_vcpu *vcpu) int inc_eip = 0; if (vcpu->arch.interrupt.soft) inc_eip = vcpu->arch.event_exit_inst_len; - if (kvm_inject_realmode_interrupt(vcpu, irq, inc_eip) != EMULATE_DONE) - kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); + kvm_inject_realmode_interrupt(vcpu, irq, inc_eip); return; } intr = irq | INTR_INFO_VALID_MASK; @@ -4303,8 +4420,7 @@ static void vmx_inject_nmi(struct kvm_vcpu *vcpu) vmx->loaded_vmcs->nmi_known_unmasked = false; if (vmx->rmode.vm86_active) { - if (kvm_inject_realmode_interrupt(vcpu, NMI_VECTOR, 0) != EMULATE_DONE) - kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); + kvm_inject_realmode_interrupt(vcpu, NMI_VECTOR, 0); return; } @@ -4431,7 +4547,7 @@ static int handle_rmode_exception(struct kvm_vcpu *vcpu, * Cause the #SS fault with 0 error code in VM86 mode. */ if (((vec == GP_VECTOR) || (vec == SS_VECTOR)) && err_code == 0) { - if (kvm_emulate_instruction(vcpu, 0) == EMULATE_DONE) { + if (kvm_emulate_instruction(vcpu, 0)) { if (vcpu->arch.halt_request) { vcpu->arch.halt_request = 0; return kvm_vcpu_halt(vcpu); @@ -4482,7 +4598,6 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu) u32 intr_info, ex_no, error_code; unsigned long cr2, rip, dr6; u32 vect_info; - enum emulation_result er; vect_info = vmx->idt_vectoring_info; intr_info = vmx->exit_intr_info; @@ -4499,13 +4614,17 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu) if (!vmx->rmode.vm86_active && is_gp_fault(intr_info)) { WARN_ON_ONCE(!enable_vmware_backdoor); - er = kvm_emulate_instruction(vcpu, - EMULTYPE_VMWARE | EMULTYPE_NO_UD_ON_FAIL); - if (er == EMULATE_USER_EXIT) - return 0; - else if (er != EMULATE_DONE) + + /* + * VMware backdoor emulation on #GP interception only handles + * IN{S}, OUT{S}, and RDPMC, none of which generate a non-zero + * error code on #GP. + */ + if (error_code) { kvm_queue_exception_e(vcpu, GP_VECTOR, error_code); - return 1; + return 1; + } + return kvm_emulate_instruction(vcpu, EMULTYPE_VMWARE_GP); } /* @@ -4547,7 +4666,7 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu) vcpu->arch.dr6 &= ~DR_TRAP_BITS; vcpu->arch.dr6 |= dr6 | DR6_RTM; if (is_icebp(intr_info)) - skip_emulated_instruction(vcpu); + WARN_ON(!skip_emulated_instruction(vcpu)); kvm_queue_exception(vcpu, DB_VECTOR); return 1; @@ -4602,7 +4721,7 @@ static int handle_io(struct kvm_vcpu *vcpu) ++vcpu->stat.io_exits; if (string) - return kvm_emulate_instruction(vcpu, 0) == EMULATE_DONE; + return kvm_emulate_instruction(vcpu, 0); port = exit_qualification >> 16; size = (exit_qualification & 7) + 1; @@ -4676,7 +4795,7 @@ static int handle_set_cr4(struct kvm_vcpu *vcpu, unsigned long val) static int handle_desc(struct kvm_vcpu *vcpu) { WARN_ON(!(vcpu->arch.cr4 & X86_CR4_UMIP)); - return kvm_emulate_instruction(vcpu, 0) == EMULATE_DONE; + return kvm_emulate_instruction(vcpu, 0); } static int handle_cr(struct kvm_vcpu *vcpu) @@ -4856,41 +4975,12 @@ static int handle_cpuid(struct kvm_vcpu *vcpu) static int handle_rdmsr(struct kvm_vcpu *vcpu) { - u32 ecx = kvm_rcx_read(vcpu); - struct msr_data msr_info; - - msr_info.index = ecx; - msr_info.host_initiated = false; - if (vmx_get_msr(vcpu, &msr_info)) { - trace_kvm_msr_read_ex(ecx); - kvm_inject_gp(vcpu, 0); - return 1; - } - - trace_kvm_msr_read(ecx, msr_info.data); - - kvm_rax_write(vcpu, msr_info.data & -1u); - kvm_rdx_write(vcpu, (msr_info.data >> 32) & -1u); - return kvm_skip_emulated_instruction(vcpu); + return kvm_emulate_rdmsr(vcpu); } static int handle_wrmsr(struct kvm_vcpu *vcpu) { - struct msr_data msr; - u32 ecx = kvm_rcx_read(vcpu); - u64 data = kvm_read_edx_eax(vcpu); - - msr.data = data; - msr.index = ecx; - msr.host_initiated = false; - if (kvm_set_msr(vcpu, &msr) != 0) { - trace_kvm_msr_write_ex(ecx, data); - kvm_inject_gp(vcpu, 0); - return 1; - } - - trace_kvm_msr_write(ecx, data); - return kvm_skip_emulated_instruction(vcpu); + return kvm_emulate_wrmsr(vcpu); } static int handle_tpr_below_threshold(struct kvm_vcpu *vcpu) @@ -4921,7 +5011,7 @@ static int handle_vmcall(struct kvm_vcpu *vcpu) static int handle_invd(struct kvm_vcpu *vcpu) { - return kvm_emulate_instruction(vcpu, 0) == EMULATE_DONE; + return kvm_emulate_instruction(vcpu, 0); } static int handle_invlpg(struct kvm_vcpu *vcpu) @@ -4955,20 +5045,6 @@ static int handle_xsetbv(struct kvm_vcpu *vcpu) return 1; } -static int handle_xsaves(struct kvm_vcpu *vcpu) -{ - kvm_skip_emulated_instruction(vcpu); - WARN(1, "this should never happen\n"); - return 1; -} - -static int handle_xrstors(struct kvm_vcpu *vcpu) -{ - kvm_skip_emulated_instruction(vcpu); - WARN(1, "this should never happen\n"); - return 1; -} - static int handle_apic_access(struct kvm_vcpu *vcpu) { if (likely(fasteoi)) { @@ -4988,7 +5064,7 @@ static int handle_apic_access(struct kvm_vcpu *vcpu) return kvm_skip_emulated_instruction(vcpu); } } - return kvm_emulate_instruction(vcpu, 0) == EMULATE_DONE; + return kvm_emulate_instruction(vcpu, 0); } static int handle_apic_eoi_induced(struct kvm_vcpu *vcpu) @@ -5057,23 +5133,15 @@ static int handle_task_switch(struct kvm_vcpu *vcpu) if (!idt_v || (type != INTR_TYPE_HARD_EXCEPTION && type != INTR_TYPE_EXT_INTR && type != INTR_TYPE_NMI_INTR)) - skip_emulated_instruction(vcpu); - - if (kvm_task_switch(vcpu, tss_selector, - type == INTR_TYPE_SOFT_INTR ? idt_index : -1, reason, - has_error_code, error_code) == EMULATE_FAIL) { - vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; - vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; - vcpu->run->internal.ndata = 0; - return 0; - } + WARN_ON(!skip_emulated_instruction(vcpu)); /* * TODO: What about debug traps on tss switch? * Are we supposed to inject them and update dr6? */ - - return 1; + return kvm_task_switch(vcpu, tss_selector, + type == INTR_TYPE_SOFT_INTR ? idt_index : -1, + reason, has_error_code, error_code); } static int handle_ept_violation(struct kvm_vcpu *vcpu) @@ -5132,21 +5200,7 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu) if (!is_guest_mode(vcpu) && !kvm_io_bus_write(vcpu, KVM_FAST_MMIO_BUS, gpa, 0, NULL)) { trace_kvm_fast_mmio(gpa); - /* - * Doing kvm_skip_emulated_instruction() depends on undefined - * behavior: Intel's manual doesn't mandate - * VM_EXIT_INSTRUCTION_LEN to be set in VMCS when EPT MISCONFIG - * occurs and while on real hardware it was observed to be set, - * other hypervisors (namely Hyper-V) don't set it, we end up - * advancing IP with some random value. Disable fast mmio when - * running nested and keep it for real hardware in hope that - * VM_EXIT_INSTRUCTION_LEN will always be set correctly. - */ - if (!static_cpu_has(X86_FEATURE_HYPERVISOR)) - return kvm_skip_emulated_instruction(vcpu); - else - return kvm_emulate_instruction(vcpu, EMULTYPE_SKIP) == - EMULATE_DONE; + return kvm_skip_emulated_instruction(vcpu); } return kvm_mmu_page_fault(vcpu, gpa, PFERR_RSVD_MASK, NULL, 0); @@ -5165,8 +5219,6 @@ static int handle_nmi_window(struct kvm_vcpu *vcpu) static int handle_invalid_guest_state(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); - enum emulation_result err = EMULATE_DONE; - int ret = 1; bool intr_window_requested; unsigned count = 130; @@ -5187,71 +5239,67 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu) if (kvm_test_request(KVM_REQ_EVENT, vcpu)) return 1; - err = kvm_emulate_instruction(vcpu, 0); - - if (err == EMULATE_USER_EXIT) { - ++vcpu->stat.mmio_exits; - ret = 0; - goto out; - } - - if (err != EMULATE_DONE) - goto emulation_error; + if (!kvm_emulate_instruction(vcpu, 0)) + return 0; if (vmx->emulation_required && !vmx->rmode.vm86_active && - vcpu->arch.exception.pending) - goto emulation_error; + vcpu->arch.exception.pending) { + vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + vcpu->run->internal.suberror = + KVM_INTERNAL_ERROR_EMULATION; + vcpu->run->internal.ndata = 0; + return 0; + } if (vcpu->arch.halt_request) { vcpu->arch.halt_request = 0; - ret = kvm_vcpu_halt(vcpu); - goto out; + return kvm_vcpu_halt(vcpu); } + /* + * Note, return 1 and not 0, vcpu_run() is responsible for + * morphing the pending signal into the proper return code. + */ if (signal_pending(current)) - goto out; + return 1; + if (need_resched()) schedule(); } -out: - return ret; - -emulation_error: - vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; - vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; - vcpu->run->internal.ndata = 0; - return 0; + return 1; } static void grow_ple_window(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); - int old = vmx->ple_window; + unsigned int old = vmx->ple_window; vmx->ple_window = __grow_ple_window(old, ple_window, ple_window_grow, ple_window_max); - if (vmx->ple_window != old) + if (vmx->ple_window != old) { vmx->ple_window_dirty = true; - - trace_kvm_ple_window_grow(vcpu->vcpu_id, vmx->ple_window, old); + trace_kvm_ple_window_update(vcpu->vcpu_id, + vmx->ple_window, old); + } } static void shrink_ple_window(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); - int old = vmx->ple_window; + unsigned int old = vmx->ple_window; vmx->ple_window = __shrink_ple_window(old, ple_window, ple_window_shrink, ple_window); - if (vmx->ple_window != old) + if (vmx->ple_window != old) { vmx->ple_window_dirty = true; - - trace_kvm_ple_window_shrink(vcpu->vcpu_id, vmx->ple_window, old); + trace_kvm_ple_window_update(vcpu->vcpu_id, + vmx->ple_window, old); + } } /* @@ -5490,6 +5538,14 @@ static int handle_encls(struct kvm_vcpu *vcpu) return 1; } +static int handle_unexpected_vmexit(struct kvm_vcpu *vcpu) +{ + kvm_skip_emulated_instruction(vcpu); + WARN_ONCE(1, "Unexpected VM-Exit Reason = 0x%x", + vmcs_read32(VM_EXIT_REASON)); + return 1; +} + /* * The exit handlers return 1 if the exit was handled fully and guest execution * may resume. Otherwise they set the kvm_run parameter to indicate what needs @@ -5541,13 +5597,15 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = { [EXIT_REASON_INVVPID] = handle_vmx_instruction, [EXIT_REASON_RDRAND] = handle_invalid_op, [EXIT_REASON_RDSEED] = handle_invalid_op, - [EXIT_REASON_XSAVES] = handle_xsaves, - [EXIT_REASON_XRSTORS] = handle_xrstors, + [EXIT_REASON_XSAVES] = handle_unexpected_vmexit, + [EXIT_REASON_XRSTORS] = handle_unexpected_vmexit, [EXIT_REASON_PML_FULL] = handle_pml_full, [EXIT_REASON_INVPCID] = handle_invpcid, [EXIT_REASON_VMFUNC] = handle_vmx_instruction, [EXIT_REASON_PREEMPTION_TIMER] = handle_preemption_timer, [EXIT_REASON_ENCLS] = handle_encls, + [EXIT_REASON_UMWAIT] = handle_unexpected_vmexit, + [EXIT_REASON_TPAUSE] = handle_unexpected_vmexit, }; static const int kvm_vmx_max_exit_handlers = @@ -5887,8 +5945,13 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu) else { vcpu_unimpl(vcpu, "vmx: unexpected exit reason 0x%x\n", exit_reason); - kvm_queue_exception(vcpu, UD_VECTOR); - return 1; + dump_vmcs(); + vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + vcpu->run->internal.suberror = + KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON; + vcpu->run->internal.ndata = 1; + vcpu->run->internal.data[0] = exit_reason; + return 0; } } @@ -6373,6 +6436,23 @@ static void atomic_switch_perf_msrs(struct vcpu_vmx *vmx) msrs[i].host, false); } +static void atomic_switch_umwait_control_msr(struct vcpu_vmx *vmx) +{ + u32 host_umwait_control; + + if (!vmx_has_waitpkg(vmx)) + return; + + host_umwait_control = get_umwait_control_msr(); + + if (vmx->msr_ia32_umwait_control != host_umwait_control) + add_atomic_switch_msr(vmx, MSR_IA32_UMWAIT_CONTROL, + vmx->msr_ia32_umwait_control, + host_umwait_control, false); + else + clear_atomic_switch_msr(vmx, MSR_IA32_UMWAIT_CONTROL); +} + static void vmx_update_hv_timer(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); @@ -6467,6 +6547,7 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu) pt_guest_enter(vmx); atomic_switch_perf_msrs(vmx); + atomic_switch_umwait_control_msr(vmx); if (enable_preemption_timer) vmx_update_hv_timer(vcpu); @@ -6522,6 +6603,9 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu) current_evmcs->hv_clean_fields |= HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL; + if (static_branch_unlikely(&enable_evmcs)) + current_evmcs->hv_vp_id = vcpu->arch.hyperv.vp_index; + /* MSR_IA32_DEBUGCTLMSR is zeroed on vmexit. Restore it if needed */ if (vmx->host_debugctlmsr) update_debugctlmsr(vmx->host_debugctlmsr); @@ -6589,6 +6673,7 @@ static struct kvm *vmx_vm_alloc(void) static void vmx_vm_free(struct kvm *kvm) { + kfree(kvm->arch.hyperv.hv_pa_pg); vfree(to_kvm_vmx(kvm)); } @@ -6615,6 +6700,9 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) unsigned long *msr_bitmap; int cpu; + BUILD_BUG_ON_MSG(offsetof(struct vcpu_vmx, vcpu) != 0, + "struct kvm_vcpu must be at offset 0 for arch usercopy region"); + vmx = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL_ACCOUNT); if (!vmx) return ERR_PTR(-ENOMEM); @@ -7369,10 +7457,14 @@ static int vmx_update_pi_irte(struct kvm *kvm, unsigned int host_irq, * irqbalance to make the interrupts single-CPU. * * We will support full lowest-priority interrupt later. + * + * In addition, we can only inject generic interrupts using + * the PI mechanism, refuse to route others through it. */ kvm_set_msi_irq(kvm, e, &irq); - if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu)) { + if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu) || + !kvm_irq_is_postable(&irq)) { /* * Make sure the IRTE is in remapped mode if * we don't handle it in posted mode. @@ -7474,6 +7566,11 @@ static bool vmx_need_emulation_on_page_fault(struct kvm_vcpu *vcpu) return false; } +static bool vmx_apic_init_signal_blocked(struct kvm_vcpu *vcpu) +{ + return to_vmx(vcpu)->nested.vmxon; +} + static __init int hardware_setup(void) { unsigned long host_bndcfgs; @@ -7799,6 +7896,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = { .nested_enable_evmcs = NULL, .nested_get_evmcs_version = NULL, .need_emulation_on_page_fault = vmx_need_emulation_on_page_fault, + .apic_init_signal_blocked = vmx_apic_init_signal_blocked, }; static void vmx_cleanup_l1d_flush(void) @@ -7835,6 +7933,7 @@ static void vmx_exit(void) if (!vp_ap) continue; + vp_ap->nested_control.features.directhypercall = 0; vp_ap->current_nested_vmcs = 0; vp_ap->enlighten_vmentry = 0; } @@ -7874,6 +7973,11 @@ static int __init vmx_init(void) pr_info("KVM: vmx: using Hyper-V Enlightened VMCS\n"); static_branch_enable(&enable_evmcs); } + + if (ms_hyperv.nested_features & HV_X64_NESTED_DIRECT_FLUSH) + vmx_x86_ops.enable_direct_tlbflush + = hv_enable_direct_tlbflush; + } else { enlightened_vmcs = false; } diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index 82d0bc3a4d52..bee16687dc0b 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -14,6 +14,8 @@ extern const u32 vmx_msr_index[]; extern u64 host_efer; +extern u32 get_umwait_control_msr(void); + #define MSR_TYPE_R 1 #define MSR_TYPE_W 2 #define MSR_TYPE_RW 3 @@ -211,6 +213,7 @@ struct vcpu_vmx { #endif u64 spec_ctrl; + u32 msr_ia32_umwait_control; u32 secondary_exec_control; @@ -253,7 +256,7 @@ struct vcpu_vmx { struct nested_vmx nested; /* Dynamic PLE window. */ - int ple_window; + unsigned int ple_window; bool ple_window_dirty; bool req_immediate_exit; @@ -497,6 +500,12 @@ static inline void decache_tsc_multiplier(struct vcpu_vmx *vmx) vmcs_write64(TSC_MULTIPLIER, vmx->current_tsc_ratio); } +static inline bool vmx_has_waitpkg(struct vcpu_vmx *vmx) +{ + return vmx->secondary_exec_control & + SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE; +} + void dump_vmcs(void); #endif /* __KVM_X86_VMX_H */ |