aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--arch/x86/include/uapi/asm/vmx.h1
-rw-r--r--arch/x86/kvm/vmx/nested.c44
2 files changed, 23 insertions, 22 deletions
diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h
index d213ec5c3766..f0b0c90dd398 100644
--- a/arch/x86/include/uapi/asm/vmx.h
+++ b/arch/x86/include/uapi/asm/vmx.h
@@ -146,7 +146,6 @@
#define VMX_ABORT_SAVE_GUEST_MSR_FAIL 1
#define VMX_ABORT_LOAD_HOST_PDPTE_FAIL 2
-#define VMX_ABORT_VMCS_CORRUPTED 3
#define VMX_ABORT_LOAD_HOST_MSR_FAIL 4
#endif /* _UAPIVMX_H */
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 118d185764ec..d125304ae2c9 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -2978,6 +2978,25 @@ int nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry)
!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS))
vmx->nested.vmcs01_guest_bndcfgs = vmcs_read64(GUEST_BNDCFGS);
+ /*
+ * Overwrite vmcs01.GUEST_CR3 with L1's CR3 if EPT is disabled *and*
+ * nested early checks are disabled. In the event of a "late" VM-Fail,
+ * i.e. a VM-Fail detected by hardware but not KVM, KVM must unwind its
+ * software model to the pre-VMEntry host state. When EPT is disabled,
+ * GUEST_CR3 holds KVM's shadow CR3, not L1's "real" CR3, which causes
+ * nested_vmx_restore_host_state() to corrupt vcpu->arch.cr3. Stuffing
+ * vmcs01.GUEST_CR3 results in the unwind naturally setting arch.cr3 to
+ * the correct value. Smashing vmcs01.GUEST_CR3 is safe because nested
+ * VM-Exits, and the unwind, reset KVM's MMU, i.e. vmcs01.GUEST_CR3 is
+ * guaranteed to be overwritten with a shadow CR3 prior to re-entering
+ * L1. Don't stuff vmcs01.GUEST_CR3 when using nested early checks as
+ * KVM modifies vcpu->arch.cr3 if and only if the early hardware checks
+ * pass, and early VM-Fails do not reset KVM's MMU, i.e. the VM-Fail
+ * path would need to manually save/restore vmcs01.GUEST_CR3.
+ */
+ if (!enable_ept && !nested_early_check)
+ vmcs_writel(GUEST_CR3, vcpu->arch.cr3);
+
vmx_switch_vmcs(vcpu, &vmx->nested.vmcs02);
prepare_vmcs02_early(vmx, vmcs12);
@@ -3869,18 +3888,8 @@ static void nested_vmx_restore_host_state(struct kvm_vcpu *vcpu)
vmx_set_cr4(vcpu, vmcs_readl(CR4_READ_SHADOW));
nested_ept_uninit_mmu_context(vcpu);
-
- /*
- * This is only valid if EPT is in use, otherwise the vmcs01 GUEST_CR3
- * points to shadow pages! Fortunately we only get here after a WARN_ON
- * if EPT is disabled, so a VMabort is perfectly fine.
- */
- if (enable_ept) {
- vcpu->arch.cr3 = vmcs_readl(GUEST_CR3);
- __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail);
- } else {
- nested_vmx_abort(vcpu, VMX_ABORT_VMCS_CORRUPTED);
- }
+ vcpu->arch.cr3 = vmcs_readl(GUEST_CR3);
+ __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail);
/*
* Use ept_save_pdptrs(vcpu) to load the MMU's cached PDPTRs
@@ -3888,7 +3897,8 @@ static void nested_vmx_restore_host_state(struct kvm_vcpu *vcpu)
* VMFail, like everything else we just need to ensure our
* software model is up-to-date.
*/
- ept_save_pdptrs(vcpu);
+ if (enable_ept)
+ ept_save_pdptrs(vcpu);
kvm_mmu_reset_context(vcpu);
@@ -5889,14 +5899,6 @@ __init int nested_vmx_hardware_setup(int (*exit_handlers[])(struct kvm_vcpu *))
{
int i;
- /*
- * Without EPT it is not possible to restore L1's CR3 and PDPTR on
- * VMfail, because they are not available in vmcs01. Just always
- * use hardware checks.
- */
- if (!enable_ept)
- nested_early_check = 1;
-
if (!cpu_has_vmx_shadow_vmcs())
enable_shadow_vmcs = 0;
if (enable_shadow_vmcs) {