diff options
author | Sean Christopherson <seanjc@google.com> | 2023-02-02 18:28:15 +0000 |
---|---|---|
committer | Paolo Bonzini <pbonzini@redhat.com> | 2023-03-14 10:28:56 -0400 |
commit | 258d985f6eb360c9c7aacd025d0dbc080a59423f (patch) | |
tree | f523f350cf4795a806f4b9cc1e49c12128959ab5 /arch/x86/kvm/mmu | |
parent | f3e707413dbe3920a972d0c2b51175180e7de36b (diff) |
KVM: x86/mmu: Use EMULTYPE flag to track write #PFs to shadow pages
Use a new EMULTYPE flag, EMULTYPE_WRITE_PF_TO_SP, to track page faults
on self-changing writes to shadowed page tables instead of propagating
that information to the emulator via a semi-persistent vCPU flag. Using
a flag in "struct kvm_vcpu_arch" is confusing, especially as implemented,
as it's not at all obvious that clearing the flag only when emulation
actually occurs is correct.
E.g. if KVM sets the flag and then retries the fault without ever getting
to the emulator, the flag will be left set for future calls into the
emulator. But because the flag is consumed if and only if both
EMULTYPE_PF and EMULTYPE_ALLOW_RETRY_PF are set, and because
EMULTYPE_ALLOW_RETRY_PF is deliberately not set for direct MMUs, emulated
MMIO, or while L2 is active, KVM avoids false positives on a stale flag
since FNAME(page_fault) is guaranteed to be run and refresh the flag
before it's ultimately consumed by the tail end of reexecute_instruction().
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20230202182817.407394-2-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Diffstat (limited to 'arch/x86/kvm/mmu')
-rw-r--r-- | arch/x86/kvm/mmu/mmu.c | 5 | ||||
-rw-r--r-- | arch/x86/kvm/mmu/mmu_internal.h | 12 | ||||
-rw-r--r-- | arch/x86/kvm/mmu/paging_tmpl.h | 4 |
3 files changed, 15 insertions, 6 deletions
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index c8ebe542c565..144c5a01cd77 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -4203,7 +4203,7 @@ void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work) work->arch.cr3 != vcpu->arch.mmu->get_guest_pgd(vcpu)) return; - kvm_mmu_do_page_fault(vcpu, work->cr2_or_gpa, 0, true); + kvm_mmu_do_page_fault(vcpu, work->cr2_or_gpa, 0, true, NULL); } static int __kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) @@ -5664,7 +5664,8 @@ int noinline kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 err if (r == RET_PF_INVALID) { r = kvm_mmu_do_page_fault(vcpu, cr2_or_gpa, - lower_32_bits(error_code), false); + lower_32_bits(error_code), false, + &emulation_type); if (KVM_BUG_ON(r == RET_PF_INVALID, vcpu->kvm)) return -EIO; } diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h index cc58631e2336..2cbb155c686c 100644 --- a/arch/x86/kvm/mmu/mmu_internal.h +++ b/arch/x86/kvm/mmu/mmu_internal.h @@ -240,6 +240,13 @@ struct kvm_page_fault { kvm_pfn_t pfn; hva_t hva; bool map_writable; + + /* + * Indicates the guest is trying to write a gfn that contains one or + * more of the PTEs used to translate the write itself, i.e. the access + * is changing its own translation in the guest page tables. + */ + bool write_fault_to_shadow_pgtable; }; int kvm_tdp_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault); @@ -273,7 +280,7 @@ enum { }; static inline int kvm_mmu_do_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, - u32 err, bool prefetch) + u32 err, bool prefetch, int *emulation_type) { struct kvm_page_fault fault = { .addr = cr2_or_gpa, @@ -312,6 +319,9 @@ static inline int kvm_mmu_do_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, else r = vcpu->arch.mmu->page_fault(vcpu, &fault); + if (fault.write_fault_to_shadow_pgtable && emulation_type) + *emulation_type |= EMULTYPE_WRITE_PF_TO_SP; + /* * Similar to above, prefetch faults aren't truly spurious, and the * async #PF path doesn't do emulation. Do count faults that are fixed diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h index 57f0b75c80f9..5d2958299b4f 100644 --- a/arch/x86/kvm/mmu/paging_tmpl.h +++ b/arch/x86/kvm/mmu/paging_tmpl.h @@ -825,10 +825,8 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault if (r) return r; - vcpu->arch.write_fault_to_shadow_pgtable = false; - is_self_change_mapping = FNAME(is_self_change_mapping)(vcpu, - &walker, fault->user, &vcpu->arch.write_fault_to_shadow_pgtable); + &walker, fault->user, &fault->write_fault_to_shadow_pgtable); if (is_self_change_mapping) fault->max_level = PG_LEVEL_4K; |