KVM: x86/mmu: Use EMULTYPE flag to track write #PFs to shadow pages

Use a new EMULTYPE flag, EMULTYPE_WRITE_PF_TO_SP, to track page faults on self-changing writes to shadowed page tables instead of propagating that information to the emulator via a semi-persistent vCPU flag. Using a flag in "struct kvm_vcpu_arch" is confusing, especially as implemented, as it's not at all obvious that clearing the flag only when emulation actually occurs is correct. E.g. if KVM sets the flag and then retries the fault without ever getting to the emulator, the flag will be left set for future calls into the emulator. But because the flag is consumed if and only if both EMULTYPE_PF and EMULTYPE_ALLOW_RETRY_PF are set, and because EMULTYPE_ALLOW_RETRY_PF is deliberately not set for direct MMUs, emulated MMIO, or while L2 is active, KVM avoids false positives on a stale flag since FNAME(page_fault) is guaranteed to be run and refresh the flag before it's ultimately consumed by the tail end of reexecute_instruction(). Signed-off-by: Sean Christopherson <seanjc@google.com> Message-Id: <20230202182817.407394-2-seanjc@google.com> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
author: Sean Christopherson <seanjc@google.com> 2023-02-02 18:28:15 +0000
committer: Paolo Bonzini <pbonzini@redhat.com> 2023-03-14 10:28:56 -0400
commit: 258d985f6eb360c9c7aacd025d0dbc080a59423f (patch)
tree: f523f350cf4795a806f4b9cc1e49c12128959ab5 /arch/x86/kvm/mmu
parent: f3e707413dbe3920a972d0c2b51175180e7de36b (diff)
3 files changed, 15 insertions, 6 deletions
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index c8ebe542c565..144c5a01cd77 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -4203,7 +4203,7 @@ void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work)
 	      work->arch.cr3 != vcpu->arch.mmu->get_guest_pgd(vcpu))
 		return;
 
-	kvm_mmu_do_page_fault(vcpu, work->cr2_or_gpa, 0, true);
+	kvm_mmu_do_page_fault(vcpu, work->cr2_or_gpa, 0, true, NULL);
 }
 
 static int __kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
@@ -5664,7 +5664,8 @@ int noinline kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 err
 
 	if (r == RET_PF_INVALID) {
 		r = kvm_mmu_do_page_fault(vcpu, cr2_or_gpa,
-					  lower_32_bits(error_code), false);
+					  lower_32_bits(error_code), false,
+					  &emulation_type);
 		if (KVM_BUG_ON(r == RET_PF_INVALID, vcpu->kvm))
 			return -EIO;
 	}
diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h
index cc58631e2336..2cbb155c686c 100644
--- a/arch/x86/kvm/mmu/mmu_internal.h
+++ b/arch/x86/kvm/mmu/mmu_internal.h
@@ -240,6 +240,13 @@ struct kvm_page_fault {
 	kvm_pfn_t pfn;
 	hva_t hva;
 	bool map_writable;
+
+	/*
+	 * Indicates the guest is trying to write a gfn that contains one or
+	 * more of the PTEs used to translate the write itself, i.e. the access
+	 * is changing its own translation in the guest page tables.
+	 */
+	bool write_fault_to_shadow_pgtable;
 };
 
 int kvm_tdp_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault);
@@ -273,7 +280,7 @@ enum {
 };
 
 static inline int kvm_mmu_do_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
-					u32 err, bool prefetch)
+					u32 err, bool prefetch, int *emulation_type)
 {
 	struct kvm_page_fault fault = {
 		.addr = cr2_or_gpa,
@@ -312,6 +319,9 @@ static inline int kvm_mmu_do_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
 	else
 		r = vcpu->arch.mmu->page_fault(vcpu, &fault);
 
+	if (fault.write_fault_to_shadow_pgtable && emulation_type)
+		*emulation_type |= EMULTYPE_WRITE_PF_TO_SP;
+
 	/*
 	 * Similar to above, prefetch faults aren't truly spurious, and the
 	 * async #PF path doesn't do emulation.  Do count faults that are fixed
diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h
index 57f0b75c80f9..5d2958299b4f 100644
--- a/arch/x86/kvm/mmu/paging_tmpl.h
+++ b/arch/x86/kvm/mmu/paging_tmpl.h
@@ -825,10 +825,8 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
 	if (r)
 		return r;
 
-	vcpu->arch.write_fault_to_shadow_pgtable = false;
-
 	is_self_change_mapping = FNAME(is_self_change_mapping)(vcpu,
-	      &walker, fault->user, &vcpu->arch.write_fault_to_shadow_pgtable);
+	      &walker, fault->user, &fault->write_fault_to_shadow_pgtable);
 
 	if (is_self_change_mapping)
 		fault->max_level = PG_LEVEL_4K;
author	Sean Christopherson <seanjc@google.com>	2023-02-02 18:28:15 +0000
committer	Paolo Bonzini <pbonzini@redhat.com>	2023-03-14 10:28:56 -0400
commit	258d985f6eb360c9c7aacd025d0dbc080a59423f (patch)
tree	f523f350cf4795a806f4b9cc1e49c12128959ab5 /arch/x86/kvm/mmu
parent	f3e707413dbe3920a972d0c2b51175180e7de36b (diff)