From 234ff0b729ad882d20f7996591a964965647addf Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Fri, 16 Nov 2018 21:28:18 +1100 Subject: KVM: PPC: Book3S HV: Fix race between kvm_unmap_hva_range and MMU mode switch Testing has revealed an occasional crash which appears to be caused by a race between kvmppc_switch_mmu_to_hpt and kvm_unmap_hva_range_hv. The symptom is a NULL pointer dereference in __find_linux_pte() called from kvm_unmap_radix() with kvm->arch.pgtable == NULL. Looking at kvmppc_switch_mmu_to_hpt(), it does indeed clear kvm->arch.pgtable (via kvmppc_free_radix()) before setting kvm->arch.radix to NULL, and there is nothing to prevent kvm_unmap_hva_range_hv() or the other MMU callback functions from being called concurrently with kvmppc_switch_mmu_to_hpt() or kvmppc_switch_mmu_to_radix(). This patch therefore adds calls to spin_lock/unlock on the kvm->mmu_lock around the assignments to kvm->arch.radix, and makes sure that the partition-scoped radix tree or HPT is only freed after changing kvm->arch.radix. This also takes the kvm->mmu_lock in kvmppc_rmap_reset() to make sure that the clearing of each rmap array (one per memslot) doesn't happen concurrently with use of the array in the kvm_unmap_hva_range_hv() or the other MMU callbacks. Fixes: 18c3640cefc7 ("KVM: PPC: Book3S HV: Add infrastructure for running HPT guests on radix host") Cc: stable@vger.kernel.org # v4.15+ Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_64_mmu_hv.c | 3 +++ arch/powerpc/kvm/book3s_hv.c | 17 +++++++++++------ 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index c615617e78ac..a18afda3d0f0 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -743,12 +743,15 @@ void kvmppc_rmap_reset(struct kvm *kvm) srcu_idx = srcu_read_lock(&kvm->srcu); slots = kvm_memslots(kvm); kvm_for_each_memslot(memslot, slots) { + /* Mutual exclusion with kvm_unmap_hva_range etc. */ + spin_lock(&kvm->mmu_lock); /* * This assumes it is acceptable to lose reference and * change bits across a reset. */ memset(memslot->arch.rmap, 0, memslot->npages * sizeof(*memslot->arch.rmap)); + spin_unlock(&kvm->mmu_lock); } srcu_read_unlock(&kvm->srcu, srcu_idx); } diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index a56f8413758a..ab43306c4ea1 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -4532,12 +4532,15 @@ int kvmppc_switch_mmu_to_hpt(struct kvm *kvm) { if (nesting_enabled(kvm)) kvmhv_release_all_nested(kvm); + kvmppc_rmap_reset(kvm); + kvm->arch.process_table = 0; + /* Mutual exclusion with kvm_unmap_hva_range etc. */ + spin_lock(&kvm->mmu_lock); + kvm->arch.radix = 0; + spin_unlock(&kvm->mmu_lock); kvmppc_free_radix(kvm); kvmppc_update_lpcr(kvm, LPCR_VPM1, LPCR_VPM1 | LPCR_UPRT | LPCR_GTSE | LPCR_HR); - kvmppc_rmap_reset(kvm); - kvm->arch.radix = 0; - kvm->arch.process_table = 0; return 0; } @@ -4549,12 +4552,14 @@ int kvmppc_switch_mmu_to_radix(struct kvm *kvm) err = kvmppc_init_vm_radix(kvm); if (err) return err; - + kvmppc_rmap_reset(kvm); + /* Mutual exclusion with kvm_unmap_hva_range etc. */ + spin_lock(&kvm->mmu_lock); + kvm->arch.radix = 1; + spin_unlock(&kvm->mmu_lock); kvmppc_free_hpt(&kvm->arch.hpt); kvmppc_update_lpcr(kvm, LPCR_UPRT | LPCR_GTSE | LPCR_HR, LPCR_VPM1 | LPCR_UPRT | LPCR_GTSE | LPCR_HR); - kvmppc_rmap_reset(kvm); - kvm->arch.radix = 1; return 0; } -- cgit From 0f6ddf34be2a2076bfa7b049c2d270fdda19fa19 Mon Sep 17 00:00:00 2001 From: Yangtao Li Date: Mon, 5 Nov 2018 09:47:17 -0500 Subject: KVM: PPC: Book3S HV: Change to use DEFINE_SHOW_ATTRIBUTE macro Use DEFINE_SHOW_ATTRIBUTE macro to simplify the code. Signed-off-by: Yangtao Li Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_xics.c | 12 +----------- arch/powerpc/kvm/book3s_xive.c | 12 +----------- 2 files changed, 2 insertions(+), 22 deletions(-) diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c index b0b2bfc2ff51..f27ee57ab46e 100644 --- a/arch/powerpc/kvm/book3s_xics.c +++ b/arch/powerpc/kvm/book3s_xics.c @@ -1015,17 +1015,7 @@ static int xics_debug_show(struct seq_file *m, void *private) return 0; } -static int xics_debug_open(struct inode *inode, struct file *file) -{ - return single_open(file, xics_debug_show, inode->i_private); -} - -static const struct file_operations xics_debug_fops = { - .open = xics_debug_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; +DEFINE_SHOW_ATTRIBUTE(xics_debug); static void xics_debugfs_init(struct kvmppc_xics *xics) { diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c index ad4a370703d3..f78d002f0fe0 100644 --- a/arch/powerpc/kvm/book3s_xive.c +++ b/arch/powerpc/kvm/book3s_xive.c @@ -1968,17 +1968,7 @@ static int xive_debug_show(struct seq_file *m, void *private) return 0; } -static int xive_debug_open(struct inode *inode, struct file *file) -{ - return single_open(file, xive_debug_show, inode->i_private); -} - -static const struct file_operations xive_debug_fops = { - .open = xive_debug_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; +DEFINE_SHOW_ATTRIBUTE(xive_debug); static void xive_debugfs_init(struct kvmppc_xive *xive) { -- cgit From 6142236cd9f761d6f696f477c2669f8b0820de03 Mon Sep 17 00:00:00 2001 From: Suraj Jitindar Singh Date: Fri, 7 Dec 2018 12:17:03 +1100 Subject: KVM: PPC: Book3S PR: Set hflag to indicate that POWER9 supports 1T segments When booting a kvm-pr guest on a POWER9 machine the following message is observed: "qemu-system-ppc64: KVM does not support 1TiB segments which guest expects" This is because the guest is expecting to be able to use 1T segments however we don't indicate support for it. This is because we don't set the BOOK3S_HFLAG_MULTI_PGSIZE flag in the hflags in kvmppc_set_pvr_pr() on POWER9. POWER9 does indeed have support for 1T segments, so add a case for POWER9 to the switch statement to ensure it is set. Signed-off-by: Suraj Jitindar Singh Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_pr.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index 4efd65d9e828..82840160c606 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -587,6 +587,7 @@ void kvmppc_set_pvr_pr(struct kvm_vcpu *vcpu, u32 pvr) case PVR_POWER8: case PVR_POWER8E: case PVR_POWER8NVL: + case PVR_POWER9: vcpu->arch.hflags |= BOOK3S_HFLAG_MULTI_PGSIZE | BOOK3S_HFLAG_NEW_TLBIE; break; -- cgit From f032b73459eed4897bcafee2b1d37a817f1bb596 Mon Sep 17 00:00:00 2001 From: Bharata B Rao Date: Wed, 12 Dec 2018 15:15:30 +1100 Subject: KVM: PPC: Pass change type down to memslot commit function Currently, kvm_arch_commit_memory_region() gets called with a parameter indicating what type of change is being made to the memslot, but it doesn't pass it down to the platform-specific memslot commit functions. This adds the `change' parameter to the lower-level functions so that they can use it in future. [paulus@ozlabs.org - fix book E also.] Signed-off-by: Bharata B Rao Reviewed-by: Suraj Jitindar Singh Reviewed-by: David Gibson Signed-off-by: Paul Mackerras --- arch/powerpc/include/asm/kvm_ppc.h | 6 ++++-- arch/powerpc/kvm/book3s.c | 5 +++-- arch/powerpc/kvm/book3s_hv.c | 3 ++- arch/powerpc/kvm/book3s_pr.c | 3 ++- arch/powerpc/kvm/booke.c | 3 ++- arch/powerpc/kvm/powerpc.c | 2 +- 6 files changed, 14 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 9b89b1918dfc..04c5b84df83d 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -224,7 +224,8 @@ extern int kvmppc_core_prepare_memory_region(struct kvm *kvm, extern void kvmppc_core_commit_memory_region(struct kvm *kvm, const struct kvm_userspace_memory_region *mem, const struct kvm_memory_slot *old, - const struct kvm_memory_slot *new); + const struct kvm_memory_slot *new, + enum kvm_mr_change change); extern int kvm_vm_ioctl_get_smmu_info(struct kvm *kvm, struct kvm_ppc_smmu_info *info); extern void kvmppc_core_flush_memslot(struct kvm *kvm, @@ -294,7 +295,8 @@ struct kvmppc_ops { void (*commit_memory_region)(struct kvm *kvm, const struct kvm_userspace_memory_region *mem, const struct kvm_memory_slot *old, - const struct kvm_memory_slot *new); + const struct kvm_memory_slot *new, + enum kvm_mr_change change); int (*unmap_hva_range)(struct kvm *kvm, unsigned long start, unsigned long end); int (*age_hva)(struct kvm *kvm, unsigned long start, unsigned long end); diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index fd9893bc7aa1..a35fb4099094 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -830,9 +830,10 @@ int kvmppc_core_prepare_memory_region(struct kvm *kvm, void kvmppc_core_commit_memory_region(struct kvm *kvm, const struct kvm_userspace_memory_region *mem, const struct kvm_memory_slot *old, - const struct kvm_memory_slot *new) + const struct kvm_memory_slot *new, + enum kvm_mr_change change) { - kvm->arch.kvm_ops->commit_memory_region(kvm, mem, old, new); + kvm->arch.kvm_ops->commit_memory_region(kvm, mem, old, new, change); } int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end) diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index ab43306c4ea1..f4fbb7b58486 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -4371,7 +4371,8 @@ static int kvmppc_core_prepare_memory_region_hv(struct kvm *kvm, static void kvmppc_core_commit_memory_region_hv(struct kvm *kvm, const struct kvm_userspace_memory_region *mem, const struct kvm_memory_slot *old, - const struct kvm_memory_slot *new) + const struct kvm_memory_slot *new, + enum kvm_mr_change change) { unsigned long npages = mem->memory_size >> PAGE_SHIFT; diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index 82840160c606..811a3c2fb0e9 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -1914,7 +1914,8 @@ static int kvmppc_core_prepare_memory_region_pr(struct kvm *kvm, static void kvmppc_core_commit_memory_region_pr(struct kvm *kvm, const struct kvm_userspace_memory_region *mem, const struct kvm_memory_slot *old, - const struct kvm_memory_slot *new) + const struct kvm_memory_slot *new, + enum kvm_mr_change change) { return; } diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index a9ca016da670..dbec4128bb51 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -1833,7 +1833,8 @@ int kvmppc_core_prepare_memory_region(struct kvm *kvm, void kvmppc_core_commit_memory_region(struct kvm *kvm, const struct kvm_userspace_memory_region *mem, const struct kvm_memory_slot *old, - const struct kvm_memory_slot *new) + const struct kvm_memory_slot *new, + enum kvm_mr_change change) { } diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 2869a299c4ed..6a7a6a101efd 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -696,7 +696,7 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, const struct kvm_memory_slot *new, enum kvm_mr_change change) { - kvmppc_core_commit_memory_region(kvm, mem, old, new); + kvmppc_core_commit_memory_region(kvm, mem, old, new, change); } void kvm_arch_flush_shadow_memslot(struct kvm *kvm, -- cgit From f460f6791a0224cfb019462363caa014c3aec8d7 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 12 Dec 2018 15:16:17 +1100 Subject: KVM: PPC: Book3S HV: Map single pages when doing dirty page logging For radix guests, this makes KVM map guest memory as individual pages when dirty page logging is enabled for the memslot corresponding to the guest real address. Having a separate partition-scoped PTE for each system page mapped to the guest means that we have a separate dirty bit for each page, thus making the reported dirty bitmap more accurate. Without this, if part of guest memory is backed by transparent huge pages, the dirty status is reported at a 2MB granularity rather than a 64kB (or 4kB) granularity for that part, causing userspace to have to transmit more data when migrating the guest. Signed-off-by: Paul Mackerras Reviewed-by: Suraj Jitindar Singh Reviewed-by: David Gibson Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_64_mmu_radix.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c index d68162ee159b..87ad35e0b138 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c @@ -683,6 +683,7 @@ int kvmppc_book3s_instantiate_page(struct kvm_vcpu *vcpu, pte_t pte, *ptep; unsigned int shift, level; int ret; + bool large_enable; /* used to check for invalidations in progress */ mmu_seq = kvm->mmu_notifier_seq; @@ -732,12 +733,15 @@ int kvmppc_book3s_instantiate_page(struct kvm_vcpu *vcpu, pte = *ptep; local_irq_enable(); + /* If we're logging dirty pages, always map single pages */ + large_enable = !(memslot->flags & KVM_MEM_LOG_DIRTY_PAGES); + /* Get pte level from shift/size */ - if (shift == PUD_SHIFT && + if (large_enable && shift == PUD_SHIFT && (gpa & (PUD_SIZE - PAGE_SIZE)) == (hva & (PUD_SIZE - PAGE_SIZE))) { level = 2; - } else if (shift == PMD_SHIFT && + } else if (large_enable && shift == PMD_SHIFT && (gpa & (PMD_SIZE - PAGE_SIZE)) == (hva & (PMD_SIZE - PAGE_SIZE))) { level = 1; -- cgit From c43c3a8683fe624b67b91a06f1c25cd752a05b3b Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 12 Dec 2018 15:16:48 +1100 Subject: KVM: PPC: Book3S HV: Cleanups - constify memslots, fix comments This adds 'const' to the declarations for the struct kvm_memory_slot pointer parameters of some functions, which will make it possible to call those functions from kvmppc_core_commit_memory_region_hv() in the next patch. This also fixes some comments about locking. Signed-off-by: Paul Mackerras Reviewed-by: Suraj Jitindar Singh Reviewed-by: David Gibson Signed-off-by: Paul Mackerras --- arch/powerpc/include/asm/kvm_book3s.h | 9 +++------ arch/powerpc/include/asm/kvm_book3s_64.h | 2 +- arch/powerpc/kvm/book3s_64_mmu_radix.c | 9 +++++---- arch/powerpc/kvm/book3s_hv_nested.c | 2 +- arch/powerpc/kvm/book3s_hv_rm_mmu.c | 2 +- 5 files changed, 11 insertions(+), 13 deletions(-) diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index 09f8e9ba69bc..728d2b7c9981 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -197,7 +197,8 @@ extern int kvmppc_mmu_radix_translate_table(struct kvm_vcpu *vcpu, gva_t eaddr, extern int kvmppc_mmu_radix_xlate(struct kvm_vcpu *vcpu, gva_t eaddr, struct kvmppc_pte *gpte, bool data, bool iswrite); extern void kvmppc_unmap_pte(struct kvm *kvm, pte_t *pte, unsigned long gpa, - unsigned int shift, struct kvm_memory_slot *memslot, + unsigned int shift, + const struct kvm_memory_slot *memslot, unsigned int lpid); extern bool kvmppc_hv_handle_set_rc(struct kvm *kvm, pgd_t *pgtable, bool writing, unsigned long gpa, @@ -215,10 +216,6 @@ extern int kvmppc_radix_init(void); extern void kvmppc_radix_exit(void); extern int kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, unsigned long gfn); -extern void kvmppc_unmap_pte(struct kvm *kvm, pte_t *pte, - unsigned long gpa, unsigned int shift, - struct kvm_memory_slot *memslot, - unsigned int lpid); extern int kvm_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, unsigned long gfn); extern int kvm_test_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, @@ -242,7 +239,7 @@ extern kvm_pfn_t kvmppc_gpa_to_pfn(struct kvm_vcpu *vcpu, gpa_t gpa, bool writing, bool *writable); extern void kvmppc_add_revmap_chain(struct kvm *kvm, struct revmap_entry *rev, unsigned long *rmap, long pte_index, int realmode); -extern void kvmppc_update_dirty_map(struct kvm_memory_slot *memslot, +extern void kvmppc_update_dirty_map(const struct kvm_memory_slot *memslot, unsigned long gfn, unsigned long psize); extern void kvmppc_invalidate_hpte(struct kvm *kvm, __be64 *hptep, unsigned long pte_index); diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h index 6d298145d564..c9cbaf02fc73 100644 --- a/arch/powerpc/include/asm/kvm_book3s_64.h +++ b/arch/powerpc/include/asm/kvm_book3s_64.h @@ -625,7 +625,7 @@ extern int kvmppc_create_pte(struct kvm *kvm, pgd_t *pgtable, pte_t pte, extern void kvmhv_insert_nest_rmap(struct kvm *kvm, unsigned long *rmapp, struct rmap_nested **n_rmap); extern void kvmhv_remove_nest_rmap_range(struct kvm *kvm, - struct kvm_memory_slot *memslot, + const struct kvm_memory_slot *memslot, unsigned long gpa, unsigned long hpa, unsigned long nbytes); diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c index 87ad35e0b138..52711eb052f5 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c @@ -284,7 +284,8 @@ static void kvmppc_pmd_free(pmd_t *pmdp) /* Called with kvm->mmu_lock held */ void kvmppc_unmap_pte(struct kvm *kvm, pte_t *pte, unsigned long gpa, - unsigned int shift, struct kvm_memory_slot *memslot, + unsigned int shift, + const struct kvm_memory_slot *memslot, unsigned int lpid) { @@ -861,7 +862,7 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, return ret; } -/* Called with kvm->lock held */ +/* Called with kvm->mmu_lock held */ int kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, unsigned long gfn) { @@ -876,7 +877,7 @@ int kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, return 0; } -/* Called with kvm->lock held */ +/* Called with kvm->mmu_lock held */ int kvm_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, unsigned long gfn) { @@ -895,7 +896,7 @@ int kvm_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, return ref; } -/* Called with kvm->lock held */ +/* Called with kvm->mmu_lock held */ int kvm_test_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, unsigned long gfn) { diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c index 401d2ecbebc5..6d1b14bd7e65 100644 --- a/arch/powerpc/kvm/book3s_hv_nested.c +++ b/arch/powerpc/kvm/book3s_hv_nested.c @@ -723,7 +723,7 @@ static void kvmhv_remove_nest_rmap_list(struct kvm *kvm, unsigned long *rmapp, /* called with kvm->mmu_lock held */ void kvmhv_remove_nest_rmap_range(struct kvm *kvm, - struct kvm_memory_slot *memslot, + const struct kvm_memory_slot *memslot, unsigned long gpa, unsigned long hpa, unsigned long nbytes) { diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index a67cf1cdeda4..3b3791ed74a6 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c @@ -107,7 +107,7 @@ void kvmppc_add_revmap_chain(struct kvm *kvm, struct revmap_entry *rev, EXPORT_SYMBOL_GPL(kvmppc_add_revmap_chain); /* Update the dirty bitmap of a memslot */ -void kvmppc_update_dirty_map(struct kvm_memory_slot *memslot, +void kvmppc_update_dirty_map(const struct kvm_memory_slot *memslot, unsigned long gfn, unsigned long psize) { unsigned long npages; -- cgit From 5af3e9d06d830d52864b39c86724dc39b463eddd Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 12 Dec 2018 15:17:17 +1100 Subject: KVM: PPC: Book3S HV: Flush guest mappings when turning dirty tracking on/off This adds code to flush the partition-scoped page tables for a radix guest when dirty tracking is turned on or off for a memslot. Only the guest real addresses covered by the memslot are flushed. The reason for this is to get rid of any 2M PTEs in the partition-scoped page tables that correspond to host transparent huge pages, so that page dirtiness is tracked at a system page (4k or 64k) granularity rather than a 2M granularity. The page tables are also flushed when turning dirty tracking off so that the memslot's address space can be repopulated with THPs if possible. To do this, we add a new function kvmppc_radix_flush_memslot(). Since this does what's needed for kvmppc_core_flush_memslot_hv() on a radix guest, we now make kvmppc_core_flush_memslot_hv() call the new kvmppc_radix_flush_memslot() rather than calling kvm_unmap_radix() for each page in the memslot. This has the effect of fixing a bug in that kvmppc_core_flush_memslot_hv() was previously calling kvm_unmap_radix() without holding the kvm->mmu_lock spinlock, which is required to be held. Signed-off-by: Paul Mackerras Reviewed-by: Suraj Jitindar Singh Reviewed-by: David Gibson Signed-off-by: Paul Mackerras --- arch/powerpc/include/asm/kvm_book3s.h | 2 ++ arch/powerpc/kvm/book3s_64_mmu_hv.c | 9 +++++---- arch/powerpc/kvm/book3s_64_mmu_radix.c | 20 ++++++++++++++++++++ arch/powerpc/kvm/book3s_hv.c | 17 +++++++++++++++++ 4 files changed, 44 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index 728d2b7c9981..f8a5ac85a7df 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -222,6 +222,8 @@ extern int kvm_test_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, unsigned long gfn); extern long kvmppc_hv_get_dirty_log_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, unsigned long *map); +extern void kvmppc_radix_flush_memslot(struct kvm *kvm, + const struct kvm_memory_slot *memslot); extern int kvmhv_get_rmmu_info(struct kvm *kvm, struct kvm_ppc_rmmu_info *info); /* XXX remove this export when load_last_inst() is generic */ diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index a18afda3d0f0..6f2d2fb4e098 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -899,11 +899,12 @@ void kvmppc_core_flush_memslot_hv(struct kvm *kvm, gfn = memslot->base_gfn; rmapp = memslot->arch.rmap; + if (kvm_is_radix(kvm)) { + kvmppc_radix_flush_memslot(kvm, memslot); + return; + } + for (n = memslot->npages; n; --n, ++gfn) { - if (kvm_is_radix(kvm)) { - kvm_unmap_radix(kvm, memslot, gfn); - continue; - } /* * Testing the present bit without locking is OK because * the memslot has been marked invalid already, and hence diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c index 52711eb052f5..d675ad92c7ad 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c @@ -958,6 +958,26 @@ long kvmppc_hv_get_dirty_log_radix(struct kvm *kvm, return 0; } +void kvmppc_radix_flush_memslot(struct kvm *kvm, + const struct kvm_memory_slot *memslot) +{ + unsigned long n; + pte_t *ptep; + unsigned long gpa; + unsigned int shift; + + gpa = memslot->base_gfn << PAGE_SHIFT; + spin_lock(&kvm->mmu_lock); + for (n = memslot->npages; n; --n) { + ptep = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, &shift); + if (ptep && pte_present(*ptep)) + kvmppc_unmap_pte(kvm, ptep, gpa, shift, memslot, + kvm->arch.lpid); + gpa += PAGE_SIZE; + } + spin_unlock(&kvm->mmu_lock); +} + static void add_rmmu_ap_encoding(struct kvm_ppc_rmmu_info *info, int psize, int *indexp) { diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index f4fbb7b58486..074ff5b314c9 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -4384,6 +4384,23 @@ static void kvmppc_core_commit_memory_region_hv(struct kvm *kvm, */ if (npages) atomic64_inc(&kvm->arch.mmio_update); + + /* + * For change == KVM_MR_MOVE or KVM_MR_DELETE, higher levels + * have already called kvm_arch_flush_shadow_memslot() to + * flush shadow mappings. For KVM_MR_CREATE we have no + * previous mappings. So the only case to handle is + * KVM_MR_FLAGS_ONLY when the KVM_MEM_LOG_DIRTY_PAGES bit + * has been changed. + * For radix guests, we flush on setting KVM_MEM_LOG_DIRTY_PAGES + * to get rid of any THP PTEs in the partition-scoped page tables + * so we can track dirtiness at the page level; we flush when + * clearing KVM_MEM_LOG_DIRTY_PAGES so that we can go back to + * using THP PTEs. + */ + if (change == KVM_MR_FLAGS_ONLY && kvm_is_radix(kvm) && + ((new->flags ^ old->flags) & KVM_MEM_LOG_DIRTY_PAGES)) + kvmppc_radix_flush_memslot(kvm, old); } /* -- cgit From 693ac10a88a2219bde553b2e8460dbec97e594e6 Mon Sep 17 00:00:00 2001 From: Suraj Jitindar Singh Date: Fri, 14 Dec 2018 16:29:03 +1100 Subject: KVM: PPC: Book3S: Only report KVM_CAP_SPAPR_TCE_VFIO on powernv machines The kvm capability KVM_CAP_SPAPR_TCE_VFIO is used to indicate the availability of in kernel tce acceleration for vfio. However it is currently the case that this is only available on a powernv machine, not for a pseries machine. Thus make this capability dependent on having the cpu feature CPU_FTR_HVMODE. [paulus@ozlabs.org - fixed compilation for Book E.] Signed-off-by: Suraj Jitindar Singh Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/powerpc.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 6a7a6a101efd..18dd98f35ac1 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -543,8 +543,11 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) #ifdef CONFIG_PPC_BOOK3S_64 case KVM_CAP_SPAPR_TCE: case KVM_CAP_SPAPR_TCE_64: - /* fallthrough */ + r = 1; + break; case KVM_CAP_SPAPR_TCE_VFIO: + r = !!cpu_has_feature(CPU_FTR_HVMODE); + break; case KVM_CAP_PPC_RTAS: case KVM_CAP_PPC_FIXUP_HCALL: case KVM_CAP_PPC_ENABLE_HCALL: -- cgit From d232afebf97168487e83d1366e41d53609cd1f55 Mon Sep 17 00:00:00 2001 From: Suraj Jitindar Singh Date: Fri, 14 Dec 2018 16:29:04 +1100 Subject: KVM: PPC: Book3S HV: Add function kvmhv_vcpu_is_radix() There exists a function kvm_is_radix() which is used to determine if a kvm instance is using the radix mmu. However this only applies to the first level (L1) guest. Add a function kvmhv_vcpu_is_radix() which can be used to determine if the current execution context of the vcpu is radix, accounting for if the vcpu is running a nested guest. Currently all nested guests must be radix but this may change in the future. Signed-off-by: Suraj Jitindar Singh Signed-off-by: Paul Mackerras --- arch/powerpc/include/asm/kvm_book3s_64.h | 13 +++++++++++++ arch/powerpc/kvm/book3s_hv_nested.c | 1 + 2 files changed, 14 insertions(+) diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h index c9cbaf02fc73..34968fd2de24 100644 --- a/arch/powerpc/include/asm/kvm_book3s_64.h +++ b/arch/powerpc/include/asm/kvm_book3s_64.h @@ -55,6 +55,7 @@ struct kvm_nested_guest { cpumask_t need_tlb_flush; cpumask_t cpu_in_guest; short prev_cpu[NR_CPUS]; + u8 radix; /* is this nested guest radix */ }; /* @@ -150,6 +151,18 @@ static inline bool kvm_is_radix(struct kvm *kvm) return kvm->arch.radix; } +static inline bool kvmhv_vcpu_is_radix(struct kvm_vcpu *vcpu) +{ + bool radix; + + if (vcpu->arch.nested) + radix = vcpu->arch.nested->radix; + else + radix = kvm_is_radix(vcpu->kvm); + + return radix; +} + #define KVM_DEFAULT_HPT_ORDER 24 /* 16MB HPT by default */ #endif diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c index 6d1b14bd7e65..6f9e1ad60527 100644 --- a/arch/powerpc/kvm/book3s_hv_nested.c +++ b/arch/powerpc/kvm/book3s_hv_nested.c @@ -480,6 +480,7 @@ struct kvm_nested_guest *kvmhv_alloc_nested(struct kvm *kvm, unsigned int lpid) if (shadow_lpid < 0) goto out_free2; gp->shadow_lpid = shadow_lpid; + gp->radix = 1; memset(gp->prev_cpu, -1, sizeof(gp->prev_cpu)); -- cgit From d7b456152230fcec3e98699dc137c763199f509a Mon Sep 17 00:00:00 2001 From: Suraj Jitindar Singh Date: Fri, 14 Dec 2018 16:29:05 +1100 Subject: KVM: PPC: Book3S HV: Implement functions to access quadrants 1 & 2 The POWER9 radix mmu has the concept of quadrants. The quadrant number is the two high bits of the effective address and determines the fully qualified address to be used for the translation. The fully qualified address consists of the effective lpid, the effective pid and the effective address. This gives then 4 possible quadrants 0, 1, 2, and 3. When accessing these quadrants the fully qualified address is obtained as follows: Quadrant | Hypervisor | Guest -------------------------------------------------------------------------- | EA[0:1] = 0b00 | EA[0:1] = 0b00 0 | effLPID = 0 | effLPID = LPIDR | effPID = PIDR | effPID = PIDR -------------------------------------------------------------------------- | EA[0:1] = 0b01 | 1 | effLPID = LPIDR | Invalid Access | effPID = PIDR | -------------------------------------------------------------------------- | EA[0:1] = 0b10 | 2 | effLPID = LPIDR | Invalid Access | effPID = 0 | -------------------------------------------------------------------------- | EA[0:1] = 0b11 | EA[0:1] = 0b11 3 | effLPID = 0 | effLPID = LPIDR | effPID = 0 | effPID = 0 -------------------------------------------------------------------------- In the Guest; Quadrant 3 is normally used to address the operating system since this uses effPID=0 and effLPID=LPIDR, meaning the PID register doesn't need to be switched. Quadrant 0 is normally used to address user space since the effLPID and effPID are taken from the corresponding registers. In the Host; Quadrant 0 and 3 are used as above, however the effLPID is always 0 to address the host. Quadrants 1 and 2 can be used by the host to address guest memory using a guest effective address. Since the effLPID comes from the LPID register, the host loads the LPID of the guest it would like to access (and the PID of the process) and can perform accesses to a guest effective address. This means quadrant 1 can be used to address the guest user space and quadrant 2 can be used to address the guest operating system from the hypervisor, using a guest effective address. Access to the quadrants can cause a Hypervisor Data Storage Interrupt (HDSI) due to being unable to perform partition scoped translation. Previously this could only be generated from a guest and so the code path expects us to take the KVM trampoline in the interrupt handler. This is no longer the case so we modify the handler to call bad_page_fault() to check if we were expecting this fault so we can handle it gracefully and just return with an error code. In the hash mmu case we still raise an unknown exception since quadrants aren't defined for the hash mmu. Signed-off-by: Suraj Jitindar Singh Signed-off-by: Paul Mackerras --- arch/powerpc/include/asm/kvm_book3s.h | 4 ++ arch/powerpc/kernel/exceptions-64s.S | 9 ++++ arch/powerpc/kvm/book3s_64_mmu_radix.c | 97 ++++++++++++++++++++++++++++++++++ arch/powerpc/mm/fault.c | 1 + 4 files changed, 111 insertions(+) diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index f8a5ac85a7df..b25a3f18b301 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -188,6 +188,10 @@ extern int kvmppc_book3s_hcall_implemented(struct kvm *kvm, unsigned long hc); extern int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, unsigned long ea, unsigned long dsisr); +extern long kvmhv_copy_from_guest_radix(struct kvm_vcpu *vcpu, gva_t eaddr, + void *to, unsigned long n); +extern long kvmhv_copy_to_guest_radix(struct kvm_vcpu *vcpu, gva_t eaddr, + void *from, unsigned long n); extern int kvmppc_mmu_walk_radix_tree(struct kvm_vcpu *vcpu, gva_t eaddr, struct kvmppc_pte *gpte, u64 root, u64 *pte_ret_p); diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 89d32bb79d5e..db2691ff4c0b 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -995,7 +995,16 @@ EXC_COMMON_BEGIN(h_data_storage_common) bl save_nvgprs RECONCILE_IRQ_STATE(r10, r11) addi r3,r1,STACK_FRAME_OVERHEAD +BEGIN_MMU_FTR_SECTION + ld r4,PACA_EXGEN+EX_DAR(r13) + lwz r5,PACA_EXGEN+EX_DSISR(r13) + std r4,_DAR(r1) + std r5,_DSISR(r1) + li r5,SIGSEGV + bl bad_page_fault +MMU_FTR_SECTION_ELSE bl unknown_exception +ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_TYPE_RADIX) b ret_from_except diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c index d675ad92c7ad..c3f85c1b60d6 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c @@ -29,6 +29,103 @@ */ static int p9_supported_radix_bits[4] = { 5, 9, 9, 13 }; +static unsigned long __kvmhv_copy_tofrom_guest_radix(int lpid, int pid, + gva_t eaddr, void *to, void *from, + unsigned long n) +{ + unsigned long quadrant, ret = n; + int old_pid, old_lpid; + bool is_load = !!to; + + /* Can't access quadrants 1 or 2 in non-HV mode */ + if (kvmhv_on_pseries()) { + /* TODO h-call */ + return -EPERM; + } + + quadrant = 1; + if (!pid) + quadrant = 2; + if (is_load) + from = (void *) (eaddr | (quadrant << 62)); + else + to = (void *) (eaddr | (quadrant << 62)); + + preempt_disable(); + + /* switch the lpid first to avoid running host with unallocated pid */ + old_lpid = mfspr(SPRN_LPID); + if (old_lpid != lpid) + mtspr(SPRN_LPID, lpid); + if (quadrant == 1) { + old_pid = mfspr(SPRN_PID); + if (old_pid != pid) + mtspr(SPRN_PID, pid); + } + isync(); + + pagefault_disable(); + if (is_load) + ret = raw_copy_from_user(to, from, n); + else + ret = raw_copy_to_user(to, from, n); + pagefault_enable(); + + /* switch the pid first to avoid running host with unallocated pid */ + if (quadrant == 1 && pid != old_pid) + mtspr(SPRN_PID, old_pid); + if (lpid != old_lpid) + mtspr(SPRN_LPID, old_lpid); + isync(); + + preempt_enable(); + + return ret; +} + +static long kvmhv_copy_tofrom_guest_radix(struct kvm_vcpu *vcpu, gva_t eaddr, + void *to, void *from, unsigned long n) +{ + int lpid = vcpu->kvm->arch.lpid; + int pid = vcpu->arch.pid; + + /* This would cause a data segment intr so don't allow the access */ + if (eaddr & (0x3FFUL << 52)) + return -EINVAL; + + /* Should we be using the nested lpid */ + if (vcpu->arch.nested) + lpid = vcpu->arch.nested->shadow_lpid; + + /* If accessing quadrant 3 then pid is expected to be 0 */ + if (((eaddr >> 62) & 0x3) == 0x3) + pid = 0; + + eaddr &= ~(0xFFFUL << 52); + + return __kvmhv_copy_tofrom_guest_radix(lpid, pid, eaddr, to, from, n); +} + +long kvmhv_copy_from_guest_radix(struct kvm_vcpu *vcpu, gva_t eaddr, void *to, + unsigned long n) +{ + long ret; + + ret = kvmhv_copy_tofrom_guest_radix(vcpu, eaddr, to, NULL, n); + if (ret > 0) + memset(to + (n - ret), 0, ret); + + return ret; +} +EXPORT_SYMBOL_GPL(kvmhv_copy_from_guest_radix); + +long kvmhv_copy_to_guest_radix(struct kvm_vcpu *vcpu, gva_t eaddr, void *from, + unsigned long n) +{ + return kvmhv_copy_tofrom_guest_radix(vcpu, eaddr, NULL, from, n); +} +EXPORT_SYMBOL_GPL(kvmhv_copy_to_guest_radix); + int kvmppc_mmu_walk_radix_tree(struct kvm_vcpu *vcpu, gva_t eaddr, struct kvmppc_pte *gpte, u64 root, u64 *pte_ret_p) diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 1697e903bbf2..2e6fb1d758c3 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -636,6 +636,7 @@ void bad_page_fault(struct pt_regs *regs, unsigned long address, int sig) switch (TRAP(regs)) { case 0x300: case 0x380: + case 0xe00: printk(KERN_ALERT "Unable to handle kernel paging request for " "data at address 0x%08lx\n", regs->dar); break; -- cgit From dceadcf91b2e0971abe706b6d605ed25de61db0e Mon Sep 17 00:00:00 2001 From: Suraj Jitindar Singh Date: Fri, 14 Dec 2018 16:29:06 +1100 Subject: KVM: PPC: Add load_from_eaddr and store_to_eaddr to the kvmppc_ops struct The kvmppc_ops struct is used to store function pointers to kvm implementation specific functions. Introduce two new functions load_from_eaddr and store_to_eaddr to be used to load from and store to a guest effective address respectively. Also implement these for the kvm-hv module. If we are using the radix mmu then we can call the functions to access quadrant 1 and 2. Signed-off-by: Suraj Jitindar Singh Signed-off-by: Paul Mackerras --- arch/powerpc/include/asm/kvm_ppc.h | 4 ++++ arch/powerpc/kvm/book3s_hv.c | 40 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 44 insertions(+) diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 04c5b84df83d..eb0d79f0ca45 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -328,6 +328,10 @@ struct kvmppc_ops { unsigned long flags); void (*giveup_ext)(struct kvm_vcpu *vcpu, ulong msr); int (*enable_nested)(struct kvm *kvm); + int (*load_from_eaddr)(struct kvm_vcpu *vcpu, ulong *eaddr, void *ptr, + int size); + int (*store_to_eaddr)(struct kvm_vcpu *vcpu, ulong *eaddr, void *ptr, + int size); }; extern struct kvmppc_ops *kvmppc_hv_ops; diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 074ff5b314c9..cef57b68dda1 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -5237,6 +5237,44 @@ static int kvmhv_enable_nested(struct kvm *kvm) return 0; } +static int kvmhv_load_from_eaddr(struct kvm_vcpu *vcpu, ulong *eaddr, void *ptr, + int size) +{ + int rc = -EINVAL; + + if (kvmhv_vcpu_is_radix(vcpu)) { + rc = kvmhv_copy_from_guest_radix(vcpu, *eaddr, ptr, size); + + if (rc > 0) + rc = -EINVAL; + } + + /* For now quadrants are the only way to access nested guest memory */ + if (rc && vcpu->arch.nested) + rc = -EAGAIN; + + return rc; +} + +static int kvmhv_store_to_eaddr(struct kvm_vcpu *vcpu, ulong *eaddr, void *ptr, + int size) +{ + int rc = -EINVAL; + + if (kvmhv_vcpu_is_radix(vcpu)) { + rc = kvmhv_copy_to_guest_radix(vcpu, *eaddr, ptr, size); + + if (rc > 0) + rc = -EINVAL; + } + + /* For now quadrants are the only way to access nested guest memory */ + if (rc && vcpu->arch.nested) + rc = -EAGAIN; + + return rc; +} + static struct kvmppc_ops kvm_ops_hv = { .get_sregs = kvm_arch_vcpu_ioctl_get_sregs_hv, .set_sregs = kvm_arch_vcpu_ioctl_set_sregs_hv, @@ -5277,6 +5315,8 @@ static struct kvmppc_ops kvm_ops_hv = { .get_rmmu_info = kvmhv_get_rmmu_info, .set_smt_mode = kvmhv_set_smt_mode, .enable_nested = kvmhv_enable_nested, + .load_from_eaddr = kvmhv_load_from_eaddr, + .store_to_eaddr = kvmhv_store_to_eaddr, }; static int kvm_init_subcore_bitmap(void) -- cgit From cc6929cc842cceee714c66cd55346b1a9f114a98 Mon Sep 17 00:00:00 2001 From: Suraj Jitindar Singh Date: Fri, 14 Dec 2018 16:29:07 +1100 Subject: KVM: PPC: Update kvmppc_st and kvmppc_ld to use quadrants The functions kvmppc_st and kvmppc_ld are used to access guest memory from the host using a guest effective address. They do so by translating through the process table to obtain a guest real address and then using kvm_read_guest or kvm_write_guest to make the access with the guest real address. This method of access however only works for L1 guests and will give the incorrect results for a nested guest. We can however use the store_to_eaddr and load_from_eaddr kvmppc_ops to perform the access for a nested guesti (and a L1 guest). So attempt this method first and fall back to the old method if this fails and we aren't running a nested guest. At this stage there is no fall back method to perform the access for a nested guest and this is left as a future improvement. For now we will return to the nested guest and rely on the fact that a translation should be faulted in before retrying the access. Signed-off-by: Suraj Jitindar Singh Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/powerpc.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 18dd98f35ac1..ec9cbf9db364 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -331,10 +331,17 @@ int kvmppc_st(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, { ulong mp_pa = vcpu->arch.magic_page_pa & KVM_PAM & PAGE_MASK; struct kvmppc_pte pte; - int r; + int r = -EINVAL; vcpu->stat.st++; + if (vcpu->kvm->arch.kvm_ops && vcpu->kvm->arch.kvm_ops->store_to_eaddr) + r = vcpu->kvm->arch.kvm_ops->store_to_eaddr(vcpu, eaddr, ptr, + size); + + if ((!r) || (r == -EAGAIN)) + return r; + r = kvmppc_xlate(vcpu, *eaddr, data ? XLATE_DATA : XLATE_INST, XLATE_WRITE, &pte); if (r < 0) @@ -367,10 +374,17 @@ int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, { ulong mp_pa = vcpu->arch.magic_page_pa & KVM_PAM & PAGE_MASK; struct kvmppc_pte pte; - int rc; + int rc = -EINVAL; vcpu->stat.ld++; + if (vcpu->kvm->arch.kvm_ops && vcpu->kvm->arch.kvm_ops->load_from_eaddr) + rc = vcpu->kvm->arch.kvm_ops->load_from_eaddr(vcpu, eaddr, ptr, + size); + + if ((!rc) || (rc == -EAGAIN)) + return rc; + rc = kvmppc_xlate(vcpu, *eaddr, data ? XLATE_DATA : XLATE_INST, XLATE_READ, &pte); if (rc) -- cgit From 873db2cd9a6d7f017d8f4c637cf4166c038c27d6 Mon Sep 17 00:00:00 2001 From: Suraj Jitindar Singh Date: Fri, 14 Dec 2018 16:29:08 +1100 Subject: KVM: PPC: Book3S HV: Allow passthrough of an emulated device to an L2 guest Allow for a device which is being emulated at L0 (the host) for an L1 guest to be passed through to a nested (L2) guest. The existing kvmppc_hv_emulate_mmio function can be used here. The main challenge is that for a load the result must be stored into the L2 gpr, not an L1 gpr as would normally be the case after going out to qemu to complete the operation. This presents a challenge as at this point the L2 gpr state has been written back into L1 memory. To work around this we store the address in L1 memory of the L2 gpr where the result of the load is to be stored and use the new io_gpr value KVM_MMIO_REG_NESTED_GPR to indicate that this is a nested load for which completion must be done when returning back into the kernel. Then in kvmppc_complete_mmio_load() the resultant value is written into L1 memory at the location of the indicated L2 gpr. Note that we don't currently let an L1 guest emulate a device for an L2 guest which is then passed through to an L3 guest. Signed-off-by: Suraj Jitindar Singh Signed-off-by: Paul Mackerras --- arch/powerpc/include/asm/kvm_book3s.h | 2 +- arch/powerpc/include/asm/kvm_host.h | 3 +++ arch/powerpc/kvm/book3s_hv.c | 12 ++++++---- arch/powerpc/kvm/book3s_hv_nested.c | 43 ++++++++++++++++++++++++++++++----- arch/powerpc/kvm/powerpc.c | 8 +++++++ 5 files changed, 57 insertions(+), 11 deletions(-) diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index b25a3f18b301..616b28802a19 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -310,7 +310,7 @@ int kvmhv_run_single_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu, void kvmhv_save_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr); void kvmhv_restore_hv_return_state(struct kvm_vcpu *vcpu, struct hv_guest_state *hr); -long int kvmhv_nested_page_fault(struct kvm_vcpu *vcpu); +long int kvmhv_nested_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu); void kvmppc_giveup_fac(struct kvm_vcpu *vcpu, ulong fac); diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index fac6f631ed29..7a2483a139cf 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -793,6 +793,7 @@ struct kvm_vcpu_arch { /* For support of nested guests */ struct kvm_nested_guest *nested; u32 nested_vcpu_id; + gpa_t nested_io_gpr; #endif #ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING @@ -827,6 +828,8 @@ struct kvm_vcpu_arch { #define KVM_MMIO_REG_FQPR 0x00c0 #define KVM_MMIO_REG_VSX 0x0100 #define KVM_MMIO_REG_VMX 0x0180 +#define KVM_MMIO_REG_NESTED_GPR 0xffc0 + #define __KVM_HAVE_ARCH_WQP #define __KVM_HAVE_CREATE_DEVICE diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index cef57b68dda1..ca7de15f4a70 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -985,6 +985,10 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) kvmppc_set_gpr(vcpu, 3, 0); vcpu->arch.hcall_needed = 0; return -EINTR; + } else if (ret == H_TOO_HARD) { + kvmppc_set_gpr(vcpu, 3, 0); + vcpu->arch.hcall_needed = 0; + return RESUME_HOST; } break; case H_TLB_INVALIDATE: @@ -1336,7 +1340,7 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu, return r; } -static int kvmppc_handle_nested_exit(struct kvm_vcpu *vcpu) +static int kvmppc_handle_nested_exit(struct kvm_run *run, struct kvm_vcpu *vcpu) { int r; int srcu_idx; @@ -1394,7 +1398,7 @@ static int kvmppc_handle_nested_exit(struct kvm_vcpu *vcpu) */ case BOOK3S_INTERRUPT_H_DATA_STORAGE: srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); - r = kvmhv_nested_page_fault(vcpu); + r = kvmhv_nested_page_fault(run, vcpu); srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx); break; case BOOK3S_INTERRUPT_H_INST_STORAGE: @@ -1404,7 +1408,7 @@ static int kvmppc_handle_nested_exit(struct kvm_vcpu *vcpu) if (vcpu->arch.shregs.msr & HSRR1_HISI_WRITE) vcpu->arch.fault_dsisr |= DSISR_ISSTORE; srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); - r = kvmhv_nested_page_fault(vcpu); + r = kvmhv_nested_page_fault(run, vcpu); srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx); break; @@ -4059,7 +4063,7 @@ int kvmhv_run_single_vcpu(struct kvm_run *kvm_run, if (!nested) r = kvmppc_handle_exit_hv(kvm_run, vcpu, current); else - r = kvmppc_handle_nested_exit(vcpu); + r = kvmppc_handle_nested_exit(kvm_run, vcpu); } vcpu->arch.ret = r; diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c index 6f9e1ad60527..aeb0bb2075fa 100644 --- a/arch/powerpc/kvm/book3s_hv_nested.c +++ b/arch/powerpc/kvm/book3s_hv_nested.c @@ -195,6 +195,26 @@ void kvmhv_restore_hv_return_state(struct kvm_vcpu *vcpu, vcpu->arch.ppr = hr->ppr; } +static void kvmhv_nested_mmio_needed(struct kvm_vcpu *vcpu, u64 regs_ptr) +{ + /* No need to reflect the page fault to L1, we've handled it */ + vcpu->arch.trap = 0; + + /* + * Since the L2 gprs have already been written back into L1 memory when + * we complete the mmio, store the L1 memory location of the L2 gpr + * being loaded into by the mmio so that the loaded value can be + * written there in kvmppc_complete_mmio_load() + */ + if (((vcpu->arch.io_gpr & KVM_MMIO_REG_EXT_MASK) == KVM_MMIO_REG_GPR) + && (vcpu->mmio_is_write == 0)) { + vcpu->arch.nested_io_gpr = (gpa_t) regs_ptr + + offsetof(struct pt_regs, + gpr[vcpu->arch.io_gpr]); + vcpu->arch.io_gpr = KVM_MMIO_REG_NESTED_GPR; + } +} + long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu) { long int err, r; @@ -316,6 +336,11 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu) if (r == -EINTR) return H_INTERRUPT; + if (vcpu->mmio_needed) { + kvmhv_nested_mmio_needed(vcpu, regs_ptr); + return H_TOO_HARD; + } + return vcpu->arch.trap; } @@ -1100,7 +1125,8 @@ static inline int kvmppc_radix_shift_to_level(int shift) } /* called with gp->tlb_lock held */ -static long int __kvmhv_nested_page_fault(struct kvm_vcpu *vcpu, +static long int __kvmhv_nested_page_fault(struct kvm_run *run, + struct kvm_vcpu *vcpu, struct kvm_nested_guest *gp) { struct kvm *kvm = vcpu->kvm; @@ -1181,9 +1207,14 @@ static long int __kvmhv_nested_page_fault(struct kvm_vcpu *vcpu, kvmppc_core_queue_data_storage(vcpu, ea, dsisr); return RESUME_GUEST; } - /* passthrough of emulated MMIO case... */ - pr_err("emulated MMIO passthrough?\n"); - return -EINVAL; + + /* passthrough of emulated MMIO case */ + if (kvmhv_on_pseries()) { + pr_err("emulated MMIO passthrough?\n"); + return -EINVAL; + } + + return kvmppc_hv_emulate_mmio(run, vcpu, gpa, ea, writing); } if (memslot->flags & KVM_MEM_READONLY) { if (writing) { @@ -1265,13 +1296,13 @@ static long int __kvmhv_nested_page_fault(struct kvm_vcpu *vcpu, return RESUME_GUEST; } -long int kvmhv_nested_page_fault(struct kvm_vcpu *vcpu) +long int kvmhv_nested_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu) { struct kvm_nested_guest *gp = vcpu->arch.nested; long int ret; mutex_lock(&gp->tlb_lock); - ret = __kvmhv_nested_page_fault(vcpu, gp); + ret = __kvmhv_nested_page_fault(run, vcpu, gp); mutex_unlock(&gp->tlb_lock); return ret; } diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index ec9cbf9db364..db61096ffcf7 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -1208,6 +1208,14 @@ static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu, KVMPPC_VMX_COPY_BYTE) kvmppc_set_vmx_byte(vcpu, gpr); break; +#endif +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE + case KVM_MMIO_REG_NESTED_GPR: + if (kvmppc_need_byteswap(vcpu)) + gpr = swab64(gpr); + kvm_vcpu_write_guest(vcpu, vcpu->arch.nested_io_gpr, &gpr, + sizeof(gpr)); + break; #endif default: BUG(); -- cgit From 6ff887b8bd0d820d9f3371c0ce093d96b73035d6 Mon Sep 17 00:00:00 2001 From: Suraj Jitindar Singh Date: Fri, 14 Dec 2018 16:29:09 +1100 Subject: KVM: PPC: Book3S: Introduce new hcall H_COPY_TOFROM_GUEST to access quadrants 1 & 2 A guest cannot access quadrants 1 or 2 as this would result in an exception. Thus introduce the hcall H_COPY_TOFROM_GUEST to be used by a guest when it wants to perform an access to quadrants 1 or 2, for example when it wants to access memory for one of its nested guests. Also provide an implementation for the kvm-hv module. Signed-off-by: Suraj Jitindar Singh Signed-off-by: Paul Mackerras --- arch/powerpc/include/asm/hvcall.h | 1 + arch/powerpc/include/asm/kvm_book3s.h | 4 ++ arch/powerpc/kvm/book3s_64_mmu_radix.c | 7 ++-- arch/powerpc/kvm/book3s_hv.c | 6 ++- arch/powerpc/kvm/book3s_hv_nested.c | 75 ++++++++++++++++++++++++++++++++++ 5 files changed, 89 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h index 33a4fc891947..463c63a9fcf1 100644 --- a/arch/powerpc/include/asm/hvcall.h +++ b/arch/powerpc/include/asm/hvcall.h @@ -335,6 +335,7 @@ #define H_SET_PARTITION_TABLE 0xF800 #define H_ENTER_NESTED 0xF804 #define H_TLB_INVALIDATE 0xF808 +#define H_COPY_TOFROM_GUEST 0xF80C /* Values for 2nd argument to H_SET_MODE */ #define H_SET_MODE_RESOURCE_SET_CIABR 1 diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index 616b28802a19..82a0a9f3c39c 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -188,6 +188,9 @@ extern int kvmppc_book3s_hcall_implemented(struct kvm *kvm, unsigned long hc); extern int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, unsigned long ea, unsigned long dsisr); +extern unsigned long __kvmhv_copy_tofrom_guest_radix(int lpid, int pid, + gva_t eaddr, void *to, void *from, + unsigned long n); extern long kvmhv_copy_from_guest_radix(struct kvm_vcpu *vcpu, gva_t eaddr, void *to, unsigned long n); extern long kvmhv_copy_to_guest_radix(struct kvm_vcpu *vcpu, gva_t eaddr, @@ -301,6 +304,7 @@ long kvmhv_nested_init(void); void kvmhv_nested_exit(void); void kvmhv_vm_nested_init(struct kvm *kvm); long kvmhv_set_partition_table(struct kvm_vcpu *vcpu); +long kvmhv_copy_tofrom_guest_nested(struct kvm_vcpu *vcpu); void kvmhv_set_ptbl_entry(unsigned int lpid, u64 dw0, u64 dw1); void kvmhv_release_all_nested(struct kvm *kvm); long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu); diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c index c3f85c1b60d6..5b3f266422d6 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c @@ -29,9 +29,9 @@ */ static int p9_supported_radix_bits[4] = { 5, 9, 9, 13 }; -static unsigned long __kvmhv_copy_tofrom_guest_radix(int lpid, int pid, - gva_t eaddr, void *to, void *from, - unsigned long n) +unsigned long __kvmhv_copy_tofrom_guest_radix(int lpid, int pid, + gva_t eaddr, void *to, void *from, + unsigned long n) { unsigned long quadrant, ret = n; int old_pid, old_lpid; @@ -82,6 +82,7 @@ static unsigned long __kvmhv_copy_tofrom_guest_radix(int lpid, int pid, return ret; } +EXPORT_SYMBOL_GPL(__kvmhv_copy_tofrom_guest_radix); static long kvmhv_copy_tofrom_guest_radix(struct kvm_vcpu *vcpu, gva_t eaddr, void *to, void *from, unsigned long n) diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index ca7de15f4a70..5a066fc299e1 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -996,7 +996,11 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) if (nesting_enabled(vcpu->kvm)) ret = kvmhv_do_nested_tlbie(vcpu); break; - + case H_COPY_TOFROM_GUEST: + ret = H_FUNCTION; + if (nesting_enabled(vcpu->kvm)) + ret = kvmhv_copy_tofrom_guest_nested(vcpu); + break; default: return RESUME_HOST; } diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c index aeb0bb2075fa..e41a4d196a91 100644 --- a/arch/powerpc/kvm/book3s_hv_nested.c +++ b/arch/powerpc/kvm/book3s_hv_nested.c @@ -461,6 +461,81 @@ long kvmhv_set_partition_table(struct kvm_vcpu *vcpu) return ret; } +/* + * Handle the H_COPY_TOFROM_GUEST hcall. + * r4 = L1 lpid of nested guest + * r5 = pid + * r6 = eaddr to access + * r7 = to buffer (L1 gpa) + * r8 = from buffer (L1 gpa) + * r9 = n bytes to copy + */ +long kvmhv_copy_tofrom_guest_nested(struct kvm_vcpu *vcpu) +{ + struct kvm_nested_guest *gp; + int l1_lpid = kvmppc_get_gpr(vcpu, 4); + int pid = kvmppc_get_gpr(vcpu, 5); + gva_t eaddr = kvmppc_get_gpr(vcpu, 6); + gpa_t gp_to = (gpa_t) kvmppc_get_gpr(vcpu, 7); + gpa_t gp_from = (gpa_t) kvmppc_get_gpr(vcpu, 8); + void *buf; + unsigned long n = kvmppc_get_gpr(vcpu, 9); + bool is_load = !!gp_to; + long rc; + + if (gp_to && gp_from) /* One must be NULL to determine the direction */ + return H_PARAMETER; + + if (eaddr & (0xFFFUL << 52)) + return H_PARAMETER; + + buf = kzalloc(n, GFP_KERNEL); + if (!buf) + return H_NO_MEM; + + gp = kvmhv_get_nested(vcpu->kvm, l1_lpid, false); + if (!gp) { + rc = H_PARAMETER; + goto out_free; + } + + mutex_lock(&gp->tlb_lock); + + if (is_load) { + /* Load from the nested guest into our buffer */ + rc = __kvmhv_copy_tofrom_guest_radix(gp->shadow_lpid, pid, + eaddr, buf, NULL, n); + if (rc) + goto not_found; + + /* Write what was loaded into our buffer back to the L1 guest */ + rc = kvm_vcpu_write_guest(vcpu, gp_to, buf, n); + if (rc) + goto not_found; + } else { + /* Load the data to be stored from the L1 guest into our buf */ + rc = kvm_vcpu_read_guest(vcpu, gp_from, buf, n); + if (rc) + goto not_found; + + /* Store from our buffer into the nested guest */ + rc = __kvmhv_copy_tofrom_guest_radix(gp->shadow_lpid, pid, + eaddr, NULL, buf, n); + if (rc) + goto not_found; + } + +out_unlock: + mutex_unlock(&gp->tlb_lock); + kvmhv_put_nested(gp); +out_free: + kfree(buf); + return rc; +not_found: + rc = H_NOT_FOUND; + goto out_unlock; +} + /* * Reload the partition table entry for a guest. * Caller must hold gp->tlb_lock. -- cgit From 95d386c2d2e7660a6447df1507a9845665dab7d8 Mon Sep 17 00:00:00 2001 From: Suraj Jitindar Singh Date: Fri, 14 Dec 2018 16:29:10 +1100 Subject: KVM: PPC: Book3S HV: Allow passthrough of an emulated device to an L3 guest Previously when a device was being emulated by an L1 guest for an L2 guest, that device couldn't then be passed through to an L3 guest. This was because the L1 guest had no method for accessing L3 memory. The hcall H_COPY_TOFROM_GUEST provides this access. Thus this setup for passthrough can now be allowed. Signed-off-by: Suraj Jitindar Singh Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_64_mmu_radix.c | 9 ++++----- arch/powerpc/kvm/book3s_hv_nested.c | 5 ----- 2 files changed, 4 insertions(+), 10 deletions(-) diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c index 5b3f266422d6..870ef9d5eee6 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c @@ -37,11 +37,10 @@ unsigned long __kvmhv_copy_tofrom_guest_radix(int lpid, int pid, int old_pid, old_lpid; bool is_load = !!to; - /* Can't access quadrants 1 or 2 in non-HV mode */ - if (kvmhv_on_pseries()) { - /* TODO h-call */ - return -EPERM; - } + /* Can't access quadrants 1 or 2 in non-HV mode, call the HV to do it */ + if (kvmhv_on_pseries()) + return plpar_hcall_norets(H_COPY_TOFROM_GUEST, lpid, pid, eaddr, + __pa(to), __pa(from), n); quadrant = 1; if (!pid) diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c index e41a4d196a91..9dce4b9c1d9c 100644 --- a/arch/powerpc/kvm/book3s_hv_nested.c +++ b/arch/powerpc/kvm/book3s_hv_nested.c @@ -1284,11 +1284,6 @@ static long int __kvmhv_nested_page_fault(struct kvm_run *run, } /* passthrough of emulated MMIO case */ - if (kvmhv_on_pseries()) { - pr_err("emulated MMIO passthrough?\n"); - return -EINVAL; - } - return kvmppc_hv_emulate_mmio(run, vcpu, gpa, ea, writing); } if (memslot->flags & KVM_MEM_READONLY) { -- cgit