diff options
-rw-r--r-- | arch/powerpc/include/asm/kvm_book3s_asm.h | 3 | ||||
-rw-r--r-- | arch/powerpc/include/asm/kvm_host.h | 10 | ||||
-rw-r--r-- | arch/powerpc/include/asm/kvm_ppc.h | 5 | ||||
-rw-r--r-- | arch/powerpc/include/asm/reg.h | 3 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_64_mmu_hv.c | 40 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_hv.c | 59 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_hv_nested.c | 137 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_hv_p9_entry.c | 15 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_hv_rm_xics.c | 2 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_hv_rmhandlers.S | 8 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_hv_uvmem.c | 8 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_xive.c | 11 | ||||
-rw-r--r-- | arch/powerpc/kvm/e500mc.c | 1 | ||||
-rw-r--r-- | arch/powerpc/kvm/powerpc.c | 30 | ||||
-rw-r--r-- | arch/powerpc/mm/init_64.c | 3 |
15 files changed, 200 insertions, 135 deletions
diff --git a/arch/powerpc/include/asm/kvm_book3s_asm.h b/arch/powerpc/include/asm/kvm_book3s_asm.h index b6d31bff5209..c8882d9b86c2 100644 --- a/arch/powerpc/include/asm/kvm_book3s_asm.h +++ b/arch/powerpc/include/asm/kvm_book3s_asm.h @@ -14,9 +14,6 @@ #define XICS_MFRR 0xc #define XICS_IPI 2 /* interrupt source # for IPIs */ -/* LPIDs we support with this build -- runtime limit may be lower */ -#define KVMPPC_NR_LPIDS (LPID_RSVD + 1) - /* Maximum number of threads per physical core */ #define MAX_SMT_THREADS 8 diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index faf301d0dec0..2909a88acd16 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -36,7 +36,12 @@ #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE #include <asm/kvm_book3s_asm.h> /* for MAX_SMT_THREADS */ #define KVM_MAX_VCPU_IDS (MAX_SMT_THREADS * KVM_MAX_VCORES) -#define KVM_MAX_NESTED_GUESTS KVMPPC_NR_LPIDS + +/* + * Limit the nested partition table to 4096 entries (because that's what + * hardware supports). Both guest and host use this value. + */ +#define KVM_MAX_NESTED_GUESTS_SHIFT 12 #else #define KVM_MAX_VCPU_IDS KVM_MAX_VCPUS @@ -327,8 +332,7 @@ struct kvm_arch { struct list_head uvmem_pfns; struct mutex mmu_setup_lock; /* nests inside vcpu mutexes */ u64 l1_ptcr; - int max_nested_lpid; - struct kvm_nested_guest *nested_guests[KVM_MAX_NESTED_GUESTS]; + struct idr kvm_nested_guest_idr; /* This array can grow quite large, keep it at the end */ struct kvmppc_vcore *vcores[KVM_MAX_VCORES]; #endif diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 838d4cb460b7..2f80191e5437 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -685,7 +685,7 @@ extern int kvmppc_xive_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level, bool line_status); extern void kvmppc_xive_push_vcpu(struct kvm_vcpu *vcpu); extern void kvmppc_xive_pull_vcpu(struct kvm_vcpu *vcpu); -extern void kvmppc_xive_rearm_escalation(struct kvm_vcpu *vcpu); +extern bool kvmppc_xive_rearm_escalation(struct kvm_vcpu *vcpu); static inline int kvmppc_xive_enabled(struct kvm_vcpu *vcpu) { @@ -723,7 +723,7 @@ static inline int kvmppc_xive_set_irq(struct kvm *kvm, int irq_source_id, u32 ir int level, bool line_status) { return -ENODEV; } static inline void kvmppc_xive_push_vcpu(struct kvm_vcpu *vcpu) { } static inline void kvmppc_xive_pull_vcpu(struct kvm_vcpu *vcpu) { } -static inline void kvmppc_xive_rearm_escalation(struct kvm_vcpu *vcpu) { } +static inline bool kvmppc_xive_rearm_escalation(struct kvm_vcpu *vcpu) { return true; } static inline int kvmppc_xive_enabled(struct kvm_vcpu *vcpu) { return 0; } @@ -877,7 +877,6 @@ int kvm_vcpu_ioctl_dirty_tlb(struct kvm_vcpu *vcpu, struct kvm_dirty_tlb *cfg); long kvmppc_alloc_lpid(void); -void kvmppc_claim_lpid(long lpid); void kvmppc_free_lpid(long lpid); void kvmppc_init_lpid(unsigned long nr_lpids); diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index 2835f6363228..1e8b2e04e626 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -417,7 +417,6 @@ #define FSCR_DSCR __MASK(FSCR_DSCR_LG) #define FSCR_INTR_CAUSE (ASM_CONST(0xFF) << 56) /* interrupt cause */ #define SPRN_HFSCR 0xbe /* HV=1 Facility Status & Control Register */ -#define HFSCR_PREFIX __MASK(FSCR_PREFIX_LG) #define HFSCR_MSGP __MASK(FSCR_MSGP_LG) #define HFSCR_TAR __MASK(FSCR_TAR_LG) #define HFSCR_EBB __MASK(FSCR_EBB_LG) @@ -474,8 +473,6 @@ #ifndef SPRN_LPID #define SPRN_LPID 0x13F /* Logical Partition Identifier */ #endif -#define LPID_RSVD_POWER7 0x3ff /* Reserved LPID for partn switching */ -#define LPID_RSVD 0xfff /* Reserved LPID for partn switching */ #define SPRN_HMER 0x150 /* Hypervisor maintenance exception reg */ #define HMER_DEBUG_TRIG (1ul << (63 - 17)) /* Debug trigger */ #define SPRN_HMEER 0x151 /* Hyp maintenance exception enable reg */ diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index 0aeb51738ca9..c036b1a22b00 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -256,26 +256,34 @@ void kvmppc_map_vrma(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot, int kvmppc_mmu_hv_init(void) { - unsigned long host_lpid, rsvd_lpid; + unsigned long nr_lpids; if (!mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE)) return -EINVAL; - host_lpid = 0; - if (cpu_has_feature(CPU_FTR_HVMODE)) - host_lpid = mfspr(SPRN_LPID); + if (cpu_has_feature(CPU_FTR_HVMODE)) { + if (WARN_ON(mfspr(SPRN_LPID) != 0)) + return -EINVAL; + nr_lpids = 1UL << mmu_lpid_bits; + } else { + nr_lpids = 1UL << KVM_MAX_NESTED_GUESTS_SHIFT; + } - /* POWER8 and above have 12-bit LPIDs (10-bit in POWER7) */ - if (cpu_has_feature(CPU_FTR_ARCH_207S)) - rsvd_lpid = LPID_RSVD; - else - rsvd_lpid = LPID_RSVD_POWER7; + if (!cpu_has_feature(CPU_FTR_ARCH_300)) { + /* POWER7 has 10-bit LPIDs, POWER8 has 12-bit LPIDs */ + if (cpu_has_feature(CPU_FTR_ARCH_207S)) + WARN_ON(nr_lpids != 1UL << 12); + else + WARN_ON(nr_lpids != 1UL << 10); - kvmppc_init_lpid(rsvd_lpid + 1); + /* + * Reserve the last implemented LPID use in partition + * switching for POWER7 and POWER8. + */ + nr_lpids -= 1; + } - kvmppc_claim_lpid(host_lpid); - /* rsvd_lpid is reserved for use in partition switching */ - kvmppc_claim_lpid(rsvd_lpid); + kvmppc_init_lpid(nr_lpids); return 0; } @@ -879,7 +887,7 @@ static bool kvm_age_rmapp(struct kvm *kvm, struct kvm_memory_slot *memslot, struct revmap_entry *rev = kvm->arch.hpt.rev; unsigned long head, i, j; __be64 *hptep; - int ret = 0; + bool ret = false; unsigned long *rmapp; rmapp = &memslot->arch.rmap[gfn - memslot->base_gfn]; @@ -887,7 +895,7 @@ static bool kvm_age_rmapp(struct kvm *kvm, struct kvm_memory_slot *memslot, lock_rmap(rmapp); if (*rmapp & KVMPPC_RMAP_REFERENCED) { *rmapp &= ~KVMPPC_RMAP_REFERENCED; - ret = 1; + ret = true; } if (!(*rmapp & KVMPPC_RMAP_PRESENT)) { unlock_rmap(rmapp); @@ -919,7 +927,7 @@ static bool kvm_age_rmapp(struct kvm *kvm, struct kvm_memory_slot *memslot, rev[i].guest_rpte |= HPTE_R_R; note_hpte_modification(kvm, &rev[i]); } - ret = 1; + ret = true; } __unlock_hpte(hptep, be64_to_cpu(hptep[0])); } while ((i = j) != head); diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 6fa518f6501d..c5fd9b4657dd 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -2834,7 +2834,7 @@ static int kvmppc_core_vcpu_create_hv(struct kvm_vcpu *vcpu) * to trap and then we emulate them. */ vcpu->arch.hfscr = HFSCR_TAR | HFSCR_EBB | HFSCR_PM | HFSCR_BHRB | - HFSCR_DSCR | HFSCR_VECVSX | HFSCR_FP | HFSCR_PREFIX; + HFSCR_DSCR | HFSCR_VECVSX | HFSCR_FP; if (cpu_has_feature(CPU_FTR_HVMODE)) { vcpu->arch.hfscr &= mfspr(SPRN_HFSCR); #ifdef CONFIG_PPC_TRANSACTIONAL_MEM @@ -3967,6 +3967,7 @@ static int kvmhv_vcpu_entry_p9_nested(struct kvm_vcpu *vcpu, u64 time_limit, uns kvmhv_save_hv_regs(vcpu, &hvregs); hvregs.lpcr = lpcr; + hvregs.amor = ~0; vcpu->arch.regs.msr = vcpu->arch.shregs.msr; hvregs.version = HV_GUEST_STATE_VERSION; if (vcpu->arch.nested) { @@ -4029,6 +4030,8 @@ static int kvmhv_vcpu_entry_p9_nested(struct kvm_vcpu *vcpu, u64 time_limit, uns static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpcr, u64 *tb) { + struct kvm *kvm = vcpu->kvm; + struct kvm_nested_guest *nested = vcpu->arch.nested; u64 next_timer; int trap; @@ -4048,34 +4051,61 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit, trap = kvmhv_vcpu_entry_p9_nested(vcpu, time_limit, lpcr, tb); /* H_CEDE has to be handled now, not later */ - if (trap == BOOK3S_INTERRUPT_SYSCALL && !vcpu->arch.nested && + if (trap == BOOK3S_INTERRUPT_SYSCALL && !nested && kvmppc_get_gpr(vcpu, 3) == H_CEDE) { kvmppc_cede(vcpu); kvmppc_set_gpr(vcpu, 3, 0); trap = 0; } - } else { - struct kvm *kvm = vcpu->kvm; + } else if (nested) { + __this_cpu_write(cpu_in_guest, kvm); + trap = kvmhv_vcpu_entry_p9(vcpu, time_limit, lpcr, tb); + __this_cpu_write(cpu_in_guest, NULL); + } else { kvmppc_xive_push_vcpu(vcpu); __this_cpu_write(cpu_in_guest, kvm); trap = kvmhv_vcpu_entry_p9(vcpu, time_limit, lpcr, tb); __this_cpu_write(cpu_in_guest, NULL); - if (trap == BOOK3S_INTERRUPT_SYSCALL && !vcpu->arch.nested && + if (trap == BOOK3S_INTERRUPT_SYSCALL && !(vcpu->arch.shregs.msr & MSR_PR)) { unsigned long req = kvmppc_get_gpr(vcpu, 3); - /* H_CEDE has to be handled now, not later */ + /* + * XIVE rearm and XICS hcalls must be handled + * before xive context is pulled (is this + * true?) + */ if (req == H_CEDE) { + /* H_CEDE has to be handled now */ kvmppc_cede(vcpu); - kvmppc_xive_rearm_escalation(vcpu); /* may un-cede */ + if (!kvmppc_xive_rearm_escalation(vcpu)) { + /* + * Pending escalation so abort + * the cede. + */ + vcpu->arch.ceded = 0; + } kvmppc_set_gpr(vcpu, 3, 0); trap = 0; - /* XICS hcalls must be handled before xive is pulled */ + } else if (req == H_ENTER_NESTED) { + /* + * L2 should not run with the L1 + * context so rearm and pull it. + */ + if (!kvmppc_xive_rearm_escalation(vcpu)) { + /* + * Pending escalation so abort + * H_ENTER_NESTED. + */ + kvmppc_set_gpr(vcpu, 3, 0); + trap = 0; + } + } else if (hcall_is_xics(req)) { int ret; @@ -4519,9 +4549,14 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit, if (!nested) { kvmppc_core_prepare_to_enter(vcpu); - if (test_bit(BOOK3S_IRQPRIO_EXTERNAL, - &vcpu->arch.pending_exceptions)) + if (vcpu->arch.shregs.msr & MSR_EE) { + if (xive_interrupt_pending(vcpu)) + kvmppc_inject_interrupt_hv(vcpu, + BOOK3S_INTERRUPT_EXTERNAL, 0); + } else if (test_bit(BOOK3S_IRQPRIO_EXTERNAL, + &vcpu->arch.pending_exceptions)) { lpcr |= LPCR_MER; + } } else if (vcpu->arch.pending_exceptions || vcpu->arch.doorbell_request || xive_interrupt_pending(vcpu)) { @@ -5283,6 +5318,10 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm) kvm->arch.host_lpcr = lpcr = mfspr(SPRN_LPCR); lpcr &= LPCR_PECE | LPCR_LPES; } else { + /* + * The L2 LPES mode will be set by the L0 according to whether + * or not it needs to take external interrupts in HV mode. + */ lpcr = 0; } lpcr |= (4UL << LPCR_DPFD_SH) | LPCR_HDICE | diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c index 9d373f8963ee..ce08573fc58f 100644 --- a/arch/powerpc/kvm/book3s_hv_nested.c +++ b/arch/powerpc/kvm/book3s_hv_nested.c @@ -261,8 +261,7 @@ static void load_l2_hv_regs(struct kvm_vcpu *vcpu, /* * Don't let L1 change LPCR bits for the L2 except these: */ - mask = LPCR_DPFD | LPCR_ILE | LPCR_TC | LPCR_AIL | LPCR_LD | - LPCR_LPES | LPCR_MER; + mask = LPCR_DPFD | LPCR_ILE | LPCR_TC | LPCR_AIL | LPCR_LD | LPCR_MER; /* * Additional filtering is required depending on hardware @@ -439,10 +438,11 @@ long kvmhv_nested_init(void) if (!radix_enabled()) return -ENODEV; - /* find log base 2 of KVMPPC_NR_LPIDS, rounding up */ - ptb_order = __ilog2(KVMPPC_NR_LPIDS - 1) + 1; - if (ptb_order < 8) - ptb_order = 8; + /* Partition table entry is 1<<4 bytes in size, hence the 4. */ + ptb_order = KVM_MAX_NESTED_GUESTS_SHIFT + 4; + /* Minimum partition table size is 1<<12 bytes */ + if (ptb_order < 12) + ptb_order = 12; pseries_partition_tb = kmalloc(sizeof(struct patb_entry) << ptb_order, GFP_KERNEL); if (!pseries_partition_tb) { @@ -450,7 +450,7 @@ long kvmhv_nested_init(void) return -ENOMEM; } - ptcr = __pa(pseries_partition_tb) | (ptb_order - 8); + ptcr = __pa(pseries_partition_tb) | (ptb_order - 12); rc = plpar_hcall_norets(H_SET_PARTITION_TABLE, ptcr); if (rc != H_SUCCESS) { pr_err("kvm-hv: Parent hypervisor does not support nesting (rc=%ld)\n", @@ -521,11 +521,6 @@ static void kvmhv_set_nested_ptbl(struct kvm_nested_guest *gp) kvmhv_set_ptbl_entry(gp->shadow_lpid, dw0, gp->process_table); } -void kvmhv_vm_nested_init(struct kvm *kvm) -{ - kvm->arch.max_nested_lpid = -1; -} - /* * Handle the H_SET_PARTITION_TABLE hcall. * r4 = guest real address of partition table + log_2(size) - 12 @@ -539,16 +534,14 @@ long kvmhv_set_partition_table(struct kvm_vcpu *vcpu) long ret = H_SUCCESS; srcu_idx = srcu_read_lock(&kvm->srcu); - /* - * Limit the partition table to 4096 entries (because that's what - * hardware supports), and check the base address. - */ - if ((ptcr & PRTS_MASK) > 12 - 8 || + /* Check partition size and base address. */ + if ((ptcr & PRTS_MASK) + 12 - 4 > KVM_MAX_NESTED_GUESTS_SHIFT || !kvm_is_visible_gfn(vcpu->kvm, (ptcr & PRTB_MASK) >> PAGE_SHIFT)) ret = H_PARAMETER; srcu_read_unlock(&kvm->srcu, srcu_idx); if (ret == H_SUCCESS) kvm->arch.l1_ptcr = ptcr; + return ret; } @@ -644,7 +637,7 @@ static void kvmhv_update_ptbl_cache(struct kvm_nested_guest *gp) ret = -EFAULT; ptbl_addr = (kvm->arch.l1_ptcr & PRTB_MASK) + (gp->l1_lpid << 4); - if (gp->l1_lpid < (1ul << ((kvm->arch.l1_ptcr & PRTS_MASK) + 8))) { + if (gp->l1_lpid < (1ul << ((kvm->arch.l1_ptcr & PRTS_MASK) + 12 - 4))) { int srcu_idx = srcu_read_lock(&kvm->srcu); ret = kvm_read_guest(kvm, ptbl_addr, &ptbl_entry, sizeof(ptbl_entry)); @@ -660,6 +653,35 @@ static void kvmhv_update_ptbl_cache(struct kvm_nested_guest *gp) kvmhv_set_nested_ptbl(gp); } +void kvmhv_vm_nested_init(struct kvm *kvm) +{ + idr_init(&kvm->arch.kvm_nested_guest_idr); +} + +static struct kvm_nested_guest *__find_nested(struct kvm *kvm, int lpid) +{ + return idr_find(&kvm->arch.kvm_nested_guest_idr, lpid); +} + +static bool __prealloc_nested(struct kvm *kvm, int lpid) +{ + if (idr_alloc(&kvm->arch.kvm_nested_guest_idr, + NULL, lpid, lpid + 1, GFP_KERNEL) != lpid) + return false; + return true; +} + +static void __add_nested(struct kvm *kvm, int lpid, struct kvm_nested_guest *gp) +{ + if (idr_replace(&kvm->arch.kvm_nested_guest_idr, gp, lpid)) + WARN_ON(1); +} + +static void __remove_nested(struct kvm *kvm, int lpid) +{ + idr_remove(&kvm->arch.kvm_nested_guest_idr, lpid); +} + static struct kvm_nested_guest *kvmhv_alloc_nested(struct kvm *kvm, unsigned int lpid) { struct kvm_nested_guest *gp; @@ -720,13 +742,8 @@ static void kvmhv_remove_nested(struct kvm_nested_guest *gp) long ref; spin_lock(&kvm->mmu_lock); - if (gp == kvm->arch.nested_guests[lpid]) { - kvm->arch.nested_guests[lpid] = NULL; - if (lpid == kvm->arch.max_nested_lpid) { - while (--lpid >= 0 && !kvm->arch.nested_guests[lpid]) - ; - kvm->arch.max_nested_lpid = lpid; - } + if (gp == __find_nested(kvm, lpid)) { + __remove_nested(kvm, lpid); --gp->refcnt; } ref = gp->refcnt; @@ -743,24 +760,22 @@ static void kvmhv_remove_nested(struct kvm_nested_guest *gp) */ void kvmhv_release_all_nested(struct kvm *kvm) { - int i; + int lpid; struct kvm_nested_guest *gp; struct kvm_nested_guest *freelist = NULL; struct kvm_memory_slot *memslot; int srcu_idx, bkt; spin_lock(&kvm->mmu_lock); - for (i = 0; i <= kvm->arch.max_nested_lpid; i++) { - gp = kvm->arch.nested_guests[i]; - if (!gp) - continue; - kvm->arch.nested_guests[i] = NULL; + idr_for_each_entry(&kvm->arch.kvm_nested_guest_idr, gp, lpid) { + __remove_nested(kvm, lpid); if (--gp->refcnt == 0) { gp->next = freelist; freelist = gp; } } - kvm->arch.max_nested_lpid = -1; + idr_destroy(&kvm->arch.kvm_nested_guest_idr); + /* idr is empty and may be reused at this point */ spin_unlock(&kvm->mmu_lock); while ((gp = freelist) != NULL) { freelist = gp->next; @@ -792,12 +807,11 @@ struct kvm_nested_guest *kvmhv_get_nested(struct kvm *kvm, int l1_lpid, { struct kvm_nested_guest *gp, *newgp; - if (l1_lpid >= KVM_MAX_NESTED_GUESTS || - l1_lpid >= (1ul << ((kvm->arch.l1_ptcr & PRTS_MASK) + 12 - 4))) + if (l1_lpid >= (1ul << ((kvm->arch.l1_ptcr & PRTS_MASK) + 12 - 4))) return NULL; spin_lock(&kvm->mmu_lock); - gp = kvm->arch.nested_guests[l1_lpid]; + gp = __find_nested(kvm, l1_lpid); if (gp) ++gp->refcnt; spin_unlock(&kvm->mmu_lock); @@ -808,17 +822,19 @@ struct kvm_nested_guest *kvmhv_get_nested(struct kvm *kvm, int l1_lpid, newgp = kvmhv_alloc_nested(kvm, l1_lpid); if (!newgp) return NULL; + + if (!__prealloc_nested(kvm, l1_lpid)) { + kvmhv_release_nested(newgp); + return NULL; + } + spin_lock(&kvm->mmu_lock); - if (kvm->arch.nested_guests[l1_lpid]) { - /* someone else beat us to it */ - gp = kvm->arch.nested_guests[l1_lpid]; - } else { - kvm->arch.nested_guests[l1_lpid] = newgp; + gp = __find_nested(kvm, l1_lpid); + if (!gp) { + __add_nested(kvm, l1_lpid, newgp); ++newgp->refcnt; gp = newgp; newgp = NULL; - if (l1_lpid > kvm->arch.max_nested_lpid) - kvm->arch.max_nested_lpid = l1_lpid; } ++gp->refcnt; spin_unlock(&kvm->mmu_lock); @@ -841,20 +857,13 @@ void kvmhv_put_nested(struct kvm_nested_guest *gp) kvmhv_release_nested(gp); } -static struct kvm_nested_guest *kvmhv_find_nested(struct kvm *kvm, int lpid) -{ - if (lpid > kvm->arch.max_nested_lpid) - return NULL; - return kvm->arch.nested_guests[lpid]; -} - pte_t *find_kvm_nested_guest_pte(struct kvm *kvm, unsigned long lpid, unsigned long ea, unsigned *hshift) { struct kvm_nested_guest *gp; pte_t *pte; - gp = kvmhv_find_nested(kvm, lpid); + gp = __find_nested(kvm, lpid); if (!gp) return NULL; @@ -960,7 +969,7 @@ static void kvmhv_remove_nest_rmap(struct kvm *kvm, u64 n_rmap, gpa = n_rmap & RMAP_NESTED_GPA_MASK; lpid = (n_rmap & RMAP_NESTED_LPID_MASK) >> RMAP_NESTED_LPID_SHIFT; - gp = kvmhv_find_nested(kvm, lpid); + gp = __find_nested(kvm, lpid); if (!gp) return; @@ -1152,16 +1161,13 @@ static void kvmhv_emulate_tlbie_all_lpid(struct kvm_vcpu *vcpu, int ric) { struct kvm *kvm = vcpu->kvm; struct kvm_nested_guest *gp; - int i; + int lpid; spin_lock(&kvm->mmu_lock); - for (i = 0; i <= kvm->arch.max_nested_lpid; i++) { - gp = kvm->arch.nested_guests[i]; - if (gp) { - spin_unlock(&kvm->mmu_lock); - kvmhv_emulate_tlbie_lpid(vcpu, gp, ric); - spin_lock(&kvm->mmu_lock); - } + idr_for_each_entry(&kvm->arch.kvm_nested_guest_idr, gp, lpid) { + spin_unlock(&kvm->mmu_lock); + kvmhv_emulate_tlbie_lpid(vcpu, gp, ric); + spin_lock(&kvm->mmu_lock); } spin_unlock(&kvm->mmu_lock); } @@ -1313,7 +1319,7 @@ long do_h_rpt_invalidate_pat(struct kvm_vcpu *vcpu, unsigned long lpid, * H_ENTER_NESTED call. Since we can't differentiate this case from * the invalid case, we ignore such flush requests and return success. */ - if (!kvmhv_find_nested(vcpu->kvm, lpid)) + if (!__find_nested(vcpu->kvm, lpid)) return H_SUCCESS; /* @@ -1657,15 +1663,12 @@ long int kvmhv_nested_page_fault(struct kvm_vcpu *vcpu) int kvmhv_nested_next_lpid(struct kvm *kvm, int lpid) { - int ret = -1; + int ret = lpid + 1; spin_lock(&kvm->mmu_lock); - while (++lpid <= kvm->arch.max_nested_lpid) { - if (kvm->arch.nested_guests[lpid]) { - ret = lpid; - break; - } - } + if (!idr_get_next(&kvm->arch.kvm_nested_guest_idr, &ret)) + ret = -1; spin_unlock(&kvm->mmu_lock); + return ret; } diff --git a/arch/powerpc/kvm/book3s_hv_p9_entry.c b/arch/powerpc/kvm/book3s_hv_p9_entry.c index a28e5b3daabd..9dba3e3f65a0 100644 --- a/arch/powerpc/kvm/book3s_hv_p9_entry.c +++ b/arch/powerpc/kvm/book3s_hv_p9_entry.c @@ -539,8 +539,10 @@ static void switch_mmu_to_guest_radix(struct kvm *kvm, struct kvm_vcpu *vcpu, u6 { struct kvm_nested_guest *nested = vcpu->arch.nested; u32 lpid; + u32 pid; lpid = nested ? nested->shadow_lpid : kvm->arch.lpid; + pid = vcpu->arch.pid; /* * Prior memory accesses to host PID Q3 must be completed before we @@ -551,7 +553,7 @@ static void switch_mmu_to_guest_radix(struct kvm *kvm, struct kvm_vcpu *vcpu, u6 isync(); mtspr(SPRN_LPID, lpid); mtspr(SPRN_LPCR, lpcr); - mtspr(SPRN_PID, vcpu->arch.pid); + mtspr(SPRN_PID, pid); /* * isync not required here because we are HRFID'ing to guest before * any guest context access, which is context synchronising. @@ -561,9 +563,11 @@ static void switch_mmu_to_guest_radix(struct kvm *kvm, struct kvm_vcpu *vcpu, u6 static void switch_mmu_to_guest_hpt(struct kvm *kvm, struct kvm_vcpu *vcpu, u64 lpcr) { u32 lpid; + u32 pid; int i; lpid = kvm->arch.lpid; + pid = vcpu->arch.pid; /* * See switch_mmu_to_guest_radix. ptesync should not be required here @@ -574,7 +578,7 @@ static void switch_mmu_to_guest_hpt(struct kvm *kvm, struct kvm_vcpu *vcpu, u64 isync(); mtspr(SPRN_LPID, lpid); mtspr(SPRN_LPCR, lpcr); - mtspr(SPRN_PID, vcpu->arch.pid); + mtspr(SPRN_PID, pid); for (i = 0; i < vcpu->arch.slb_max; i++) mtslb(vcpu->arch.slb[i].orige, vcpu->arch.slb[i].origv); @@ -585,6 +589,9 @@ static void switch_mmu_to_guest_hpt(struct kvm *kvm, struct kvm_vcpu *vcpu, u64 static void switch_mmu_to_host(struct kvm *kvm, u32 pid) { + u32 lpid = kvm->arch.host_lpid; + u64 lpcr = kvm->arch.host_lpcr; + /* * The guest has exited, so guest MMU context is no longer being * non-speculatively accessed, but a hwsync is needed before the @@ -594,8 +601,8 @@ static void switch_mmu_to_host(struct kvm *kvm, u32 pid) asm volatile("hwsync" ::: "memory"); isync(); mtspr(SPRN_PID, pid); - mtspr(SPRN_LPID, kvm->arch.host_lpid); - mtspr(SPRN_LPCR, kvm->arch.host_lpcr); + mtspr(SPRN_LPID, lpid); + mtspr(SPRN_LPCR, lpcr); /* * isync is not required after the switch, because mtmsrd with L=0 * is performed after this switch, which is context synchronising. diff --git a/arch/powerpc/kvm/book3s_hv_rm_xics.c b/arch/powerpc/kvm/book3s_hv_rm_xics.c index 587c33fc4564..6e16bd751c84 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_xics.c +++ b/arch/powerpc/kvm/book3s_hv_rm_xics.c @@ -883,7 +883,7 @@ long kvmppc_deliver_irq_passthru(struct kvm_vcpu *vcpu, /* --- Non-real mode XICS-related built-in routines --- */ -/** +/* * Host Operations poked by RM KVM */ static void rm_host_ipi_action(int action, void *data) diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index d185dee26026..0c552885a032 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -51,6 +51,14 @@ #define STACK_SLOT_FSCR (SFS-96) /* + * Use the last LPID (all implemented LPID bits = 1) for partition switching. + * This is reserved in the LPID allocator. POWER7 only implements 0x3ff, but + * we write 0xfff into the LPID SPR anyway, which seems to work and just + * ignores the top bits. + */ +#define LPID_RSVD 0xfff + +/* * Call kvmppc_hv_entry in real mode. * Must be called with interrupts hard-disabled. * diff --git a/arch/powerpc/kvm/book3s_hv_uvmem.c b/arch/powerpc/kvm/book3s_hv_uvmem.c index 45c993dd05f5..36f2314c58e5 100644 --- a/arch/powerpc/kvm/book3s_hv_uvmem.c +++ b/arch/powerpc/kvm/book3s_hv_uvmem.c @@ -361,13 +361,15 @@ static bool kvmppc_gfn_is_uvmem_pfn(unsigned long gfn, struct kvm *kvm, static bool kvmppc_next_nontransitioned_gfn(const struct kvm_memory_slot *memslot, struct kvm *kvm, unsigned long *gfn) { - struct kvmppc_uvmem_slot *p; + struct kvmppc_uvmem_slot *p = NULL, *iter; bool ret = false; unsigned long i; - list_for_each_entry(p, &kvm->arch.uvmem_pfns, list) - if (*gfn >= p->base_pfn && *gfn < p->base_pfn + p->nr_pfns) + list_for_each_entry(iter, &kvm->arch.uvmem_pfns, list) + if (*gfn >= iter->base_pfn && *gfn < iter->base_pfn + iter->nr_pfns) { + p = iter; break; + } if (!p) return ret; /* diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c index c0ce5531d9bc..ee4be73649e5 100644 --- a/arch/powerpc/kvm/book3s_xive.c +++ b/arch/powerpc/kvm/book3s_xive.c @@ -179,12 +179,13 @@ void kvmppc_xive_pull_vcpu(struct kvm_vcpu *vcpu) } EXPORT_SYMBOL_GPL(kvmppc_xive_pull_vcpu); -void kvmppc_xive_rearm_escalation(struct kvm_vcpu *vcpu) +bool kvmppc_xive_rearm_escalation(struct kvm_vcpu *vcpu) { void __iomem *esc_vaddr = (void __iomem *)vcpu->arch.xive_esc_vaddr; + bool ret = true; if (!esc_vaddr) - return; + return ret; /* we are using XIVE with single escalation */ @@ -197,7 +198,7 @@ void kvmppc_xive_rearm_escalation(struct kvm_vcpu *vcpu) * we also don't want to set xive_esc_on to 1 here in * case we race with xive_esc_irq(). */ - vcpu->arch.ceded = 0; + ret = false; /* * The escalation interrupts are special as we don't EOI them. * There is no need to use the load-after-store ordering offset @@ -210,6 +211,8 @@ void kvmppc_xive_rearm_escalation(struct kvm_vcpu *vcpu) __raw_readq(esc_vaddr + XIVE_ESB_SET_PQ_00); } mb(); + + return ret; } EXPORT_SYMBOL_GPL(kvmppc_xive_rearm_escalation); @@ -238,7 +241,7 @@ static irqreturn_t xive_esc_irq(int irq, void *data) vcpu->arch.irq_pending = 1; smp_mb(); - if (vcpu->arch.ceded) + if (vcpu->arch.ceded || vcpu->arch.nested) kvmppc_fast_vcpu_kick(vcpu); /* Since we have the no-EOI flag, the interrupt is effectively diff --git a/arch/powerpc/kvm/e500mc.c b/arch/powerpc/kvm/e500mc.c index fa0d8dbbe484..2b9ad8984ea1 100644 --- a/arch/powerpc/kvm/e500mc.c +++ b/arch/powerpc/kvm/e500mc.c @@ -399,7 +399,6 @@ static int __init kvmppc_e500mc_init(void) * allocator. */ kvmppc_init_lpid(KVMPPC_NR_LPIDS/threads_per_core); - kvmppc_claim_lpid(0); /* host */ r = kvm_init(NULL, sizeof(struct kvmppc_vcpu_e500), 0, THIS_MODULE); if (r) diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 875c30c12db0..32561192cf12 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -2496,41 +2496,37 @@ out: return r; } -static unsigned long lpid_inuse[BITS_TO_LONGS(KVMPPC_NR_LPIDS)]; +static DEFINE_IDA(lpid_inuse); static unsigned long nr_lpids; long kvmppc_alloc_lpid(void) { - long lpid; + int lpid; - do { - lpid = find_first_zero_bit(lpid_inuse, KVMPPC_NR_LPIDS); - if (lpid >= nr_lpids) { + /* The host LPID must always be 0 (allocation starts at 1) */ + lpid = ida_alloc_range(&lpid_inuse, 1, nr_lpids - 1, GFP_KERNEL); + if (lpid < 0) { + if (lpid == -ENOMEM) + pr_err("%s: Out of memory\n", __func__); + else pr_err("%s: No LPIDs free\n", __func__); - return -ENOMEM; - } - } while (test_and_set_bit(lpid, lpid_inuse)); + return -ENOMEM; + } return lpid; } EXPORT_SYMBOL_GPL(kvmppc_alloc_lpid); -void kvmppc_claim_lpid(long lpid) -{ - set_bit(lpid, lpid_inuse); -} -EXPORT_SYMBOL_GPL(kvmppc_claim_lpid); - void kvmppc_free_lpid(long lpid) { - clear_bit(lpid, lpid_inuse); + ida_free(&lpid_inuse, lpid); } EXPORT_SYMBOL_GPL(kvmppc_free_lpid); +/* nr_lpids_param includes the host LPID */ void kvmppc_init_lpid(unsigned long nr_lpids_param) { - nr_lpids = min_t(unsigned long, KVMPPC_NR_LPIDS, nr_lpids_param); - memset(lpid_inuse, 0, sizeof(lpid_inuse)); + nr_lpids = nr_lpids_param; } EXPORT_SYMBOL_GPL(kvmppc_init_lpid); diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index 83c0ee9fbf05..0f2608679067 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -372,6 +372,9 @@ void register_page_bootmem_memmap(unsigned long section_nr, #ifdef CONFIG_PPC_BOOK3S_64 unsigned int mmu_lpid_bits; +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE +EXPORT_SYMBOL_GPL(mmu_lpid_bits); +#endif unsigned int mmu_pid_bits; static bool disable_radix = !IS_ENABLED(CONFIG_PPC_RADIX_MMU_DEFAULT); |