aboutsummaryrefslogtreecommitdiff
path: root/arch/powerpc/kvm/book3s_hv.c
diff options
context:
space:
mode:
authorJordan Niethe <[email protected]>2023-09-14 13:05:59 +1000
committerMichael Ellerman <[email protected]>2023-09-14 22:04:24 +1000
commit19d31c5f115754c369c0995df47479c384757f82 (patch)
tree21369b34560ed6e35fe8d902bf0b7dc793914e52 /arch/powerpc/kvm/book3s_hv.c
parentdfcaacc8f970c6b4ea4e32d2186f2bea4a1d5255 (diff)
KVM: PPC: Add support for nestedv2 guests
A series of hcalls have been added to the PAPR which allow a regular guest partition to create and manage guest partitions of its own. KVM already had an interface that allowed this on powernv platforms. This existing interface will now be called "nestedv1". The newly added PAPR interface will be called "nestedv2". PHYP will support the nestedv2 interface. At this time the host side of the nestedv2 interface has not been implemented on powernv but there is no technical reason why it could not be added. The nestedv1 interface is still supported. Add support to KVM to utilize these hcalls to enable running nested guests as a pseries guest on PHYP. Overview of the new hcall usage: - L1 and L0 negotiate capabilities with H_GUEST_{G,S}ET_CAPABILITIES() - L1 requests the L0 create a L2 with H_GUEST_CREATE() and receives a handle to use in future hcalls - L1 requests the L0 create a L2 vCPU with H_GUEST_CREATE_VCPU() - L1 sets up the L2 using H_GUEST_SET and the H_GUEST_VCPU_RUN input buffer - L1 requests the L0 runs the L2 vCPU using H_GUEST_VCPU_RUN() - L2 returns to L1 with an exit reason and L1 reads the H_GUEST_VCPU_RUN output buffer populated by the L0 - L1 handles the exit using H_GET_STATE if necessary - L1 reruns L2 vCPU with H_GUEST_VCPU_RUN - L1 frees the L2 in the L0 with H_GUEST_DELETE() Support for the new API is determined by trying H_GUEST_GET_CAPABILITIES. On a successful return, use the nestedv2 interface. Use the vcpu register state setters for tracking modified guest state elements and copy the thread wide values into the H_GUEST_VCPU_RUN input buffer immediately before running a L2. The guest wide elements can not be added to the input buffer so send them with a separate H_GUEST_SET call if necessary. Make the vcpu register getter load the corresponding value from the real host with H_GUEST_GET. To avoid unnecessarily calling H_GUEST_GET, track which values have already been loaded between H_GUEST_VCPU_RUN calls. If an element is present in the H_GUEST_VCPU_RUN output buffer it also does not need to be loaded again. Tested-by: Sachin Sant <[email protected]> Signed-off-by: Vaibhav Jain <[email protected]> Signed-off-by: Gautam Menghani <[email protected]> Signed-off-by: Kautuk Consul <[email protected]> Signed-off-by: Amit Machhiwal <[email protected]> Signed-off-by: Jordan Niethe <[email protected]> Signed-off-by: Michael Ellerman <[email protected]> Link: https://msgid.link/[email protected]
Diffstat (limited to 'arch/powerpc/kvm/book3s_hv.c')
-rw-r--r--arch/powerpc/kvm/book3s_hv.c134
1 files changed, 124 insertions, 10 deletions
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 5743f32bf45e..1ed6ec140701 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -393,7 +393,7 @@ static void kvmppc_set_pvr_hv(struct kvm_vcpu *vcpu, u32 pvr)
static int kvmppc_set_arch_compat(struct kvm_vcpu *vcpu, u32 arch_compat)
{
- unsigned long host_pcr_bit = 0, guest_pcr_bit = 0;
+ unsigned long host_pcr_bit = 0, guest_pcr_bit = 0, cap = 0;
struct kvmppc_vcore *vc = vcpu->arch.vcore;
/* We can (emulate) our own architecture version and anything older */
@@ -424,9 +424,11 @@ static int kvmppc_set_arch_compat(struct kvm_vcpu *vcpu, u32 arch_compat)
break;
case PVR_ARCH_300:
guest_pcr_bit = PCR_ARCH_300;
+ cap = H_GUEST_CAP_POWER9;
break;
case PVR_ARCH_31:
guest_pcr_bit = PCR_ARCH_31;
+ cap = H_GUEST_CAP_POWER10;
break;
default:
return -EINVAL;
@@ -437,8 +439,14 @@ static int kvmppc_set_arch_compat(struct kvm_vcpu *vcpu, u32 arch_compat)
if (guest_pcr_bit > host_pcr_bit)
return -EINVAL;
+ if (kvmhv_on_pseries() && kvmhv_is_nestedv2()) {
+ if (!(cap & nested_capabilities))
+ return -EINVAL;
+ }
+
spin_lock(&vc->lock);
vc->arch_compat = arch_compat;
+ kvmhv_nestedv2_mark_dirty(vcpu, KVMPPC_GSID_LOGICAL_PVR);
/*
* Set all PCR bits for which guest_pcr_bit <= bit < host_pcr_bit
* Also set all reserved PCR bits
@@ -2187,6 +2195,7 @@ static void kvmppc_set_lpcr(struct kvm_vcpu *vcpu, u64 new_lpcr,
}
vc->lpcr = new_lpcr;
+ kvmhv_nestedv2_mark_dirty(vcpu, KVMPPC_GSID_LPCR);
spin_unlock(&vc->lock);
}
@@ -2927,8 +2936,14 @@ static int kvmppc_core_vcpu_create_hv(struct kvm_vcpu *vcpu)
vcpu->arch.shared_big_endian = false;
#endif
#endif
- kvmppc_set_mmcr_hv(vcpu, 0, MMCR0_FC);
+ if (kvmhv_is_nestedv2()) {
+ err = kvmhv_nestedv2_vcpu_create(vcpu, &vcpu->arch.nestedv2_io);
+ if (err < 0)
+ return err;
+ }
+
+ kvmppc_set_mmcr_hv(vcpu, 0, MMCR0_FC);
if (cpu_has_feature(CPU_FTR_ARCH_31)) {
kvmppc_set_mmcr_hv(vcpu, 0, kvmppc_get_mmcr_hv(vcpu, 0) | MMCR0_PMCCEXT);
kvmppc_set_mmcra_hv(vcpu, MMCRA_BHRB_DISABLE);
@@ -3084,6 +3099,8 @@ static void kvmppc_core_vcpu_free_hv(struct kvm_vcpu *vcpu)
unpin_vpa(vcpu->kvm, &vcpu->arch.slb_shadow);
unpin_vpa(vcpu->kvm, &vcpu->arch.vpa);
spin_unlock(&vcpu->arch.vpa_update_lock);
+ if (kvmhv_is_nestedv2())
+ kvmhv_nestedv2_vcpu_free(vcpu, &vcpu->arch.nestedv2_io);
}
static int kvmppc_core_check_requests_hv(struct kvm_vcpu *vcpu)
@@ -4048,6 +4065,55 @@ static void vcpu_vpa_increment_dispatch(struct kvm_vcpu *vcpu)
}
}
+static int kvmhv_vcpu_entry_nestedv2(struct kvm_vcpu *vcpu, u64 time_limit,
+ unsigned long lpcr, u64 *tb)
+{
+ struct kvmhv_nestedv2_io *io;
+ unsigned long msr, i;
+ int trap;
+ long rc;
+
+ io = &vcpu->arch.nestedv2_io;
+
+ msr = mfmsr();
+ kvmppc_msr_hard_disable_set_facilities(vcpu, msr);
+ if (lazy_irq_pending())
+ return 0;
+
+ rc = kvmhv_nestedv2_flush_vcpu(vcpu, time_limit);
+ if (rc < 0)
+ return -EINVAL;
+
+ accumulate_time(vcpu, &vcpu->arch.in_guest);
+ rc = plpar_guest_run_vcpu(0, vcpu->kvm->arch.lpid, vcpu->vcpu_id,
+ &trap, &i);
+
+ if (rc != H_SUCCESS) {
+ pr_err("KVM Guest Run VCPU hcall failed\n");
+ if (rc == H_INVALID_ELEMENT_ID)
+ pr_err("KVM: Guest Run VCPU invalid element id at %ld\n", i);
+ else if (rc == H_INVALID_ELEMENT_SIZE)
+ pr_err("KVM: Guest Run VCPU invalid element size at %ld\n", i);
+ else if (rc == H_INVALID_ELEMENT_VALUE)
+ pr_err("KVM: Guest Run VCPU invalid element value at %ld\n", i);
+ return -EINVAL;
+ }
+ accumulate_time(vcpu, &vcpu->arch.guest_exit);
+
+ *tb = mftb();
+ kvmppc_gsm_reset(io->vcpu_message);
+ kvmppc_gsm_reset(io->vcore_message);
+ kvmppc_gsbm_zero(&io->valids);
+
+ rc = kvmhv_nestedv2_parse_output(vcpu);
+ if (rc < 0)
+ return -EINVAL;
+
+ timer_rearm_host_dec(*tb);
+
+ return trap;
+}
+
/* call our hypervisor to load up HV regs and go */
static int kvmhv_vcpu_entry_p9_nested(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpcr, u64 *tb)
{
@@ -4165,7 +4231,10 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
vcpu_vpa_increment_dispatch(vcpu);
if (kvmhv_on_pseries()) {
- trap = kvmhv_vcpu_entry_p9_nested(vcpu, time_limit, lpcr, tb);
+ if (kvmhv_is_nestedv1())
+ trap = kvmhv_vcpu_entry_p9_nested(vcpu, time_limit, lpcr, tb);
+ else
+ trap = kvmhv_vcpu_entry_nestedv2(vcpu, time_limit, lpcr, tb);
/* H_CEDE has to be handled now, not later */
if (trap == BOOK3S_INTERRUPT_SYSCALL && !nested &&
@@ -5145,6 +5214,14 @@ void kvmppc_update_lpcr(struct kvm *kvm, unsigned long lpcr, unsigned long mask)
if (++cores_done >= kvm->arch.online_vcores)
break;
}
+
+ if (kvmhv_is_nestedv2()) {
+ struct kvm_vcpu *vcpu;
+
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ kvmhv_nestedv2_mark_dirty(vcpu, KVMPPC_GSID_LPCR);
+ }
+ }
}
void kvmppc_setup_partition_table(struct kvm *kvm)
@@ -5411,15 +5488,43 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm)
/* Allocate the guest's logical partition ID */
- lpid = kvmppc_alloc_lpid();
- if ((long)lpid < 0)
- return -ENOMEM;
- kvm->arch.lpid = lpid;
+ if (!kvmhv_is_nestedv2()) {
+ lpid = kvmppc_alloc_lpid();
+ if ((long)lpid < 0)
+ return -ENOMEM;
+ kvm->arch.lpid = lpid;
+ }
kvmppc_alloc_host_rm_ops();
kvmhv_vm_nested_init(kvm);
+ if (kvmhv_is_nestedv2()) {
+ long rc;
+ unsigned long guest_id;
+
+ rc = plpar_guest_create(0, &guest_id);
+
+ if (rc != H_SUCCESS)
+ pr_err("KVM: Create Guest hcall failed, rc=%ld\n", rc);
+
+ switch (rc) {
+ case H_PARAMETER:
+ case H_FUNCTION:
+ case H_STATE:
+ return -EINVAL;
+ case H_NOT_ENOUGH_RESOURCES:
+ case H_ABORTED:
+ return -ENOMEM;
+ case H_AUTHORITY:
+ return -EPERM;
+ case H_NOT_AVAILABLE:
+ return -EBUSY;
+ }
+ kvm->arch.lpid = guest_id;
+ }
+
+
/*
* Since we don't flush the TLB when tearing down a VM,
* and this lpid might have previously been used,
@@ -5489,7 +5594,10 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm)
lpcr |= LPCR_HAIL;
ret = kvmppc_init_vm_radix(kvm);
if (ret) {
- kvmppc_free_lpid(kvm->arch.lpid);
+ if (kvmhv_is_nestedv2())
+ plpar_guest_delete(0, kvm->arch.lpid);
+ else
+ kvmppc_free_lpid(kvm->arch.lpid);
return ret;
}
kvmppc_setup_partition_table(kvm);
@@ -5579,10 +5687,14 @@ static void kvmppc_core_destroy_vm_hv(struct kvm *kvm)
kvm->arch.process_table = 0;
if (kvm->arch.secure_guest)
uv_svm_terminate(kvm->arch.lpid);
- kvmhv_set_ptbl_entry(kvm->arch.lpid, 0, 0);
+ if (!kvmhv_is_nestedv2())
+ kvmhv_set_ptbl_entry(kvm->arch.lpid, 0, 0);
}
- kvmppc_free_lpid(kvm->arch.lpid);
+ if (kvmhv_is_nestedv2())
+ plpar_guest_delete(0, kvm->arch.lpid);
+ else
+ kvmppc_free_lpid(kvm->arch.lpid);
kvmppc_free_pimap(kvm);
}
@@ -5994,6 +6106,8 @@ static int kvmhv_enable_nested(struct kvm *kvm)
return -ENODEV;
if (!radix_enabled())
return -ENODEV;
+ if (kvmhv_is_nestedv2())
+ return -ENODEV;
/* kvm == NULL means the caller is testing if the capability exists */
if (kvm)