aboutsummaryrefslogtreecommitdiff
path: root/arch/s390/kvm/kvm-s390.c
diff options
context:
space:
mode:
authorClaudio Imbrenda <[email protected]>2022-11-11 18:06:27 +0100
committerJanosch Frank <[email protected]>2022-11-23 09:06:50 +0000
commitfb491d5500a7ca551e49bc32d9b19d226023f68d (patch)
tree998a347ce950106f75b18f3d4e9fe6aa65ce76e8 /arch/s390/kvm/kvm-s390.c
parent58635d6615f1e5a870548ae8999870fdfcdecec0 (diff)
KVM: s390: pv: asynchronous destroy for reboot
Until now, destroying a protected guest was an entirely synchronous operation that could potentially take a very long time, depending on the size of the guest, due to the time needed to clean up the address space from protected pages. This patch implements an asynchronous destroy mechanism, that allows a protected guest to reboot significantly faster than previously. This is achieved by clearing the pages of the old guest in background. In case of reboot, the new guest will be able to run in the same address space almost immediately. The old protected guest is then only destroyed when all of its memory has been destroyed or otherwise made non protected. Two new PV commands are added for the KVM_S390_PV_COMMAND ioctl: KVM_PV_ASYNC_CLEANUP_PREPARE: set aside the current protected VM for later asynchronous teardown. The current KVM VM will then continue immediately as non-protected. If a protected VM had already been set aside for asynchronous teardown, but without starting the teardown process, this call will fail. There can be at most one VM set aside at any time. Once it is set aside, the protected VM only exists in the context of the Ultravisor, it is not associated with the KVM VM anymore. Its protected CPUs have already been destroyed, but not its memory. This command can be issued again immediately after starting KVM_PV_ASYNC_CLEANUP_PERFORM, without having to wait for completion. KVM_PV_ASYNC_CLEANUP_PERFORM: tears down the protected VM previously set aside using KVM_PV_ASYNC_CLEANUP_PREPARE. Ideally the KVM_PV_ASYNC_CLEANUP_PERFORM PV command should be issued by userspace from a separate thread. If a fatal signal is received (or if the process terminates naturally), the command will terminate immediately without completing. All protected VMs whose teardown was interrupted will be put in the need_cleanup list. The rest of the normal KVM teardown process will take care of properly cleaning up all remaining protected VMs, including the ones on the need_cleanup list. Signed-off-by: Claudio Imbrenda <[email protected]> Reviewed-by: Nico Boehr <[email protected]> Reviewed-by: Janosch Frank <[email protected]> Reviewed-by: Steffen Eiden <[email protected]> Link: https://lore.kernel.org/r/[email protected] Message-Id: <[email protected]> Signed-off-by: Janosch Frank <[email protected]>
Diffstat (limited to 'arch/s390/kvm/kvm-s390.c')
-rw-r--r--arch/s390/kvm/kvm-s390.c49
1 files changed, 41 insertions, 8 deletions
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index bd6e0201bfe5..f0abaaf7eea4 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -209,6 +209,8 @@ unsigned int diag9c_forwarding_hz;
module_param(diag9c_forwarding_hz, uint, 0644);
MODULE_PARM_DESC(diag9c_forwarding_hz, "Maximum diag9c forwarding per second, 0 to turn off");
+static int async_destroy;
+
/*
* For now we handle at most 16 double words as this is what the s390 base
* kernel handles and stores in the prefix page. If we ever need to go beyond
@@ -2504,9 +2506,13 @@ static int kvm_s390_pv_dmp(struct kvm *kvm, struct kvm_pv_cmd *cmd,
static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
{
+ const bool need_lock = (cmd->cmd != KVM_PV_ASYNC_CLEANUP_PERFORM);
+ void __user *argp = (void __user *)cmd->data;
int r = 0;
u16 dummy;
- void __user *argp = (void __user *)cmd->data;
+
+ if (need_lock)
+ mutex_lock(&kvm->lock);
switch (cmd->cmd) {
case KVM_PV_ENABLE: {
@@ -2540,6 +2546,31 @@ static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
set_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
break;
}
+ case KVM_PV_ASYNC_CLEANUP_PREPARE:
+ r = -EINVAL;
+ if (!kvm_s390_pv_is_protected(kvm) || !async_destroy)
+ break;
+
+ r = kvm_s390_cpus_from_pv(kvm, &cmd->rc, &cmd->rrc);
+ /*
+ * If a CPU could not be destroyed, destroy VM will also fail.
+ * There is no point in trying to destroy it. Instead return
+ * the rc and rrc from the first CPU that failed destroying.
+ */
+ if (r)
+ break;
+ r = kvm_s390_pv_set_aside(kvm, &cmd->rc, &cmd->rrc);
+
+ /* no need to block service interrupts any more */
+ clear_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
+ break;
+ case KVM_PV_ASYNC_CLEANUP_PERFORM:
+ r = -EINVAL;
+ if (!async_destroy)
+ break;
+ /* kvm->lock must not be held; this is asserted inside the function. */
+ r = kvm_s390_pv_deinit_aside_vm(kvm, &cmd->rc, &cmd->rrc);
+ break;
case KVM_PV_DISABLE: {
r = -EINVAL;
if (!kvm_s390_pv_is_protected(kvm))
@@ -2553,7 +2584,7 @@ static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
*/
if (r)
break;
- r = kvm_s390_pv_deinit_vm(kvm, &cmd->rc, &cmd->rrc);
+ r = kvm_s390_pv_deinit_cleanup_all(kvm, &cmd->rc, &cmd->rrc);
/* no need to block service interrupts any more */
clear_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
@@ -2703,6 +2734,9 @@ static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
default:
r = -ENOTTY;
}
+ if (need_lock)
+ mutex_unlock(&kvm->lock);
+
return r;
}
@@ -2907,9 +2941,8 @@ long kvm_arch_vm_ioctl(struct file *filp,
r = -EINVAL;
break;
}
- mutex_lock(&kvm->lock);
+ /* must be called without kvm->lock */
r = kvm_s390_handle_pv(kvm, &args);
- mutex_unlock(&kvm->lock);
if (copy_to_user(argp, &args, sizeof(args))) {
r = -EFAULT;
break;
@@ -3228,6 +3261,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
kvm_s390_vsie_init(kvm);
if (use_gisa)
kvm_s390_gisa_init(kvm);
+ INIT_LIST_HEAD(&kvm->arch.pv.need_cleanup);
+ kvm->arch.pv.set_aside = NULL;
KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
return 0;
@@ -3272,11 +3307,9 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
/*
* We are already at the end of life and kvm->lock is not taken.
* This is ok as the file descriptor is closed by now and nobody
- * can mess with the pv state. To avoid lockdep_assert_held from
- * complaining we do not use kvm_s390_pv_is_protected.
+ * can mess with the pv state.
*/
- if (kvm_s390_pv_get_handle(kvm))
- kvm_s390_pv_deinit_vm(kvm, &rc, &rrc);
+ kvm_s390_pv_deinit_cleanup_all(kvm, &rc, &rrc);
/*
* Remove the mmu notifier only when the whole KVM VM is torn down,
* and only if one was registered to begin with. If the VM is