aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--arch/x86/include/asm/cpufeatures.h3
-rw-r--r--arch/x86/include/asm/kvm_host.h9
-rw-r--r--arch/x86/include/asm/reboot.h2
-rw-r--r--arch/x86/include/asm/virtext.h16
-rw-r--r--arch/x86/include/asm/xen/hypervisor.h4
-rw-r--r--arch/x86/include/uapi/asm/kvm.h5
-rw-r--r--arch/x86/kernel/crash.c17
-rw-r--r--arch/x86/kernel/reboot.c88
-rw-r--r--arch/x86/kernel/smp.c6
-rw-r--r--arch/x86/kvm/cpuid.c31
-rw-r--r--arch/x86/kvm/emulate.c6
-rw-r--r--arch/x86/kvm/hyperv.c55
-rw-r--r--arch/x86/kvm/pmu.c3
-rw-r--r--arch/x86/kvm/vmx/vmx.c9
-rw-r--r--arch/x86/kvm/x86.c34
-rw-r--r--arch/x86/kvm/xen.c26
-rw-r--r--arch/x86/kvm/xen.h7
-rw-r--r--tools/testing/selftests/kvm/Makefile1
-rw-r--r--tools/testing/selftests/kvm/include/x86_64/hyperv.h5
-rw-r--r--tools/testing/selftests/kvm/lib/kvm_util.c1
-rw-r--r--tools/testing/selftests/kvm/x86_64/hyperv_clock.c2
-rw-r--r--tools/testing/selftests/kvm/x86_64/hyperv_extended_hypercalls.c97
-rw-r--r--tools/testing/selftests/kvm/x86_64/hyperv_features.c9
23 files changed, 327 insertions, 109 deletions
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 61012476d66e..cdb7e1492311 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -312,6 +312,9 @@
#define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */
#define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* AVX512 BFLOAT16 instructions */
#define X86_FEATURE_CMPCCXADD (12*32+ 7) /* "" CMPccXADD instructions */
+#define X86_FEATURE_FZRM (12*32+10) /* "" Fast zero-length REP MOVSB */
+#define X86_FEATURE_FSRS (12*32+11) /* "" Fast short REP STOSB */
+#define X86_FEATURE_FSRC (12*32+12) /* "" Fast short REP {CMPSB,SCASB} */
#define X86_FEATURE_AMX_FP16 (12*32+21) /* "" AMX fp16 Support */
#define X86_FEATURE_AVX_IFMA (12*32+23) /* "" Support for VPMADD52[H,L]UQ */
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 4d2bc08794e4..8b38a4cb2e29 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -678,6 +678,11 @@ struct kvm_vcpu_hv {
} nested;
};
+struct kvm_hypervisor_cpuid {
+ u32 base;
+ u32 limit;
+};
+
/* Xen HVM per vcpu emulation context */
struct kvm_vcpu_xen {
u64 hypercall_rip;
@@ -698,6 +703,7 @@ struct kvm_vcpu_xen {
struct hrtimer timer;
int poll_evtchn;
struct timer_list poll_timer;
+ struct kvm_hypervisor_cpuid cpuid;
};
struct kvm_queued_exception {
@@ -826,7 +832,7 @@ struct kvm_vcpu_arch {
int cpuid_nent;
struct kvm_cpuid_entry2 *cpuid_entries;
- u32 kvm_cpuid_base;
+ struct kvm_hypervisor_cpuid kvm_cpuid;
u64 reserved_gpa_bits;
int maxphyaddr;
@@ -1327,7 +1333,6 @@ struct kvm_arch {
u32 bsp_vcpu_id;
u64 disabled_quirks;
- int cpu_dirty_logging_count;
enum kvm_irqchip_mode irqchip_mode;
u8 nr_reserved_ioapic_pins;
diff --git a/arch/x86/include/asm/reboot.h b/arch/x86/include/asm/reboot.h
index 04c17be9b5fd..bc5b4d788c08 100644
--- a/arch/x86/include/asm/reboot.h
+++ b/arch/x86/include/asm/reboot.h
@@ -25,6 +25,8 @@ void __noreturn machine_real_restart(unsigned int type);
#define MRR_BIOS 0
#define MRR_APM 1
+void cpu_emergency_disable_virtualization(void);
+
typedef void (*nmi_shootdown_cb)(int, struct pt_regs*);
void nmi_panic_self_stop(struct pt_regs *regs);
void nmi_shootdown_cpus(nmi_shootdown_cb callback);
diff --git a/arch/x86/include/asm/virtext.h b/arch/x86/include/asm/virtext.h
index 8757078d4442..3b12e6b99412 100644
--- a/arch/x86/include/asm/virtext.h
+++ b/arch/x86/include/asm/virtext.h
@@ -126,7 +126,21 @@ static inline void cpu_svm_disable(void)
wrmsrl(MSR_VM_HSAVE_PA, 0);
rdmsrl(MSR_EFER, efer);
- wrmsrl(MSR_EFER, efer & ~EFER_SVME);
+ if (efer & EFER_SVME) {
+ /*
+ * Force GIF=1 prior to disabling SVM to ensure INIT and NMI
+ * aren't blocked, e.g. if a fatal error occurred between CLGI
+ * and STGI. Note, STGI may #UD if SVM is disabled from NMI
+ * context between reading EFER and executing STGI. In that
+ * case, GIF must already be set, otherwise the NMI would have
+ * been blocked, so just eat the fault.
+ */
+ asm_volatile_goto("1: stgi\n\t"
+ _ASM_EXTABLE(1b, %l[fault])
+ ::: "memory" : fault);
+fault:
+ wrmsrl(MSR_EFER, efer & ~EFER_SVME);
+ }
}
/** Makes sure SVM is disabled, if it is supported on the CPU
diff --git a/arch/x86/include/asm/xen/hypervisor.h b/arch/x86/include/asm/xen/hypervisor.h
index 16f548a661cf..5fc35f889cd1 100644
--- a/arch/x86/include/asm/xen/hypervisor.h
+++ b/arch/x86/include/asm/xen/hypervisor.h
@@ -38,9 +38,11 @@ extern struct start_info *xen_start_info;
#include <asm/processor.h>
+#define XEN_SIGNATURE "XenVMMXenVMM"
+
static inline uint32_t xen_cpuid_base(void)
{
- return hypervisor_cpuid_base("XenVMMXenVMM", 2);
+ return hypervisor_cpuid_base(XEN_SIGNATURE, 2);
}
struct pci_dev;
diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h
index e48deab8901d..bde47f3a8c9d 100644
--- a/arch/x86/include/uapi/asm/kvm.h
+++ b/arch/x86/include/uapi/asm/kvm.h
@@ -9,6 +9,7 @@
#include <linux/types.h>
#include <linux/ioctl.h>
+#include <linux/stddef.h>
#define KVM_PIO_PAGE_OFFSET 1
#define KVM_COALESCED_MMIO_PAGE_OFFSET 2
@@ -507,8 +508,8 @@ struct kvm_nested_state {
* KVM_{GET,PUT}_NESTED_STATE ioctl values.
*/
union {
- struct kvm_vmx_nested_state_data vmx[0];
- struct kvm_svm_nested_state_data svm[0];
+ __DECLARE_FLEX_ARRAY(struct kvm_vmx_nested_state_data, vmx);
+ __DECLARE_FLEX_ARRAY(struct kvm_svm_nested_state_data, svm);
} data;
};
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index 305514431f26..cdd92ab43cda 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -37,7 +37,6 @@
#include <linux/kdebug.h>
#include <asm/cpu.h>
#include <asm/reboot.h>
-#include <asm/virtext.h>
#include <asm/intel_pt.h>
#include <asm/crash.h>
#include <asm/cmdline.h>
@@ -81,15 +80,6 @@ static void kdump_nmi_callback(int cpu, struct pt_regs *regs)
*/
cpu_crash_vmclear_loaded_vmcss();
- /* Disable VMX or SVM if needed.
- *
- * We need to disable virtualization on all CPUs.
- * Having VMX or SVM enabled on any CPU may break rebooting
- * after the kdump kernel has finished its task.
- */
- cpu_emergency_vmxoff();
- cpu_emergency_svm_disable();
-
/*
* Disable Intel PT to stop its logging
*/
@@ -148,12 +138,7 @@ void native_machine_crash_shutdown(struct pt_regs *regs)
*/
cpu_crash_vmclear_loaded_vmcss();
- /* Booting kdump kernel with VMX or SVM enabled won't work,
- * because (among other limitations) we can't disable paging
- * with the virt flags.
- */
- cpu_emergency_vmxoff();
- cpu_emergency_svm_disable();
+ cpu_emergency_disable_virtualization();
/*
* Disable Intel PT to stop its logging
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index c3636ea4aa71..d03c551defcc 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -528,33 +528,29 @@ static inline void kb_wait(void)
}
}
-static void vmxoff_nmi(int cpu, struct pt_regs *regs)
-{
- cpu_emergency_vmxoff();
-}
+static inline void nmi_shootdown_cpus_on_restart(void);
-/* Use NMIs as IPIs to tell all CPUs to disable virtualization */
-static void emergency_vmx_disable_all(void)
+static void emergency_reboot_disable_virtualization(void)
{
/* Just make sure we won't change CPUs while doing this */
local_irq_disable();
/*
- * Disable VMX on all CPUs before rebooting, otherwise we risk hanging
- * the machine, because the CPU blocks INIT when it's in VMX root.
+ * Disable virtualization on all CPUs before rebooting to avoid hanging
+ * the system, as VMX and SVM block INIT when running in the host.
*
* We can't take any locks and we may be on an inconsistent state, so
- * use NMIs as IPIs to tell the other CPUs to exit VMX root and halt.
+ * use NMIs as IPIs to tell the other CPUs to disable VMX/SVM and halt.
*
- * Do the NMI shootdown even if VMX if off on _this_ CPU, as that
- * doesn't prevent a different CPU from being in VMX root operation.
+ * Do the NMI shootdown even if virtualization is off on _this_ CPU, as
+ * other CPUs may have virtualization enabled.
*/
- if (cpu_has_vmx()) {
- /* Safely force _this_ CPU out of VMX root operation. */
- __cpu_emergency_vmxoff();
+ if (cpu_has_vmx() || cpu_has_svm(NULL)) {
+ /* Safely force _this_ CPU out of VMX/SVM operation. */
+ cpu_emergency_disable_virtualization();
- /* Halt and exit VMX root operation on the other CPUs. */
- nmi_shootdown_cpus(vmxoff_nmi);
+ /* Disable VMX/SVM and halt on other CPUs. */
+ nmi_shootdown_cpus_on_restart();
}
}
@@ -590,7 +586,7 @@ static void native_machine_emergency_restart(void)
unsigned short mode;
if (reboot_emergency)
- emergency_vmx_disable_all();
+ emergency_reboot_disable_virtualization();
tboot_shutdown(TB_SHUTDOWN_REBOOT);
@@ -795,6 +791,17 @@ void machine_crash_shutdown(struct pt_regs *regs)
/* This is the CPU performing the emergency shutdown work. */
int crashing_cpu = -1;
+/*
+ * Disable virtualization, i.e. VMX or SVM, to ensure INIT is recognized during
+ * reboot. VMX blocks INIT if the CPU is post-VMXON, and SVM blocks INIT if
+ * GIF=0, i.e. if the crash occurred between CLGI and STGI.
+ */
+void cpu_emergency_disable_virtualization(void)
+{
+ cpu_emergency_vmxoff();
+ cpu_emergency_svm_disable();
+}
+
#if defined(CONFIG_SMP)
static nmi_shootdown_cb shootdown_callback;
@@ -817,7 +824,14 @@ static int crash_nmi_callback(unsigned int val, struct pt_regs *regs)
return NMI_HANDLED;
local_irq_disable();
- shootdown_callback(cpu, regs);
+ if (shootdown_callback)
+ shootdown_callback(cpu, regs);
+
+ /*
+ * Prepare the CPU for reboot _after_ invoking the callback so that the
+ * callback can safely use virtualization instructions, e.g. VMCLEAR.
+ */
+ cpu_emergency_disable_virtualization();
atomic_dec(&waiting_for_crash_ipi);
/* Assume hlt works */
@@ -828,18 +842,32 @@ static int crash_nmi_callback(unsigned int val, struct pt_regs *regs)
return NMI_HANDLED;
}
-/*
- * Halt all other CPUs, calling the specified function on each of them
+/**
+ * nmi_shootdown_cpus - Stop other CPUs via NMI
+ * @callback: Optional callback to be invoked from the NMI handler
+ *
+ * The NMI handler on the remote CPUs invokes @callback, if not
+ * NULL, first and then disables virtualization to ensure that
+ * INIT is recognized during reboot.
*
- * This function can be used to halt all other CPUs on crash
- * or emergency reboot time. The function passed as parameter
- * will be called inside a NMI handler on all CPUs.
+ * nmi_shootdown_cpus() can only be invoked once. After the first
+ * invocation all other CPUs are stuck in crash_nmi_callback() and
+ * cannot respond to a second NMI.
*/
void nmi_shootdown_cpus(nmi_shootdown_cb callback)
{
unsigned long msecs;
+
local_irq_disable();
+ /*
+ * Avoid certain doom if a shootdown already occurred; re-registering
+ * the NMI handler will cause list corruption, modifying the callback
+ * will do who knows what, etc...
+ */
+ if (WARN_ON_ONCE(crash_ipi_issued))
+ return;
+
/* Make a note of crashing cpu. Will be used in NMI callback. */
crashing_cpu = safe_smp_processor_id();
@@ -867,7 +895,17 @@ void nmi_shootdown_cpus(nmi_shootdown_cb callback)
msecs--;
}
- /* Leave the nmi callback set */
+ /*
+ * Leave the nmi callback set, shootdown is a one-time thing. Clearing
+ * the callback could result in a NULL pointer dereference if a CPU
+ * (finally) responds after the timeout expires.
+ */
+}
+
+static inline void nmi_shootdown_cpus_on_restart(void)
+{
+ if (!crash_ipi_issued)
+ nmi_shootdown_cpus(NULL);
}
/*
@@ -897,6 +935,8 @@ void nmi_shootdown_cpus(nmi_shootdown_cb callback)
/* No other CPUs to shoot down */
}
+static inline void nmi_shootdown_cpus_on_restart(void) { }
+
void run_crash_ipi_callback(struct pt_regs *regs)
{
}
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index 06db901fabe8..375b33ecafa2 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -32,7 +32,7 @@
#include <asm/mce.h>
#include <asm/trace/irq_vectors.h>
#include <asm/kexec.h>
-#include <asm/virtext.h>
+#include <asm/reboot.h>
/*
* Some notes on x86 processor bugs affecting SMP operation:
@@ -122,7 +122,7 @@ static int smp_stop_nmi_callback(unsigned int val, struct pt_regs *regs)
if (raw_smp_processor_id() == atomic_read(&stopping_cpu))
return NMI_HANDLED;
- cpu_emergency_vmxoff();
+ cpu_emergency_disable_virtualization();
stop_this_cpu(NULL);
return NMI_HANDLED;
@@ -134,7 +134,7 @@ static int smp_stop_nmi_callback(unsigned int val, struct pt_regs *regs)
DEFINE_IDTENTRY_SYSVEC(sysvec_reboot)
{
ack_APIC_irq();
- cpu_emergency_vmxoff();
+ cpu_emergency_disable_virtualization();
stop_this_cpu(NULL);
}
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 2a9f1e200dbc..8f8edeaf8177 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -26,6 +26,7 @@
#include "mmu.h"
#include "trace.h"
#include "pmu.h"
+#include "xen.h"
/*
* Unlike "struct cpuinfo_x86.x86_capability", kvm_cpu_caps doesn't need to be
@@ -181,15 +182,15 @@ static int kvm_cpuid_check_equal(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2
return 0;
}
-static void kvm_update_kvm_cpuid_base(struct kvm_vcpu *vcpu)
+static struct kvm_hypervisor_cpuid kvm_get_hypervisor_cpuid(struct kvm_vcpu *vcpu,
+ const char *sig)
{
- u32 function;
+ struct kvm_hypervisor_cpuid cpuid = {};
struct kvm_cpuid_entry2 *entry;
+ u32 base;
- vcpu->arch.kvm_cpuid_base = 0;
-
- for_each_possible_hypervisor_cpuid_base(function) {
- entry = kvm_find_cpuid_entry(vcpu, function);
+ for_each_possible_hypervisor_cpuid_base(base) {
+ entry = kvm_find_cpuid_entry(vcpu, base);
if (entry) {
u32 signature[3];
@@ -198,19 +199,21 @@ static void kvm_update_kvm_cpuid_base(struct kvm_vcpu *vcpu)
signature[1] = entry->ecx;
signature[2] = entry->edx;
- BUILD_BUG_ON(sizeof(signature) > sizeof(KVM_SIGNATURE));
- if (!memcmp(signature, KVM_SIGNATURE, sizeof(signature))) {
- vcpu->arch.kvm_cpuid_base = function;
+ if (!memcmp(signature, sig, sizeof(signature))) {
+ cpuid.base = base;
+ cpuid.limit = entry->eax;
break;
}
}
}
+
+ return cpuid;
}
static struct kvm_cpuid_entry2 *__kvm_find_kvm_cpuid_features(struct kvm_vcpu *vcpu,
struct kvm_cpuid_entry2 *entries, int nent)
{
- u32 base = vcpu->arch.kvm_cpuid_base;
+ u32 base = vcpu->arch.kvm_cpuid.base;
if (!base)
return NULL;
@@ -440,7 +443,8 @@ static int kvm_set_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 *e2,
vcpu->arch.cpuid_entries = e2;
vcpu->arch.cpuid_nent = nent;
- kvm_update_kvm_cpuid_base(vcpu);
+ vcpu->arch.kvm_cpuid = kvm_get_hypervisor_cpuid(vcpu, KVM_SIGNATURE);
+ vcpu->arch.xen.cpuid = kvm_get_hypervisor_cpuid(vcpu, XEN_SIGNATURE);
kvm_vcpu_after_set_cpuid(vcpu);
return 0;
@@ -664,8 +668,9 @@ void kvm_set_cpu_caps(void)
kvm_cpu_cap_set(X86_FEATURE_SPEC_CTRL_SSBD);
kvm_cpu_cap_mask(CPUID_7_1_EAX,
- F(AVX_VNNI) | F(AVX512_BF16) | F(CMPCCXADD) | F(AMX_FP16) |
- F(AVX_IFMA)
+ F(AVX_VNNI) | F(AVX512_BF16) | F(CMPCCXADD) |
+ F(FZRM) | F(FSRS) | F(FSRC) |
+ F(AMX_FP16) | F(AVX_IFMA)
);
kvm_cpu_cap_init_kvm_defined(CPUID_7_1_EDX,
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index c3443045cd93..a630c5db971c 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -1634,7 +1634,7 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
case VCPU_SREG_SS:
/*
* segment is not a writable data segment or segment
- * selector's RPL != CPL or segment selector's RPL != CPL
+ * selector's RPL != CPL or DPL != CPL
*/
if (rpl != cpl || (seg_desc.type & 0xa) != 0x2 || dpl != cpl)
goto exception;
@@ -1696,11 +1696,11 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
/*
* segment is not a data or readable code segment or
* ((segment is a data or nonconforming code segment)
- * and (both RPL and CPL > DPL))
+ * and ((RPL > DPL) or (CPL > DPL)))
*/
if ((seg_desc.type & 0xa) == 0x8 ||
(((seg_desc.type & 0xc) != 0xc) &&
- (rpl > dpl && cpl > dpl)))
+ (rpl > dpl || cpl > dpl)))
goto exception;
break;
}
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index 71aff0edc0ed..f610b07ddcf4 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -44,6 +44,24 @@
#define KVM_HV_MAX_SPARSE_VCPU_SET_BITS DIV_ROUND_UP(KVM_MAX_VCPUS, HV_VCPUS_PER_SPARSE_BANK)
+/*
+ * As per Hyper-V TLFS, extended hypercalls start from 0x8001
+ * (HvExtCallQueryCapabilities). Response of this hypercalls is a 64 bit value
+ * where each bit tells which extended hypercall is available besides
+ * HvExtCallQueryCapabilities.
+ *
+ * 0x8001 - First extended hypercall, HvExtCallQueryCapabilities, no bit
+ * assigned.
+ *
+ * 0x8002 - Bit 0
+ * 0x8003 - Bit 1
+ * ..
+ * 0x8041 - Bit 63
+ *
+ * Therefore, HV_EXT_CALL_MAX = 0x8001 + 64
+ */
+#define HV_EXT_CALL_MAX (HV_EXT_CALL_QUERY_CAPABILITIES + 64)
+
static void stimer_mark_pending(struct kvm_vcpu_hv_stimer *stimer,
bool vcpu_kick);
@@ -2439,6 +2457,9 @@ static bool hv_check_hypercall_access(struct kvm_vcpu_hv *hv_vcpu, u16 code)
case HVCALL_SEND_IPI:
return hv_vcpu->cpuid_cache.enlightenments_eax &
HV_X64_CLUSTER_IPI_RECOMMENDED;
+ case HV_EXT_CALL_QUERY_CAPABILITIES ... HV_EXT_CALL_MAX:
+ return hv_vcpu->cpuid_cache.features_ebx &
+ HV_ENABLE_EXTENDED_HYPERCALLS;
default:
break;
}
@@ -2531,14 +2552,7 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
break;
}
- vcpu->run->exit_reason = KVM_EXIT_HYPERV;
- vcpu->run->hyperv.type = KVM_EXIT_HYPERV_HCALL;
- vcpu->run->hyperv.u.hcall.input = hc.param;
- vcpu->run->hyperv.u.hcall.params[0] = hc.ingpa;
- vcpu->run->hyperv.u.hcall.params[1] = hc.outgpa;
- vcpu->arch.complete_userspace_io =
- kvm_hv_hypercall_complete_userspace;
- return 0;
+ goto hypercall_userspace_exit;
case HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST:
if (unlikely(hc.var_cnt)) {
ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
@@ -2597,15 +2611,14 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
ret = HV_STATUS_OPERATION_DENIED;
break;
}
- vcpu->run->exit_reason = KVM_EXIT_HYPERV;
- vcpu->run->hyperv.type = KVM_EXIT_HYPERV_HCALL;
- vcpu->run->hyperv.u.hcall.input = hc.param;
- vcpu->run->hyperv.u.hcall.params[0] = hc.ingpa;
- vcpu->run->hyperv.u.hcall.params[1] = hc.outgpa;
- vcpu->arch.complete_userspace_io =
- kvm_hv_hypercall_complete_userspace;
- return 0;
+ goto hypercall_userspace_exit;
}
+ case HV_EXT_CALL_QUERY_CAPABILITIES ... HV_EXT_CALL_MAX:
+ if (unlikely(hc.fast)) {
+ ret = HV_STATUS_INVALID_PARAMETER;
+ break;
+ }
+ goto hypercall_userspace_exit;
default:
ret = HV_STATUS_INVALID_HYPERCALL_CODE;
break;
@@ -2613,6 +2626,15 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
hypercall_complete:
return kvm_hv_hypercall_complete(vcpu, ret);
+
+hypercall_userspace_exit:
+ vcpu->run->exit_reason = KVM_EXIT_HYPERV;
+ vcpu->run->hyperv.type = KVM_EXIT_HYPERV_HCALL;
+ vcpu->run->hyperv.u.hcall.input = hc.param;
+ vcpu->run->hyperv.u.hcall.params[0] = hc.ingpa;
+ vcpu->run->hyperv.u.hcall.params[1] = hc.outgpa;
+ vcpu->arch.complete_userspace_io = kvm_hv_hypercall_complete_userspace;
+ return 0;
}
void kvm_hv_init_vm(struct kvm *kvm)
@@ -2756,6 +2778,7 @@ int kvm_get_hv_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid,
ent->ebx |= HV_POST_MESSAGES;
ent->ebx |= HV_SIGNAL_EVENTS;
+ ent->ebx |= HV_ENABLE_EXTENDED_HYPERCALLS;
ent->edx |= HV_X64_HYPERCALL_XMM_INPUT_AVAILABLE;
ent->edx |= HV_FEATURE_FREQUENCY_MSRS_AVAILABLE;
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index d939d3b84e6f..58e5a456273a 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -634,6 +634,7 @@ int kvm_vm_ioctl_set_pmu_event_filter(struct kvm *kvm, void __user *argp)
mutex_lock(&kvm->lock);
filter = rcu_replace_pointer(kvm->arch.pmu_event_filter, filter,
mutex_is_locked(&kvm->lock));
+ mutex_unlock(&kvm->lock);
synchronize_srcu_expedited(&kvm->srcu);
BUILD_BUG_ON(sizeof(((struct kvm_pmu *)0)->reprogram_pmi) >
@@ -644,8 +645,6 @@ int kvm_vm_ioctl_set_pmu_event_filter(struct kvm *kvm, void __user *argp)
kvm_make_all_cpus_request(kvm, KVM_REQ_PMU);
- mutex_unlock(&kvm->lock);
-
r = 0;
cleanup:
kfree(filter);
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index c788aa382611..bbf60bda877e 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -4606,7 +4606,7 @@ static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx)
* it needs to be set here when dirty logging is already active, e.g.
* if this vCPU was created after dirty logging was enabled.
*/
- if (!vcpu->kvm->arch.cpu_dirty_logging_count)
+ if (!enable_pml || !atomic_read(&vcpu->kvm->nr_memslots_dirty_logging))
exec_control &= ~SECONDARY_EXEC_ENABLE_PML;
if (cpu_has_vmx_xsaves()) {
@@ -7988,17 +7988,20 @@ void vmx_update_cpu_dirty_logging(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
+ if (WARN_ON_ONCE(!enable_pml))
+ return;
+
if (is_guest_mode(vcpu)) {
vmx->nested.update_vmcs01_cpu_dirty_logging = true;
return;
}
/*
- * Note, cpu_dirty_logging_count can be changed concurrent with this
+ * Note, nr_memslots_dirty_logging can be changed concurrent with this
* code, but in that case another update request will be made and so
* the guest will never run with a stale PML value.
*/
- if (vcpu->kvm->arch.cpu_dirty_logging_count)
+ if (atomic_read(&vcpu->kvm->nr_memslots_dirty_logging))
secondary_exec_controls_setbit(vmx, SECONDARY_EXEC_ENABLE_PML);
else
secondary_exec_controls_clearbit(vmx, SECONDARY_EXEC_ENABLE_PML);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 508074e47bc0..810cbe3b3ba6 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3161,6 +3161,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
&vcpu->hv_clock.tsc_shift,
&vcpu->hv_clock.tsc_to_system_mul);
vcpu->hw_tsc_khz = tgt_tsc_khz;
+ kvm_xen_update_tsc_info(v);
}
vcpu->hv_clock.tsc_timestamp = tsc_timestamp;
@@ -4290,8 +4291,8 @@ static int msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs __user *user_msrs,
{
struct kvm_msrs msrs;
struct kvm_msr_entry *entries;
- int r, n;
unsigned size;
+ int r;
r = -EFAULT;
if (copy_from_user(&msrs, user_msrs, sizeof(msrs)))
@@ -4308,17 +4309,11 @@ static int msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs __user *user_msrs,
goto out;
}
- r = n = __msr_io(vcpu, &msrs, entries, do_msr);
- if (r < 0)
- goto out_free;
+ r = __msr_io(vcpu, &msrs, entries, do_msr);
- r = -EFAULT;
if (writeback && copy_to_user(user_msrs->entries, entries, size))
- goto out_free;
-
- r = n;
+ r = -EFAULT;
-out_free:
kfree(entries);
out:
return r;
@@ -6465,7 +6460,7 @@ static int kvm_vm_ioctl_set_msr_filter(struct kvm *kvm,
struct kvm_x86_msr_filter *new_filter, *old_filter;
bool default_allow;
bool empty = true;
- int r = 0;
+ int r;
u32 i;
if (filter->flags & ~KVM_MSR_FILTER_VALID_MASK)
@@ -6491,17 +6486,14 @@ static int kvm_vm_ioctl_set_msr_filter(struct kvm *kvm,
}
mutex_lock(&kvm->lock);
-
- /* The per-VM filter is protected by kvm->lock... */
- old_filter = srcu_dereference_check(kvm->arch.msr_filter, &kvm->srcu, 1);
-
- rcu_assign_pointer(kvm->arch.msr_filter, new_filter);
+ old_filter = rcu_replace_pointer(kvm->arch.msr_filter, new_filter,
+ mutex_is_locked(&kvm->lock));
+ mutex_unlock(&kvm->lock);
synchronize_srcu(&kvm->srcu);
kvm_free_msr_filter(old_filter);
kvm_make_all_cpus_request(kvm, KVM_REQ_MSR_FILTER_CHANGED);
- mutex_unlock(&kvm->lock);
return 0;
}
@@ -12273,7 +12265,7 @@ void __user * __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa,
*/
hva = vm_mmap(NULL, 0, size, PROT_READ | PROT_WRITE,
MAP_SHARED | MAP_ANONYMOUS, 0);
- if (IS_ERR((void *)hva))
+ if (IS_ERR_VALUE(hva))
return (void __user *)hva;
} else {
if (!slot || !slot->npages)
@@ -12488,16 +12480,14 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
static void kvm_mmu_update_cpu_dirty_logging(struct kvm *kvm, bool enable)
{
- struct kvm_arch *ka = &kvm->arch;
+ int nr_slots;
if (!kvm_x86_ops.cpu_dirty_log_size)
return;
- if ((enable && ++ka->cpu_dirty_logging_count == 1) ||
- (!enable && --ka->cpu_dirty_logging_count == 0))
+ nr_slots = atomic_read(&kvm->nr_memslots_dirty_logging);
+ if ((enable && nr_slots == 1) || !nr_slots)
kvm_make_all_cpus_request(kvm, KVM_REQ_UPDATE_CPU_DIRTY_LOGGING);
-
- WARN_ON_ONCE(ka->cpu_dirty_logging_count < 0);
}
static void kvm_mmu_slot_apply_flags(struct kvm *kvm,
diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c
index 2681e2007e39..40edf4d1974c 100644
--- a/arch/x86/kvm/xen.c
+++ b/arch/x86/kvm/xen.c
@@ -23,6 +23,9 @@
#include <xen/interface/event_channel.h>
#include <xen/interface/sched.h>
+#include <asm/xen/cpuid.h>
+
+#include "cpuid.h"
#include "trace.h"
static int kvm_xen_set_evtchn(struct kvm_xen_evtchn *xe, struct kvm *kvm);
@@ -2077,6 +2080,29 @@ void kvm_xen_destroy_vcpu(struct kvm_vcpu *vcpu)
del_timer_sync(&vcpu->arch.xen.poll_timer);
}
+void kvm_xen_update_tsc_info(struct kvm_vcpu *vcpu)
+{
+ struct kvm_cpuid_entry2 *entry;
+ u32 function;
+
+ if (!vcpu->arch.xen.cpuid.base)
+ return;
+
+ function = vcpu->arch.xen.cpuid.base | XEN_CPUID_LEAF(3);
+ if (function > vcpu->arch.xen.cpuid.limit)
+ return;
+
+ entry = kvm_find_cpuid_entry_index(vcpu, function, 1);
+ if (entry) {
+ entry->ecx = vcpu->arch.hv_clock.tsc_to_system_mul;
+ entry->edx = vcpu->arch.hv_clock.tsc_shift;
+ }
+
+ entry = kvm_find_cpuid_entry_index(vcpu, function, 2);
+ if (entry)
+ entry->eax = vcpu->arch.hw_tsc_khz;
+}
+
void kvm_xen_init_vm(struct kvm *kvm)
{
mutex_init(&kvm->arch.xen.xen_lock);
diff --git a/arch/x86/kvm/xen.h b/arch/x86/kvm/xen.h
index ea33d80a0c51..f8f1fe22d090 100644
--- a/arch/x86/kvm/xen.h
+++ b/arch/x86/kvm/xen.h
@@ -9,6 +9,8 @@
#ifndef __ARCH_X86_KVM_XEN_H__
#define __ARCH_X86_KVM_XEN_H__
+#include <asm/xen/hypervisor.h>
+
#ifdef CONFIG_KVM_XEN
#include <linux/jump_label_ratelimit.h>
@@ -32,6 +34,7 @@ int kvm_xen_set_evtchn_fast(struct kvm_xen_evtchn *xe,
int kvm_xen_setup_evtchn(struct kvm *kvm,
struct kvm_kernel_irq_routing_entry *e,
const struct kvm_irq_routing_entry *ue);
+void kvm_xen_update_tsc_info(struct kvm_vcpu *vcpu);
static inline bool kvm_xen_msr_enabled(struct kvm *kvm)
{
@@ -135,6 +138,10 @@ static inline bool kvm_xen_timer_enabled(struct kvm_vcpu *vcpu)
{
return false;
}
+
+static inline void kvm_xen_update_tsc_info(struct kvm_vcpu *vcpu)
+{
+}
#endif
int kvm_xen_hypercall(struct kvm_vcpu *vcpu);
diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index 1750f91dd936..844417601618 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -67,6 +67,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/fix_hypercall_test
TEST_GEN_PROGS_x86_64 += x86_64/hyperv_clock
TEST_GEN_PROGS_x86_64 += x86_64/hyperv_cpuid
TEST_GEN_PROGS_x86_64 += x86_64/hyperv_evmcs
+TEST_GEN_PROGS_x86_64 += x86_64/hyperv_extended_hypercalls
TEST_GEN_PROGS_x86_64 += x86_64/hyperv_features
TEST_GEN_PROGS_x86_64 += x86_64/hyperv_ipi
TEST_GEN_PROGS_x86_64 += x86_64/hyperv_svm_test
diff --git a/tools/testing/selftests/kvm/include/x86_64/hyperv.h b/tools/testing/selftests/kvm/include/x86_64/hyperv.h
index 28eb99046475..fa65b908b13e 100644
--- a/tools/testing/selftests/kvm/include/x86_64/hyperv.h
+++ b/tools/testing/selftests/kvm/include/x86_64/hyperv.h
@@ -137,6 +137,8 @@
KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 11)
#define HV_CPU_MANAGEMENT \
KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 12)
+#define HV_ENABLE_EXTENDED_HYPERCALLS \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 20)
#define HV_ISOLATION \
KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 22)
@@ -213,6 +215,9 @@
#define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_SPACE 0x00af
#define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_LIST 0x00b0
+/* Extended hypercalls */
+#define HV_EXT_CALL_QUERY_CAPABILITIES 0x8001
+
#define HV_FLUSH_ALL_PROCESSORS BIT(0)
#define HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES BIT(1)
#define HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY BIT(2)
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
index 56d5ea949cbb..f25b3e9b5a07 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -1844,6 +1844,7 @@ static struct exit_reason {
{KVM_EXIT_X86_RDMSR, "RDMSR"},
{KVM_EXIT_X86_WRMSR, "WRMSR"},
{KVM_EXIT_XEN, "XEN"},
+ {KVM_EXIT_HYPERV, "HYPERV"},
#ifdef KVM_EXIT_MEMORY_NOT_PRESENT
{KVM_EXIT_MEMORY_NOT_PRESENT, "MEMORY_NOT_PRESENT"},
#endif
diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_clock.c b/tools/testing/selftests/kvm/x86_64/hyperv_clock.c
index d576bc8ce823..2ee0af0d449e 100644
--- a/tools/testing/selftests/kvm/x86_64/hyperv_clock.c
+++ b/tools/testing/selftests/kvm/x86_64/hyperv_clock.c
@@ -104,7 +104,7 @@ static void guest_main(struct ms_hyperv_tsc_page *tsc_page, vm_paddr_t tsc_page_
/* Set Guest OS id to enable Hyper-V emulation */
GUEST_SYNC(1);
- wrmsr(HV_X64_MSR_GUEST_OS_ID, (u64)0x8100 << 48);
+ wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID);
GUEST_SYNC(2);
check_tsc_msr_rdtsc();
diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_extended_hypercalls.c b/tools/testing/selftests/kvm/x86_64/hyperv_extended_hypercalls.c
new file mode 100644
index 000000000000..73af44d2167f
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/hyperv_extended_hypercalls.c
@@ -0,0 +1,97 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Test Hyper-V extended hypercall, HV_EXT_CALL_QUERY_CAPABILITIES (0x8001),
+ * exit to userspace and receive result in guest.
+ *
+ * Negative tests are present in hyperv_features.c
+ *
+ * Copyright 2022 Google LLC
+ * Author: Vipin Sharma <[email protected]>
+ */
+
+#include "kvm_util.h"
+#include "processor.h"
+#include "hyperv.h"
+
+/* Any value is fine */
+#define EXT_CAPABILITIES 0xbull
+
+static void guest_code(vm_paddr_t in_pg_gpa, vm_paddr_t out_pg_gpa,
+ vm_vaddr_t out_pg_gva)
+{
+ uint64_t *output_gva;
+
+ wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID);
+ wrmsr(HV_X64_MSR_HYPERCALL, in_pg_gpa);
+
+ output_gva = (uint64_t *)out_pg_gva;
+
+ hyperv_hypercall(HV_EXT_CALL_QUERY_CAPABILITIES, in_pg_gpa, out_pg_gpa);
+
+ /* TLFS states output will be a uint64_t value */
+ GUEST_ASSERT_EQ(*output_gva, EXT_CAPABILITIES);
+
+ GUEST_DONE();
+}
+
+int main(void)
+{
+ vm_vaddr_t hcall_out_page;
+ vm_vaddr_t hcall_in_page;
+ struct kvm_vcpu *vcpu;
+ struct kvm_run *run;
+ struct kvm_vm *vm;
+ uint64_t *outval;
+ struct ucall uc;
+
+ /* Verify if extended hypercalls are supported */
+ if (!kvm_cpuid_has(kvm_get_supported_hv_cpuid(),
+ HV_ENABLE_EXTENDED_HYPERCALLS)) {
+ print_skip("Extended calls not supported by the kernel");
+ exit(KSFT_SKIP);
+ }
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+ run = vcpu->run;
+ vcpu_set_hv_cpuid(vcpu);
+
+ /* Hypercall input */
+ hcall_in_page = vm_vaddr_alloc_pages(vm, 1);
+ memset(addr_gva2hva(vm, hcall_in_page), 0x0, vm->page_size);
+
+ /* Hypercall output */
+ hcall_out_page = vm_vaddr_alloc_pages(vm, 1);
+ memset(addr_gva2hva(vm, hcall_out_page), 0x0, vm->page_size);
+
+ vcpu_args_set(vcpu, 3, addr_gva2gpa(vm, hcall_in_page),
+ addr_gva2gpa(vm, hcall_out_page), hcall_out_page);
+
+ vcpu_run(vcpu);
+
+ TEST_ASSERT(run->exit_reason == KVM_EXIT_HYPERV,
+ "Unexpected exit reason: %u (%s)",
+ run->exit_reason, exit_reason_str(run->exit_reason));
+
+ outval = addr_gpa2hva(vm, run->hyperv.u.hcall.params[1]);
+ *outval = EXT_CAPABILITIES;
+ run->hyperv.u.hcall.result = HV_STATUS_SUCCESS;
+
+ vcpu_run(vcpu);
+
+ TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+ "Unexpected exit reason: %u (%s)",
+ run->exit_reason, exit_reason_str(run->exit_reason));
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT_2(uc, "arg1 = %ld, arg2 = %ld");
+ break;
+ case UCALL_DONE:
+ break;
+ default:
+ TEST_FAIL("Unhandled ucall: %ld", uc.cmd);
+ }
+
+ kvm_vm_free(vm);
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_features.c b/tools/testing/selftests/kvm/x86_64/hyperv_features.c
index 258267df8e2a..c5e3b39edd07 100644
--- a/tools/testing/selftests/kvm/x86_64/hyperv_features.c
+++ b/tools/testing/selftests/kvm/x86_64/hyperv_features.c
@@ -649,6 +649,15 @@ static void guest_test_hcalls_access(void)
hcall->expect = HV_STATUS_SUCCESS;
break;
case 19:
+ hcall->control = HV_EXT_CALL_QUERY_CAPABILITIES;
+ hcall->expect = HV_STATUS_ACCESS_DENIED;
+ break;
+ case 20:
+ vcpu_set_cpuid_feature(vcpu, HV_ENABLE_EXTENDED_HYPERCALLS);
+ hcall->control = HV_EXT_CALL_QUERY_CAPABILITIES | HV_HYPERCALL_FAST_BIT;
+ hcall->expect = HV_STATUS_INVALID_PARAMETER;
+ break;
+ case 21:
kvm_vm_free(vm);
return;
}