aboutsummaryrefslogtreecommitdiff
path: root/arch/x86
diff options
context:
space:
mode:
authorPaolo Bonzini <pbonzini@redhat.com>2024-07-16 09:53:05 -0400
committerPaolo Bonzini <pbonzini@redhat.com>2024-07-16 09:53:05 -0400
commit5dcc1e76144fcf7bfe182bd98572d1957a380bac (patch)
tree5fa11b2645b9babb1e1909986fc28cd2062060ca /arch/x86
parent86014c1e20fa1b5d5c6968f37fdd2f1b6c94d519 (diff)
parent82222ee7e84cb03158935e053c4c4960ac1debbd (diff)
Merge tag 'kvm-x86-misc-6.11' of https://github.com/kvm-x86/linux into HEAD
KVM x86 misc changes for 6.11 - Add a global struct to consolidate tracking of host values, e.g. EFER, and move "shadow_phys_bits" into the structure as "maxphyaddr". - Add KVM_CAP_X86_APIC_BUS_CYCLES_NS to allow configuring the effective APIC bus frequency, because TDX. - Print the name of the APICv/AVIC inhibits in the relevant tracepoint. - Clean up KVM's handling of vendor specific emulation to consistently act on "compatible with Intel/AMD", versus checking for a specific vendor. - Misc cleanups
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/include/asm/kvm_host.h24
-rw-r--r--arch/x86/include/uapi/asm/kvm.h1
-rw-r--r--arch/x86/kvm/cpuid.c12
-rw-r--r--arch/x86/kvm/cpuid.h18
-rw-r--r--arch/x86/kvm/emulate.c71
-rw-r--r--arch/x86/kvm/hyperv.c3
-rw-r--r--arch/x86/kvm/kvm_emulate.h1
-rw-r--r--arch/x86/kvm/lapic.c6
-rw-r--r--arch/x86/kvm/lapic.h3
-rw-r--r--arch/x86/kvm/mmu.h27
-rw-r--r--arch/x86/kvm/mmu/mmu.c2
-rw-r--r--arch/x86/kvm/mmu/spte.c26
-rw-r--r--arch/x86/kvm/pmu.c2
-rw-r--r--arch/x86/kvm/svm/sev.c4
-rw-r--r--arch/x86/kvm/svm/svm.c15
-rw-r--r--arch/x86/kvm/svm/svm.h2
-rw-r--r--arch/x86/kvm/trace.h9
-rw-r--r--arch/x86/kvm/vmx/main.c2
-rw-r--r--arch/x86/kvm/vmx/nested.c8
-rw-r--r--arch/x86/kvm/vmx/vmx.c29
-rw-r--r--arch/x86/kvm/vmx/vmx.h2
-rw-r--r--arch/x86/kvm/vmx/x86_ops.h2
-rw-r--r--arch/x86/kvm/x86.c112
-rw-r--r--arch/x86/kvm/x86.h19
24 files changed, 212 insertions, 188 deletions
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 058a5c67979d..d5101f52e76c 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1208,7 +1208,7 @@ enum kvm_apicv_inhibit {
* APIC acceleration is disabled by a module parameter
* and/or not supported in hardware.
*/
- APICV_INHIBIT_REASON_DISABLE,
+ APICV_INHIBIT_REASON_DISABLED,
/*
* APIC acceleration is inhibited because AutoEOI feature is
@@ -1278,8 +1278,27 @@ enum kvm_apicv_inhibit {
* mapping between logical ID and vCPU.
*/
APICV_INHIBIT_REASON_LOGICAL_ID_ALIASED,
+
+ NR_APICV_INHIBIT_REASONS,
};
+#define __APICV_INHIBIT_REASON(reason) \
+ { BIT(APICV_INHIBIT_REASON_##reason), #reason }
+
+#define APICV_INHIBIT_REASONS \
+ __APICV_INHIBIT_REASON(DISABLED), \
+ __APICV_INHIBIT_REASON(HYPERV), \
+ __APICV_INHIBIT_REASON(ABSENT), \
+ __APICV_INHIBIT_REASON(BLOCKIRQ), \
+ __APICV_INHIBIT_REASON(PHYSICAL_ID_ALIASED), \
+ __APICV_INHIBIT_REASON(APIC_ID_MODIFIED), \
+ __APICV_INHIBIT_REASON(APIC_BASE_MODIFIED), \
+ __APICV_INHIBIT_REASON(NESTED), \
+ __APICV_INHIBIT_REASON(IRQWIN), \
+ __APICV_INHIBIT_REASON(PIT_REINJ), \
+ __APICV_INHIBIT_REASON(SEV), \
+ __APICV_INHIBIT_REASON(LOGICAL_ID_ALIASED)
+
struct kvm_arch {
unsigned long n_used_mmu_pages;
unsigned long n_requested_mmu_pages;
@@ -1365,6 +1384,7 @@ struct kvm_arch {
u32 default_tsc_khz;
bool user_set_tsc;
+ u64 apic_bus_cycle_ns;
seqcount_raw_spinlock_t pvclock_sc;
bool use_master_clock;
@@ -1709,7 +1729,6 @@ struct kvm_x86_ops {
void (*enable_nmi_window)(struct kvm_vcpu *vcpu);
void (*enable_irq_window)(struct kvm_vcpu *vcpu);
void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr);
- bool (*check_apicv_inhibit_reasons)(enum kvm_apicv_inhibit reason);
const unsigned long required_apicv_inhibits;
bool allow_apicv_in_x2apic_without_x2apic_virtualization;
void (*refresh_apicv_exec_ctrl)(struct kvm_vcpu *vcpu);
@@ -1855,7 +1874,6 @@ struct kvm_arch_async_pf {
};
extern u32 __read_mostly kvm_nr_uret_msrs;
-extern u64 __read_mostly host_efer;
extern bool __read_mostly allow_smaller_maxphyaddr;
extern bool __read_mostly enable_apicv;
extern struct kvm_x86_ops kvm_x86_ops;
diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h
index 988b5204d636..bf57a824f722 100644
--- a/arch/x86/include/uapi/asm/kvm.h
+++ b/arch/x86/include/uapi/asm/kvm.h
@@ -106,6 +106,7 @@ struct kvm_ioapic_state {
#define KVM_RUN_X86_SMM (1 << 0)
#define KVM_RUN_X86_BUS_LOCK (1 << 1)
+#define KVM_RUN_X86_GUEST_MODE (1 << 2)
/* for KVM_GET_REGS and KVM_SET_REGS */
struct kvm_regs {
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index f2f2be5d1141..8cf4ca2ae79d 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -335,6 +335,18 @@ static bool kvm_cpuid_has_hyperv(struct kvm_cpuid_entry2 *entries, int nent)
#endif
}
+static bool guest_cpuid_is_amd_or_hygon(struct kvm_vcpu *vcpu)
+{
+ struct kvm_cpuid_entry2 *entry;
+
+ entry = kvm_find_cpuid_entry(vcpu, 0);
+ if (!entry)
+ return false;
+
+ return is_guest_vendor_amd(entry->ebx, entry->ecx, entry->edx) ||
+ is_guest_vendor_hygon(entry->ebx, entry->ecx, entry->edx);
+}
+
static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
{
struct kvm_lapic *apic = vcpu->arch.apic;
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
index 23dbb9eb277c..41697cca354e 100644
--- a/arch/x86/kvm/cpuid.h
+++ b/arch/x86/kvm/cpuid.h
@@ -102,24 +102,6 @@ static __always_inline void guest_cpuid_clear(struct kvm_vcpu *vcpu,
*reg &= ~__feature_bit(x86_feature);
}
-static inline bool guest_cpuid_is_amd_or_hygon(struct kvm_vcpu *vcpu)
-{
- struct kvm_cpuid_entry2 *best;
-
- best = kvm_find_cpuid_entry(vcpu, 0);
- return best &&
- (is_guest_vendor_amd(best->ebx, best->ecx, best->edx) ||
- is_guest_vendor_hygon(best->ebx, best->ecx, best->edx));
-}
-
-static inline bool guest_cpuid_is_intel(struct kvm_vcpu *vcpu)
-{
- struct kvm_cpuid_entry2 *best;
-
- best = kvm_find_cpuid_entry(vcpu, 0);
- return best && is_guest_vendor_intel(best->ebx, best->ecx, best->edx);
-}
-
static inline bool guest_cpuid_is_amd_compatible(struct kvm_vcpu *vcpu)
{
return vcpu->arch.is_amd_compatible;
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 5d4c86133453..1acd97c6fa53 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -2354,50 +2354,6 @@ setup_syscalls_segments(struct desc_struct *cs, struct desc_struct *ss)
ss->avl = 0;
}
-static bool vendor_intel(struct x86_emulate_ctxt *ctxt)
-{
- u32 eax, ebx, ecx, edx;
-
- eax = ecx = 0;
- ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx, true);
- return is_guest_vendor_intel(ebx, ecx, edx);
-}
-
-static bool em_syscall_is_enabled(struct x86_emulate_ctxt *ctxt)
-{
- const struct x86_emulate_ops *ops = ctxt->ops;
- u32 eax, ebx, ecx, edx;
-
- /*
- * syscall should always be enabled in longmode - so only become
- * vendor specific (cpuid) if other modes are active...
- */
- if (ctxt->mode == X86EMUL_MODE_PROT64)
- return true;
-
- eax = 0x00000000;
- ecx = 0x00000000;
- ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx, true);
- /*
- * remark: Intel CPUs only support "syscall" in 64bit longmode. Also a
- * 64bit guest with a 32bit compat-app running will #UD !! While this
- * behaviour can be fixed (by emulating) into AMD response - CPUs of
- * AMD can't behave like Intel.
- */
- if (is_guest_vendor_intel(ebx, ecx, edx))
- return false;
-
- if (is_guest_vendor_amd(ebx, ecx, edx) ||
- is_guest_vendor_hygon(ebx, ecx, edx))
- return true;
-
- /*
- * default: (not Intel, not AMD, not Hygon), apply Intel's
- * stricter rules...
- */
- return false;
-}
-
static int em_syscall(struct x86_emulate_ctxt *ctxt)
{
const struct x86_emulate_ops *ops = ctxt->ops;
@@ -2411,7 +2367,15 @@ static int em_syscall(struct x86_emulate_ctxt *ctxt)
ctxt->mode == X86EMUL_MODE_VM86)
return emulate_ud(ctxt);
- if (!(em_syscall_is_enabled(ctxt)))
+ /*
+ * Intel compatible CPUs only support SYSCALL in 64-bit mode, whereas
+ * AMD allows SYSCALL in any flavor of protected mode. Note, it's
+ * infeasible to emulate Intel behavior when running on AMD hardware,
+ * as SYSCALL won't fault in the "wrong" mode, i.e. there is no #UD
+ * for KVM to trap-and-emulate, unlike emulating AMD on Intel.
+ */
+ if (ctxt->mode != X86EMUL_MODE_PROT64 &&
+ ctxt->ops->guest_cpuid_is_intel_compatible(ctxt))
return emulate_ud(ctxt);
ops->get_msr(ctxt, MSR_EFER, &efer);
@@ -2471,11 +2435,11 @@ static int em_sysenter(struct x86_emulate_ctxt *ctxt)
return emulate_gp(ctxt, 0);
/*
- * Not recognized on AMD in compat mode (but is recognized in legacy
- * mode).
+ * Intel's architecture allows SYSENTER in compatibility mode, but AMD
+ * does not. Note, AMD does allow SYSENTER in legacy protected mode.
*/
- if ((ctxt->mode != X86EMUL_MODE_PROT64) && (efer & EFER_LMA)
- && !vendor_intel(ctxt))
+ if ((ctxt->mode != X86EMUL_MODE_PROT64) && (efer & EFER_LMA) &&
+ !ctxt->ops->guest_cpuid_is_intel_compatible(ctxt))
return emulate_ud(ctxt);
/* sysenter/sysexit have not been tested in 64bit mode. */
@@ -2647,7 +2611,14 @@ static void string_registers_quirk(struct x86_emulate_ctxt *ctxt)
* manner when ECX is zero due to REP-string optimizations.
*/
#ifdef CONFIG_X86_64
- if (ctxt->ad_bytes != 4 || !vendor_intel(ctxt))
+ u32 eax, ebx, ecx, edx;
+
+ if (ctxt->ad_bytes != 4)
+ return;
+
+ eax = ecx = 0;
+ ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx, true);
+ if (!is_guest_vendor_intel(ebx, ecx, edx))
return;
*reg_write(ctxt, VCPU_REGS_RCX) = 0;
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index 8a47f8541eab..5c31e715d2ad 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -1737,7 +1737,8 @@ static int kvm_hv_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata,
data = (u64)vcpu->arch.virtual_tsc_khz * 1000;
break;
case HV_X64_MSR_APIC_FREQUENCY:
- data = APIC_BUS_FREQUENCY;
+ data = div64_u64(1000000000ULL,
+ vcpu->kvm->arch.apic_bus_cycle_ns);
break;
default:
kvm_pr_unimpl_rdmsr(vcpu, msr);
diff --git a/arch/x86/kvm/kvm_emulate.h b/arch/x86/kvm/kvm_emulate.h
index 29ea4313e1bb..55a18e2f2dcd 100644
--- a/arch/x86/kvm/kvm_emulate.h
+++ b/arch/x86/kvm/kvm_emulate.h
@@ -223,6 +223,7 @@ struct x86_emulate_ops {
bool (*guest_has_movbe)(struct x86_emulate_ctxt *ctxt);
bool (*guest_has_fxsr)(struct x86_emulate_ctxt *ctxt);
bool (*guest_has_rdpid)(struct x86_emulate_ctxt *ctxt);
+ bool (*guest_cpuid_is_intel_compatible)(struct x86_emulate_ctxt *ctxt);
void (*set_nmi_mask)(struct x86_emulate_ctxt *ctxt, bool masked);
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index acd7d48100a1..4e44c267959a 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1557,7 +1557,8 @@ static u32 apic_get_tmcct(struct kvm_lapic *apic)
remaining = 0;
ns = mod_64(ktime_to_ns(remaining), apic->lapic_timer.period);
- return div64_u64(ns, (APIC_BUS_CYCLE_NS * apic->divide_count));
+ return div64_u64(ns, (apic->vcpu->kvm->arch.apic_bus_cycle_ns *
+ apic->divide_count));
}
static void __report_tpr_access(struct kvm_lapic *apic, bool write)
@@ -1973,7 +1974,8 @@ static void start_sw_tscdeadline(struct kvm_lapic *apic)
static inline u64 tmict_to_ns(struct kvm_lapic *apic, u32 tmict)
{
- return (u64)tmict * APIC_BUS_CYCLE_NS * (u64)apic->divide_count;
+ return (u64)tmict * apic->vcpu->kvm->arch.apic_bus_cycle_ns *
+ (u64)apic->divide_count;
}
static void update_target_expiration(struct kvm_lapic *apic, uint32_t old_divisor)
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index a69e706b9080..9030778e3077 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -16,8 +16,7 @@
#define APIC_DEST_NOSHORT 0x0
#define APIC_DEST_MASK 0x800
-#define APIC_BUS_CYCLE_NS 1
-#define APIC_BUS_FREQUENCY (1000000000ULL / APIC_BUS_CYCLE_NS)
+#define APIC_BUS_CYCLE_NS_DEFAULT 1
#define APIC_BROADCAST 0xFF
#define X2APIC_BROADCAST 0xFFFFFFFFul
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index dc80e72e4848..f2e7e5c9b9ef 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -57,12 +57,6 @@ static __always_inline u64 rsvd_bits(int s, int e)
return ((2ULL << (e - s)) - 1) << s;
}
-/*
- * The number of non-reserved physical address bits irrespective of features
- * that repurpose legal bits, e.g. MKTME.
- */
-extern u8 __read_mostly shadow_phys_bits;
-
static inline gfn_t kvm_mmu_max_gfn(void)
{
/*
@@ -76,30 +70,11 @@ static inline gfn_t kvm_mmu_max_gfn(void)
* than hardware's real MAXPHYADDR. Using the host MAXPHYADDR
* disallows such SPTEs entirely and simplifies the TDP MMU.
*/
- int max_gpa_bits = likely(tdp_enabled) ? shadow_phys_bits : 52;
+ int max_gpa_bits = likely(tdp_enabled) ? kvm_host.maxphyaddr : 52;
return (1ULL << (max_gpa_bits - PAGE_SHIFT)) - 1;
}
-static inline u8 kvm_get_shadow_phys_bits(void)
-{
- /*
- * boot_cpu_data.x86_phys_bits is reduced when MKTME or SME are detected
- * in CPU detection code, but the processor treats those reduced bits as
- * 'keyID' thus they are not reserved bits. Therefore KVM needs to look at
- * the physical address bits reported by CPUID.
- */
- if (likely(boot_cpu_data.extended_cpuid_level >= 0x80000008))
- return cpuid_eax(0x80000008) & 0xff;
-
- /*
- * Quite weird to have VMX or SVM but not MAXPHYADDR; probably a VM with
- * custom CPUID. Proceed with whatever the kernel found since these features
- * aren't virtualizable (SME/SEV also require CPUIDs higher than 0x80000008).
- */
- return boot_cpu_data.x86_phys_bits;
-}
-
u8 kvm_mmu_get_max_tdp_level(void);
void kvm_mmu_set_mmio_spte_mask(u64 mmio_value, u64 mmio_mask, u64 access_mask);
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 4e0e9963066f..44ae8455f91e 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -5109,7 +5109,7 @@ static void reset_rsvds_bits_mask_ept(struct kvm_vcpu *vcpu,
static inline u64 reserved_hpa_bits(void)
{
- return rsvd_bits(shadow_phys_bits, 63);
+ return rsvd_bits(kvm_host.maxphyaddr, 63);
}
/*
diff --git a/arch/x86/kvm/mmu/spte.c b/arch/x86/kvm/mmu/spte.c
index 59cac37615b6..8275b96b6596 100644
--- a/arch/x86/kvm/mmu/spte.c
+++ b/arch/x86/kvm/mmu/spte.c
@@ -43,7 +43,25 @@ u64 __read_mostly shadow_acc_track_mask;
u64 __read_mostly shadow_nonpresent_or_rsvd_mask;
u64 __read_mostly shadow_nonpresent_or_rsvd_lower_gfn_mask;
-u8 __read_mostly shadow_phys_bits;
+static u8 __init kvm_get_host_maxphyaddr(void)
+{
+ /*
+ * boot_cpu_data.x86_phys_bits is reduced when MKTME or SME are detected
+ * in CPU detection code, but the processor treats those reduced bits as
+ * 'keyID' thus they are not reserved bits. Therefore KVM needs to look at
+ * the physical address bits reported by CPUID, i.e. the raw MAXPHYADDR,
+ * when reasoning about CPU behavior with respect to MAXPHYADDR.
+ */
+ if (likely(boot_cpu_data.extended_cpuid_level >= 0x80000008))
+ return cpuid_eax(0x80000008) & 0xff;
+
+ /*
+ * Quite weird to have VMX or SVM but not MAXPHYADDR; probably a VM with
+ * custom CPUID. Proceed with whatever the kernel found since these features
+ * aren't virtualizable (SME/SEV also require CPUIDs higher than 0x80000008).
+ */
+ return boot_cpu_data.x86_phys_bits;
+}
void __init kvm_mmu_spte_module_init(void)
{
@@ -55,6 +73,8 @@ void __init kvm_mmu_spte_module_init(void)
* will change when the vendor module is (re)loaded.
*/
allow_mmio_caching = enable_mmio_caching;
+
+ kvm_host.maxphyaddr = kvm_get_host_maxphyaddr();
}
static u64 generation_mmio_spte_mask(u64 gen)
@@ -441,8 +461,6 @@ void kvm_mmu_reset_all_pte_masks(void)
u8 low_phys_bits;
u64 mask;
- shadow_phys_bits = kvm_get_shadow_phys_bits();
-
/*
* If the CPU has 46 or less physical address bits, then set an
* appropriate mask to guard against L1TF attacks. Otherwise, it is
@@ -494,7 +512,7 @@ void kvm_mmu_reset_all_pte_masks(void)
* 52-bit physical addresses then there are no reserved PA bits in the
* PTEs and so the reserved PA approach must be disabled.
*/
- if (shadow_phys_bits < 52)
+ if (kvm_host.maxphyaddr < 52)
mask = BIT_ULL(51) | PT_PRESENT_MASK;
else
mask = 0;
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index f9149c9fc275..7f0fdbc8240f 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -194,7 +194,7 @@ static int pmc_reprogram_counter(struct kvm_pmc *pmc, u32 type, u64 config,
attr.sample_period = get_sample_period(pmc, pmc->counter);
if ((attr.config & HSW_IN_TX_CHECKPOINTED) &&
- guest_cpuid_is_intel(pmc->vcpu)) {
+ (boot_cpu_has(X86_FEATURE_RTM) || boot_cpu_has(X86_FEATURE_HLE))) {
/*
* HSW_IN_TX_CHECKPOINTED is not supported with nonzero
* period. Just clear the sample period so at least
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index df8818759698..8cb439a5c77a 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -4401,9 +4401,9 @@ void sev_es_prepare_switch_to_guest(struct vcpu_svm *svm, struct sev_es_save_are
* isn't saved by VMRUN, that isn't already saved by VMSAVE (performed
* by common SVM code).
*/
- hostsa->xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
+ hostsa->xcr0 = kvm_host.xcr0;
hostsa->pkru = read_pkru();
- hostsa->xss = host_xss;
+ hostsa->xss = kvm_host.xss;
/*
* If DebugSwap is enabled, debug registers are loaded but NOT saved by
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 80ac33a5effe..f011849e7ef8 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -53,6 +53,7 @@
#include "svm_onhyperv.h"
MODULE_AUTHOR("Qumranet");
+MODULE_DESCRIPTION("KVM support for SVM (AMD-V) extensions");
MODULE_LICENSE("GPL");
#ifdef MODULE
@@ -1202,7 +1203,7 @@ static inline void init_vmcb_after_set_cpuid(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
- if (guest_cpuid_is_intel(vcpu)) {
+ if (guest_cpuid_is_intel_compatible(vcpu)) {
/*
* We must intercept SYSENTER_EIP and SYSENTER_ESP
* accesses because the processor only stores 32 bits.
@@ -2890,12 +2891,12 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
break;
case MSR_IA32_SYSENTER_EIP:
msr_info->data = (u32)svm->vmcb01.ptr->save.sysenter_eip;
- if (guest_cpuid_is_intel(vcpu))
+ if (guest_cpuid_is_intel_compatible(vcpu))
msr_info->data |= (u64)svm->sysenter_eip_hi << 32;
break;
case MSR_IA32_SYSENTER_ESP:
msr_info->data = svm->vmcb01.ptr->save.sysenter_esp;
- if (guest_cpuid_is_intel(vcpu))
+ if (guest_cpuid_is_intel_compatible(vcpu))
msr_info->data |= (u64)svm->sysenter_esp_hi << 32;
break;
case MSR_TSC_AUX:
@@ -3122,11 +3123,11 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
* 32 bit part of these msrs to support Intel's
* implementation of SYSENTER/SYSEXIT.
*/
- svm->sysenter_eip_hi = guest_cpuid_is_intel(vcpu) ? (data >> 32) : 0;
+ svm->sysenter_eip_hi = guest_cpuid_is_intel_compatible(vcpu) ? (data >> 32) : 0;
break;
case MSR_IA32_SYSENTER_ESP:
svm->vmcb01.ptr->save.sysenter_esp = (u32)data;
- svm->sysenter_esp_hi = guest_cpuid_is_intel(vcpu) ? (data >> 32) : 0;
+ svm->sysenter_esp_hi = guest_cpuid_is_intel_compatible(vcpu) ? (data >> 32) : 0;
break;
case MSR_TSC_AUX:
/*
@@ -4387,11 +4388,11 @@ static void svm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_LBRV);
/*
- * Intercept VMLOAD if the vCPU mode is Intel in order to emulate that
+ * Intercept VMLOAD if the vCPU model is Intel in order to emulate that
* VMLOAD drops bits 63:32 of SYSENTER (ignoring the fact that exposing
* SVM on Intel is bonkers and extremely unlikely to work).
*/
- if (!guest_cpuid_is_intel(vcpu))
+ if (!guest_cpuid_is_intel_compatible(vcpu))
kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_V_VMSAVE_VMLOAD);
kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_PAUSEFILTER);
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index d2397b98bbf0..0eb87bb6c5a8 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -668,7 +668,7 @@ extern struct kvm_x86_nested_ops svm_nested_ops;
/* avic.c */
#define AVIC_REQUIRED_APICV_INHIBITS \
( \
- BIT(APICV_INHIBIT_REASON_DISABLE) | \
+ BIT(APICV_INHIBIT_REASON_DISABLED) | \
BIT(APICV_INHIBIT_REASON_ABSENT) | \
BIT(APICV_INHIBIT_REASON_HYPERV) | \
BIT(APICV_INHIBIT_REASON_NESTED) | \
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index 10ad5d32fcc3..e30d01ef5a61 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -1375,6 +1375,10 @@ TRACE_EVENT(kvm_hv_stimer_cleanup,
__entry->vcpu_id, __entry->timer_index)
);
+#define kvm_print_apicv_inhibit_reasons(inhibits) \
+ (inhibits), (inhibits) ? " " : "", \
+ (inhibits) ? __print_flags(inhibits, "|", APICV_INHIBIT_REASONS) : ""
+
TRACE_EVENT(kvm_apicv_inhibit_changed,
TP_PROTO(int reason, bool set, unsigned long inhibits),
TP_ARGS(reason, set, inhibits),
@@ -1391,9 +1395,10 @@ TRACE_EVENT(kvm_apicv_inhibit_changed,
__entry->inhibits = inhibits;
),
- TP_printk("%s reason=%u, inhibits=0x%lx",
+ TP_printk("%s reason=%u, inhibits=0x%lx%s%s",
__entry->set ? "set" : "cleared",
- __entry->reason, __entry->inhibits)
+ __entry->reason,
+ kvm_print_apicv_inhibit_reasons(__entry->inhibits))
);
TRACE_EVENT(kvm_apicv_accept_irq,
diff --git a/arch/x86/kvm/vmx/main.c b/arch/x86/kvm/vmx/main.c
index c7a86be0f30e..d8e2e853be51 100644
--- a/arch/x86/kvm/vmx/main.c
+++ b/arch/x86/kvm/vmx/main.c
@@ -8,7 +8,7 @@
#include "posted_intr.h"
#define VMX_REQUIRED_APICV_INHIBITS \
- (BIT(APICV_INHIBIT_REASON_DISABLE)| \
+ (BIT(APICV_INHIBIT_REASON_DISABLED) | \
BIT(APICV_INHIBIT_REASON_ABSENT) | \
BIT(APICV_INHIBIT_REASON_HYPERV) | \
BIT(APICV_INHIBIT_REASON_BLOCKIRQ) | \
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 643935a0f70a..75b4f41d9926 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -2425,7 +2425,7 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct loaded_vmcs *vmcs0
if (cpu_has_load_ia32_efer()) {
if (guest_efer & EFER_LMA)
exec_control |= VM_ENTRY_IA32E_MODE;
- if (guest_efer != host_efer)
+ if (guest_efer != kvm_host.efer)
exec_control |= VM_ENTRY_LOAD_IA32_EFER;
}
vm_entry_controls_set(vmx, exec_control);
@@ -2438,7 +2438,7 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct loaded_vmcs *vmcs0
* bits may be modified by vmx_set_efer() in prepare_vmcs02().
*/
exec_control = __vm_exit_controls_get(vmcs01);
- if (cpu_has_load_ia32_efer() && guest_efer != host_efer)
+ if (cpu_has_load_ia32_efer() && guest_efer != kvm_host.efer)
exec_control |= VM_EXIT_LOAD_IA32_EFER;
else
exec_control &= ~VM_EXIT_LOAD_IA32_EFER;
@@ -4665,7 +4665,7 @@ static inline u64 nested_vmx_get_vmcs01_guest_efer(struct vcpu_vmx *vmx)
return vmcs_read64(GUEST_IA32_EFER);
if (cpu_has_load_ia32_efer())
- return host_efer;
+ return kvm_host.efer;
for (i = 0; i < vmx->msr_autoload.guest.nr; ++i) {
if (vmx->msr_autoload.guest.val[i].index == MSR_EFER)
@@ -4676,7 +4676,7 @@ static inline u64 nested_vmx_get_vmcs01_guest_efer(struct vcpu_vmx *vmx)
if (efer_msr)
return efer_msr->data;
- return host_efer;
+ return kvm_host.efer;
}
static void nested_vmx_restore_host_state(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 078386582782..bedb9ba96918 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -74,6 +74,7 @@
#include "posted_intr.h"
MODULE_AUTHOR("Qumranet");
+MODULE_DESCRIPTION("KVM support for VMX (Intel VT-x) extensions");
MODULE_LICENSE("GPL");
#ifdef MODULE
@@ -259,7 +260,7 @@ static int vmx_setup_l1d_flush(enum vmx_l1d_flush_state l1tf)
return 0;
}
- if (host_arch_capabilities & ARCH_CAP_SKIP_VMENTRY_L1DFLUSH) {
+ if (kvm_host.arch_capabilities & ARCH_CAP_SKIP_VMENTRY_L1DFLUSH) {
l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_NOT_REQUIRED;
return 0;
}
@@ -404,7 +405,7 @@ static void vmx_update_fb_clear_dis(struct kvm_vcpu *vcpu, struct vcpu_vmx *vmx)
* and VM-Exit.
*/
vmx->disable_fb_clear = !cpu_feature_enabled(X86_FEATURE_CLEAR_CPU_BUF) &&
- (host_arch_capabilities & ARCH_CAP_FB_CLEAR_CTRL) &&
+ (kvm_host.arch_capabilities & ARCH_CAP_FB_CLEAR_CTRL) &&
!boot_cpu_has_bug(X86_BUG_MDS) &&
!boot_cpu_has_bug(X86_BUG_TAA);
@@ -1123,12 +1124,12 @@ static bool update_transition_efer(struct vcpu_vmx *vmx)
* atomically, since it's faster than switching it manually.
*/
if (cpu_has_load_ia32_efer() ||
- (enable_ept && ((vmx->vcpu.arch.efer ^ host_efer) & EFER_NX))) {
+ (enable_ept && ((vmx->vcpu.arch.efer ^ kvm_host.efer) & EFER_NX))) {
if (!(guest_efer & EFER_LMA))
guest_efer &= ~EFER_LME;
- if (guest_efer != host_efer)
+ if (guest_efer != kvm_host.efer)
add_atomic_switch_msr(vmx, MSR_EFER,
- guest_efer, host_efer, false);
+ guest_efer, kvm_host.efer, false);
else
clear_atomic_switch_msr(vmx, MSR_EFER);
return false;
@@ -1141,7 +1142,7 @@ static bool update_transition_efer(struct vcpu_vmx *vmx)
clear_atomic_switch_msr(vmx, MSR_EFER);
guest_efer &= ~ignore_bits;
- guest_efer |= host_efer & ignore_bits;
+ guest_efer |= kvm_host.efer & ignore_bits;
vmx->guest_uret_msrs[i].data = guest_efer;
vmx->guest_uret_msrs[i].mask = ~ignore_bits;
@@ -4392,7 +4393,7 @@ void vmx_set_constant_host_state(struct vcpu_vmx *vmx)
}
if (cpu_has_load_ia32_efer())
- vmcs_write64(HOST_IA32_EFER, host_efer);
+ vmcs_write64(HOST_IA32_EFER, kvm_host.efer);
}
void set_cr4_guest_host_mask(struct vcpu_vmx *vmx)
@@ -8394,18 +8395,16 @@ static void __init vmx_setup_me_spte_mask(void)
u64 me_mask = 0;
/*
- * kvm_get_shadow_phys_bits() returns shadow_phys_bits. Use
- * the former to avoid exposing shadow_phys_bits.
- *
* On pre-MKTME system, boot_cpu_data.x86_phys_bits equals to
- * shadow_phys_bits. On MKTME and/or TDX capable systems,
+ * kvm_host.maxphyaddr. On MKTME and/or TDX capable systems,
* boot_cpu_data.x86_phys_bits holds the actual physical address
- * w/o the KeyID bits, and shadow_phys_bits equals to MAXPHYADDR
- * reported by CPUID. Those bits between are KeyID bits.
+ * w/o the KeyID bits, and kvm_host.maxphyaddr equals to
+ * MAXPHYADDR reported by CPUID. Those bits between are KeyID bits.
*/
- if (boot_cpu_data.x86_phys_bits != kvm_get_shadow_phys_bits())
+ if (boot_cpu_data.x86_phys_bits != kvm_host.maxphyaddr)
me_mask = rsvd_bits(boot_cpu_data.x86_phys_bits,
- kvm_get_shadow_phys_bits() - 1);
+ kvm_host.maxphyaddr - 1);
+
/*
* Unlike SME, host kernel doesn't support setting up any
* MKTME KeyID on Intel platforms. No memory encryption
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index 7b64e271a931..08d7d67fe760 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -727,7 +727,7 @@ static inline bool vmx_need_pf_intercept(struct kvm_vcpu *vcpu)
return true;
return allow_smaller_maxphyaddr &&
- cpuid_maxphyaddr(vcpu) < kvm_get_shadow_phys_bits();
+ cpuid_maxphyaddr(vcpu) < kvm_host.maxphyaddr;
}
static inline bool is_unrestricted_guest(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/vmx/x86_ops.h b/arch/x86/kvm/vmx/x86_ops.h
index 3cb0be94e779..6016883c8533 100644
--- a/arch/x86/kvm/vmx/x86_ops.h
+++ b/arch/x86/kvm/vmx/x86_ops.h
@@ -46,7 +46,6 @@ bool vmx_apic_init_signal_blocked(struct kvm_vcpu *vcpu);
void vmx_migrate_timers(struct kvm_vcpu *vcpu);
void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu);
void vmx_apicv_pre_state_restore(struct kvm_vcpu *vcpu);
-bool vmx_check_apicv_inhibit_reasons(enum kvm_apicv_inhibit reason);
void vmx_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr);
void vmx_hwapic_isr_update(int max_isr);
bool vmx_guest_apic_has_interrupt(struct kvm_vcpu *vcpu);
@@ -111,7 +110,6 @@ u64 vmx_get_l2_tsc_offset(struct kvm_vcpu *vcpu);
u64 vmx_get_l2_tsc_multiplier(struct kvm_vcpu *vcpu);
void vmx_write_tsc_offset(struct kvm_vcpu *vcpu);
void vmx_write_tsc_multiplier(struct kvm_vcpu *vcpu);
-void vmx_request_immediate_exit(struct kvm_vcpu *vcpu);
void vmx_update_cpu_dirty_logging(struct kvm_vcpu *vcpu);
#ifdef CONFIG_X86_64
int vmx_set_hv_timer(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index e8c34f8e1964..e90e1a74564e 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -100,6 +100,9 @@
struct kvm_caps kvm_caps __read_mostly;
EXPORT_SYMBOL_GPL(kvm_caps);
+struct kvm_host_values kvm_host __read_mostly;
+EXPORT_SYMBOL_GPL(kvm_host);
+
#define ERR_PTR_USR(e) ((void __user *)ERR_PTR(e))
#define emul_to_vcpu(ctxt) \
@@ -220,21 +223,12 @@ static struct kvm_user_return_msrs __percpu *user_return_msrs;
| XFEATURE_MASK_BNDCSR | XFEATURE_MASK_AVX512 \
| XFEATURE_MASK_PKRU | XFEATURE_MASK_XTILE)
-u64 __read_mostly host_efer;
-EXPORT_SYMBOL_GPL(host_efer);
-
bool __read_mostly allow_smaller_maxphyaddr = 0;
EXPORT_SYMBOL_GPL(allow_smaller_maxphyaddr);
bool __read_mostly enable_apicv = true;
EXPORT_SYMBOL_GPL(enable_apicv);
-u64 __read_mostly host_xss;
-EXPORT_SYMBOL_GPL(host_xss);
-
-u64 __read_mostly host_arch_capabilities;
-EXPORT_SYMBOL_GPL(host_arch_capabilities);
-
const struct _kvm_stats_desc kvm_vm_stats_desc[] = {
KVM_GENERIC_VM_STATS(),
STATS_DESC_COUNTER(VM, mmu_shadow_zapped),
@@ -308,8 +302,6 @@ const struct kvm_stats_header kvm_vcpu_stats_header = {
sizeof(kvm_vcpu_stats_desc),
};
-u64 __read_mostly host_xcr0;
-
static struct kmem_cache *x86_emulator_cache;
/*
@@ -1016,11 +1008,11 @@ void kvm_load_guest_xsave_state(struct kvm_vcpu *vcpu)
if (kvm_is_cr4_bit_set(vcpu, X86_CR4_OSXSAVE)) {
- if (vcpu->arch.xcr0 != host_xcr0)
+ if (vcpu->arch.xcr0 != kvm_host.xcr0)
xsetbv(XCR_XFEATURE_ENABLED_MASK, vcpu->arch.xcr0);
if (guest_can_use(vcpu, X86_FEATURE_XSAVES) &&
- vcpu->arch.ia32_xss != host_xss)
+ vcpu->arch.ia32_xss != kvm_host.xss)
wrmsrl(MSR_IA32_XSS, vcpu->arch.ia32_xss);
}
@@ -1047,12 +1039,12 @@ void kvm_load_host_xsave_state(struct kvm_vcpu *vcpu)
if (kvm_is_cr4_bit_set(vcpu, X86_CR4_OSXSAVE)) {
- if (vcpu->arch.xcr0 != host_xcr0)
- xsetbv(XCR_XFEATURE_ENABLED_MASK, host_xcr0);
+ if (vcpu->arch.xcr0 != kvm_host.xcr0)
+ xsetbv(XCR_XFEATURE_ENABLED_MASK, kvm_host.xcr0);
if (guest_can_use(vcpu, X86_FEATURE_XSAVES) &&
- vcpu->arch.ia32_xss != host_xss)
- wrmsrl(MSR_IA32_XSS, host_xss);
+ vcpu->arch.ia32_xss != kvm_host.xss)
+ wrmsrl(MSR_IA32_XSS, kvm_host.xss);
}
}
@@ -1619,7 +1611,7 @@ static bool kvm_is_immutable_feature_msr(u32 msr)
static u64 kvm_get_arch_capabilities(void)
{
- u64 data = host_arch_capabilities & KVM_SUPPORTED_ARCH_CAP;
+ u64 data = kvm_host.arch_capabilities & KVM_SUPPORTED_ARCH_CAP;
/*
* If nx_huge_pages is enabled, KVM's shadow paging will ensure that
@@ -1877,11 +1869,11 @@ static int __kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data,
* incomplete and conflicting architectural behavior. Current
* AMD CPUs completely ignore bits 63:32, i.e. they aren't
* reserved and always read as zeros. Enforce Intel's reserved
- * bits check if and only if the guest CPU is Intel, and clear
- * the bits in all other cases. This ensures cross-vendor
- * migration will provide consistent behavior for the guest.
+ * bits check if the guest CPU is Intel compatible, otherwise
+ * clear the bits. This ensures cross-vendor migration will
+ * provide consistent behavior for the guest.
*/
- if (guest_cpuid_is_intel(vcpu) && (data >> 32) != 0)
+ if (guest_cpuid_is_intel_compatible(vcpu) && (data >> 32) != 0)
return 1;
data = (u32)data;
@@ -4703,11 +4695,15 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_VM_DISABLE_NX_HUGE_PAGES:
case KVM_CAP_IRQFD_RESAMPLE:
case KVM_CAP_MEMORY_FAULT_INFO:
+ case KVM_CAP_X86_GUEST_MODE:
r = 1;
break;
case KVM_CAP_PRE_FAULT_MEMORY:
r = tdp_enabled;
break;
+ case KVM_CAP_X86_APIC_BUS_CYCLES_NS:
+ r = APIC_BUS_CYCLE_NS_DEFAULT;
+ break;
case KVM_CAP_EXIT_HYPERCALL:
r = KVM_EXIT_HYPERCALL_VALID_MASK;
break;
@@ -5891,8 +5887,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
r = -EINVAL;
if (!lapic_in_kernel(vcpu))
goto out;
- u.lapic = kzalloc(sizeof(struct kvm_lapic_state),
- GFP_KERNEL_ACCOUNT);
+ u.lapic = kzalloc(sizeof(struct kvm_lapic_state), GFP_KERNEL);
r = -ENOMEM;
if (!u.lapic)
@@ -6085,7 +6080,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
if (vcpu->arch.guest_fpu.uabi_size > sizeof(struct kvm_xsave))
break;
- u.xsave = kzalloc(sizeof(struct kvm_xsave), GFP_KERNEL_ACCOUNT);
+ u.xsave = kzalloc(sizeof(struct kvm_xsave), GFP_KERNEL);
r = -ENOMEM;
if (!u.xsave)
break;
@@ -6116,7 +6111,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
case KVM_GET_XSAVE2: {
int size = vcpu->arch.guest_fpu.uabi_size;
- u.xsave = kzalloc(size, GFP_KERNEL_ACCOUNT);
+ u.xsave = kzalloc(size, GFP_KERNEL);
r = -ENOMEM;
if (!u.xsave)
break;
@@ -6134,7 +6129,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
}
case KVM_GET_XCRS: {
- u.xcrs = kzalloc(sizeof(struct kvm_xcrs), GFP_KERNEL_ACCOUNT);
+ u.xcrs = kzalloc(sizeof(struct kvm_xcrs), GFP_KERNEL);
r = -ENOMEM;
if (!u.xcrs)
break;
@@ -6756,6 +6751,30 @@ split_irqchip_unlock:
}
mutex_unlock(&kvm->lock);
break;
+ case KVM_CAP_X86_APIC_BUS_CYCLES_NS: {
+ u64 bus_cycle_ns = cap->args[0];
+ u64 unused;
+
+ /*
+ * Guard against overflow in tmict_to_ns(). 128 is the highest
+ * divide value that can be programmed in APIC_TDCR.
+ */
+ r = -EINVAL;
+ if (!bus_cycle_ns ||
+ check_mul_overflow((u64)U32_MAX * 128, bus_cycle_ns, &unused))
+ break;
+
+ r = 0;
+ mutex_lock(&kvm->lock);
+ if (!irqchip_in_kernel(kvm))
+ r = -ENXIO;
+ else if (kvm->created_vcpus)
+ r = -EINVAL;
+ else
+ kvm->arch.apic_bus_cycle_ns = bus_cycle_ns;
+ mutex_unlock(&kvm->lock);
+ break;
+ }
default:
r = -EINVAL;
break;
@@ -8535,6 +8554,11 @@ static bool emulator_guest_has_rdpid(struct x86_emulate_ctxt *ctxt)
return guest_cpuid_has(emul_to_vcpu(ctxt), X86_FEATURE_RDPID);
}
+static bool emulator_guest_cpuid_is_intel_compatible(struct x86_emulate_ctxt *ctxt)
+{
+ return guest_cpuid_is_intel_compatible(emul_to_vcpu(ctxt));
+}
+
static ulong emulator_read_gpr(struct x86_emulate_ctxt *ctxt, unsigned reg)
{
return kvm_register_read_raw(emul_to_vcpu(ctxt), reg);
@@ -8633,6 +8657,7 @@ static const struct x86_emulate_ops emulate_ops = {
.guest_has_movbe = emulator_guest_has_movbe,
.guest_has_fxsr = emulator_guest_has_fxsr,
.guest_has_rdpid = emulator_guest_has_rdpid,
+ .guest_cpuid_is_intel_compatible = emulator_guest_cpuid_is_intel_compatible,
.set_nmi_mask = emulator_set_nmi_mask,
.is_smm = emulator_is_smm,
.is_guest_mode = emulator_is_guest_mode,
@@ -9014,19 +9039,17 @@ EXPORT_SYMBOL_GPL(kvm_skip_emulated_instruction);
static bool kvm_is_code_breakpoint_inhibited(struct kvm_vcpu *vcpu)
{
- u32 shadow;
-
if (kvm_get_rflags(vcpu) & X86_EFLAGS_RF)
return true;
/*
- * Intel CPUs inhibit code #DBs when MOV/POP SS blocking is active,
- * but AMD CPUs do not. MOV/POP SS blocking is rare, check that first
- * to avoid the relatively expensive CPUID lookup.
+ * Intel compatible CPUs inhibit code #DBs when MOV/POP SS blocking is
+ * active, but AMD compatible CPUs do not.
*/
- shadow = static_call(kvm_x86_get_interrupt_shadow)(vcpu);
- return (shadow & KVM_X86_SHADOW_INT_MOV_SS) &&
- guest_cpuid_is_intel(vcpu);
+ if (!guest_cpuid_is_intel_compatible(vcpu))
+ return false;
+
+ return static_call(kvm_x86_get_interrupt_shadow)(vcpu) & KVM_X86_SHADOW_INT_MOV_SS;
}
static bool kvm_vcpu_check_code_breakpoint(struct kvm_vcpu *vcpu,
@@ -9786,19 +9809,19 @@ int kvm_x86_vendor_init(struct kvm_x86_init_ops *ops)
kvm_caps.supported_mce_cap = MCG_CTL_P | MCG_SER_P;
if (boot_cpu_has(X86_FEATURE_XSAVE)) {
- host_xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
- kvm_caps.supported_xcr0 = host_xcr0 & KVM_SUPPORTED_XCR0;
+ kvm_host.xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
+ kvm_caps.supported_xcr0 = kvm_host.xcr0 & KVM_SUPPORTED_XCR0;
}
- rdmsrl_safe(MSR_EFER, &host_efer);
+ rdmsrl_safe(MSR_EFER, &kvm_host.efer);
if (boot_cpu_has(X86_FEATURE_XSAVES))
- rdmsrl(MSR_IA32_XSS, host_xss);
+ rdmsrl(MSR_IA32_XSS, kvm_host.xss);
kvm_init_pmu_capability(ops->pmu_ops);
if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES))
- rdmsrl(MSR_IA32_ARCH_CAPABILITIES, host_arch_capabilities);
+ rdmsrl(MSR_IA32_ARCH_CAPABILITIES, kvm_host.arch_capabilities);
r = ops->hardware_setup();
if (r != 0)
@@ -10023,6 +10046,10 @@ EXPORT_SYMBOL_GPL(kvm_vcpu_apicv_activated);
static void set_or_clear_apicv_inhibit(unsigned long *inhibits,
enum kvm_apicv_inhibit reason, bool set)
{
+ const struct trace_print_flags apicv_inhibits[] = { APICV_INHIBIT_REASONS };
+
+ BUILD_BUG_ON(ARRAY_SIZE(apicv_inhibits) != NR_APICV_INHIBIT_REASONS);
+
if (set)
__set_bit(reason, inhibits);
else
@@ -10034,7 +10061,7 @@ static void set_or_clear_apicv_inhibit(unsigned long *inhibits,
static void kvm_apicv_init(struct kvm *kvm)
{
enum kvm_apicv_inhibit reason = enable_apicv ? APICV_INHIBIT_REASON_ABSENT :
- APICV_INHIBIT_REASON_DISABLE;
+ APICV_INHIBIT_REASON_DISABLED;
set_or_clear_apicv_inhibit(&kvm->arch.apicv_inhibit_reasons, reason, true);
@@ -10255,6 +10282,8 @@ static void post_kvm_run_save(struct kvm_vcpu *vcpu)
if (is_smm(vcpu))
kvm_run->flags |= KVM_RUN_X86_SMM;
+ if (is_guest_mode(vcpu))
+ kvm_run->flags |= KVM_RUN_X86_GUEST_MODE;
}
static void update_cr8_intercept(struct kvm_vcpu *vcpu)
@@ -12629,6 +12658,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags);
kvm->arch.default_tsc_khz = max_tsc_khz ? : tsc_khz;
+ kvm->arch.apic_bus_cycle_ns = APIC_BUS_CYCLE_NS_DEFAULT;
kvm->arch.guest_can_read_msr_platform_info = true;
kvm->arch.enable_pmu = enable_pmu;
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index d80a4c6b5a38..a88c65d3ea26 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -33,6 +33,20 @@ struct kvm_caps {
u64 supported_perf_cap;
};
+struct kvm_host_values {
+ /*
+ * The host's raw MAXPHYADDR, i.e. the number of non-reserved physical
+ * address bits irrespective of features that repurpose legal bits,
+ * e.g. MKTME.
+ */
+ u8 maxphyaddr;
+
+ u64 efer;
+ u64 xcr0;
+ u64 xss;
+ u64 arch_capabilities;
+};
+
void kvm_spurious_fault(void);
#define KVM_NESTED_VMENTER_CONSISTENCY_CHECK(consistency_check) \
@@ -325,11 +339,8 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
int emulation_type, void *insn, int insn_len);
fastpath_t handle_fastpath_set_msr_irqoff(struct kvm_vcpu *vcpu);
-extern u64 host_xcr0;
-extern u64 host_xss;
-extern u64 host_arch_capabilities;
-
extern struct kvm_caps kvm_caps;
+extern struct kvm_host_values kvm_host;
extern bool enable_pmu;