aboutsummaryrefslogtreecommitdiff
path: root/arch/x86/kvm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kvm')
-rw-r--r--arch/x86/kvm/Kconfig11
-rw-r--r--arch/x86/kvm/hyperv.h1
-rw-r--r--arch/x86/kvm/lapic.c24
-rw-r--r--arch/x86/kvm/mmu/mmu.c19
-rw-r--r--arch/x86/kvm/mmu/spte.c6
-rw-r--r--arch/x86/kvm/mmu/spte.h2
-rw-r--r--arch/x86/kvm/mmu/tdp_mmu.c8
-rw-r--r--arch/x86/kvm/svm/sev.c24
-rw-r--r--arch/x86/kvm/svm/svm.c16
-rw-r--r--arch/x86/kvm/vmx/vmx.c10
-rw-r--r--arch/x86/kvm/x86.c24
11 files changed, 86 insertions, 59 deletions
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index 4287a8071a3a..730c2f34d347 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -19,7 +19,6 @@ if VIRTUALIZATION
config KVM
tristate "Kernel-based Virtual Machine (KVM) support"
- depends on HIGH_RES_TIMERS
depends on X86_LOCAL_APIC
select KVM_COMMON
select KVM_GENERIC_MMU_NOTIFIER
@@ -141,11 +140,13 @@ config KVM_AMD_SEV
depends on CRYPTO_DEV_SP_PSP && !(KVM_AMD=y && CRYPTO_DEV_CCP_DD=m)
select ARCH_HAS_CC_PLATFORM
select KVM_GENERIC_PRIVATE_MEM
- select HAVE_KVM_GMEM_PREPARE
- select HAVE_KVM_GMEM_INVALIDATE
+ select HAVE_KVM_ARCH_GMEM_PREPARE
+ select HAVE_KVM_ARCH_GMEM_INVALIDATE
help
- Provides support for launching Encrypted VMs (SEV) and Encrypted VMs
- with Encrypted State (SEV-ES) on AMD processors.
+ Provides support for launching encrypted VMs which use Secure
+ Encrypted Virtualization (SEV), Secure Encrypted Virtualization with
+ Encrypted State (SEV-ES), and Secure Encrypted Virtualization with
+ Secure Nested Paging (SEV-SNP) technologies on AMD processors.
config KVM_SMM
bool "System Management Mode emulation"
diff --git a/arch/x86/kvm/hyperv.h b/arch/x86/kvm/hyperv.h
index 923e64903da9..913bfc96959c 100644
--- a/arch/x86/kvm/hyperv.h
+++ b/arch/x86/kvm/hyperv.h
@@ -286,7 +286,6 @@ static inline int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
return HV_STATUS_ACCESS_DENIED;
}
static inline void kvm_hv_vcpu_purge_flush_tlb(struct kvm_vcpu *vcpu) {}
-static inline void kvm_hv_free_pa_page(struct kvm *kvm) {}
static inline bool kvm_hv_synic_has_vector(struct kvm_vcpu *vcpu, int vector)
{
return false;
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index a7172ba59ad2..5bb481aefcbc 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -351,10 +351,8 @@ static void kvm_recalculate_logical_map(struct kvm_apic_map *new,
* reversing the LDR calculation to get cluster of APICs, i.e. no
* additional work is required.
*/
- if (apic_x2apic_mode(apic)) {
- WARN_ON_ONCE(ldr != kvm_apic_calc_x2apic_ldr(kvm_x2apic_id(apic)));
+ if (apic_x2apic_mode(apic))
return;
- }
if (WARN_ON_ONCE(!kvm_apic_map_get_logical_dest(new, ldr,
&cluster, &mask))) {
@@ -1743,7 +1741,7 @@ static void limit_periodic_timer_frequency(struct kvm_lapic *apic)
s64 min_period = min_timer_period_us * 1000LL;
if (apic->lapic_timer.period < min_period) {
- pr_info_ratelimited(
+ pr_info_once(
"vcpu %i: requested %lld ns "
"lapic timer period limited to %lld ns\n",
apic->vcpu->vcpu_id,
@@ -2966,18 +2964,28 @@ static int kvm_apic_state_fixup(struct kvm_vcpu *vcpu,
struct kvm_lapic_state *s, bool set)
{
if (apic_x2apic_mode(vcpu->arch.apic)) {
+ u32 x2apic_id = kvm_x2apic_id(vcpu->arch.apic);
u32 *id = (u32 *)(s->regs + APIC_ID);
u32 *ldr = (u32 *)(s->regs + APIC_LDR);
u64 icr;
if (vcpu->kvm->arch.x2apic_format) {
- if (*id != vcpu->vcpu_id)
+ if (*id != x2apic_id)
return -EINVAL;
} else {
+ /*
+ * Ignore the userspace value when setting APIC state.
+ * KVM's model is that the x2APIC ID is readonly, e.g.
+ * KVM only supports delivering interrupts to KVM's
+ * version of the x2APIC ID. However, for backwards
+ * compatibility, don't reject attempts to set a
+ * mismatched ID for userspace that hasn't opted into
+ * x2apic_format.
+ */
if (set)
- *id >>= 24;
+ *id = x2apic_id;
else
- *id <<= 24;
+ *id = x2apic_id << 24;
}
/*
@@ -2986,7 +2994,7 @@ static int kvm_apic_state_fixup(struct kvm_vcpu *vcpu,
* split to ICR+ICR2 in userspace for backwards compatibility.
*/
if (set) {
- *ldr = kvm_apic_calc_x2apic_ldr(*id);
+ *ldr = kvm_apic_calc_x2apic_ldr(x2apic_id);
icr = __kvm_lapic_get_reg(s->regs, APIC_ICR) |
(u64)__kvm_lapic_get_reg(s->regs, APIC_ICR2) << 32;
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 901be9e420a4..7813d28b082f 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -4335,7 +4335,7 @@ static u8 kvm_max_private_mapping_level(struct kvm *kvm, kvm_pfn_t pfn,
if (req_max_level)
max_level = min(max_level, req_max_level);
- return req_max_level;
+ return max_level;
}
static int kvm_faultin_pfn_private(struct kvm_vcpu *vcpu,
@@ -4674,16 +4674,14 @@ out_unlock:
bool kvm_mmu_may_ignore_guest_pat(void)
{
/*
- * When EPT is enabled (shadow_memtype_mask is non-zero), the CPU does
- * not support self-snoop (or is affected by an erratum), and the VM
+ * When EPT is enabled (shadow_memtype_mask is non-zero), and the VM
* has non-coherent DMA (DMA doesn't snoop CPU caches), KVM's ABI is to
* honor the memtype from the guest's PAT so that guest accesses to
* memory that is DMA'd aren't cached against the guest's wishes. As a
* result, KVM _may_ ignore guest PAT, whereas without non-coherent DMA,
- * KVM _always_ ignores or honors guest PAT, i.e. doesn't toggle SPTE
- * bits in response to non-coherent device (un)registration.
+ * KVM _always_ ignores guest PAT (when EPT is enabled).
*/
- return !static_cpu_has(X86_FEATURE_SELFSNOOP) && shadow_memtype_mask;
+ return shadow_memtype_mask;
}
int kvm_tdp_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
@@ -4743,11 +4741,16 @@ long kvm_arch_vcpu_pre_fault_memory(struct kvm_vcpu *vcpu,
u64 end;
int r;
+ if (!vcpu->kvm->arch.pre_fault_allowed)
+ return -EOPNOTSUPP;
+
/*
* reload is efficient when called repeatedly, so we can do it on
* every iteration.
*/
- kvm_mmu_reload(vcpu);
+ r = kvm_mmu_reload(vcpu);
+ if (r)
+ return r;
if (kvm_arch_has_private_mem(vcpu->kvm) &&
kvm_mem_is_private(vcpu->kvm, gpa_to_gfn(range->gpa)))
@@ -7510,7 +7513,7 @@ static bool hugepage_has_attrs(struct kvm *kvm, struct kvm_memory_slot *slot,
const unsigned long end = start + KVM_PAGES_PER_HPAGE(level);
if (level == PG_LEVEL_2M)
- return kvm_range_has_memory_attributes(kvm, start, end, attrs);
+ return kvm_range_has_memory_attributes(kvm, start, end, ~0, attrs);
for (gfn = start; gfn < end; gfn += KVM_PAGES_PER_HPAGE(level - 1)) {
if (hugepage_test_mixed(slot, gfn, level - 1) ||
diff --git a/arch/x86/kvm/mmu/spte.c b/arch/x86/kvm/mmu/spte.c
index d4527965e48c..8f7eb3ad88fc 100644
--- a/arch/x86/kvm/mmu/spte.c
+++ b/arch/x86/kvm/mmu/spte.c
@@ -391,9 +391,9 @@ void kvm_mmu_set_mmio_spte_mask(u64 mmio_value, u64 mmio_mask, u64 access_mask)
mmio_value = 0;
/*
- * The masked MMIO value must obviously match itself and a removed SPTE
- * must not get a false positive. Removed SPTEs and MMIO SPTEs should
- * never collide as MMIO must set some RWX bits, and removed SPTEs must
+ * The masked MMIO value must obviously match itself and a frozen SPTE
+ * must not get a false positive. Frozen SPTEs and MMIO SPTEs should
+ * never collide as MMIO must set some RWX bits, and frozen SPTEs must
* not set any RWX bits.
*/
if (WARN_ON((mmio_value & mmio_mask) != mmio_value) ||
diff --git a/arch/x86/kvm/mmu/spte.h b/arch/x86/kvm/mmu/spte.h
index ef793c459b05..2cb816ea2430 100644
--- a/arch/x86/kvm/mmu/spte.h
+++ b/arch/x86/kvm/mmu/spte.h
@@ -214,7 +214,7 @@ extern u64 __read_mostly shadow_nonpresent_or_rsvd_mask;
*/
#define FROZEN_SPTE (SHADOW_NONPRESENT_VALUE | 0x5a0ULL)
-/* Removed SPTEs must not be misconstrued as shadow present PTEs. */
+/* Frozen SPTEs must not be misconstrued as shadow present PTEs. */
static_assert(!(FROZEN_SPTE & SPTE_MMU_PRESENT_MASK));
static inline bool is_frozen_spte(u64 spte)
diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index c7dc49ee7388..3c55955bcaf8 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -359,10 +359,10 @@ static void handle_removed_pt(struct kvm *kvm, tdp_ptep_t pt, bool shared)
/*
* Set the SPTE to a nonpresent value that other
* threads will not overwrite. If the SPTE was
- * already marked as removed then another thread
+ * already marked as frozen then another thread
* handling a page fault could overwrite it, so
* set the SPTE until it is set from some other
- * value to the removed SPTE value.
+ * value to the frozen SPTE value.
*/
for (;;) {
old_spte = kvm_tdp_mmu_write_spte_atomic(sptep, FROZEN_SPTE);
@@ -536,8 +536,8 @@ static inline int __must_check __tdp_mmu_set_spte_atomic(struct tdp_iter *iter,
u64 *sptep = rcu_dereference(iter->sptep);
/*
- * The caller is responsible for ensuring the old SPTE is not a REMOVED
- * SPTE. KVM should never attempt to zap or manipulate a REMOVED SPTE,
+ * The caller is responsible for ensuring the old SPTE is not a FROZEN
+ * SPTE. KVM should never attempt to zap or manipulate a FROZEN SPTE,
* and pre-checking before inserting a new SPTE is advantageous as it
* avoids unnecessary work.
*/
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index a16c873b3232..714c517dd4b7 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -2276,30 +2276,24 @@ static int sev_gmem_post_populate(struct kvm *kvm, gfn_t gfn_start, kvm_pfn_t pf
for (gfn = gfn_start, i = 0; gfn < gfn_start + npages; gfn++, i++) {
struct sev_data_snp_launch_update fw_args = {0};
- bool assigned;
+ bool assigned = false;
int level;
- if (!kvm_mem_is_private(kvm, gfn)) {
- pr_debug("%s: Failed to ensure GFN 0x%llx has private memory attribute set\n",
- __func__, gfn);
- ret = -EINVAL;
- goto err;
- }
-
ret = snp_lookup_rmpentry((u64)pfn + i, &assigned, &level);
if (ret || assigned) {
pr_debug("%s: Failed to ensure GFN 0x%llx RMP entry is initial shared state, ret: %d assigned: %d\n",
__func__, gfn, ret, assigned);
- ret = -EINVAL;
+ ret = ret ? -EINVAL : -EEXIST;
goto err;
}
if (src) {
void *vaddr = kmap_local_pfn(pfn + i);
- ret = copy_from_user(vaddr, src + i * PAGE_SIZE, PAGE_SIZE);
- if (ret)
+ if (copy_from_user(vaddr, src + i * PAGE_SIZE, PAGE_SIZE)) {
+ ret = -EFAULT;
goto err;
+ }
kunmap_local(vaddr);
}
@@ -2549,6 +2543,14 @@ static int snp_launch_finish(struct kvm *kvm, struct kvm_sev_cmd *argp)
data->gctx_paddr = __psp_pa(sev->snp_context);
ret = sev_issue_cmd(kvm, SEV_CMD_SNP_LAUNCH_FINISH, data, &argp->error);
+ /*
+ * Now that there will be no more SNP_LAUNCH_UPDATE ioctls, private pages
+ * can be given to the guest simply by marking the RMP entry as private.
+ * This can happen on first access and also with KVM_PRE_FAULT_MEMORY.
+ */
+ if (!ret)
+ kvm->arch.pre_fault_allowed = true;
+
kfree(id_auth);
e_free_id_block:
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index c115d26844f7..5ab2c92c7331 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -2876,6 +2876,12 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
case MSR_CSTAR:
msr_info->data = svm->vmcb01.ptr->save.cstar;
break;
+ case MSR_GS_BASE:
+ msr_info->data = svm->vmcb01.ptr->save.gs.base;
+ break;
+ case MSR_FS_BASE:
+ msr_info->data = svm->vmcb01.ptr->save.fs.base;
+ break;
case MSR_KERNEL_GS_BASE:
msr_info->data = svm->vmcb01.ptr->save.kernel_gs_base;
break;
@@ -3101,6 +3107,12 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
case MSR_CSTAR:
svm->vmcb01.ptr->save.cstar = data;
break;
+ case MSR_GS_BASE:
+ svm->vmcb01.ptr->save.gs.base = data;
+ break;
+ case MSR_FS_BASE:
+ svm->vmcb01.ptr->save.fs.base = data;
+ break;
case MSR_KERNEL_GS_BASE:
svm->vmcb01.ptr->save.kernel_gs_base = data;
break;
@@ -4949,6 +4961,7 @@ static int svm_vm_init(struct kvm *kvm)
to_kvm_sev_info(kvm)->need_init = true;
kvm->arch.has_private_mem = (type == KVM_X86_SNP_VM);
+ kvm->arch.pre_fault_allowed = !kvm->arch.has_private_mem;
}
if (!pause_filter_count || !pause_filter_thresh)
@@ -5223,6 +5236,9 @@ static __init void svm_set_cpu_caps(void)
/* CPUID 0x8000001F (SME/SEV features) */
sev_set_cpu_caps();
+
+ /* Don't advertise Bus Lock Detect to guest if SVM support is absent */
+ kvm_cpu_cap_clear(X86_FEATURE_BUS_LOCK_DETECT);
}
static __init int svm_hardware_setup(void)
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index f18c2d8c7476..733a0c45d1a6 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -7659,13 +7659,11 @@ u8 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
/*
* Force WB and ignore guest PAT if the VM does NOT have a non-coherent
- * device attached and the CPU doesn't support self-snoop. Letting the
- * guest control memory types on Intel CPUs without self-snoop may
- * result in unexpected behavior, and so KVM's (historical) ABI is to
- * trust the guest to behave only as a last resort.
+ * device attached. Letting the guest control memory types on Intel
+ * CPUs may result in unexpected behavior, and so KVM's ABI is to trust
+ * the guest to behave only as a last resort.
*/
- if (!static_cpu_has(X86_FEATURE_SELFSNOOP) &&
- !kvm_arch_has_noncoherent_dma(vcpu->kvm))
+ if (!kvm_arch_has_noncoherent_dma(vcpu->kvm))
return (MTRR_TYPE_WRBACK << VMX_EPT_MT_EPTE_SHIFT) | VMX_EPT_IPAT_BIT;
return (MTRR_TYPE_WRBACK << VMX_EPT_MT_EPTE_SHIFT);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index af6c8cf6a37a..c983c8e434b8 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -427,8 +427,7 @@ static void kvm_user_return_msr_cpu_online(void)
int kvm_set_user_return_msr(unsigned slot, u64 value, u64 mask)
{
- unsigned int cpu = smp_processor_id();
- struct kvm_user_return_msrs *msrs = per_cpu_ptr(user_return_msrs, cpu);
+ struct kvm_user_return_msrs *msrs = this_cpu_ptr(user_return_msrs);
int err;
value = (value & mask) | (msrs->values[slot].host & ~mask);
@@ -450,8 +449,7 @@ EXPORT_SYMBOL_GPL(kvm_set_user_return_msr);
static void drop_user_return_notifiers(void)
{
- unsigned int cpu = smp_processor_id();
- struct kvm_user_return_msrs *msrs = per_cpu_ptr(user_return_msrs, cpu);
+ struct kvm_user_return_msrs *msrs = this_cpu_ptr(user_return_msrs);
if (msrs->registered)
kvm_on_user_return(&msrs->urn);
@@ -4658,7 +4656,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_ASYNC_PF_INT:
case KVM_CAP_GET_TSC_KHZ:
case KVM_CAP_KVMCLOCK_CTRL:
- case KVM_CAP_READONLY_MEM:
case KVM_CAP_IOAPIC_POLARITY_IGNORED:
case KVM_CAP_TSC_DEADLINE_TIMER:
case KVM_CAP_DISABLE_QUIRKS:
@@ -4817,6 +4814,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_VM_TYPES:
r = kvm_caps.supported_vm_types;
break;
+ case KVM_CAP_READONLY_MEM:
+ r = kvm ? kvm_arch_has_readonly_mem(kvm) : 1;
+ break;
default:
break;
}
@@ -6042,7 +6042,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
if (copy_from_user(&events, argp, sizeof(struct kvm_vcpu_events)))
break;
+ kvm_vcpu_srcu_read_lock(vcpu);
r = kvm_vcpu_ioctl_x86_set_vcpu_events(vcpu, &events);
+ kvm_vcpu_srcu_read_unlock(vcpu);
break;
}
case KVM_GET_DEBUGREGS: {
@@ -12646,6 +12648,9 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
kvm->arch.vm_type = type;
kvm->arch.has_private_mem =
(type == KVM_X86_SW_PROTECTED_VM);
+ /* Decided by the vendor code for other VM types. */
+ kvm->arch.pre_fault_allowed =
+ type == KVM_X86_DEFAULT_VM || type == KVM_X86_SW_PROTECTED_VM;
ret = kvm_page_track_init(kvm);
if (ret)
@@ -13641,19 +13646,14 @@ bool kvm_arch_no_poll(struct kvm_vcpu *vcpu)
}
EXPORT_SYMBOL_GPL(kvm_arch_no_poll);
-#ifdef CONFIG_HAVE_KVM_GMEM_PREPARE
-bool kvm_arch_gmem_prepare_needed(struct kvm *kvm)
-{
- return kvm->arch.vm_type == KVM_X86_SNP_VM;
-}
-
+#ifdef CONFIG_HAVE_KVM_ARCH_GMEM_PREPARE
int kvm_arch_gmem_prepare(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, int max_order)
{
return kvm_x86_call(gmem_prepare)(kvm, pfn, gfn, max_order);
}
#endif
-#ifdef CONFIG_HAVE_KVM_GMEM_INVALIDATE
+#ifdef CONFIG_HAVE_KVM_ARCH_GMEM_INVALIDATE
void kvm_arch_gmem_invalidate(kvm_pfn_t start, kvm_pfn_t end)
{
kvm_x86_call(gmem_invalidate)(start, end);