diff options
Diffstat (limited to 'arch/x86/kvm')
-rw-r--r-- | arch/x86/kvm/Kconfig | 11 | ||||
-rw-r--r-- | arch/x86/kvm/hyperv.h | 1 | ||||
-rw-r--r-- | arch/x86/kvm/lapic.c | 24 | ||||
-rw-r--r-- | arch/x86/kvm/mmu/mmu.c | 19 | ||||
-rw-r--r-- | arch/x86/kvm/mmu/spte.c | 6 | ||||
-rw-r--r-- | arch/x86/kvm/mmu/spte.h | 2 | ||||
-rw-r--r-- | arch/x86/kvm/mmu/tdp_mmu.c | 8 | ||||
-rw-r--r-- | arch/x86/kvm/svm/sev.c | 24 | ||||
-rw-r--r-- | arch/x86/kvm/svm/svm.c | 16 | ||||
-rw-r--r-- | arch/x86/kvm/vmx/vmx.c | 10 | ||||
-rw-r--r-- | arch/x86/kvm/x86.c | 24 |
11 files changed, 86 insertions, 59 deletions
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index 4287a8071a3a..730c2f34d347 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig @@ -19,7 +19,6 @@ if VIRTUALIZATION config KVM tristate "Kernel-based Virtual Machine (KVM) support" - depends on HIGH_RES_TIMERS depends on X86_LOCAL_APIC select KVM_COMMON select KVM_GENERIC_MMU_NOTIFIER @@ -141,11 +140,13 @@ config KVM_AMD_SEV depends on CRYPTO_DEV_SP_PSP && !(KVM_AMD=y && CRYPTO_DEV_CCP_DD=m) select ARCH_HAS_CC_PLATFORM select KVM_GENERIC_PRIVATE_MEM - select HAVE_KVM_GMEM_PREPARE - select HAVE_KVM_GMEM_INVALIDATE + select HAVE_KVM_ARCH_GMEM_PREPARE + select HAVE_KVM_ARCH_GMEM_INVALIDATE help - Provides support for launching Encrypted VMs (SEV) and Encrypted VMs - with Encrypted State (SEV-ES) on AMD processors. + Provides support for launching encrypted VMs which use Secure + Encrypted Virtualization (SEV), Secure Encrypted Virtualization with + Encrypted State (SEV-ES), and Secure Encrypted Virtualization with + Secure Nested Paging (SEV-SNP) technologies on AMD processors. config KVM_SMM bool "System Management Mode emulation" diff --git a/arch/x86/kvm/hyperv.h b/arch/x86/kvm/hyperv.h index 923e64903da9..913bfc96959c 100644 --- a/arch/x86/kvm/hyperv.h +++ b/arch/x86/kvm/hyperv.h @@ -286,7 +286,6 @@ static inline int kvm_hv_hypercall(struct kvm_vcpu *vcpu) return HV_STATUS_ACCESS_DENIED; } static inline void kvm_hv_vcpu_purge_flush_tlb(struct kvm_vcpu *vcpu) {} -static inline void kvm_hv_free_pa_page(struct kvm *kvm) {} static inline bool kvm_hv_synic_has_vector(struct kvm_vcpu *vcpu, int vector) { return false; diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index a7172ba59ad2..5bb481aefcbc 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -351,10 +351,8 @@ static void kvm_recalculate_logical_map(struct kvm_apic_map *new, * reversing the LDR calculation to get cluster of APICs, i.e. no * additional work is required. */ - if (apic_x2apic_mode(apic)) { - WARN_ON_ONCE(ldr != kvm_apic_calc_x2apic_ldr(kvm_x2apic_id(apic))); + if (apic_x2apic_mode(apic)) return; - } if (WARN_ON_ONCE(!kvm_apic_map_get_logical_dest(new, ldr, &cluster, &mask))) { @@ -1743,7 +1741,7 @@ static void limit_periodic_timer_frequency(struct kvm_lapic *apic) s64 min_period = min_timer_period_us * 1000LL; if (apic->lapic_timer.period < min_period) { - pr_info_ratelimited( + pr_info_once( "vcpu %i: requested %lld ns " "lapic timer period limited to %lld ns\n", apic->vcpu->vcpu_id, @@ -2966,18 +2964,28 @@ static int kvm_apic_state_fixup(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s, bool set) { if (apic_x2apic_mode(vcpu->arch.apic)) { + u32 x2apic_id = kvm_x2apic_id(vcpu->arch.apic); u32 *id = (u32 *)(s->regs + APIC_ID); u32 *ldr = (u32 *)(s->regs + APIC_LDR); u64 icr; if (vcpu->kvm->arch.x2apic_format) { - if (*id != vcpu->vcpu_id) + if (*id != x2apic_id) return -EINVAL; } else { + /* + * Ignore the userspace value when setting APIC state. + * KVM's model is that the x2APIC ID is readonly, e.g. + * KVM only supports delivering interrupts to KVM's + * version of the x2APIC ID. However, for backwards + * compatibility, don't reject attempts to set a + * mismatched ID for userspace that hasn't opted into + * x2apic_format. + */ if (set) - *id >>= 24; + *id = x2apic_id; else - *id <<= 24; + *id = x2apic_id << 24; } /* @@ -2986,7 +2994,7 @@ static int kvm_apic_state_fixup(struct kvm_vcpu *vcpu, * split to ICR+ICR2 in userspace for backwards compatibility. */ if (set) { - *ldr = kvm_apic_calc_x2apic_ldr(*id); + *ldr = kvm_apic_calc_x2apic_ldr(x2apic_id); icr = __kvm_lapic_get_reg(s->regs, APIC_ICR) | (u64)__kvm_lapic_get_reg(s->regs, APIC_ICR2) << 32; diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 901be9e420a4..7813d28b082f 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -4335,7 +4335,7 @@ static u8 kvm_max_private_mapping_level(struct kvm *kvm, kvm_pfn_t pfn, if (req_max_level) max_level = min(max_level, req_max_level); - return req_max_level; + return max_level; } static int kvm_faultin_pfn_private(struct kvm_vcpu *vcpu, @@ -4674,16 +4674,14 @@ out_unlock: bool kvm_mmu_may_ignore_guest_pat(void) { /* - * When EPT is enabled (shadow_memtype_mask is non-zero), the CPU does - * not support self-snoop (or is affected by an erratum), and the VM + * When EPT is enabled (shadow_memtype_mask is non-zero), and the VM * has non-coherent DMA (DMA doesn't snoop CPU caches), KVM's ABI is to * honor the memtype from the guest's PAT so that guest accesses to * memory that is DMA'd aren't cached against the guest's wishes. As a * result, KVM _may_ ignore guest PAT, whereas without non-coherent DMA, - * KVM _always_ ignores or honors guest PAT, i.e. doesn't toggle SPTE - * bits in response to non-coherent device (un)registration. + * KVM _always_ ignores guest PAT (when EPT is enabled). */ - return !static_cpu_has(X86_FEATURE_SELFSNOOP) && shadow_memtype_mask; + return shadow_memtype_mask; } int kvm_tdp_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) @@ -4743,11 +4741,16 @@ long kvm_arch_vcpu_pre_fault_memory(struct kvm_vcpu *vcpu, u64 end; int r; + if (!vcpu->kvm->arch.pre_fault_allowed) + return -EOPNOTSUPP; + /* * reload is efficient when called repeatedly, so we can do it on * every iteration. */ - kvm_mmu_reload(vcpu); + r = kvm_mmu_reload(vcpu); + if (r) + return r; if (kvm_arch_has_private_mem(vcpu->kvm) && kvm_mem_is_private(vcpu->kvm, gpa_to_gfn(range->gpa))) @@ -7510,7 +7513,7 @@ static bool hugepage_has_attrs(struct kvm *kvm, struct kvm_memory_slot *slot, const unsigned long end = start + KVM_PAGES_PER_HPAGE(level); if (level == PG_LEVEL_2M) - return kvm_range_has_memory_attributes(kvm, start, end, attrs); + return kvm_range_has_memory_attributes(kvm, start, end, ~0, attrs); for (gfn = start; gfn < end; gfn += KVM_PAGES_PER_HPAGE(level - 1)) { if (hugepage_test_mixed(slot, gfn, level - 1) || diff --git a/arch/x86/kvm/mmu/spte.c b/arch/x86/kvm/mmu/spte.c index d4527965e48c..8f7eb3ad88fc 100644 --- a/arch/x86/kvm/mmu/spte.c +++ b/arch/x86/kvm/mmu/spte.c @@ -391,9 +391,9 @@ void kvm_mmu_set_mmio_spte_mask(u64 mmio_value, u64 mmio_mask, u64 access_mask) mmio_value = 0; /* - * The masked MMIO value must obviously match itself and a removed SPTE - * must not get a false positive. Removed SPTEs and MMIO SPTEs should - * never collide as MMIO must set some RWX bits, and removed SPTEs must + * The masked MMIO value must obviously match itself and a frozen SPTE + * must not get a false positive. Frozen SPTEs and MMIO SPTEs should + * never collide as MMIO must set some RWX bits, and frozen SPTEs must * not set any RWX bits. */ if (WARN_ON((mmio_value & mmio_mask) != mmio_value) || diff --git a/arch/x86/kvm/mmu/spte.h b/arch/x86/kvm/mmu/spte.h index ef793c459b05..2cb816ea2430 100644 --- a/arch/x86/kvm/mmu/spte.h +++ b/arch/x86/kvm/mmu/spte.h @@ -214,7 +214,7 @@ extern u64 __read_mostly shadow_nonpresent_or_rsvd_mask; */ #define FROZEN_SPTE (SHADOW_NONPRESENT_VALUE | 0x5a0ULL) -/* Removed SPTEs must not be misconstrued as shadow present PTEs. */ +/* Frozen SPTEs must not be misconstrued as shadow present PTEs. */ static_assert(!(FROZEN_SPTE & SPTE_MMU_PRESENT_MASK)); static inline bool is_frozen_spte(u64 spte) diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index c7dc49ee7388..3c55955bcaf8 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -359,10 +359,10 @@ static void handle_removed_pt(struct kvm *kvm, tdp_ptep_t pt, bool shared) /* * Set the SPTE to a nonpresent value that other * threads will not overwrite. If the SPTE was - * already marked as removed then another thread + * already marked as frozen then another thread * handling a page fault could overwrite it, so * set the SPTE until it is set from some other - * value to the removed SPTE value. + * value to the frozen SPTE value. */ for (;;) { old_spte = kvm_tdp_mmu_write_spte_atomic(sptep, FROZEN_SPTE); @@ -536,8 +536,8 @@ static inline int __must_check __tdp_mmu_set_spte_atomic(struct tdp_iter *iter, u64 *sptep = rcu_dereference(iter->sptep); /* - * The caller is responsible for ensuring the old SPTE is not a REMOVED - * SPTE. KVM should never attempt to zap or manipulate a REMOVED SPTE, + * The caller is responsible for ensuring the old SPTE is not a FROZEN + * SPTE. KVM should never attempt to zap or manipulate a FROZEN SPTE, * and pre-checking before inserting a new SPTE is advantageous as it * avoids unnecessary work. */ diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index a16c873b3232..714c517dd4b7 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -2276,30 +2276,24 @@ static int sev_gmem_post_populate(struct kvm *kvm, gfn_t gfn_start, kvm_pfn_t pf for (gfn = gfn_start, i = 0; gfn < gfn_start + npages; gfn++, i++) { struct sev_data_snp_launch_update fw_args = {0}; - bool assigned; + bool assigned = false; int level; - if (!kvm_mem_is_private(kvm, gfn)) { - pr_debug("%s: Failed to ensure GFN 0x%llx has private memory attribute set\n", - __func__, gfn); - ret = -EINVAL; - goto err; - } - ret = snp_lookup_rmpentry((u64)pfn + i, &assigned, &level); if (ret || assigned) { pr_debug("%s: Failed to ensure GFN 0x%llx RMP entry is initial shared state, ret: %d assigned: %d\n", __func__, gfn, ret, assigned); - ret = -EINVAL; + ret = ret ? -EINVAL : -EEXIST; goto err; } if (src) { void *vaddr = kmap_local_pfn(pfn + i); - ret = copy_from_user(vaddr, src + i * PAGE_SIZE, PAGE_SIZE); - if (ret) + if (copy_from_user(vaddr, src + i * PAGE_SIZE, PAGE_SIZE)) { + ret = -EFAULT; goto err; + } kunmap_local(vaddr); } @@ -2549,6 +2543,14 @@ static int snp_launch_finish(struct kvm *kvm, struct kvm_sev_cmd *argp) data->gctx_paddr = __psp_pa(sev->snp_context); ret = sev_issue_cmd(kvm, SEV_CMD_SNP_LAUNCH_FINISH, data, &argp->error); + /* + * Now that there will be no more SNP_LAUNCH_UPDATE ioctls, private pages + * can be given to the guest simply by marking the RMP entry as private. + * This can happen on first access and also with KVM_PRE_FAULT_MEMORY. + */ + if (!ret) + kvm->arch.pre_fault_allowed = true; + kfree(id_auth); e_free_id_block: diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index c115d26844f7..5ab2c92c7331 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -2876,6 +2876,12 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_CSTAR: msr_info->data = svm->vmcb01.ptr->save.cstar; break; + case MSR_GS_BASE: + msr_info->data = svm->vmcb01.ptr->save.gs.base; + break; + case MSR_FS_BASE: + msr_info->data = svm->vmcb01.ptr->save.fs.base; + break; case MSR_KERNEL_GS_BASE: msr_info->data = svm->vmcb01.ptr->save.kernel_gs_base; break; @@ -3101,6 +3107,12 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) case MSR_CSTAR: svm->vmcb01.ptr->save.cstar = data; break; + case MSR_GS_BASE: + svm->vmcb01.ptr->save.gs.base = data; + break; + case MSR_FS_BASE: + svm->vmcb01.ptr->save.fs.base = data; + break; case MSR_KERNEL_GS_BASE: svm->vmcb01.ptr->save.kernel_gs_base = data; break; @@ -4949,6 +4961,7 @@ static int svm_vm_init(struct kvm *kvm) to_kvm_sev_info(kvm)->need_init = true; kvm->arch.has_private_mem = (type == KVM_X86_SNP_VM); + kvm->arch.pre_fault_allowed = !kvm->arch.has_private_mem; } if (!pause_filter_count || !pause_filter_thresh) @@ -5223,6 +5236,9 @@ static __init void svm_set_cpu_caps(void) /* CPUID 0x8000001F (SME/SEV features) */ sev_set_cpu_caps(); + + /* Don't advertise Bus Lock Detect to guest if SVM support is absent */ + kvm_cpu_cap_clear(X86_FEATURE_BUS_LOCK_DETECT); } static __init int svm_hardware_setup(void) diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index f18c2d8c7476..733a0c45d1a6 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -7659,13 +7659,11 @@ u8 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio) /* * Force WB and ignore guest PAT if the VM does NOT have a non-coherent - * device attached and the CPU doesn't support self-snoop. Letting the - * guest control memory types on Intel CPUs without self-snoop may - * result in unexpected behavior, and so KVM's (historical) ABI is to - * trust the guest to behave only as a last resort. + * device attached. Letting the guest control memory types on Intel + * CPUs may result in unexpected behavior, and so KVM's ABI is to trust + * the guest to behave only as a last resort. */ - if (!static_cpu_has(X86_FEATURE_SELFSNOOP) && - !kvm_arch_has_noncoherent_dma(vcpu->kvm)) + if (!kvm_arch_has_noncoherent_dma(vcpu->kvm)) return (MTRR_TYPE_WRBACK << VMX_EPT_MT_EPTE_SHIFT) | VMX_EPT_IPAT_BIT; return (MTRR_TYPE_WRBACK << VMX_EPT_MT_EPTE_SHIFT); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index af6c8cf6a37a..c983c8e434b8 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -427,8 +427,7 @@ static void kvm_user_return_msr_cpu_online(void) int kvm_set_user_return_msr(unsigned slot, u64 value, u64 mask) { - unsigned int cpu = smp_processor_id(); - struct kvm_user_return_msrs *msrs = per_cpu_ptr(user_return_msrs, cpu); + struct kvm_user_return_msrs *msrs = this_cpu_ptr(user_return_msrs); int err; value = (value & mask) | (msrs->values[slot].host & ~mask); @@ -450,8 +449,7 @@ EXPORT_SYMBOL_GPL(kvm_set_user_return_msr); static void drop_user_return_notifiers(void) { - unsigned int cpu = smp_processor_id(); - struct kvm_user_return_msrs *msrs = per_cpu_ptr(user_return_msrs, cpu); + struct kvm_user_return_msrs *msrs = this_cpu_ptr(user_return_msrs); if (msrs->registered) kvm_on_user_return(&msrs->urn); @@ -4658,7 +4656,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_ASYNC_PF_INT: case KVM_CAP_GET_TSC_KHZ: case KVM_CAP_KVMCLOCK_CTRL: - case KVM_CAP_READONLY_MEM: case KVM_CAP_IOAPIC_POLARITY_IGNORED: case KVM_CAP_TSC_DEADLINE_TIMER: case KVM_CAP_DISABLE_QUIRKS: @@ -4817,6 +4814,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_VM_TYPES: r = kvm_caps.supported_vm_types; break; + case KVM_CAP_READONLY_MEM: + r = kvm ? kvm_arch_has_readonly_mem(kvm) : 1; + break; default: break; } @@ -6042,7 +6042,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp, if (copy_from_user(&events, argp, sizeof(struct kvm_vcpu_events))) break; + kvm_vcpu_srcu_read_lock(vcpu); r = kvm_vcpu_ioctl_x86_set_vcpu_events(vcpu, &events); + kvm_vcpu_srcu_read_unlock(vcpu); break; } case KVM_GET_DEBUGREGS: { @@ -12646,6 +12648,9 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) kvm->arch.vm_type = type; kvm->arch.has_private_mem = (type == KVM_X86_SW_PROTECTED_VM); + /* Decided by the vendor code for other VM types. */ + kvm->arch.pre_fault_allowed = + type == KVM_X86_DEFAULT_VM || type == KVM_X86_SW_PROTECTED_VM; ret = kvm_page_track_init(kvm); if (ret) @@ -13641,19 +13646,14 @@ bool kvm_arch_no_poll(struct kvm_vcpu *vcpu) } EXPORT_SYMBOL_GPL(kvm_arch_no_poll); -#ifdef CONFIG_HAVE_KVM_GMEM_PREPARE -bool kvm_arch_gmem_prepare_needed(struct kvm *kvm) -{ - return kvm->arch.vm_type == KVM_X86_SNP_VM; -} - +#ifdef CONFIG_HAVE_KVM_ARCH_GMEM_PREPARE int kvm_arch_gmem_prepare(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, int max_order) { return kvm_x86_call(gmem_prepare)(kvm, pfn, gfn, max_order); } #endif -#ifdef CONFIG_HAVE_KVM_GMEM_INVALIDATE +#ifdef CONFIG_HAVE_KVM_ARCH_GMEM_INVALIDATE void kvm_arch_gmem_invalidate(kvm_pfn_t start, kvm_pfn_t end) { kvm_x86_call(gmem_invalidate)(start, end); |