* Address some fallout of the locking rework, this time affecting
   the way the vgic is configured
 
 * Fix an issue where the page table walker frees a subtree and
   then proceeds with walking what it has just freed...
 
 * Check that a given PA donated to the guest is actually memory
   (only affecting pKVM)
 
 * Correctly handle MTE CMOs by Set/Way
 
 * Fix the reported address of a watchpoint forwarded to userspace
 
 * Fix the freeing of the root of stage-2 page tables
 
 * Stop creating spurious PMU events to perform detection of the
   default PMU and use the existing PMU list instead.
 
 x86:
 
 * Fix a memslot lookup bug in the NX recovery thread that could
    theoretically let userspace bypass the NX hugepage mitigation
 
 * Fix a s/BLOCKING/PENDING bug in SVM's vNMI support
 
 * Account exit stats for fastpath VM-Exits that never leave the super
   tight run-loop
 
 * Fix an out-of-bounds bug in the optimized APIC map code, and add a
   regression test for the race.
 -----BEGIN PGP SIGNATURE-----
 
 iQFIBAABCAAyFiEE8TM4V0tmI4mGbHaCv/vSX3jHroMFAmR7k1QUHHBib256aW5p
 QHJlZGhhdC5jb20ACgkQv/vSX3jHroNblwf/faUVOBMv7mQBGsGa7FNcmaNhYeIT
 U1k4pFNlo7dNNuNJrGdpo+sOGP5A8CRLNSVvlyjgCHF1Qc9gVtXNvZ9PnA6nAYmB
 qqvUz/TDw9/NLTlJEkbSs05B4am4yfd5pV6R/32jrPIbXOW++6ae2LpILS/NPBrB
 y0tGiVUJrO3zVXdBKa4PFmlO8jsXPmMEiicEJa5v2Boeo5SFyFfErw9zDNwSMsQc
 27bzbs3O2daXTNMFnwVCCpWUxt1EqWYUXGvBjsChAUI0K10F2/GW9f6YeFsGXqKI
 d8g1QuCukSt/CvN0pT+g/540mR6i0Azpek1myQfuCu2IhQ1jCJaSWOjoEw==
 =8VrO
 -----END PGP SIGNATURE-----

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull kvm fixes from Paolo Bonzini:
 "ARM:

   - Address some fallout of the locking rework, this time affecting the
     way the vgic is configured

   - Fix an issue where the page table walker frees a subtree and then
     proceeds with walking what it has just freed...

   - Check that a given PA donated to the guest is actually memory (only
     affecting pKVM)

   - Correctly handle MTE CMOs by Set/Way

   - Fix the reported address of a watchpoint forwarded to userspace

   - Fix the freeing of the root of stage-2 page tables

   - Stop creating spurious PMU events to perform detection of the
     default PMU and use the existing PMU list instead

  x86:

   - Fix a memslot lookup bug in the NX recovery thread that could
     theoretically let userspace bypass the NX hugepage mitigation

   - Fix a s/BLOCKING/PENDING bug in SVM's vNMI support

   - Account exit stats for fastpath VM-Exits that never leave the super
     tight run-loop

   - Fix an out-of-bounds bug in the optimized APIC map code, and add a
     regression test for the race"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm:
  KVM: selftests: Add test for race in kvm_recalculate_apic_map()
  KVM: x86: Bail from kvm_recalculate_phys_map() if x2APIC ID is out-of-bounds
  KVM: x86: Account fastpath-only VM-Exits in vCPU stats
  KVM: SVM: vNMI pending bit is V_NMI_PENDING_MASK not V_NMI_BLOCKING_MASK
  KVM: x86/mmu: Grab memslot for correct address space in NX recovery worker
  KVM: arm64: Document default vPMU behavior on heterogeneous systems
  KVM: arm64: Iterate arm_pmus list to probe for default PMU
  KVM: arm64: Drop last page ref in kvm_pgtable_stage2_free_removed()
  KVM: arm64: Populate fault info for watchpoint
  KVM: arm64: Reload PTE after invoking walker callback on preorder traversal
  KVM: arm64: Handle trap of tagged Set/Way CMOs
  arm64: Add missing Set/Way CMO encodings
  KVM: arm64: Prevent unconditional donation of unmapped regions from the host
  KVM: arm64: vgic: Fix a comment
  KVM: arm64: vgic: Fix locking comment
  KVM: arm64: vgic: Wrap vgic_its_create() with config_lock
  KVM: arm64: vgic: Fix a circular locking issue
This commit is contained in:
Linus Torvalds 2023-06-04 07:16:53 -04:00
commit b066935bf8
23 changed files with 248 additions and 95 deletions

View file

@ -632,9 +632,9 @@ int kvm_pgtable_stage2_flush(struct kvm_pgtable *pgt, u64 addr, u64 size);
*
* The walker will walk the page-table entries corresponding to the input
* address range specified, visiting entries according to the walker flags.
* Invalid entries are treated as leaf entries. Leaf entries are reloaded
* after invoking the walker callback, allowing the walker to descend into
* a newly installed table.
* Invalid entries are treated as leaf entries. The visited page table entry is
* reloaded after invoking the walker callback, allowing the walker to descend
* into a newly installed table.
*
* Returning a negative error code from the walker callback function will
* terminate the walk immediately with the same error code.

View file

@ -115,8 +115,14 @@
#define SB_BARRIER_INSN __SYS_BARRIER_INSN(0, 7, 31)
#define SYS_DC_ISW sys_insn(1, 0, 7, 6, 2)
#define SYS_DC_IGSW sys_insn(1, 0, 7, 6, 4)
#define SYS_DC_IGDSW sys_insn(1, 0, 7, 6, 6)
#define SYS_DC_CSW sys_insn(1, 0, 7, 10, 2)
#define SYS_DC_CGSW sys_insn(1, 0, 7, 10, 4)
#define SYS_DC_CGDSW sys_insn(1, 0, 7, 10, 6)
#define SYS_DC_CISW sys_insn(1, 0, 7, 14, 2)
#define SYS_DC_CIGSW sys_insn(1, 0, 7, 14, 4)
#define SYS_DC_CIGDSW sys_insn(1, 0, 7, 14, 6)
/*
* Automatically generated definitions for system registers, the

View file

@ -412,17 +412,21 @@ static bool kvm_hyp_handle_cp15_32(struct kvm_vcpu *vcpu, u64 *exit_code)
return false;
}
static bool kvm_hyp_handle_iabt_low(struct kvm_vcpu *vcpu, u64 *exit_code)
static bool kvm_hyp_handle_memory_fault(struct kvm_vcpu *vcpu, u64 *exit_code)
{
if (!__populate_fault_info(vcpu))
return true;
return false;
}
static bool kvm_hyp_handle_iabt_low(struct kvm_vcpu *vcpu, u64 *exit_code)
__alias(kvm_hyp_handle_memory_fault);
static bool kvm_hyp_handle_watchpt_low(struct kvm_vcpu *vcpu, u64 *exit_code)
__alias(kvm_hyp_handle_memory_fault);
static bool kvm_hyp_handle_dabt_low(struct kvm_vcpu *vcpu, u64 *exit_code)
{
if (!__populate_fault_info(vcpu))
if (kvm_hyp_handle_memory_fault(vcpu, exit_code))
return true;
if (static_branch_unlikely(&vgic_v2_cpuif_trap)) {

View file

@ -575,7 +575,7 @@ struct pkvm_mem_donation {
struct check_walk_data {
enum pkvm_page_state desired;
enum pkvm_page_state (*get_page_state)(kvm_pte_t pte);
enum pkvm_page_state (*get_page_state)(kvm_pte_t pte, u64 addr);
};
static int __check_page_state_visitor(const struct kvm_pgtable_visit_ctx *ctx,
@ -583,10 +583,7 @@ static int __check_page_state_visitor(const struct kvm_pgtable_visit_ctx *ctx,
{
struct check_walk_data *d = ctx->arg;
if (kvm_pte_valid(ctx->old) && !addr_is_allowed_memory(kvm_pte_to_phys(ctx->old)))
return -EINVAL;
return d->get_page_state(ctx->old) == d->desired ? 0 : -EPERM;
return d->get_page_state(ctx->old, ctx->addr) == d->desired ? 0 : -EPERM;
}
static int check_page_state_range(struct kvm_pgtable *pgt, u64 addr, u64 size,
@ -601,8 +598,11 @@ static int check_page_state_range(struct kvm_pgtable *pgt, u64 addr, u64 size,
return kvm_pgtable_walk(pgt, addr, size, &walker);
}
static enum pkvm_page_state host_get_page_state(kvm_pte_t pte)
static enum pkvm_page_state host_get_page_state(kvm_pte_t pte, u64 addr)
{
if (!addr_is_allowed_memory(addr))
return PKVM_NOPAGE;
if (!kvm_pte_valid(pte) && pte)
return PKVM_NOPAGE;
@ -709,7 +709,7 @@ static int host_complete_donation(u64 addr, const struct pkvm_mem_transition *tx
return host_stage2_set_owner_locked(addr, size, host_id);
}
static enum pkvm_page_state hyp_get_page_state(kvm_pte_t pte)
static enum pkvm_page_state hyp_get_page_state(kvm_pte_t pte, u64 addr)
{
if (!kvm_pte_valid(pte))
return PKVM_NOPAGE;

View file

@ -186,6 +186,7 @@ static const exit_handler_fn hyp_exit_handlers[] = {
[ESR_ELx_EC_FP_ASIMD] = kvm_hyp_handle_fpsimd,
[ESR_ELx_EC_IABT_LOW] = kvm_hyp_handle_iabt_low,
[ESR_ELx_EC_DABT_LOW] = kvm_hyp_handle_dabt_low,
[ESR_ELx_EC_WATCHPT_LOW] = kvm_hyp_handle_watchpt_low,
[ESR_ELx_EC_PAC] = kvm_hyp_handle_ptrauth,
};
@ -196,6 +197,7 @@ static const exit_handler_fn pvm_exit_handlers[] = {
[ESR_ELx_EC_FP_ASIMD] = kvm_hyp_handle_fpsimd,
[ESR_ELx_EC_IABT_LOW] = kvm_hyp_handle_iabt_low,
[ESR_ELx_EC_DABT_LOW] = kvm_hyp_handle_dabt_low,
[ESR_ELx_EC_WATCHPT_LOW] = kvm_hyp_handle_watchpt_low,
[ESR_ELx_EC_PAC] = kvm_hyp_handle_ptrauth,
};

View file

@ -209,14 +209,26 @@ static inline int __kvm_pgtable_visit(struct kvm_pgtable_walk_data *data,
.flags = flags,
};
int ret = 0;
bool reload = false;
kvm_pteref_t childp;
bool table = kvm_pte_table(ctx.old, level);
if (table && (ctx.flags & KVM_PGTABLE_WALK_TABLE_PRE))
if (table && (ctx.flags & KVM_PGTABLE_WALK_TABLE_PRE)) {
ret = kvm_pgtable_visitor_cb(data, &ctx, KVM_PGTABLE_WALK_TABLE_PRE);
reload = true;
}
if (!table && (ctx.flags & KVM_PGTABLE_WALK_LEAF)) {
ret = kvm_pgtable_visitor_cb(data, &ctx, KVM_PGTABLE_WALK_LEAF);
reload = true;
}
/*
* Reload the page table after invoking the walker callback for leaf
* entries or after pre-order traversal, to allow the walker to descend
* into a newly installed or replaced table.
*/
if (reload) {
ctx.old = READ_ONCE(*ptep);
table = kvm_pte_table(ctx.old, level);
}
@ -1320,4 +1332,7 @@ void kvm_pgtable_stage2_free_removed(struct kvm_pgtable_mm_ops *mm_ops, void *pg
};
WARN_ON(__kvm_pgtable_walk(&data, mm_ops, ptep, level + 1));
WARN_ON(mm_ops->page_count(pgtable) != 1);
mm_ops->put_page(pgtable);
}

View file

@ -110,6 +110,7 @@ static const exit_handler_fn hyp_exit_handlers[] = {
[ESR_ELx_EC_FP_ASIMD] = kvm_hyp_handle_fpsimd,
[ESR_ELx_EC_IABT_LOW] = kvm_hyp_handle_iabt_low,
[ESR_ELx_EC_DABT_LOW] = kvm_hyp_handle_dabt_low,
[ESR_ELx_EC_WATCHPT_LOW] = kvm_hyp_handle_watchpt_low,
[ESR_ELx_EC_PAC] = kvm_hyp_handle_ptrauth,
};

View file

@ -694,45 +694,23 @@ out_unlock:
static struct arm_pmu *kvm_pmu_probe_armpmu(void)
{
struct perf_event_attr attr = { };
struct perf_event *event;
struct arm_pmu *pmu = NULL;
struct arm_pmu *tmp, *pmu = NULL;
struct arm_pmu_entry *entry;
int cpu;
/*
* Create a dummy event that only counts user cycles. As we'll never
* leave this function with the event being live, it will never
* count anything. But it allows us to probe some of the PMU
* details. Yes, this is terrible.
*/
attr.type = PERF_TYPE_RAW;
attr.size = sizeof(attr);
attr.pinned = 1;
attr.disabled = 0;
attr.exclude_user = 0;
attr.exclude_kernel = 1;
attr.exclude_hv = 1;
attr.exclude_host = 1;
attr.config = ARMV8_PMUV3_PERFCTR_CPU_CYCLES;
attr.sample_period = GENMASK(63, 0);
mutex_lock(&arm_pmus_lock);
event = perf_event_create_kernel_counter(&attr, -1, current,
kvm_pmu_perf_overflow, &attr);
cpu = smp_processor_id();
list_for_each_entry(entry, &arm_pmus, entry) {
tmp = entry->arm_pmu;
if (IS_ERR(event)) {
pr_err_once("kvm: pmu event creation failed %ld\n",
PTR_ERR(event));
return NULL;
if (cpumask_test_cpu(cpu, &tmp->supported_cpus)) {
pmu = tmp;
break;
}
}
if (event->pmu) {
pmu = to_arm_pmu(event->pmu);
if (pmu->pmuver == ID_AA64DFR0_EL1_PMUVer_NI ||
pmu->pmuver == ID_AA64DFR0_EL1_PMUVer_IMP_DEF)
pmu = NULL;
}
perf_event_disable(event);
perf_event_release_kernel(event);
mutex_unlock(&arm_pmus_lock);
return pmu;
}
@ -912,7 +890,17 @@ int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
return -EBUSY;
if (!kvm->arch.arm_pmu) {
/* No PMU set, get the default one */
/*
* No PMU set, get the default one.
*
* The observant among you will notice that the supported_cpus
* mask does not get updated for the default PMU even though it
* is quite possible the selected instance supports only a
* subset of cores in the system. This is intentional, and
* upholds the preexisting behavior on heterogeneous systems
* where vCPUs can be scheduled on any core but the guest
* counters could stop working.
*/
kvm->arch.arm_pmu = kvm_pmu_probe_armpmu();
if (!kvm->arch.arm_pmu)
return -ENODEV;

View file

@ -211,6 +211,19 @@ static bool access_dcsw(struct kvm_vcpu *vcpu,
return true;
}
static bool access_dcgsw(struct kvm_vcpu *vcpu,
struct sys_reg_params *p,
const struct sys_reg_desc *r)
{
if (!kvm_has_mte(vcpu->kvm)) {
kvm_inject_undefined(vcpu);
return false;
}
/* Treat MTE S/W ops as we treat the classic ones: with contempt */
return access_dcsw(vcpu, p, r);
}
static void get_access_mask(const struct sys_reg_desc *r, u64 *mask, u64 *shift)
{
switch (r->aarch32_map) {
@ -1756,8 +1769,14 @@ static bool access_spsr(struct kvm_vcpu *vcpu,
*/
static const struct sys_reg_desc sys_reg_descs[] = {
{ SYS_DESC(SYS_DC_ISW), access_dcsw },
{ SYS_DESC(SYS_DC_IGSW), access_dcgsw },
{ SYS_DESC(SYS_DC_IGDSW), access_dcgsw },
{ SYS_DESC(SYS_DC_CSW), access_dcsw },
{ SYS_DESC(SYS_DC_CGSW), access_dcgsw },
{ SYS_DESC(SYS_DC_CGDSW), access_dcgsw },
{ SYS_DESC(SYS_DC_CISW), access_dcsw },
{ SYS_DESC(SYS_DC_CIGSW), access_dcgsw },
{ SYS_DESC(SYS_DC_CIGDSW), access_dcgsw },
DBG_BCR_BVR_WCR_WVR_EL1(0),
DBG_BCR_BVR_WCR_WVR_EL1(1),

View file

@ -235,9 +235,9 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
* KVM io device for the redistributor that belongs to this VCPU.
*/
if (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) {
mutex_lock(&vcpu->kvm->arch.config_lock);
mutex_lock(&vcpu->kvm->slots_lock);
ret = vgic_register_redist_iodev(vcpu);
mutex_unlock(&vcpu->kvm->arch.config_lock);
mutex_unlock(&vcpu->kvm->slots_lock);
}
return ret;
}
@ -406,7 +406,7 @@ void kvm_vgic_destroy(struct kvm *kvm)
/**
* vgic_lazy_init: Lazy init is only allowed if the GIC exposed to the guest
* is a GICv2. A GICv3 must be explicitly initialized by the guest using the
* is a GICv2. A GICv3 must be explicitly initialized by userspace using the
* KVM_DEV_ARM_VGIC_GRP_CTRL KVM_DEVICE group.
* @kvm: kvm struct pointer
*/
@ -446,11 +446,13 @@ int vgic_lazy_init(struct kvm *kvm)
int kvm_vgic_map_resources(struct kvm *kvm)
{
struct vgic_dist *dist = &kvm->arch.vgic;
gpa_t dist_base;
int ret = 0;
if (likely(vgic_ready(kvm)))
return 0;
mutex_lock(&kvm->slots_lock);
mutex_lock(&kvm->arch.config_lock);
if (vgic_ready(kvm))
goto out;
@ -463,13 +465,26 @@ int kvm_vgic_map_resources(struct kvm *kvm)
else
ret = vgic_v3_map_resources(kvm);
if (ret)
if (ret) {
__kvm_vgic_destroy(kvm);
else
dist->ready = true;
goto out;
}
dist->ready = true;
dist_base = dist->vgic_dist_base;
mutex_unlock(&kvm->arch.config_lock);
ret = vgic_register_dist_iodev(kvm, dist_base,
kvm_vgic_global_state.type);
if (ret) {
kvm_err("Unable to register VGIC dist MMIO regions\n");
kvm_vgic_destroy(kvm);
}
mutex_unlock(&kvm->slots_lock);
return ret;
out:
mutex_unlock(&kvm->arch.config_lock);
mutex_unlock(&kvm->slots_lock);
return ret;
}

View file

@ -1936,6 +1936,7 @@ void vgic_lpi_translation_cache_destroy(struct kvm *kvm)
static int vgic_its_create(struct kvm_device *dev, u32 type)
{
int ret;
struct vgic_its *its;
if (type != KVM_DEV_TYPE_ARM_VGIC_ITS)
@ -1945,9 +1946,12 @@ static int vgic_its_create(struct kvm_device *dev, u32 type)
if (!its)
return -ENOMEM;
mutex_lock(&dev->kvm->arch.config_lock);
if (vgic_initialized(dev->kvm)) {
int ret = vgic_v4_init(dev->kvm);
ret = vgic_v4_init(dev->kvm);
if (ret < 0) {
mutex_unlock(&dev->kvm->arch.config_lock);
kfree(its);
return ret;
}
@ -1960,12 +1964,10 @@ static int vgic_its_create(struct kvm_device *dev, u32 type)
/* Yep, even more trickery for lock ordering... */
#ifdef CONFIG_LOCKDEP
mutex_lock(&dev->kvm->arch.config_lock);
mutex_lock(&its->cmd_lock);
mutex_lock(&its->its_lock);
mutex_unlock(&its->its_lock);
mutex_unlock(&its->cmd_lock);
mutex_unlock(&dev->kvm->arch.config_lock);
#endif
its->vgic_its_base = VGIC_ADDR_UNDEF;
@ -1986,7 +1988,11 @@ static int vgic_its_create(struct kvm_device *dev, u32 type)
dev->private = its;
return vgic_its_set_abi(its, NR_ITS_ABIS - 1);
ret = vgic_its_set_abi(its, NR_ITS_ABIS - 1);
mutex_unlock(&dev->kvm->arch.config_lock);
return ret;
}
static void vgic_its_destroy(struct kvm_device *kvm_dev)

View file

@ -102,7 +102,11 @@ static int kvm_vgic_addr(struct kvm *kvm, struct kvm_device_attr *attr, bool wri
if (get_user(addr, uaddr))
return -EFAULT;
mutex_lock(&kvm->arch.config_lock);
/*
* Since we can't hold config_lock while registering the redistributor
* iodevs, take the slots_lock immediately.
*/
mutex_lock(&kvm->slots_lock);
switch (attr->attr) {
case KVM_VGIC_V2_ADDR_TYPE_DIST:
r = vgic_check_type(kvm, KVM_DEV_TYPE_ARM_VGIC_V2);
@ -182,6 +186,7 @@ static int kvm_vgic_addr(struct kvm *kvm, struct kvm_device_attr *attr, bool wri
if (r)
goto out;
mutex_lock(&kvm->arch.config_lock);
if (write) {
r = vgic_check_iorange(kvm, *addr_ptr, addr, alignment, size);
if (!r)
@ -189,9 +194,10 @@ static int kvm_vgic_addr(struct kvm *kvm, struct kvm_device_attr *attr, bool wri
} else {
addr = *addr_ptr;
}
mutex_unlock(&kvm->arch.config_lock);
out:
mutex_unlock(&kvm->arch.config_lock);
mutex_unlock(&kvm->slots_lock);
if (!r && !write)
r = put_user(addr, uaddr);

View file

@ -769,10 +769,13 @@ int vgic_register_redist_iodev(struct kvm_vcpu *vcpu)
struct vgic_io_device *rd_dev = &vcpu->arch.vgic_cpu.rd_iodev;
struct vgic_redist_region *rdreg;
gpa_t rd_base;
int ret;
int ret = 0;
lockdep_assert_held(&kvm->slots_lock);
mutex_lock(&kvm->arch.config_lock);
if (!IS_VGIC_ADDR_UNDEF(vgic_cpu->rd_iodev.base_addr))
return 0;
goto out_unlock;
/*
* We may be creating VCPUs before having set the base address for the
@ -782,10 +785,12 @@ int vgic_register_redist_iodev(struct kvm_vcpu *vcpu)
*/
rdreg = vgic_v3_rdist_free_slot(&vgic->rd_regions);
if (!rdreg)
return 0;
goto out_unlock;
if (!vgic_v3_check_base(kvm))
return -EINVAL;
if (!vgic_v3_check_base(kvm)) {
ret = -EINVAL;
goto out_unlock;
}
vgic_cpu->rdreg = rdreg;
vgic_cpu->rdreg_index = rdreg->free_index;
@ -799,16 +804,20 @@ int vgic_register_redist_iodev(struct kvm_vcpu *vcpu)
rd_dev->nr_regions = ARRAY_SIZE(vgic_v3_rd_registers);
rd_dev->redist_vcpu = vcpu;
mutex_lock(&kvm->slots_lock);
mutex_unlock(&kvm->arch.config_lock);
ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, rd_base,
2 * SZ_64K, &rd_dev->dev);
mutex_unlock(&kvm->slots_lock);
if (ret)
return ret;
/* Protected by slots_lock */
rdreg->free_index++;
return 0;
out_unlock:
mutex_unlock(&kvm->arch.config_lock);
return ret;
}
static void vgic_unregister_redist_iodev(struct kvm_vcpu *vcpu)
@ -834,12 +843,10 @@ static int vgic_register_all_redist_iodevs(struct kvm *kvm)
/* The current c failed, so iterate over the previous ones. */
int i;
mutex_lock(&kvm->slots_lock);
for (i = 0; i < c; i++) {
vcpu = kvm_get_vcpu(kvm, i);
vgic_unregister_redist_iodev(vcpu);
}
mutex_unlock(&kvm->slots_lock);
}
return ret;
@ -938,7 +945,9 @@ int vgic_v3_set_redist_base(struct kvm *kvm, u32 index, u64 addr, u32 count)
{
int ret;
mutex_lock(&kvm->arch.config_lock);
ret = vgic_v3_alloc_redist_region(kvm, index, addr, count);
mutex_unlock(&kvm->arch.config_lock);
if (ret)
return ret;
@ -950,8 +959,10 @@ int vgic_v3_set_redist_base(struct kvm *kvm, u32 index, u64 addr, u32 count)
if (ret) {
struct vgic_redist_region *rdreg;
mutex_lock(&kvm->arch.config_lock);
rdreg = vgic_v3_rdist_region_from_index(kvm, index);
vgic_v3_free_redist_region(rdreg);
mutex_unlock(&kvm->arch.config_lock);
return ret;
}

View file

@ -1096,7 +1096,6 @@ int vgic_register_dist_iodev(struct kvm *kvm, gpa_t dist_base_address,
enum vgic_type type)
{
struct vgic_io_device *io_device = &kvm->arch.vgic.dist_iodev;
int ret = 0;
unsigned int len;
switch (type) {
@ -1114,10 +1113,6 @@ int vgic_register_dist_iodev(struct kvm *kvm, gpa_t dist_base_address,
io_device->iodev_type = IODEV_DIST;
io_device->redist_vcpu = NULL;
mutex_lock(&kvm->slots_lock);
ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, dist_base_address,
len, &io_device->dev);
mutex_unlock(&kvm->slots_lock);
return ret;
return kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, dist_base_address,
len, &io_device->dev);
}

View file

@ -312,12 +312,6 @@ int vgic_v2_map_resources(struct kvm *kvm)
return ret;
}
ret = vgic_register_dist_iodev(kvm, dist->vgic_dist_base, VGIC_V2);
if (ret) {
kvm_err("Unable to register VGIC MMIO regions\n");
return ret;
}
if (!static_branch_unlikely(&vgic_v2_cpuif_trap)) {
ret = kvm_phys_addr_ioremap(kvm, dist->vgic_cpu_base,
kvm_vgic_global_state.vcpu_base,

View file

@ -539,7 +539,6 @@ int vgic_v3_map_resources(struct kvm *kvm)
{
struct vgic_dist *dist = &kvm->arch.vgic;
struct kvm_vcpu *vcpu;
int ret = 0;
unsigned long c;
kvm_for_each_vcpu(c, vcpu, kvm) {
@ -569,12 +568,6 @@ int vgic_v3_map_resources(struct kvm *kvm)
return -EBUSY;
}
ret = vgic_register_dist_iodev(kvm, dist->vgic_dist_base, VGIC_V3);
if (ret) {
kvm_err("Unable to register VGICv3 dist MMIO regions\n");
return ret;
}
if (kvm_vgic_global_state.has_gicv4_1)
vgic_v4_configure_vsgis(kvm);

View file

@ -184,13 +184,14 @@ static void vgic_v4_disable_vsgis(struct kvm_vcpu *vcpu)
}
}
/* Must be called with the kvm lock held */
void vgic_v4_configure_vsgis(struct kvm *kvm)
{
struct vgic_dist *dist = &kvm->arch.vgic;
struct kvm_vcpu *vcpu;
unsigned long i;
lockdep_assert_held(&kvm->arch.config_lock);
kvm_arm_halt_guest(kvm);
kvm_for_each_vcpu(i, vcpu, kvm) {

View file

@ -228,6 +228,23 @@ static int kvm_recalculate_phys_map(struct kvm_apic_map *new,
u32 xapic_id = kvm_xapic_id(apic);
u32 physical_id;
/*
* For simplicity, KVM always allocates enough space for all possible
* xAPIC IDs. Yell, but don't kill the VM, as KVM can continue on
* without the optimized map.
*/
if (WARN_ON_ONCE(xapic_id > new->max_apic_id))
return -EINVAL;
/*
* Bail if a vCPU was added and/or enabled its APIC between allocating
* the map and doing the actual calculations for the map. Note, KVM
* hardcodes the x2APIC ID to vcpu_id, i.e. there's no TOCTOU bug if
* the compiler decides to reload x2apic_id after this check.
*/
if (x2apic_id > new->max_apic_id)
return -E2BIG;
/*
* Deliberately truncate the vCPU ID when detecting a mismatched APIC
* ID to avoid false positives if the vCPU ID, i.e. x2APIC ID, is a
@ -253,8 +270,7 @@ static int kvm_recalculate_phys_map(struct kvm_apic_map *new,
*/
if (vcpu->kvm->arch.x2apic_format) {
/* See also kvm_apic_match_physical_addr(). */
if ((apic_x2apic_mode(apic) || x2apic_id > 0xff) &&
x2apic_id <= new->max_apic_id)
if (apic_x2apic_mode(apic) || x2apic_id > 0xff)
new->phys_map[x2apic_id] = apic;
if (!apic_x2apic_mode(apic) && !new->phys_map[xapic_id])

View file

@ -7091,7 +7091,10 @@ static void kvm_recover_nx_huge_pages(struct kvm *kvm)
*/
slot = NULL;
if (atomic_read(&kvm->nr_memslots_dirty_logging)) {
slot = gfn_to_memslot(kvm, sp->gfn);
struct kvm_memslots *slots;
slots = kvm_memslots_for_spte_role(kvm, sp->role);
slot = __gfn_to_memslot(slots, sp->gfn);
WARN_ON_ONCE(!slot);
}

View file

@ -3510,7 +3510,7 @@ static bool svm_is_vnmi_pending(struct kvm_vcpu *vcpu)
if (!is_vnmi_enabled(svm))
return false;
return !!(svm->vmcb->control.int_ctl & V_NMI_BLOCKING_MASK);
return !!(svm->vmcb->control.int_ctl & V_NMI_PENDING_MASK);
}
static bool svm_set_vnmi_pending(struct kvm_vcpu *vcpu)

View file

@ -10758,6 +10758,9 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
exit_fastpath = EXIT_FASTPATH_EXIT_HANDLED;
break;
}
/* Note, VM-Exits that go down the "slow" path are accounted below. */
++vcpu->stat.exits;
}
/*

View file

@ -116,6 +116,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/sev_migrate_tests
TEST_GEN_PROGS_x86_64 += x86_64/amx_test
TEST_GEN_PROGS_x86_64 += x86_64/max_vcpuid_cap_test
TEST_GEN_PROGS_x86_64 += x86_64/triple_fault_event_test
TEST_GEN_PROGS_x86_64 += x86_64/recalc_apic_map_test
TEST_GEN_PROGS_x86_64 += access_tracking_perf_test
TEST_GEN_PROGS_x86_64 += demand_paging_test
TEST_GEN_PROGS_x86_64 += dirty_log_test

View file

@ -0,0 +1,74 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Test edge cases and race conditions in kvm_recalculate_apic_map().
*/
#include <sys/ioctl.h>
#include <pthread.h>
#include <time.h>
#include "processor.h"
#include "test_util.h"
#include "kvm_util.h"
#include "apic.h"
#define TIMEOUT 5 /* seconds */
#define LAPIC_DISABLED 0
#define LAPIC_X2APIC (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE)
#define MAX_XAPIC_ID 0xff
static void *race(void *arg)
{
struct kvm_lapic_state lapic = {};
struct kvm_vcpu *vcpu = arg;
while (1) {
/* Trigger kvm_recalculate_apic_map(). */
vcpu_ioctl(vcpu, KVM_SET_LAPIC, &lapic);
pthread_testcancel();
}
return NULL;
}
int main(void)
{
struct kvm_vcpu *vcpus[KVM_MAX_VCPUS];
struct kvm_vcpu *vcpuN;
struct kvm_vm *vm;
pthread_t thread;
time_t t;
int i;
kvm_static_assert(KVM_MAX_VCPUS > MAX_XAPIC_ID);
/*
* Create the max number of vCPUs supported by selftests so that KVM
* has decent amount of work to do when recalculating the map, i.e. to
* make the problematic window large enough to hit.
*/
vm = vm_create_with_vcpus(KVM_MAX_VCPUS, NULL, vcpus);
/*
* Enable x2APIC on all vCPUs so that KVM doesn't bail from the recalc
* due to vCPUs having aliased xAPIC IDs (truncated to 8 bits).
*/
for (i = 0; i < KVM_MAX_VCPUS; i++)
vcpu_set_msr(vcpus[i], MSR_IA32_APICBASE, LAPIC_X2APIC);
ASSERT_EQ(pthread_create(&thread, NULL, race, vcpus[0]), 0);
vcpuN = vcpus[KVM_MAX_VCPUS - 1];
for (t = time(NULL) + TIMEOUT; time(NULL) < t;) {
vcpu_set_msr(vcpuN, MSR_IA32_APICBASE, LAPIC_X2APIC);
vcpu_set_msr(vcpuN, MSR_IA32_APICBASE, LAPIC_DISABLED);
}
ASSERT_EQ(pthread_cancel(thread), 0);
ASSERT_EQ(pthread_join(thread, NULL), 0);
kvm_vm_free(vm);
return 0;
}