diff options
Diffstat (limited to 'tools/testing/selftests/kvm/lib')
| -rw-r--r-- | tools/testing/selftests/kvm/lib/aarch64/gic.c | 66 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/lib/aarch64/gic_private.h | 11 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/lib/aarch64/gic_v3.c | 206 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/lib/aarch64/gic_v3.h | 70 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/lib/aarch64/processor.c | 82 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/lib/aarch64/vgic.c | 103 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/lib/elf.c | 3 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/lib/guest_modes.c | 59 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/lib/kvm_util.c | 177 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/lib/perf_test_util.c | 184 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/lib/riscv/processor.c | 362 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/lib/riscv/ucall.c | 87 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/lib/test_util.c | 5 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/lib/x86_64/processor.c | 163 |
14 files changed, 1376 insertions, 202 deletions
diff --git a/tools/testing/selftests/kvm/lib/aarch64/gic.c b/tools/testing/selftests/kvm/lib/aarch64/gic.c index fff4fc27504d..55668631d546 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/gic.c +++ b/tools/testing/selftests/kvm/lib/aarch64/gic.c @@ -93,3 +93,69 @@ void gic_set_eoi(unsigned int intid) GUEST_ASSERT(gic_common_ops); gic_common_ops->gic_write_eoir(intid); } + +void gic_set_dir(unsigned int intid) +{ + GUEST_ASSERT(gic_common_ops); + gic_common_ops->gic_write_dir(intid); +} + +void gic_set_eoi_split(bool split) +{ + GUEST_ASSERT(gic_common_ops); + gic_common_ops->gic_set_eoi_split(split); +} + +void gic_set_priority_mask(uint64_t pmr) +{ + GUEST_ASSERT(gic_common_ops); + gic_common_ops->gic_set_priority_mask(pmr); +} + +void gic_set_priority(unsigned int intid, unsigned int prio) +{ + GUEST_ASSERT(gic_common_ops); + gic_common_ops->gic_set_priority(intid, prio); +} + +void gic_irq_set_active(unsigned int intid) +{ + GUEST_ASSERT(gic_common_ops); + gic_common_ops->gic_irq_set_active(intid); +} + +void gic_irq_clear_active(unsigned int intid) +{ + GUEST_ASSERT(gic_common_ops); + gic_common_ops->gic_irq_clear_active(intid); +} + +bool gic_irq_get_active(unsigned int intid) +{ + GUEST_ASSERT(gic_common_ops); + return gic_common_ops->gic_irq_get_active(intid); +} + +void gic_irq_set_pending(unsigned int intid) +{ + GUEST_ASSERT(gic_common_ops); + gic_common_ops->gic_irq_set_pending(intid); +} + +void gic_irq_clear_pending(unsigned int intid) +{ + GUEST_ASSERT(gic_common_ops); + gic_common_ops->gic_irq_clear_pending(intid); +} + +bool gic_irq_get_pending(unsigned int intid) +{ + GUEST_ASSERT(gic_common_ops); + return gic_common_ops->gic_irq_get_pending(intid); +} + +void gic_irq_set_config(unsigned int intid, bool is_edge) +{ + GUEST_ASSERT(gic_common_ops); + gic_common_ops->gic_irq_set_config(intid, is_edge); +} diff --git a/tools/testing/selftests/kvm/lib/aarch64/gic_private.h b/tools/testing/selftests/kvm/lib/aarch64/gic_private.h index d81d739433dc..75d07313c893 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/gic_private.h +++ b/tools/testing/selftests/kvm/lib/aarch64/gic_private.h @@ -14,6 +14,17 @@ struct gic_common_ops { void (*gic_irq_disable)(unsigned int intid); uint64_t (*gic_read_iar)(void); void (*gic_write_eoir)(uint32_t irq); + void (*gic_write_dir)(uint32_t irq); + void (*gic_set_eoi_split)(bool split); + void (*gic_set_priority_mask)(uint64_t mask); + void (*gic_set_priority)(uint32_t intid, uint32_t prio); + void (*gic_irq_set_active)(uint32_t intid); + void (*gic_irq_clear_active)(uint32_t intid); + bool (*gic_irq_get_active)(uint32_t intid); + void (*gic_irq_set_pending)(uint32_t intid); + void (*gic_irq_clear_pending)(uint32_t intid); + bool (*gic_irq_get_pending)(uint32_t intid); + void (*gic_irq_set_config)(uint32_t intid, bool is_edge); }; extern const struct gic_common_ops gicv3_ops; diff --git a/tools/testing/selftests/kvm/lib/aarch64/gic_v3.c b/tools/testing/selftests/kvm/lib/aarch64/gic_v3.c index 2dbf3339b62e..00f613c0583c 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/gic_v3.c +++ b/tools/testing/selftests/kvm/lib/aarch64/gic_v3.c @@ -19,7 +19,8 @@ struct gicv3_data { unsigned int nr_spis; }; -#define sgi_base_from_redist(redist_base) (redist_base + SZ_64K) +#define sgi_base_from_redist(redist_base) (redist_base + SZ_64K) +#define DIST_BIT (1U << 31) enum gicv3_intid_range { SGI_RANGE, @@ -50,6 +51,14 @@ static void gicv3_gicr_wait_for_rwp(void *redist_base) } } +static void gicv3_wait_for_rwp(uint32_t cpu_or_dist) +{ + if (cpu_or_dist & DIST_BIT) + gicv3_gicd_wait_for_rwp(); + else + gicv3_gicr_wait_for_rwp(gicv3_data.redist_base[cpu_or_dist]); +} + static enum gicv3_intid_range get_intid_range(unsigned int intid) { switch (intid) { @@ -81,39 +90,175 @@ static void gicv3_write_eoir(uint32_t irq) isb(); } -static void -gicv3_config_irq(unsigned int intid, unsigned int offset) +static void gicv3_write_dir(uint32_t irq) +{ + write_sysreg_s(irq, SYS_ICC_DIR_EL1); + isb(); +} + +static void gicv3_set_priority_mask(uint64_t mask) +{ + write_sysreg_s(mask, SYS_ICC_PMR_EL1); +} + +static void gicv3_set_eoi_split(bool split) +{ + uint32_t val; + + /* All other fields are read-only, so no need to read CTLR first. In + * fact, the kernel does the same. + */ + val = split ? (1U << 1) : 0; + write_sysreg_s(val, SYS_ICC_CTLR_EL1); + isb(); +} + +uint32_t gicv3_reg_readl(uint32_t cpu_or_dist, uint64_t offset) +{ + void *base = cpu_or_dist & DIST_BIT ? gicv3_data.dist_base + : sgi_base_from_redist(gicv3_data.redist_base[cpu_or_dist]); + return readl(base + offset); +} + +void gicv3_reg_writel(uint32_t cpu_or_dist, uint64_t offset, uint32_t reg_val) +{ + void *base = cpu_or_dist & DIST_BIT ? gicv3_data.dist_base + : sgi_base_from_redist(gicv3_data.redist_base[cpu_or_dist]); + writel(reg_val, base + offset); +} + +uint32_t gicv3_getl_fields(uint32_t cpu_or_dist, uint64_t offset, uint32_t mask) +{ + return gicv3_reg_readl(cpu_or_dist, offset) & mask; +} + +void gicv3_setl_fields(uint32_t cpu_or_dist, uint64_t offset, + uint32_t mask, uint32_t reg_val) +{ + uint32_t tmp = gicv3_reg_readl(cpu_or_dist, offset) & ~mask; + + tmp |= (reg_val & mask); + gicv3_reg_writel(cpu_or_dist, offset, tmp); +} + +/* + * We use a single offset for the distributor and redistributor maps as they + * have the same value in both. The only exceptions are registers that only + * exist in one and not the other, like GICR_WAKER that doesn't exist in the + * distributor map. Such registers are conveniently marked as reserved in the + * map that doesn't implement it; like GICR_WAKER's offset of 0x0014 being + * marked as "Reserved" in the Distributor map. + */ +static void gicv3_access_reg(uint32_t intid, uint64_t offset, + uint32_t reg_bits, uint32_t bits_per_field, + bool write, uint32_t *val) { uint32_t cpu = guest_get_vcpuid(); - uint32_t mask = 1 << (intid % 32); enum gicv3_intid_range intid_range = get_intid_range(intid); - void *reg; - - /* We care about 'cpu' only for SGIs or PPIs */ - if (intid_range == SGI_RANGE || intid_range == PPI_RANGE) { - GUEST_ASSERT(cpu < gicv3_data.nr_cpus); - - reg = sgi_base_from_redist(gicv3_data.redist_base[cpu]) + - offset; - writel(mask, reg); - gicv3_gicr_wait_for_rwp(gicv3_data.redist_base[cpu]); - } else if (intid_range == SPI_RANGE) { - reg = gicv3_data.dist_base + offset + (intid / 32) * 4; - writel(mask, reg); - gicv3_gicd_wait_for_rwp(); - } else { - GUEST_ASSERT(0); - } + uint32_t fields_per_reg, index, mask, shift; + uint32_t cpu_or_dist; + + GUEST_ASSERT(bits_per_field <= reg_bits); + GUEST_ASSERT(*val < (1U << bits_per_field)); + /* Some registers like IROUTER are 64 bit long. Those are currently not + * supported by readl nor writel, so just asserting here until then. + */ + GUEST_ASSERT(reg_bits == 32); + + fields_per_reg = reg_bits / bits_per_field; + index = intid % fields_per_reg; + shift = index * bits_per_field; + mask = ((1U << bits_per_field) - 1) << shift; + + /* Set offset to the actual register holding intid's config. */ + offset += (intid / fields_per_reg) * (reg_bits / 8); + + cpu_or_dist = (intid_range == SPI_RANGE) ? DIST_BIT : cpu; + + if (write) + gicv3_setl_fields(cpu_or_dist, offset, mask, *val << shift); + *val = gicv3_getl_fields(cpu_or_dist, offset, mask) >> shift; +} + +static void gicv3_write_reg(uint32_t intid, uint64_t offset, + uint32_t reg_bits, uint32_t bits_per_field, uint32_t val) +{ + gicv3_access_reg(intid, offset, reg_bits, + bits_per_field, true, &val); +} + +static uint32_t gicv3_read_reg(uint32_t intid, uint64_t offset, + uint32_t reg_bits, uint32_t bits_per_field) +{ + uint32_t val; + + gicv3_access_reg(intid, offset, reg_bits, + bits_per_field, false, &val); + return val; +} + +static void gicv3_set_priority(uint32_t intid, uint32_t prio) +{ + gicv3_write_reg(intid, GICD_IPRIORITYR, 32, 8, prio); +} + +/* Sets the intid to be level-sensitive or edge-triggered. */ +static void gicv3_irq_set_config(uint32_t intid, bool is_edge) +{ + uint32_t val; + + /* N/A for private interrupts. */ + GUEST_ASSERT(get_intid_range(intid) == SPI_RANGE); + val = is_edge ? 2 : 0; + gicv3_write_reg(intid, GICD_ICFGR, 32, 2, val); +} + +static void gicv3_irq_enable(uint32_t intid) +{ + bool is_spi = get_intid_range(intid) == SPI_RANGE; + uint32_t cpu = guest_get_vcpuid(); + + gicv3_write_reg(intid, GICD_ISENABLER, 32, 1, 1); + gicv3_wait_for_rwp(is_spi ? DIST_BIT : cpu); +} + +static void gicv3_irq_disable(uint32_t intid) +{ + bool is_spi = get_intid_range(intid) == SPI_RANGE; + uint32_t cpu = guest_get_vcpuid(); + + gicv3_write_reg(intid, GICD_ICENABLER, 32, 1, 1); + gicv3_wait_for_rwp(is_spi ? DIST_BIT : cpu); +} + +static void gicv3_irq_set_active(uint32_t intid) +{ + gicv3_write_reg(intid, GICD_ISACTIVER, 32, 1, 1); +} + +static void gicv3_irq_clear_active(uint32_t intid) +{ + gicv3_write_reg(intid, GICD_ICACTIVER, 32, 1, 1); +} + +static bool gicv3_irq_get_active(uint32_t intid) +{ + return gicv3_read_reg(intid, GICD_ISACTIVER, 32, 1); +} + +static void gicv3_irq_set_pending(uint32_t intid) +{ + gicv3_write_reg(intid, GICD_ISPENDR, 32, 1, 1); } -static void gicv3_irq_enable(unsigned int intid) +static void gicv3_irq_clear_pending(uint32_t intid) { - gicv3_config_irq(intid, GICD_ISENABLER); + gicv3_write_reg(intid, GICD_ICPENDR, 32, 1, 1); } -static void gicv3_irq_disable(unsigned int intid) +static bool gicv3_irq_get_pending(uint32_t intid) { - gicv3_config_irq(intid, GICD_ICENABLER); + return gicv3_read_reg(intid, GICD_ISPENDR, 32, 1); } static void gicv3_enable_redist(void *redist_base) @@ -237,4 +382,15 @@ const struct gic_common_ops gicv3_ops = { .gic_irq_disable = gicv3_irq_disable, .gic_read_iar = gicv3_read_iar, .gic_write_eoir = gicv3_write_eoir, + .gic_write_dir = gicv3_write_dir, + .gic_set_priority_mask = gicv3_set_priority_mask, + .gic_set_eoi_split = gicv3_set_eoi_split, + .gic_set_priority = gicv3_set_priority, + .gic_irq_set_active = gicv3_irq_set_active, + .gic_irq_clear_active = gicv3_irq_clear_active, + .gic_irq_get_active = gicv3_irq_get_active, + .gic_irq_set_pending = gicv3_irq_set_pending, + .gic_irq_clear_pending = gicv3_irq_clear_pending, + .gic_irq_get_pending = gicv3_irq_get_pending, + .gic_irq_set_config = gicv3_irq_set_config, }; diff --git a/tools/testing/selftests/kvm/lib/aarch64/gic_v3.h b/tools/testing/selftests/kvm/lib/aarch64/gic_v3.h deleted file mode 100644 index b51536d469a6..000000000000 --- a/tools/testing/selftests/kvm/lib/aarch64/gic_v3.h +++ /dev/null @@ -1,70 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * ARM Generic Interrupt Controller (GIC) v3 specific defines - */ - -#ifndef SELFTEST_KVM_GICV3_H -#define SELFTEST_KVM_GICV3_H - -#include <asm/sysreg.h> - -/* - * Distributor registers - */ -#define GICD_CTLR 0x0000 -#define GICD_TYPER 0x0004 -#define GICD_IGROUPR 0x0080 -#define GICD_ISENABLER 0x0100 -#define GICD_ICENABLER 0x0180 -#define GICD_ICACTIVER 0x0380 -#define GICD_IPRIORITYR 0x0400 - -/* - * The assumption is that the guest runs in a non-secure mode. - * The following bits of GICD_CTLR are defined accordingly. - */ -#define GICD_CTLR_RWP (1U << 31) -#define GICD_CTLR_nASSGIreq (1U << 8) -#define GICD_CTLR_ARE_NS (1U << 4) -#define GICD_CTLR_ENABLE_G1A (1U << 1) -#define GICD_CTLR_ENABLE_G1 (1U << 0) - -#define GICD_TYPER_SPIS(typer) ((((typer) & 0x1f) + 1) * 32) -#define GICD_INT_DEF_PRI_X4 0xa0a0a0a0 - -/* - * Redistributor registers - */ -#define GICR_CTLR 0x000 -#define GICR_WAKER 0x014 - -#define GICR_CTLR_RWP (1U << 3) - -#define GICR_WAKER_ProcessorSleep (1U << 1) -#define GICR_WAKER_ChildrenAsleep (1U << 2) - -/* - * Redistributor registers, offsets from SGI base - */ -#define GICR_IGROUPR0 GICD_IGROUPR -#define GICR_ISENABLER0 GICD_ISENABLER -#define GICR_ICENABLER0 GICD_ICENABLER -#define GICR_ICACTIVER0 GICD_ICACTIVER -#define GICR_IPRIORITYR0 GICD_IPRIORITYR - -/* CPU interface registers */ -#define SYS_ICC_PMR_EL1 sys_reg(3, 0, 4, 6, 0) -#define SYS_ICC_IAR1_EL1 sys_reg(3, 0, 12, 12, 0) -#define SYS_ICC_EOIR1_EL1 sys_reg(3, 0, 12, 12, 1) -#define SYS_ICC_SRE_EL1 sys_reg(3, 0, 12, 12, 5) -#define SYS_ICC_GRPEN1_EL1 sys_reg(3, 0, 12, 12, 7) - -#define ICC_PMR_DEF_PRIO 0xf0 - -#define ICC_SRE_EL1_SRE (1U << 0) - -#define ICC_IGRPEN1_EL1_ENABLE (1U << 0) - -#define GICV3_MAX_CPUS 512 - -#endif /* SELFTEST_KVM_GICV3_H */ diff --git a/tools/testing/selftests/kvm/lib/aarch64/processor.c b/tools/testing/selftests/kvm/lib/aarch64/processor.c index b4eeeafd2a70..9343d82519b4 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/processor.c +++ b/tools/testing/selftests/kvm/lib/aarch64/processor.c @@ -8,6 +8,7 @@ #include <linux/compiler.h> #include <assert.h> +#include "guest_modes.h" #include "kvm_util.h" #include "../kvm_util_internal.h" #include "processor.h" @@ -237,6 +238,7 @@ void aarch64_vcpu_setup(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_vcpu_init get_reg(vm, vcpuid, KVM_ARM64_SYS_REG(SYS_SCTLR_EL1), &sctlr_el1); get_reg(vm, vcpuid, KVM_ARM64_SYS_REG(SYS_TCR_EL1), &tcr_el1); + /* Configure base granule size */ switch (vm->mode) { case VM_MODE_P52V48_4K: TEST_FAIL("AArch64 does not support 4K sized pages " @@ -245,25 +247,47 @@ void aarch64_vcpu_setup(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_vcpu_init TEST_FAIL("AArch64 does not support 4K sized pages " "with ANY-bit physical address ranges"); case VM_MODE_P52V48_64K: + case VM_MODE_P48V48_64K: + case VM_MODE_P40V48_64K: + case VM_MODE_P36V48_64K: tcr_el1 |= 1ul << 14; /* TG0 = 64KB */ - tcr_el1 |= 6ul << 32; /* IPS = 52 bits */ + break; + case VM_MODE_P48V48_16K: + case VM_MODE_P40V48_16K: + case VM_MODE_P36V48_16K: + case VM_MODE_P36V47_16K: + tcr_el1 |= 2ul << 14; /* TG0 = 16KB */ break; case VM_MODE_P48V48_4K: + case VM_MODE_P40V48_4K: + case VM_MODE_P36V48_4K: tcr_el1 |= 0ul << 14; /* TG0 = 4KB */ - tcr_el1 |= 5ul << 32; /* IPS = 48 bits */ break; + default: + TEST_FAIL("Unknown guest mode, mode: 0x%x", vm->mode); + } + + /* Configure output size */ + switch (vm->mode) { + case VM_MODE_P52V48_64K: + tcr_el1 |= 6ul << 32; /* IPS = 52 bits */ + break; + case VM_MODE_P48V48_4K: + case VM_MODE_P48V48_16K: case VM_MODE_P48V48_64K: - tcr_el1 |= 1ul << 14; /* TG0 = 64KB */ tcr_el1 |= 5ul << 32; /* IPS = 48 bits */ break; case VM_MODE_P40V48_4K: - tcr_el1 |= 0ul << 14; /* TG0 = 4KB */ - tcr_el1 |= 2ul << 32; /* IPS = 40 bits */ - break; + case VM_MODE_P40V48_16K: case VM_MODE_P40V48_64K: - tcr_el1 |= 1ul << 14; /* TG0 = 64KB */ tcr_el1 |= 2ul << 32; /* IPS = 40 bits */ break; + case VM_MODE_P36V48_4K: + case VM_MODE_P36V48_16K: + case VM_MODE_P36V48_64K: + case VM_MODE_P36V47_16K: + tcr_el1 |= 1ul << 32; /* IPS = 36 bits */ + break; default: TEST_FAIL("Unknown guest mode, mode: 0x%x", vm->mode); } @@ -432,3 +456,47 @@ uint32_t guest_get_vcpuid(void) { return read_sysreg(tpidr_el1); } + +void aarch64_get_supported_page_sizes(uint32_t ipa, + bool *ps4k, bool *ps16k, bool *ps64k) +{ + struct kvm_vcpu_init preferred_init; + int kvm_fd, vm_fd, vcpu_fd, err; + uint64_t val; + struct kvm_one_reg reg = { + .id = KVM_ARM64_SYS_REG(SYS_ID_AA64MMFR0_EL1), + .addr = (uint64_t)&val, + }; + + kvm_fd = open_kvm_dev_path_or_exit(); + vm_fd = ioctl(kvm_fd, KVM_CREATE_VM, ipa); + TEST_ASSERT(vm_fd >= 0, "Can't create VM"); + + vcpu_fd = ioctl(vm_fd, KVM_CREATE_VCPU, 0); + TEST_ASSERT(vcpu_fd >= 0, "Can't create vcpu"); + + err = ioctl(vm_fd, KVM_ARM_PREFERRED_TARGET, &preferred_init); + TEST_ASSERT(err == 0, "Can't get target"); + err = ioctl(vcpu_fd, KVM_ARM_VCPU_INIT, &preferred_init); + TEST_ASSERT(err == 0, "Can't get init vcpu"); + + err = ioctl(vcpu_fd, KVM_GET_ONE_REG, ®); + TEST_ASSERT(err == 0, "Can't get MMFR0"); + + *ps4k = ((val >> 28) & 0xf) != 0xf; + *ps64k = ((val >> 24) & 0xf) == 0; + *ps16k = ((val >> 20) & 0xf) != 0; + + close(vcpu_fd); + close(vm_fd); + close(kvm_fd); +} + +/* + * arm64 doesn't have a true default mode, so start by computing the + * available IPA space and page sizes early. + */ +void __attribute__((constructor)) init_guest_modes(void) +{ + guest_modes_append_default(); +} diff --git a/tools/testing/selftests/kvm/lib/aarch64/vgic.c b/tools/testing/selftests/kvm/lib/aarch64/vgic.c index b9b271ff520d..b3a0fca0d780 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/vgic.c +++ b/tools/testing/selftests/kvm/lib/aarch64/vgic.c @@ -5,11 +5,14 @@ #include <linux/kvm.h> #include <linux/sizes.h> +#include <asm/kvm_para.h> #include <asm/kvm.h> #include "kvm_util.h" #include "../kvm_util_internal.h" #include "vgic.h" +#include "gic.h" +#include "gic_v3.h" /* * vGIC-v3 default host setup @@ -28,7 +31,7 @@ * redistributor regions of the guest. Since it depends on the number of * vCPUs for the VM, it must be called after all the vCPUs have been created. */ -int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, +int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, uint32_t nr_irqs, uint64_t gicd_base_gpa, uint64_t gicr_base_gpa) { int gic_fd; @@ -50,6 +53,13 @@ int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, /* Distributor setup */ gic_fd = kvm_create_device(vm, KVM_DEV_TYPE_ARM_VGIC_V3, false); + + kvm_device_access(gic_fd, KVM_DEV_ARM_VGIC_GRP_NR_IRQS, + 0, &nr_irqs, true); + + kvm_device_access(gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL, + KVM_DEV_ARM_VGIC_CTRL_INIT, NULL, true); + kvm_device_access(gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, KVM_VGIC_V3_ADDR_TYPE_DIST, &gicd_base_gpa, true); nr_gic_pages = vm_calc_num_guest_pages(vm->mode, KVM_VGIC_V3_DIST_SIZE); @@ -68,3 +78,94 @@ int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, return gic_fd; } + +/* should only work for level sensitive interrupts */ +int _kvm_irq_set_level_info(int gic_fd, uint32_t intid, int level) +{ + uint64_t attr = 32 * (intid / 32); + uint64_t index = intid % 32; + uint64_t val; + int ret; + + ret = _kvm_device_access(gic_fd, KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO, + attr, &val, false); + if (ret != 0) + return ret; + + val |= 1U << index; + ret = _kvm_device_access(gic_fd, KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO, + attr, &val, true); + return ret; +} + +void kvm_irq_set_level_info(int gic_fd, uint32_t intid, int level) +{ + int ret = _kvm_irq_set_level_info(gic_fd, intid, level); + + TEST_ASSERT(ret == 0, "KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO failed, " + "rc: %i errno: %i", ret, errno); +} + +int _kvm_arm_irq_line(struct kvm_vm *vm, uint32_t intid, int level) +{ + uint32_t irq = intid & KVM_ARM_IRQ_NUM_MASK; + + TEST_ASSERT(!INTID_IS_SGI(intid), "KVM_IRQ_LINE's interface itself " + "doesn't allow injecting SGIs. There's no mask for it."); + + if (INTID_IS_PPI(intid)) + irq |= KVM_ARM_IRQ_TYPE_PPI << KVM_ARM_IRQ_TYPE_SHIFT; + else + irq |= KVM_ARM_IRQ_TYPE_SPI << KVM_ARM_IRQ_TYPE_SHIFT; + + return _kvm_irq_line(vm, irq, level); +} + +void kvm_arm_irq_line(struct kvm_vm *vm, uint32_t intid, int level) +{ + int ret = _kvm_arm_irq_line(vm, intid, level); + + TEST_ASSERT(ret == 0, "KVM_IRQ_LINE failed, rc: %i errno: %i", + ret, errno); +} + +static void vgic_poke_irq(int gic_fd, uint32_t intid, + uint32_t vcpu, uint64_t reg_off) +{ + uint64_t reg = intid / 32; + uint64_t index = intid % 32; + uint64_t attr = reg_off + reg * 4; + uint64_t val; + bool intid_is_private = INTID_IS_SGI(intid) || INTID_IS_PPI(intid); + + /* Check that the addr part of the attr is within 32 bits. */ + assert(attr <= KVM_DEV_ARM_VGIC_OFFSET_MASK); + + uint32_t group = intid_is_private ? KVM_DEV_ARM_VGIC_GRP_REDIST_REGS + : KVM_DEV_ARM_VGIC_GRP_DIST_REGS; + + if (intid_is_private) { + /* TODO: only vcpu 0 implemented for now. */ + assert(vcpu == 0); + attr += SZ_64K; + } + + /* All calls will succeed, even with invalid intid's, as long as the + * addr part of the attr is within 32 bits (checked above). An invalid + * intid will just make the read/writes point to above the intended + * register space (i.e., ICPENDR after ISPENDR). + */ + kvm_device_access(gic_fd, group, attr, &val, false); + val |= 1ULL << index; + kvm_device_access(gic_fd, group, attr, &val, true); +} + +void kvm_irq_write_ispendr(int gic_fd, uint32_t intid, uint32_t vcpu) +{ + vgic_poke_irq(gic_fd, intid, vcpu, GICD_ISPENDR); +} + +void kvm_irq_write_isactiver(int gic_fd, uint32_t intid, uint32_t vcpu) +{ + vgic_poke_irq(gic_fd, intid, vcpu, GICD_ISACTIVER); +} diff --git a/tools/testing/selftests/kvm/lib/elf.c b/tools/testing/selftests/kvm/lib/elf.c index eac44f5d0db0..13e8e3dcf984 100644 --- a/tools/testing/selftests/kvm/lib/elf.c +++ b/tools/testing/selftests/kvm/lib/elf.c @@ -157,8 +157,7 @@ void kvm_vm_elf_load(struct kvm_vm *vm, const char *filename) "memsize of 0,\n" " phdr index: %u p_memsz: 0x%" PRIx64, n1, (uint64_t) phdr.p_memsz); - vm_vaddr_t seg_vstart = phdr.p_vaddr; - seg_vstart &= ~(vm_vaddr_t)(vm->page_size - 1); + vm_vaddr_t seg_vstart = align_down(phdr.p_vaddr, vm->page_size); vm_vaddr_t seg_vend = phdr.p_vaddr + phdr.p_memsz - 1; seg_vend |= vm->page_size - 1; size_t seg_size = seg_vend - seg_vstart + 1; diff --git a/tools/testing/selftests/kvm/lib/guest_modes.c b/tools/testing/selftests/kvm/lib/guest_modes.c index c330f414ef96..8784013b747c 100644 --- a/tools/testing/selftests/kvm/lib/guest_modes.c +++ b/tools/testing/selftests/kvm/lib/guest_modes.c @@ -4,22 +4,59 @@ */ #include "guest_modes.h" +#ifdef __aarch64__ +#include "processor.h" +enum vm_guest_mode vm_mode_default; +#endif + struct guest_mode guest_modes[NUM_VM_MODES]; void guest_modes_append_default(void) { +#ifndef __aarch64__ guest_mode_append(VM_MODE_DEFAULT, true, true); - -#ifdef __aarch64__ - guest_mode_append(VM_MODE_P40V48_64K, true, true); +#else { unsigned int limit = kvm_check_cap(KVM_CAP_ARM_VM_IPA_SIZE); + bool ps4k, ps16k, ps64k; + int i; + + aarch64_get_supported_page_sizes(limit, &ps4k, &ps16k, &ps64k); + + vm_mode_default = NUM_VM_MODES; + if (limit >= 52) - guest_mode_append(VM_MODE_P52V48_64K, true, true); + guest_mode_append(VM_MODE_P52V48_64K, ps64k, ps64k); if (limit >= 48) { - guest_mode_append(VM_MODE_P48V48_4K, true, true); - guest_mode_append(VM_MODE_P48V48_64K, true, true); + guest_mode_append(VM_MODE_P48V48_4K, ps4k, ps4k); + guest_mode_append(VM_MODE_P48V48_16K, ps16k, ps16k); + guest_mode_append(VM_MODE_P48V48_64K, ps64k, ps64k); + } + if (limit >= 40) { + guest_mode_append(VM_MODE_P40V48_4K, ps4k, ps4k); + guest_mode_append(VM_MODE_P40V48_16K, ps16k, ps16k); + guest_mode_append(VM_MODE_P40V48_64K, ps64k, ps64k); + if (ps4k) + vm_mode_default = VM_MODE_P40V48_4K; } + if (limit >= 36) { + guest_mode_append(VM_MODE_P36V48_4K, ps4k, ps4k); + guest_mode_append(VM_MODE_P36V48_16K, ps16k, ps16k); + guest_mode_append(VM_MODE_P36V48_64K, ps64k, ps64k); + guest_mode_append(VM_MODE_P36V47_16K, ps16k, ps16k); + } + + /* + * Pick the first supported IPA size if the default + * isn't available. + */ + for (i = 0; vm_mode_default == NUM_VM_MODES && i < NUM_VM_MODES; i++) { + if (guest_modes[i].supported && guest_modes[i].enabled) + vm_mode_default = i; + } + + TEST_ASSERT(vm_mode_default != NUM_VM_MODES, + "No supported mode!"); } #endif #ifdef __s390x__ @@ -38,6 +75,16 @@ void guest_modes_append_default(void) guest_mode_append(VM_MODE_P47V64_4K, true, true); } #endif +#ifdef __riscv + { + unsigned int sz = kvm_check_cap(KVM_CAP_VM_GPA_BITS); + + if (sz >= 52) + guest_mode_append(VM_MODE_P52V48_4K, true, true); + if (sz >= 48) + guest_mode_append(VM_MODE_P48V48_4K, true, true); + } +#endif } void for_each_guest_mode(void (*func)(enum vm_guest_mode, void *), void *arg) diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index 14bb4d5b6bb7..4a645dc77f34 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -22,15 +22,6 @@ static int vcpu_mmap_sz(void); -/* Aligns x up to the next multiple of size. Size must be a power of 2. */ -static void *align(void *x, size_t size) -{ - size_t mask = size - 1; - TEST_ASSERT(size != 0 && !(size & (size - 1)), - "size not a power of 2: %lu", size); - return (void *) (((size_t) x + mask) & ~mask); -} - int open_path_or_exit(const char *path, int flags) { int fd; @@ -94,6 +85,33 @@ int kvm_check_cap(long cap) return ret; } +/* VM Check Capability + * + * Input Args: + * vm - Virtual Machine + * cap - Capability + * + * Output Args: None + * + * Return: + * On success, the Value corresponding to the capability (KVM_CAP_*) + * specified by the value of cap. On failure a TEST_ASSERT failure + * is produced. + * + * Looks up and returns the value corresponding to the capability + * (KVM_CAP_*) given by cap. + */ +int vm_check_cap(struct kvm_vm *vm, long cap) +{ + int ret; + + ret = ioctl(vm->fd, KVM_CHECK_EXTENSION, cap); + TEST_ASSERT(ret >= 0, "KVM_CHECK_EXTENSION VM IOCTL failed,\n" + " rc: %i errno: %i", ret, errno); + + return ret; +} + /* VM Enable Capability * * Input Args: @@ -175,12 +193,18 @@ const char *vm_guest_mode_string(uint32_t i) [VM_MODE_P52V48_4K] = "PA-bits:52, VA-bits:48, 4K pages", [VM_MODE_P52V48_64K] = "PA-bits:52, VA-bits:48, 64K pages", [VM_MODE_P48V48_4K] = "PA-bits:48, VA-bits:48, 4K pages", + [VM_MODE_P48V48_16K] = "PA-bits:48, VA-bits:48, 16K pages", [VM_MODE_P48V48_64K] = "PA-bits:48, VA-bits:48, 64K pages", [VM_MODE_P40V48_4K] = "PA-bits:40, VA-bits:48, 4K pages", + [VM_MODE_P40V48_16K] = "PA-bits:40, VA-bits:48, 16K pages", [VM_MODE_P40V48_64K] = "PA-bits:40, VA-bits:48, 64K pages", [VM_MODE_PXXV48_4K] = "PA-bits:ANY, VA-bits:48, 4K pages", [VM_MODE_P47V64_4K] = "PA-bits:47, VA-bits:64, 4K pages", [VM_MODE_P44V64_4K] = "PA-bits:44, VA-bits:64, 4K pages", + [VM_MODE_P36V48_4K] = "PA-bits:36, VA-bits:48, 4K pages", + [VM_MODE_P36V48_16K] = "PA-bits:36, VA-bits:48, 16K pages", + [VM_MODE_P36V48_64K] = "PA-bits:36, VA-bits:48, 64K pages", + [VM_MODE_P36V47_16K] = "PA-bits:36, VA-bits:47, 16K pages", }; _Static_assert(sizeof(strings)/sizeof(char *) == NUM_VM_MODES, "Missing new mode strings?"); @@ -191,15 +215,21 @@ const char *vm_guest_mode_string(uint32_t i) } const struct vm_guest_mode_params vm_guest_mode_params[] = { - { 52, 48, 0x1000, 12 }, - { 52, 48, 0x10000, 16 }, - { 48, 48, 0x1000, 12 }, - { 48, 48, 0x10000, 16 }, - { 40, 48, 0x1000, 12 }, - { 40, 48, 0x10000, 16 }, - { 0, 0, 0x1000, 12 }, - { 47, 64, 0x1000, 12 }, - { 44, 64, 0x1000, 12 }, + [VM_MODE_P52V48_4K] = { 52, 48, 0x1000, 12 }, + [VM_MODE_P52V48_64K] = { 52, 48, 0x10000, 16 }, + [VM_MODE_P48V48_4K] = { 48, 48, 0x1000, 12 }, + [VM_MODE_P48V48_16K] = { 48, 48, 0x4000, 14 }, + [VM_MODE_P48V48_64K] = { 48, 48, 0x10000, 16 }, + [VM_MODE_P40V48_4K] = { 40, 48, 0x1000, 12 }, + [VM_MODE_P40V48_16K] = { 40, 48, 0x4000, 14 }, + [VM_MODE_P40V48_64K] = { 40, 48, 0x10000, 16 }, + [VM_MODE_PXXV48_4K] = { 0, 0, 0x1000, 12 }, + [VM_MODE_P47V64_4K] = { 47, 64, 0x1000, 12 }, + [VM_MODE_P44V64_4K] = { 44, 64, 0x1000, 12 }, + [VM_MODE_P36V48_4K] = { 36, 48, 0x1000, 12 }, + [VM_MODE_P36V48_16K] = { 36, 48, 0x4000, 14 }, + [VM_MODE_P36V48_64K] = { 36, 48, 0x10000, 16 }, + [VM_MODE_P36V47_16K] = { 36, 47, 0x4000, 14 }, }; _Static_assert(sizeof(vm_guest_mode_params)/sizeof(struct vm_guest_mode_params) == NUM_VM_MODES, "Missing new mode params?"); @@ -261,9 +291,19 @@ struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm) vm->pgtable_levels = 3; break; case VM_MODE_P40V48_4K: + case VM_MODE_P36V48_4K: vm->pgtable_levels = 4; break; case VM_MODE_P40V48_64K: + case VM_MODE_P36V48_64K: + vm->pgtable_levels = 3; + break; + case VM_MODE_P48V48_16K: + case VM_MODE_P40V48_16K: + case VM_MODE_P36V48_16K: + vm->pgtable_levels = 4; + break; + case VM_MODE_P36V47_16K: vm->pgtable_levels = 3; break; case VM_MODE_PXXV48_4K: @@ -311,7 +351,7 @@ struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm) (1ULL << (vm->va_bits - 1)) >> vm->page_shift); /* Limit physical addresses to PA-bits. */ - vm->max_gfn = ((1ULL << vm->pa_bits) >> vm->page_shift) - 1; + vm->max_gfn = vm_compute_max_gfn(vm); /* Allocate and setup memory for guest. */ vm->vpages_mapped = sparsebit_alloc(); @@ -353,6 +393,11 @@ struct kvm_vm *vm_create_with_vcpus(enum vm_guest_mode mode, uint32_t nr_vcpus, struct kvm_vm *vm; int i; + /* + * Permission needs to be requested before KVM_SET_CPUID2. + */ + vm_xsave_req_perm(); + /* Force slot0 memory size not small than DEFAULT_GUEST_PHY_PAGES */ if (slot0_mem_pages < DEFAULT_GUEST_PHY_PAGES) slot0_mem_pages = DEFAULT_GUEST_PHY_PAGES; @@ -879,9 +924,17 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm, alignment = 1; #endif + /* + * When using THP mmap is not guaranteed to returned a hugepage aligned + * address so we have to pad the mmap. Padding is not needed for HugeTLB + * because mmap will always return an address aligned to the HugeTLB + * page size. + */ if (src_type == VM_MEM_SRC_ANONYMOUS_THP) alignment = max(backing_src_pagesz, alignment); + ASSERT_EQ(guest_paddr, align_up(guest_paddr, backing_src_pagesz)); + /* Add enough memory to align up if necessary */ if (alignment > 1) region->mmap_size += alignment; @@ -914,8 +967,13 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm, "test_malloc failed, mmap_start: %p errno: %i", region->mmap_start, errno); + TEST_ASSERT(!is_backing_src_hugetlb(src_type) || + region->mmap_start == align_ptr_up(region->mmap_start, backing_src_pagesz), + "mmap_start %p is not aligned to HugeTLB page size 0x%lx", + region->mmap_start, backing_src_pagesz); + /* Align host address */ - region->host_mem = align(region->mmap_start, alignment); + region->host_mem = align_ptr_up(region->mmap_start, alignment); /* As needed perform madvise */ if ((src_type == VM_MEM_SRC_ANONYMOUS || @@ -958,7 +1016,7 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm, "mmap of alias failed, errno: %i", errno); /* Align host alias address */ - region->host_alias = align(region->mmap_alias, alignment); + region->host_alias = align_ptr_up(region->mmap_alias, alignment); } } @@ -2083,6 +2141,78 @@ int vcpu_access_device_attr(struct kvm_vm *vm, uint32_t vcpuid, uint32_t group, } /* + * IRQ related functions. + */ + +int _kvm_irq_line(struct kvm_vm *vm, uint32_t irq, int level) +{ + struct kvm_irq_level irq_level = { + .irq = irq, + .level = level, + }; + + return _vm_ioctl(vm, KVM_IRQ_LINE, &irq_level); +} + +void kvm_irq_line(struct kvm_vm *vm, uint32_t irq, int level) +{ + int ret = _kvm_irq_line(vm, irq, level); + + TEST_ASSERT(ret >= 0, "KVM_IRQ_LINE failed, rc: %i errno: %i", ret, errno); +} + +struct kvm_irq_routing *kvm_gsi_routing_create(void) +{ + struct kvm_irq_routing *routing; + size_t size; + + size = sizeof(struct kvm_irq_routing); + /* Allocate space for the max number of entries: this wastes 196 KBs. */ + size += KVM_MAX_IRQ_ROUTES * sizeof(struct kvm_irq_routing_entry); + routing = calloc(1, size); + assert(routing); + + return routing; +} + +void kvm_gsi_routing_irqchip_add(struct kvm_irq_routing *routing, + uint32_t gsi, uint32_t pin) +{ + int i; + + assert(routing); + assert(routing->nr < KVM_MAX_IRQ_ROUTES); + + i = routing->nr; + routing->entries[i].gsi = gsi; + routing->entries[i].type = KVM_IRQ_ROUTING_IRQCHIP; + routing->entries[i].flags = 0; + routing->entries[i].u.irqchip.irqchip = 0; + routing->entries[i].u.irqchip.pin = pin; + routing->nr++; +} + +int _kvm_gsi_routing_write(struct kvm_vm *vm, struct kvm_irq_routing *routing) +{ + int ret; + + assert(routing); + ret = ioctl(vm_get_fd(vm), KVM_SET_GSI_ROUTING, routing); + free(routing); + + return ret; +} + +void kvm_gsi_routing_write(struct kvm_vm *vm, struct kvm_irq_routing *routing) +{ + int ret; + + ret = _kvm_gsi_routing_write(vm, routing); + TEST_ASSERT(ret == 0, "KVM_SET_GSI_ROUTING failed, rc: %i errno: %i", + ret, errno); +} + +/* * VM Dump * * Input Args: @@ -2324,6 +2454,11 @@ unsigned int vm_get_page_shift(struct kvm_vm *vm) return vm->page_shift; } +unsigned long __attribute__((weak)) vm_compute_max_gfn(struct kvm_vm *vm) +{ + return ((1ULL << vm->pa_bits) >> vm->page_shift) - 1; +} + uint64_t vm_get_max_gfn(struct kvm_vm *vm) { return vm->max_gfn; diff --git a/tools/testing/selftests/kvm/lib/perf_test_util.c b/tools/testing/selftests/kvm/lib/perf_test_util.c index 0ef80dbdc116..722df3a28791 100644 --- a/tools/testing/selftests/kvm/lib/perf_test_util.c +++ b/tools/testing/selftests/kvm/lib/perf_test_util.c @@ -10,21 +10,40 @@ struct perf_test_args perf_test_args; -uint64_t guest_test_phys_mem; - /* * Guest virtual memory offset of the testing memory slot. * Must not conflict with identity mapped test code. */ static uint64_t guest_test_virt_mem = DEFAULT_GUEST_TEST_MEM; +struct vcpu_thread { + /* The id of the vCPU. */ + int vcpu_id; + + /* The pthread backing the vCPU. */ + pthread_t thread; + + /* Set to true once the vCPU thread is up and running. */ + bool running; +}; + +/* The vCPU threads involved in this test. */ +static struct vcpu_thread vcpu_threads[KVM_MAX_VCPUS]; + +/* The function run by each vCPU thread, as provided by the test. */ +static void (*vcpu_thread_fn)(struct perf_test_vcpu_args *); + +/* Set to true once all vCPU threads are up and running. */ +static bool all_vcpu_threads_running; + /* * Continuously write to the first 8 bytes of each page in the * specified region. */ static void guest_code(uint32_t vcpu_id) { - struct perf_test_vcpu_args *vcpu_args = &perf_test_args.vcpu_args[vcpu_id]; + struct perf_test_args *pta = &perf_test_args; + struct perf_test_vcpu_args *vcpu_args = &pta->vcpu_args[vcpu_id]; uint64_t gva; uint64_t pages; int i; @@ -37,9 +56,9 @@ static void guest_code(uint32_t vcpu_id) while (true) { for (i = 0; i < pages; i++) { - uint64_t addr = gva + (i * perf_test_args.guest_page_size); + uint64_t addr = gva + (i * pta->guest_page_size); - if (i % perf_test_args.wr_fract == 0) + if (i % pta->wr_fract == 0) *(uint64_t *)addr = 0x0123456789ABCDEF; else READ_ONCE(*(uint64_t *)addr); @@ -49,35 +68,81 @@ static void guest_code(uint32_t vcpu_id) } } +void perf_test_setup_vcpus(struct kvm_vm *vm, int vcpus, + uint64_t vcpu_memory_bytes, + bool partition_vcpu_memory_access) +{ + struct perf_test_args *pta = &perf_test_args; + struct perf_test_vcpu_args *vcpu_args; + int vcpu_id; + + for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++) { + vcpu_args = &pta->vcpu_args[vcpu_id]; + + vcpu_args->vcpu_id = vcpu_id; + if (partition_vcpu_memory_access) { + vcpu_args->gva = guest_test_virt_mem + + (vcpu_id * vcpu_memory_bytes); + vcpu_args->pages = vcpu_memory_bytes / + pta->guest_page_size; + vcpu_args->gpa = pta->gpa + (vcpu_id * vcpu_memory_bytes); + } else { + vcpu_args->gva = guest_test_virt_mem; + vcpu_args->pages = (vcpus * vcpu_memory_bytes) / + pta->guest_page_size; + vcpu_args->gpa = pta->gpa; + } + + vcpu_args_set(vm, vcpu_id, 1, vcpu_id); + + pr_debug("Added VCPU %d with test mem gpa [%lx, %lx)\n", + vcpu_id, vcpu_args->gpa, vcpu_args->gpa + + (vcpu_args->pages * pta->guest_page_size)); + } +} + struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int vcpus, uint64_t vcpu_memory_bytes, int slots, - enum vm_mem_backing_src_type backing_src) + enum vm_mem_backing_src_type backing_src, + bool partition_vcpu_memory_access) { + struct perf_test_args *pta = &perf_test_args; struct kvm_vm *vm; uint64_t guest_num_pages; + uint64_t backing_src_pagesz = get_backing_src_pagesz(backing_src); int i; pr_info("Testing guest mode: %s\n", vm_guest_mode_string(mode)); - perf_test_args.host_page_size = getpagesize(); - perf_test_args.guest_page_size = vm_guest_mode_params[mode].page_size; + /* By default vCPUs will write to memory. */ + pta->wr_fract = 1; + + /* + * Snapshot the non-huge page size. This is used by the guest code to + * access/dirty pages at the logging granularity. + */ + pta->guest_page_size = vm_guest_mode_params[mode].page_size; guest_num_pages = vm_adjust_num_guest_pages(mode, - (vcpus * vcpu_memory_bytes) / perf_test_args.guest_page_size); + (vcpus * vcpu_memory_bytes) / pta->guest_page_size); - TEST_ASSERT(vcpu_memory_bytes % perf_test_args.host_page_size == 0, + TEST_ASSERT(vcpu_memory_bytes % getpagesize() == 0, "Guest memory size is not host page size aligned."); - TEST_ASSERT(vcpu_memory_bytes % perf_test_args.guest_page_size == 0, + TEST_ASSERT(vcpu_memory_bytes % pta->guest_page_size == 0, "Guest memory size is not guest page size aligned."); TEST_ASSERT(guest_num_pages % slots == 0, "Guest memory cannot be evenly divided into %d slots.", slots); + /* + * Pass guest_num_pages to populate the page tables for test memory. + * The memory is also added to memslot 0, but that's a benign side + * effect as KVM allows aliasing HVAs in meslots. + */ vm = vm_create_with_vcpus(mode, vcpus, DEFAULT_GUEST_PHY_PAGES, - (vcpus * vcpu_memory_bytes) / perf_test_args.guest_page_size, - 0, guest_code, NULL); + guest_num_pages, 0, guest_code, NULL); - perf_test_args.vm = vm; + pta->vm = vm; /* * If there should be more memory in the guest test region than there @@ -90,20 +155,18 @@ struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int vcpus, guest_num_pages, vm_get_max_gfn(vm), vcpus, vcpu_memory_bytes); - guest_test_phys_mem = (vm_get_max_gfn(vm) - guest_num_pages) * - perf_test_args.guest_page_size; - guest_test_phys_mem &= ~(perf_test_args.host_page_size - 1); + pta->gpa = (vm_get_max_gfn(vm) - guest_num_pages) * pta->guest_page_size; + pta->gpa = align_down(pta->gpa, backing_src_pagesz); #ifdef __s390x__ /* Align to 1M (segment size) */ - guest_test_phys_mem &= ~((1 << 20) - 1); + pta->gpa = align_down(pta->gpa, 1 << 20); #endif - pr_info("guest physical test memory offset: 0x%lx\n", guest_test_phys_mem); + pr_info("guest physical test memory offset: 0x%lx\n", pta->gpa); /* Add extra memory slots for testing */ for (i = 0; i < slots; i++) { uint64_t region_pages = guest_num_pages / slots; - vm_paddr_t region_start = guest_test_phys_mem + - region_pages * perf_test_args.guest_page_size * i; + vm_paddr_t region_start = pta->gpa + region_pages * pta->guest_page_size * i; vm_userspace_mem_region_add(vm, backing_src, region_start, PERF_TEST_MEM_SLOT_INDEX + i, @@ -111,10 +174,15 @@ struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int vcpus, } /* Do mapping for the demand paging memory slot */ - virt_map(vm, guest_test_virt_mem, guest_test_phys_mem, guest_num_pages); + virt_map(vm, guest_test_virt_mem, pta->gpa, guest_num_pages); + + perf_test_setup_vcpus(vm, vcpus, vcpu_memory_bytes, partition_vcpu_memory_access); ucall_init(vm, NULL); + /* Export the shared variables to the guest. */ + sync_global_to_guest(vm, perf_test_args); + return vm; } @@ -124,36 +192,60 @@ void perf_test_destroy_vm(struct kvm_vm *vm) kvm_vm_free(vm); } -void perf_test_setup_vcpus(struct kvm_vm *vm, int vcpus, - uint64_t vcpu_memory_bytes, - bool partition_vcpu_memory_access) +void perf_test_set_wr_fract(struct kvm_vm *vm, int wr_fract) +{ + perf_test_args.wr_fract = wr_fract; + sync_global_to_guest(vm, perf_test_args); +} + +static void *vcpu_thread_main(void *data) +{ + struct vcpu_thread *vcpu = data; + + WRITE_ONCE(vcpu->running, true); + + /* + * Wait for all vCPU threads to be up and running before calling the test- + * provided vCPU thread function. This prevents thread creation (which + * requires taking the mmap_sem in write mode) from interfering with the + * guest faulting in its memory. + */ + while (!READ_ONCE(all_vcpu_threads_running)) + ; + + vcpu_thread_fn(&perf_test_args.vcpu_args[vcpu->vcpu_id]); + + return NULL; +} + +void perf_test_start_vcpu_threads(int vcpus, void (*vcpu_fn)(struct perf_test_vcpu_args *)) { - vm_paddr_t vcpu_gpa; - struct perf_test_vcpu_args *vcpu_args; int vcpu_id; + vcpu_thread_fn = vcpu_fn; + WRITE_ONCE(all_vcpu_threads_running, false); + for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++) { - vcpu_args = &perf_test_args.vcpu_args[vcpu_id]; + struct vcpu_thread *vcpu = &vcpu_threads[vcpu_id]; - vcpu_args->vcpu_id = vcpu_id; - if (partition_vcpu_memory_access) { - vcpu_args->gva = guest_test_virt_mem + - (vcpu_id * vcpu_memory_bytes); - vcpu_args->pages = vcpu_memory_bytes / - perf_test_args.guest_page_size; - vcpu_gpa = guest_test_phys_mem + - (vcpu_id * vcpu_memory_bytes); - } else { - vcpu_args->gva = guest_test_virt_mem; - vcpu_args->pages = (vcpus * vcpu_memory_bytes) / - perf_test_args.guest_page_size; - vcpu_gpa = guest_test_phys_mem; - } + vcpu->vcpu_id = vcpu_id; + WRITE_ONCE(vcpu->running, false); - vcpu_args_set(vm, vcpu_id, 1, vcpu_id); + pthread_create(&vcpu->thread, NULL, vcpu_thread_main, vcpu); + } - pr_debug("Added VCPU %d with test mem gpa [%lx, %lx)\n", - vcpu_id, vcpu_gpa, vcpu_gpa + - (vcpu_args->pages * perf_test_args.guest_page_size)); + for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++) { + while (!READ_ONCE(vcpu_threads[vcpu_id].running)) + ; } + + WRITE_ONCE(all_vcpu_threads_running, true); +} + +void perf_test_join_vcpu_threads(int vcpus) +{ + int vcpu_id; + + for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++) + pthread_join(vcpu_threads[vcpu_id].thread, NULL); } diff --git a/tools/testing/selftests/kvm/lib/riscv/processor.c b/tools/testing/selftests/kvm/lib/riscv/processor.c new file mode 100644 index 000000000000..d377f2603d98 --- /dev/null +++ b/tools/testing/selftests/kvm/lib/riscv/processor.c @@ -0,0 +1,362 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * RISC-V code + * + * Copyright (C) 2021 Western Digital Corporation or its affiliates. + */ + +#include <linux/compiler.h> +#include <assert.h> + +#include "kvm_util.h" +#include "../kvm_util_internal.h" +#include "processor.h" + +#define DEFAULT_RISCV_GUEST_STACK_VADDR_MIN 0xac0000 + +static uint64_t page_align(struct kvm_vm *vm, uint64_t v) +{ + return (v + vm->page_size) & ~(vm->page_size - 1); +} + +static uint64_t pte_addr(struct kvm_vm *vm, uint64_t entry) +{ + return ((entry & PGTBL_PTE_ADDR_MASK) >> PGTBL_PTE_ADDR_SHIFT) << + PGTBL_PAGE_SIZE_SHIFT; +} + +static uint64_t ptrs_per_pte(struct kvm_vm *vm) +{ + return PGTBL_PAGE_SIZE / sizeof(uint64_t); +} + +static uint64_t pte_index_mask[] = { + PGTBL_L0_INDEX_MASK, + PGTBL_L1_INDEX_MASK, + PGTBL_L2_INDEX_MASK, + PGTBL_L3_INDEX_MASK, +}; + +static uint32_t pte_index_shift[] = { + PGTBL_L0_INDEX_SHIFT, + PGTBL_L1_INDEX_SHIFT, + PGTBL_L2_INDEX_SHIFT, + PGTBL_L3_INDEX_SHIFT, +}; + +static uint64_t pte_index(struct kvm_vm *vm, vm_vaddr_t gva, int level) +{ + TEST_ASSERT(level > -1, + "Negative page table level (%d) not possible", level); + TEST_ASSERT(level < vm->pgtable_levels, + "Invalid page table level (%d)", level); + + return (gva & pte_index_mask[level]) >> pte_index_shift[level]; +} + +void virt_pgd_alloc(struct kvm_vm *vm) +{ + if (!vm->pgd_created) { + vm_paddr_t paddr = vm_phy_pages_alloc(vm, + page_align(vm, ptrs_per_pte(vm) * 8) / vm->page_size, + KVM_GUEST_PAGE_TABLE_MIN_PADDR, 0); + vm->pgd = paddr; + vm->pgd_created = true; + } +} + +void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr) +{ + uint64_t *ptep, next_ppn; + int level = vm->pgtable_levels - 1; + + TEST_ASSERT((vaddr % vm->page_size) == 0, + "Virtual address not on page boundary,\n" + " vaddr: 0x%lx vm->page_size: 0x%x", vaddr, vm->page_size); + TEST_ASSERT(sparsebit_is_set(vm->vpages_valid, + (vaddr >> vm->page_shift)), + "Invalid virtual address, vaddr: 0x%lx", vaddr); + TEST_ASSERT((paddr % vm->page_size) == 0, + "Physical address not on page boundary,\n" + " paddr: 0x%lx vm->page_size: 0x%x", paddr, vm->page_size); + TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn, + "Physical address beyond maximum supported,\n" + " paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x", + paddr, vm->max_gfn, vm->page_size); + + ptep = addr_gpa2hva(vm, vm->pgd) + pte_index(vm, vaddr, level) * 8; + if (!*ptep) { + next_ppn = vm_alloc_page_table(vm) >> PGTBL_PAGE_SIZE_SHIFT; + *ptep = (next_ppn << PGTBL_PTE_ADDR_SHIFT) | + PGTBL_PTE_VALID_MASK; + } + level--; + + while (level > -1) { + ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + + pte_index(vm, vaddr, level) * 8; + if (!*ptep && level > 0) { + next_ppn = vm_alloc_page_table(vm) >> + PGTBL_PAGE_SIZE_SHIFT; + *ptep = (next_ppn << PGTBL_PTE_ADDR_SHIFT) | + PGTBL_PTE_VALID_MASK; + } + level--; + } + + paddr = paddr >> PGTBL_PAGE_SIZE_SHIFT; + *ptep = (paddr << PGTBL_PTE_ADDR_SHIFT) | + PGTBL_PTE_PERM_MASK | PGTBL_PTE_VALID_MASK; +} + +vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva) +{ + uint64_t *ptep; + int level = vm->pgtable_levels - 1; + + if (!vm->pgd_created) + goto unmapped_gva; + + ptep = addr_gpa2hva(vm, vm->pgd) + pte_index(vm, gva, level) * 8; + if (!ptep) + goto unmapped_gva; + level--; + + while (level > -1) { + ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + + pte_index(vm, gva, level) * 8; + if (!ptep) + goto unmapped_gva; + level--; + } + + return pte_addr(vm, *ptep) + (gva & (vm->page_size - 1)); + +unmapped_gva: + TEST_FAIL("No mapping for vm virtual address gva: 0x%lx level: %d", + gva, level); + exit(1); +} + +static void pte_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent, + uint64_t page, int level) +{ +#ifdef DEBUG + static const char *const type[] = { "pte", "pmd", "pud", "p4d"}; + uint64_t pte, *ptep; + + if (level < 0) + return; + + for (pte = page; pte < page + ptrs_per_pte(vm) * 8; pte += 8) { + ptep = addr_gpa2hva(vm, pte); + if (!*ptep) + continue; + fprintf(stream, "%*s%s: %lx: %lx at %p\n", indent, "", + type[level], pte, *ptep, ptep); + pte_dump(stream, vm, indent + 1, + pte_addr(vm, *ptep), level - 1); + } +#endif +} + +void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) +{ + int level = vm->pgtable_levels - 1; + uint64_t pgd, *ptep; + + if (!vm->pgd_created) + return; + + for (pgd = vm->pgd; pgd < vm->pgd + ptrs_per_pte(vm) * 8; pgd += 8) { + ptep = addr_gpa2hva(vm, pgd); + if (!*ptep) + continue; + fprintf(stream, "%*spgd: %lx: %lx at %p\n", indent, "", + pgd, *ptep, ptep); + pte_dump(stream, vm, indent + 1, + pte_addr(vm, *ptep), level - 1); + } +} + +void riscv_vcpu_mmu_setup(struct kvm_vm *vm, int vcpuid) +{ + unsigned long satp; + + /* + * The RISC-V Sv48 MMU mode supports 56-bit physical address + * for 48-bit virtual address with 4KB last level page size. + */ + switch (vm->mode) { + case VM_MODE_P52V48_4K: + case VM_MODE_P48V48_4K: + case VM_MODE_P40V48_4K: + break; + default: + TEST_FAIL("Unknown guest mode, mode: 0x%x", vm->mode); + } + + satp = (vm->pgd >> PGTBL_PAGE_SIZE_SHIFT) & SATP_PPN; + satp |= SATP_MODE_48; + + set_reg(vm, vcpuid, RISCV_CSR_REG(satp), satp); +} + +void vcpu_dump(FILE *stream, struct kvm_vm *vm, uint32_t vcpuid, uint8_t indent) +{ + struct kvm_riscv_core core; + + get_reg(vm, vcpuid, RISCV_CORE_REG(mode), &core.mode); + get_reg(vm, vcpuid, RISCV_CORE_REG(regs.pc), &core.regs.pc); + get_reg(vm, vcpuid, RISCV_CORE_REG(regs.ra), &core.regs.ra); + get_reg(vm, vcpuid, RISCV_CORE_REG(regs.sp), &core.regs.sp); + get_reg(vm, vcpuid, RISCV_CORE_REG(regs.gp), &core.regs.gp); + get_reg(vm, vcpuid, RISCV_CORE_REG(regs.tp), &core.regs.tp); + get_reg(vm, vcpuid, RISCV_CORE_REG(regs.t0), &core.regs.t0); + get_reg(vm, vcpuid, RISCV_CORE_REG(regs.t1), &core.regs.t1); + get_reg(vm, vcpuid, RISCV_CORE_REG(regs.t2), &core.regs.t2); + get_reg(vm, vcpuid, RISCV_CORE_REG(regs.s0), &core.regs.s0); + get_reg(vm, vcpuid, RISCV_CORE_REG(regs.s1), &core.regs.s1); + get_reg(vm, vcpuid, RISCV_CORE_REG(regs.a0), &core.regs.a0); + get_reg(vm, vcpuid, RISCV_CORE_REG(regs.a1), &core.regs.a1); + get_reg(vm, vcpuid, RISCV_CORE_REG(regs.a2), &core.regs.a2); + get_reg(vm, vcpuid, RISCV_CORE_REG(regs.a3), &core.regs.a3); + get_reg(vm, vcpuid, RISCV_CORE_REG(regs.a4), &core.regs.a4); + get_reg(vm, vcpuid, RISCV_CORE_REG(regs.a5), &core.regs.a5); + get_reg(vm, vcpuid, RISCV_CORE_REG(regs.a6), &core.regs.a6); + get_reg(vm, vcpuid, RISCV_CORE_REG(regs.a7), &core.regs.a7); + get_reg(vm, vcpuid, RISCV_CORE_REG(regs.s2), &core.regs.s2); + get_reg(vm, vcpuid, RISCV_CORE_REG(regs.s3), &core.regs.s3); + get_reg(vm, vcpuid, RISCV_CORE_REG(regs.s4), &core.regs.s4); + get_reg(vm, vcpuid, RISCV_CORE_REG(regs.s5), &core.regs.s5); + get_reg(vm, vcpuid, RISCV_CORE_REG(regs.s6), &core.regs.s6); + get_reg(vm, vcpuid, RISCV_CORE_REG(regs.s7), &core.regs.s7); + get_reg(vm, vcpuid, RISCV_CORE_REG(regs.s8), &core.regs.s8); + get_reg(vm, vcpuid, RISCV_CORE_REG(regs.s9), &core.regs.s9); + get_reg(vm, vcpuid, RISCV_CORE_REG(regs.s10), &core.regs.s10); + get_reg(vm, vcpuid, RISCV_CORE_REG(regs.s11), &core.regs.s11); + get_reg(vm, vcpuid, RISCV_CORE_REG(regs.t3), &core.regs.t3); + get_reg(vm, vcpuid, RISCV_CORE_REG(regs.t4), &core.regs.t4); + get_reg(vm, vcpuid, RISCV_CORE_REG(regs.t5), &core.regs.t5); + get_reg(vm, vcpuid, RISCV_CORE_REG(regs.t6), &core.regs.t6); + + fprintf(stream, + " MODE: 0x%lx\n", core.mode); + fprintf(stream, + " PC: 0x%016lx RA: 0x%016lx SP: 0x%016lx GP: 0x%016lx\n", + core.regs.pc, core.regs.ra, core.regs.sp, core.regs.gp); + fprintf(stream, + " TP: 0x%016lx T0: 0x%016lx T1: 0x%016lx T2: 0x%016lx\n", + core.regs.tp, core.regs.t0, core.regs.t1, core.regs.t2); + fprintf(stream, + " S0: 0x%016lx S1: 0x%016lx A0: 0x%016lx A1: 0x%016lx\n", + core.regs.s0, core.regs.s1, core.regs.a0, core.regs.a1); + fprintf(stream, + " A2: 0x%016lx A3: 0x%016lx A4: 0x%016lx A5: 0x%016lx\n", + core.regs.a2, core.regs.a3, core.regs.a4, core.regs.a5); + fprintf(stream, + " A6: 0x%016lx A7: 0x%016lx S2: 0x%016lx S3: 0x%016lx\n", + core.regs.a6, core.regs.a7, core.regs.s2, core.regs.s3); + fprintf(stream, + " S4: 0x%016lx S5: 0x%016lx S6: 0x%016lx S7: 0x%016lx\n", + core.regs.s4, core.regs.s5, core.regs.s6, core.regs.s7); + fprintf(stream, + " S8: 0x%016lx S9: 0x%016lx S10: 0x%016lx S11: 0x%016lx\n", + core.regs.s8, core.regs.s9, core.regs.s10, core.regs.s11); + fprintf(stream, + " T3: 0x%016lx T4: 0x%016lx T5: 0x%016lx T6: 0x%016lx\n", + core.regs.t3, core.regs.t4, core.regs.t5, core.regs.t6); +} + +static void guest_hang(void) +{ + while (1) + ; +} + +void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code) +{ + int r; + size_t stack_size = vm->page_size == 4096 ? + DEFAULT_STACK_PGS * vm->page_size : + vm->page_size; + unsigned long stack_vaddr = vm_vaddr_alloc(vm, stack_size, + DEFAULT_RISCV_GUEST_STACK_VADDR_MIN); + unsigned long current_gp = 0; + struct kvm_mp_state mps; + + vm_vcpu_add(vm, vcpuid); + riscv_vcpu_mmu_setup(vm, vcpuid); + + /* + * With SBI HSM support in KVM RISC-V, all secondary VCPUs are + * powered-off by default so we ensure that all secondary VCPUs + * are powered-on using KVM_SET_MP_STATE ioctl(). + */ + mps.mp_state = KVM_MP_STATE_RUNNABLE; + r = _vcpu_ioctl(vm, vcpuid, KVM_SET_MP_STATE, &mps); + TEST_ASSERT(!r, "IOCTL KVM_SET_MP_STATE failed (error %d)", r); + + /* Setup global pointer of guest to be same as the host */ + asm volatile ( + "add %0, gp, zero" : "=r" (current_gp) : : "memory"); + set_reg(vm, vcpuid, RISCV_CORE_REG(regs.gp), current_gp); + + /* Setup stack pointer and program counter of guest */ + set_reg(vm, vcpuid, RISCV_CORE_REG(regs.sp), + stack_vaddr + stack_size); + set_reg(vm, vcpuid, RISCV_CORE_REG(regs.pc), + (unsigned long)guest_code); + + /* Setup default exception vector of guest */ + set_reg(vm, vcpuid, RISCV_CSR_REG(stvec), + (unsigned long)guest_hang); +} + +void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...) +{ + va_list ap; + uint64_t id = RISCV_CORE_REG(regs.a0); + int i; + + TEST_ASSERT(num >= 1 && num <= 8, "Unsupported number of args,\n" + " num: %u\n", num); + + va_start(ap, num); + + for (i = 0; i < num; i++) { + switch (i) { + case 0: + id = RISCV_CORE_REG(regs.a0); + break; + case 1: + id = RISCV_CORE_REG(regs.a1); + break; + case 2: + id = RISCV_CORE_REG(regs.a2); + break; + case 3: + id = RISCV_CORE_REG(regs.a3); + break; + case 4: + id = RISCV_CORE_REG(regs.a4); + break; + case 5: + id = RISCV_CORE_REG(regs.a5); + break; + case 6: + id = RISCV_CORE_REG(regs.a6); + break; + case 7: + id = RISCV_CORE_REG(regs.a7); + break; + }; + set_reg(vm, vcpuid, id, va_arg(ap, uint64_t)); + } + + va_end(ap); +} + +void assert_on_unhandled_exception(struct kvm_vm *vm, uint32_t vcpuid) +{ +} diff --git a/tools/testing/selftests/kvm/lib/riscv/ucall.c b/tools/testing/selftests/kvm/lib/riscv/ucall.c new file mode 100644 index 000000000000..9e42d8248fa6 --- /dev/null +++ b/tools/testing/selftests/kvm/lib/riscv/ucall.c @@ -0,0 +1,87 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * ucall support. A ucall is a "hypercall to userspace". + * + * Copyright (C) 2021 Western Digital Corporation or its affiliates. + */ + +#include <linux/kvm.h> + +#include "kvm_util.h" +#include "../kvm_util_internal.h" +#include "processor.h" + +void ucall_init(struct kvm_vm *vm, void *arg) +{ +} + +void ucall_uninit(struct kvm_vm *vm) +{ +} + +struct sbiret sbi_ecall(int ext, int fid, unsigned long arg0, + unsigned long arg1, unsigned long arg2, + unsigned long arg3, unsigned long arg4, + unsigned long arg5) +{ + register uintptr_t a0 asm ("a0") = (uintptr_t)(arg0); + register uintptr_t a1 asm ("a1") = (uintptr_t)(arg1); + register uintptr_t a2 asm ("a2") = (uintptr_t)(arg2); + register uintptr_t a3 asm ("a3") = (uintptr_t)(arg3); + register uintptr_t a4 asm ("a4") = (uintptr_t)(arg4); + register uintptr_t a5 asm ("a5") = (uintptr_t)(arg5); + register uintptr_t a6 asm ("a6") = (uintptr_t)(fid); + register uintptr_t a7 asm ("a7") = (uintptr_t)(ext); + struct sbiret ret; + + asm volatile ( + "ecall" + : "+r" (a0), "+r" (a1) + : "r" (a2), "r" (a3), "r" (a4), "r" (a5), "r" (a6), "r" (a7) + : "memory"); + ret.error = a0; + ret.value = a1; + + return ret; +} + +void ucall(uint64_t cmd, int nargs, ...) +{ + struct ucall uc = { + .cmd = cmd, + }; + va_list va; + int i; + + nargs = nargs <= UCALL_MAX_ARGS ? nargs : UCALL_MAX_ARGS; + + va_start(va, nargs); + for (i = 0; i < nargs; ++i) + uc.args[i] = va_arg(va, uint64_t); + va_end(va); + + sbi_ecall(KVM_RISCV_SELFTESTS_SBI_EXT, 0, (vm_vaddr_t)&uc, + 0, 0, 0, 0, 0); +} + +uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc) +{ + struct kvm_run *run = vcpu_state(vm, vcpu_id); + struct ucall ucall = {}; + + if (uc) + memset(uc, 0, sizeof(*uc)); + + if (run->exit_reason == KVM_EXIT_RISCV_SBI && + run->riscv_sbi.extension_id == KVM_RISCV_SELFTESTS_SBI_EXT && + run->riscv_sbi.function_id == 0) { + memcpy(&ucall, addr_gva2hva(vm, run->riscv_sbi.args[0]), + sizeof(ucall)); + + vcpu_run_complete_io(vm, vcpu_id); + if (uc) + memcpy(uc, &ucall, sizeof(ucall)); + } + + return ucall.cmd; +} diff --git a/tools/testing/selftests/kvm/lib/test_util.c b/tools/testing/selftests/kvm/lib/test_util.c index b72429108993..6d23878bbfe1 100644 --- a/tools/testing/selftests/kvm/lib/test_util.c +++ b/tools/testing/selftests/kvm/lib/test_util.c @@ -283,6 +283,11 @@ size_t get_backing_src_pagesz(uint32_t i) } } +bool is_backing_src_hugetlb(uint32_t i) +{ + return !!(vm_mem_backing_src_alias(i)->flag & MAP_HUGETLB); +} + static void print_available_backing_src_types(const char *prefix) { int i; diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c index 82c39db91369..babb0f28575c 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/processor.c +++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c @@ -650,6 +650,45 @@ static void vcpu_setup(struct kvm_vm *vm, int vcpuid) vcpu_sregs_set(vm, vcpuid, &sregs); } +#define CPUID_XFD_BIT (1 << 4) +static bool is_xfd_supported(void) +{ + int eax, ebx, ecx, edx; + const int leaf = 0xd, subleaf = 0x1; + + __asm__ __volatile__( + "cpuid" + : /* output */ "=a"(eax), "=b"(ebx), + "=c"(ecx), "=d"(edx) + : /* input */ "0"(leaf), "2"(subleaf)); + + return !!(eax & CPUID_XFD_BIT); +} + +void vm_xsave_req_perm(void) +{ + unsigned long bitmask; + long rc; + + if (!is_xfd_supported()) + return; + + rc = syscall(SYS_arch_prctl, ARCH_REQ_XCOMP_GUEST_PERM, + XSTATE_XTILE_DATA_BIT); + /* + * The older kernel version(<5.15) can't support + * ARCH_REQ_XCOMP_GUEST_PERM and directly return. + */ + if (rc) + return; + + rc = syscall(SYS_arch_prctl, ARCH_GET_XCOMP_GUEST_PERM, &bitmask); + TEST_ASSERT(rc == 0, "prctl(ARCH_GET_XCOMP_GUEST_PERM) error: %ld", rc); + TEST_ASSERT(bitmask & XFEATURE_XTILE_MASK, + "prctl(ARCH_REQ_XCOMP_GUEST_PERM) failure bitmask=0x%lx", + bitmask); +} + void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code) { struct kvm_mp_state mp_state; @@ -1017,21 +1056,6 @@ void vcpu_dump(FILE *stream, struct kvm_vm *vm, uint32_t vcpuid, uint8_t indent) sregs_dump(stream, &sregs, indent + 4); } -struct kvm_x86_state { - struct kvm_vcpu_events events; - struct kvm_mp_state mp_state; - struct kvm_regs regs; - struct kvm_xsave xsave; - struct kvm_xcrs xcrs; - struct kvm_sregs sregs; - struct kvm_debugregs debugregs; - union { - struct kvm_nested_state nested; - char nested_[16384]; - }; - struct kvm_msrs msrs; -}; - static int kvm_get_num_msrs_fd(int kvm_fd) { struct kvm_msr_list nmsrs; @@ -1069,6 +1093,22 @@ struct kvm_msr_list *kvm_get_msr_index_list(void) return list; } +static int vcpu_save_xsave_state(struct kvm_vm *vm, struct vcpu *vcpu, + struct kvm_x86_state *state) +{ + int size; + + size = vm_check_cap(vm, KVM_CAP_XSAVE2); + if (!size) + size = sizeof(struct kvm_xsave); + + state->xsave = malloc(size); + if (size == sizeof(struct kvm_xsave)) + return ioctl(vcpu->fd, KVM_GET_XSAVE, state->xsave); + else + return ioctl(vcpu->fd, KVM_GET_XSAVE2, state->xsave); +} + struct kvm_x86_state *vcpu_save_state(struct kvm_vm *vm, uint32_t vcpuid) { struct vcpu *vcpu = vcpu_find(vm, vcpuid); @@ -1112,7 +1152,7 @@ struct kvm_x86_state *vcpu_save_state(struct kvm_vm *vm, uint32_t vcpuid) TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_REGS, r: %i", r); - r = ioctl(vcpu->fd, KVM_GET_XSAVE, &state->xsave); + r = vcpu_save_xsave_state(vm, vcpu, state); TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_XSAVE, r: %i", r); @@ -1157,24 +1197,25 @@ void vcpu_load_state(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_x86_state *s struct vcpu *vcpu = vcpu_find(vm, vcpuid); int r; - r = ioctl(vcpu->fd, KVM_SET_XSAVE, &state->xsave); - TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_XSAVE, r: %i", + r = ioctl(vcpu->fd, KVM_SET_SREGS, &state->sregs); + TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_SREGS, r: %i", r); + r = ioctl(vcpu->fd, KVM_SET_MSRS, &state->msrs); + TEST_ASSERT(r == state->msrs.nmsrs, + "Unexpected result from KVM_SET_MSRS, r: %i (failed at %x)", + r, r == state->msrs.nmsrs ? -1 : state->msrs.entries[r].index); + if (kvm_check_cap(KVM_CAP_XCRS)) { r = ioctl(vcpu->fd, KVM_SET_XCRS, &state->xcrs); TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_XCRS, r: %i", r); } - r = ioctl(vcpu->fd, KVM_SET_SREGS, &state->sregs); - TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_SREGS, r: %i", + r = ioctl(vcpu->fd, KVM_SET_XSAVE, state->xsave); + TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_XSAVE, r: %i", r); - r = ioctl(vcpu->fd, KVM_SET_MSRS, &state->msrs); - TEST_ASSERT(r == state->msrs.nmsrs, "Unexpected result from KVM_SET_MSRS, r: %i (failed at %x)", - r, r == state->msrs.nmsrs ? -1 : state->msrs.entries[r].index); - r = ioctl(vcpu->fd, KVM_SET_VCPU_EVENTS, &state->events); TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_VCPU_EVENTS, r: %i", r); @@ -1198,6 +1239,12 @@ void vcpu_load_state(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_x86_state *s } } +void kvm_x86_state_cleanup(struct kvm_x86_state *state) +{ + free(state->xsave); + free(state); +} + bool is_intel_cpu(void) { int eax, ebx, ecx, edx; @@ -1431,3 +1478,71 @@ struct kvm_cpuid2 *vcpu_get_supported_hv_cpuid(struct kvm_vm *vm, uint32_t vcpui return cpuid; } + +#define X86EMUL_CPUID_VENDOR_AuthenticAMD_ebx 0x68747541 +#define X86EMUL_CPUID_VENDOR_AuthenticAMD_ecx 0x444d4163 +#define X86EMUL_CPUID_VENDOR_AuthenticAMD_edx 0x69746e65 + +static inline unsigned x86_family(unsigned int eax) +{ + unsigned int x86; + + x86 = (eax >> 8) & 0xf; + + if (x86 == 0xf) + x86 += (eax >> 20) & 0xff; + + return x86; +} + +unsigned long vm_compute_max_gfn(struct kvm_vm *vm) +{ + const unsigned long num_ht_pages = 12 << (30 - vm->page_shift); /* 12 GiB */ + unsigned long ht_gfn, max_gfn, max_pfn; + uint32_t eax, ebx, ecx, edx, max_ext_leaf; + + max_gfn = (1ULL << (vm->pa_bits - vm->page_shift)) - 1; + + /* Avoid reserved HyperTransport region on AMD processors. */ + eax = ecx = 0; + cpuid(&eax, &ebx, &ecx, &edx); + if (ebx != X86EMUL_CPUID_VENDOR_AuthenticAMD_ebx || + ecx != X86EMUL_CPUID_VENDOR_AuthenticAMD_ecx || + edx != X86EMUL_CPUID_VENDOR_AuthenticAMD_edx) + return max_gfn; + + /* On parts with <40 physical address bits, the area is fully hidden */ + if (vm->pa_bits < 40) + return max_gfn; + + /* Before family 17h, the HyperTransport area is just below 1T. */ + ht_gfn = (1 << 28) - num_ht_pages; + eax = 1; + cpuid(&eax, &ebx, &ecx, &edx); + if (x86_family(eax) < 0x17) + goto done; + + /* + * Otherwise it's at the top of the physical address space, possibly + * reduced due to SME by bits 11:6 of CPUID[0x8000001f].EBX. Use + * the old conservative value if MAXPHYADDR is not enumerated. + */ + eax = 0x80000000; + cpuid(&eax, &ebx, &ecx, &edx); + max_ext_leaf = eax; + if (max_ext_leaf < 0x80000008) + goto done; + + eax = 0x80000008; + cpuid(&eax, &ebx, &ecx, &edx); + max_pfn = (1ULL << ((eax & 0xff) - vm->page_shift)) - 1; + if (max_ext_leaf >= 0x8000001f) { + eax = 0x8000001f; + cpuid(&eax, &ebx, &ecx, &edx); + max_pfn >>= (ebx >> 6) & 0x3f; + } + + ht_gfn = max_pfn - num_ht_pages; +done: + return min(max_gfn, ht_gfn - 1); +} |