diff options
Diffstat (limited to 'tools')
192 files changed, 6813 insertions, 2340 deletions
diff --git a/tools/arch/x86/dell-uart-backlight-emulator/.gitignore b/tools/arch/x86/dell-uart-backlight-emulator/.gitignore new file mode 100644 index 000000000000..5c8cad8d72b9 --- /dev/null +++ b/tools/arch/x86/dell-uart-backlight-emulator/.gitignore @@ -0,0 +1 @@ +dell-uart-backlight-emulator diff --git a/tools/arch/x86/dell-uart-backlight-emulator/Makefile b/tools/arch/x86/dell-uart-backlight-emulator/Makefile new file mode 100644 index 000000000000..6ea1d9fd534b --- /dev/null +++ b/tools/arch/x86/dell-uart-backlight-emulator/Makefile @@ -0,0 +1,19 @@ +# SPDX-License-Identifier: GPL-2.0 +# Makefile for Intel Software Defined Silicon provisioning tool + +dell-uart-backlight-emulator: dell-uart-backlight-emulator.c + +BINDIR ?= /usr/bin + +override CFLAGS += -O2 -Wall + +%: %.c + $(CC) $(CFLAGS) -o $@ $< $(LDFLAGS) + +.PHONY : clean +clean : + @rm -f dell-uart-backlight-emulator + +install : dell-uart-backlight-emulator + install -d $(DESTDIR)$(BINDIR) + install -m 755 -p dell-uart-backlight-emulator $(DESTDIR)$(BINDIR)/dell-uart-backlight-emulator diff --git a/tools/arch/x86/dell-uart-backlight-emulator/README b/tools/arch/x86/dell-uart-backlight-emulator/README new file mode 100644 index 000000000000..c0d8e52046ee --- /dev/null +++ b/tools/arch/x86/dell-uart-backlight-emulator/README @@ -0,0 +1,46 @@ +Emulator for DELL0501 UART attached backlight controller +-------------------------------------------------------- + +Dell All In One (AIO) models released after 2017 use a backlight controller +board connected to an UART. + +In DSDT this uart port will be defined as: + + Name (_HID, "DELL0501") + Name (_CID, EisaId ("PNP0501") + +With the DELL0501 indicating that we are dealing with an UART with +the backlight controller board attached. + +This small emulator allows testing +the drivers/platform/x86/dell/dell-uart-backlight.c driver without access +to an actual Dell All In One. + +This requires: +1. A (desktop) PC with a 16550 UART on the motherboard and a standard DB9 + connector connected to this UART. +2. A DB9 NULL modem cable. +3. A second DB9 serial port, this can e.g. be a USB to serial converter + with a DB9 connector plugged into the same desktop PC. +4. A DSDT overlay for the desktop PC replacing the _HID of the 16550 UART + ACPI Device()Â with "DELL0501" and adding a _CID of "PNP0501", see + DSDT.patch for an example of the necessary DSDT changes. + +With everything setup and the NULL modem cable connected between +the 2 serial ports run: + +./dell-uart-backlight-emulator <path-to-/dev/tty*S#-for-second-port> + +For example when using an USB to serial converter for the second port: + +./dell-uart-backlight-emulator /dev/ttyUSB0 + +And then (re)load the dell-uart-backlight driver: + +sudo rmmod dell-uart-backlight; sudo modprobe dell-uart-backlight dyndbg + +After this check "dmesg" to see if the driver correctly received +the firmware version string from the emulator. If this works there +should be a /sys/class/backlight/dell_uart_backlight/ directory now +and writes to the brightness or bl_power files should be reflected +by matching output from the emulator. diff --git a/tools/arch/x86/dell-uart-backlight-emulator/dell-uart-backlight-emulator.c b/tools/arch/x86/dell-uart-backlight-emulator/dell-uart-backlight-emulator.c new file mode 100644 index 000000000000..655b6c96d8cf --- /dev/null +++ b/tools/arch/x86/dell-uart-backlight-emulator/dell-uart-backlight-emulator.c @@ -0,0 +1,163 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Dell AIO Serial Backlight board emulator for testing + * the Linux dell-uart-backlight driver. + * + * Copyright (C) 2024 Hans de Goede <hansg@kernel.org> + */ +#include <errno.h> +#include <fcntl.h> +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/ioctl.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <sys/un.h> +#include <termios.h> +#include <unistd.h> + +int serial_fd; +int brightness = 50; + +static unsigned char dell_uart_checksum(unsigned char *buf, int len) +{ + unsigned char val = 0; + + while (len-- > 0) + val += buf[len]; + + return val ^ 0xff; +} + +/* read() will return -1 on SIGINT / SIGTERM causing the mainloop to cleanly exit */ +void signalhdlr(int signum) +{ +} + +int main(int argc, char *argv[]) +{ + struct sigaction sigact = { .sa_handler = signalhdlr }; + unsigned char buf[4], csum, response[32]; + const char *version_str = "PHI23-V321"; + struct termios tty, saved_tty; + int ret, idx, len = 0; + + if (argc != 2) { + fprintf(stderr, "Invalid or missing arguments\n"); + fprintf(stderr, "Usage: %s <serial-port>\n", argv[0]); + return 1; + } + + serial_fd = open(argv[1], O_RDWR | O_NOCTTY); + if (serial_fd == -1) { + fprintf(stderr, "Error opening %s: %s\n", argv[1], strerror(errno)); + return 1; + } + + ret = tcgetattr(serial_fd, &tty); + if (ret == -1) { + fprintf(stderr, "Error getting tcattr: %s\n", strerror(errno)); + goto out_close; + } + saved_tty = tty; + + cfsetspeed(&tty, 9600); + cfmakeraw(&tty); + tty.c_cflag &= ~CSTOPB; + tty.c_cflag &= ~CRTSCTS; + tty.c_cflag |= CLOCAL | CREAD; + + ret = tcsetattr(serial_fd, TCSANOW, &tty); + if (ret == -1) { + fprintf(stderr, "Error setting tcattr: %s\n", strerror(errno)); + goto out_restore; + } + + sigaction(SIGINT, &sigact, 0); + sigaction(SIGTERM, &sigact, 0); + + idx = 0; + while (read(serial_fd, &buf[idx], 1) == 1) { + if (idx == 0) { + switch (buf[0]) { + /* 3 MSB bits: cmd-len + 01010 SOF marker */ + case 0x6a: len = 3; break; + case 0x8a: len = 4; break; + default: + fprintf(stderr, "Error unexpected first byte: 0x%02x\n", buf[0]); + continue; /* Try to sync up with sender */ + } + } + + /* Process msg when len bytes have been received */ + if (idx != (len - 1)) { + idx++; + continue; + } + + /* Reset idx for next command */ + idx = 0; + + csum = dell_uart_checksum(buf, len - 1); + if (buf[len - 1] != csum) { + fprintf(stderr, "Error checksum mismatch got 0x%02x expected 0x%02x\n", + buf[len - 1], csum); + continue; + } + + switch ((buf[0] << 8) | buf[1]) { + case 0x6a06: /* cmd = 0x06, get version */ + len = strlen(version_str); + strcpy((char *)&response[2], version_str); + printf("Get version, reply: %s\n", version_str); + break; + case 0x8a0b: /* cmd = 0x0b, set brightness */ + if (buf[2] > 100) { + fprintf(stderr, "Error invalid brightness param: %d\n", buf[2]); + continue; + } + + len = 0; + brightness = buf[2]; + printf("Set brightness %d\n", brightness); + break; + case 0x6a0c: /* cmd = 0x0c, get brightness */ + len = 1; + response[2] = brightness; + printf("Get brightness, reply: %d\n", brightness); + break; + case 0x8a0e: /* cmd = 0x0e, set backlight power */ + if (buf[2] != 0 && buf[2] != 1) { + fprintf(stderr, "Error invalid set power param: %d\n", buf[2]); + continue; + } + + len = 0; + printf("Set power %d\n", buf[2]); + break; + default: + fprintf(stderr, "Error unknown cmd 0x%04x\n", + (buf[0] << 8) | buf[1]); + continue; + } + + /* Respond with <total-len> <cmd> <data...> <csum> */ + response[0] = len + 3; /* response length in bytes */ + response[1] = buf[1]; /* ack cmd */ + csum = dell_uart_checksum(response, len + 2); + response[len + 2] = csum; + ret = write(serial_fd, response, response[0]); + if (ret != (response[0])) + fprintf(stderr, "Error writing %d bytes: %d\n", + response[0], ret); + } + + ret = 0; +out_restore: + tcsetattr(serial_fd, TCSANOW, &saved_tty); +out_close: + close(serial_fd); + return ret; +} diff --git a/tools/arch/x86/intel_sdsi/intel_sdsi.c b/tools/arch/x86/intel_sdsi/intel_sdsi.c index 2cd92761f171..766a5d26f534 100644 --- a/tools/arch/x86/intel_sdsi/intel_sdsi.c +++ b/tools/arch/x86/intel_sdsi/intel_sdsi.c @@ -43,7 +43,7 @@ #define METER_CERT_MAX_SIZE 4096 #define STATE_MAX_NUM_LICENSES 16 #define STATE_MAX_NUM_IN_BUNDLE (uint32_t)8 -#define METER_MAX_NUM_BUNDLES 8 +#define FEAT_LEN 5 /* 4 plus NUL terminator */ #define __round_mask(x, y) ((__typeof__(x))((y) - 1)) #define round_up(x, y) ((((x) - 1) | __round_mask(x, y)) + 1) @@ -154,11 +154,12 @@ struct bundle_encoding { }; struct meter_certificate { - uint32_t block_signature; - uint32_t counter_unit; + uint32_t signature; + uint32_t version; uint64_t ppin; + uint32_t counter_unit; uint32_t bundle_length; - uint32_t reserved; + uint64_t reserved; uint32_t mmrc_encoding; uint32_t mmrc_counter; }; @@ -167,6 +168,11 @@ struct bundle_encoding_counter { uint32_t encoding; uint32_t counter; }; +#define METER_BUNDLE_SIZE sizeof(struct bundle_encoding_counter) +#define BUNDLE_COUNT(length) ((length) / METER_BUNDLE_SIZE) +#define METER_MAX_NUM_BUNDLES \ + ((METER_CERT_MAX_SIZE - sizeof(struct meter_certificate)) / \ + sizeof(struct bundle_encoding_counter)) struct sdsi_dev { struct sdsi_regs regs; @@ -179,6 +185,7 @@ struct sdsi_dev { enum command { CMD_SOCKET_INFO, CMD_METER_CERT, + CMD_METER_CURRENT_CERT, CMD_STATE_CERT, CMD_PROV_AKC, CMD_PROV_CAP, @@ -316,24 +323,27 @@ static char *content_type(uint32_t type) } } -static void get_feature(uint32_t encoding, char *feature) +static void get_feature(uint32_t encoding, char feature[5]) { char *name = (char *)&encoding; + feature[4] = '\0'; feature[3] = name[0]; feature[2] = name[1]; feature[1] = name[2]; feature[0] = name[3]; } -static int sdsi_meter_cert_show(struct sdsi_dev *s) +static int sdsi_meter_cert_show(struct sdsi_dev *s, bool show_current) { char buf[METER_CERT_MAX_SIZE] = {0}; struct bundle_encoding_counter *bec; struct meter_certificate *mc; uint32_t count = 0; FILE *cert_ptr; + char *cert_fname; int ret, size; + char name[FEAT_LEN]; ret = sdsi_update_registers(s); if (ret) @@ -341,7 +351,6 @@ static int sdsi_meter_cert_show(struct sdsi_dev *s) if (!s->regs.en_features.sdsi) { fprintf(stderr, "SDSi feature is present but not enabled.\n"); - fprintf(stderr, " Unable to read meter certificate\n"); return -1; } @@ -356,15 +365,17 @@ static int sdsi_meter_cert_show(struct sdsi_dev *s) return ret; } - cert_ptr = fopen("meter_certificate", "r"); + cert_fname = show_current ? "meter_current" : "meter_certificate"; + cert_ptr = fopen(cert_fname, "r"); + if (!cert_ptr) { - perror("Could not open 'meter_certificate' file"); + fprintf(stderr, "Could not open '%s' file: %s", cert_fname, strerror(errno)); return -1; } size = fread(buf, 1, sizeof(buf), cert_ptr); if (!size) { - fprintf(stderr, "Could not read 'meter_certificate' file\n"); + fprintf(stderr, "Could not read '%s' file\n", cert_fname); fclose(cert_ptr); return -1; } @@ -375,32 +386,39 @@ static int sdsi_meter_cert_show(struct sdsi_dev *s) printf("\n"); printf("Meter certificate for device %s\n", s->dev_name); printf("\n"); - printf("Block Signature: 0x%x\n", mc->block_signature); - printf("Count Unit: %dms\n", mc->counter_unit); - printf("PPIN: 0x%lx\n", mc->ppin); - printf("Feature Bundle Length: %d\n", mc->bundle_length); - printf("MMRC encoding: %d\n", mc->mmrc_encoding); - printf("MMRC counter: %d\n", mc->mmrc_counter); - if (mc->bundle_length % 8) { + + get_feature(mc->signature, name); + printf("Signature: %s\n", name); + + printf("Version: %d\n", mc->version); + printf("Count Unit: %dms\n", mc->counter_unit); + printf("PPIN: 0x%lx\n", mc->ppin); + printf("Feature Bundle Length: %d\n", mc->bundle_length); + + get_feature(mc->mmrc_encoding, name); + printf("MMRC encoding: %s\n", name); + + printf("MMRC counter: %d\n", mc->mmrc_counter); + if (mc->bundle_length % METER_BUNDLE_SIZE) { fprintf(stderr, "Invalid bundle length\n"); return -1; } - if (mc->bundle_length > METER_MAX_NUM_BUNDLES * 8) { - fprintf(stderr, "More than %d bundles: %d\n", - METER_MAX_NUM_BUNDLES, mc->bundle_length / 8); + if (mc->bundle_length > METER_MAX_NUM_BUNDLES * METER_BUNDLE_SIZE) { + fprintf(stderr, "More than %ld bundles: actual %ld\n", + METER_MAX_NUM_BUNDLES, BUNDLE_COUNT(mc->bundle_length)); return -1; } - bec = (void *)(mc) + sizeof(mc); + bec = (struct bundle_encoding_counter *)(mc + 1); - printf("Number of Feature Counters: %d\n", mc->bundle_length / 8); - while (count++ < mc->bundle_length / 8) { - char feature[5]; + printf("Number of Feature Counters: %ld\n", BUNDLE_COUNT(mc->bundle_length)); + while (count < BUNDLE_COUNT(mc->bundle_length)) { + char feature[FEAT_LEN]; - feature[4] = '\0'; get_feature(bec[count].encoding, feature); printf(" %s: %d\n", feature, bec[count].counter); + ++count; } return 0; @@ -480,7 +498,7 @@ static int sdsi_state_cert_show(struct sdsi_dev *s) sizeof(*lki) + // size of the license key info offset; // offset to this blob content struct bundle_encoding *bundle = (void *)(lbc) + sizeof(*lbc); - char feature[5]; + char feature[FEAT_LEN]; uint32_t i; printf(" Blob %d:\n", count - 1); @@ -493,8 +511,6 @@ static int sdsi_state_cert_show(struct sdsi_dev *s) printf(" Blob revision ID: %u\n", lbc->rev_id); printf(" Number of Features: %u\n", lbc->num_bundles); - feature[4] = '\0'; - for (i = 0; i < min(lbc->num_bundles, STATE_MAX_NUM_IN_BUNDLE); i++) { get_feature(bundle[i].encoding, feature); printf(" Feature %d: %s\n", i, feature); @@ -725,7 +741,7 @@ static void sdsi_free_dev(struct sdsi_dev *s) static void usage(char *prog) { - printf("Usage: %s [-l] [-d DEVNO [-i] [-s] [-m] [-a FILE] [-c FILE]]\n", prog); + printf("Usage: %s [-l] [-d DEVNO [-i] [-s] [-m | -C] [-a FILE] [-c FILE]\n", prog); } static void show_help(void) @@ -734,8 +750,9 @@ static void show_help(void) printf(" %-18s\t%s\n", "-l, --list", "list available On Demand devices"); printf(" %-18s\t%s\n", "-d, --devno DEVNO", "On Demand device number"); printf(" %-18s\t%s\n", "-i, --info", "show socket information"); - printf(" %-18s\t%s\n", "-s, --state", "show state certificate"); - printf(" %-18s\t%s\n", "-m, --meter", "show meter certificate"); + printf(" %-18s\t%s\n", "-s, --state", "show state certificate data"); + printf(" %-18s\t%s\n", "-m, --meter", "show meter certificate data"); + printf(" %-18s\t%s\n", "-C, --meter_current", "show live unattested meter data"); printf(" %-18s\t%s\n", "-a, --akc FILE", "provision socket with AKC FILE"); printf(" %-18s\t%s\n", "-c, --cap FILE>", "provision socket with CAP FILE"); } @@ -751,21 +768,22 @@ int main(int argc, char *argv[]) int option_index = 0; static struct option long_options[] = { - {"akc", required_argument, 0, 'a'}, - {"cap", required_argument, 0, 'c'}, - {"devno", required_argument, 0, 'd'}, - {"help", no_argument, 0, 'h'}, - {"info", no_argument, 0, 'i'}, - {"list", no_argument, 0, 'l'}, - {"meter", no_argument, 0, 'm'}, - {"state", no_argument, 0, 's'}, - {0, 0, 0, 0 } + {"akc", required_argument, 0, 'a'}, + {"cap", required_argument, 0, 'c'}, + {"devno", required_argument, 0, 'd'}, + {"help", no_argument, 0, 'h'}, + {"info", no_argument, 0, 'i'}, + {"list", no_argument, 0, 'l'}, + {"meter", no_argument, 0, 'm'}, + {"meter_current", no_argument, 0, 'C'}, + {"state", no_argument, 0, 's'}, + {0, 0, 0, 0 } }; progname = argv[0]; - while ((opt = getopt_long_only(argc, argv, "+a:c:d:hilms", long_options, + while ((opt = getopt_long_only(argc, argv, "+a:c:d:hilmCs", long_options, &option_index)) != -1) { switch (opt) { case 'd': @@ -781,6 +799,9 @@ int main(int argc, char *argv[]) case 'm': command = CMD_METER_CERT; break; + case 'C': + command = CMD_METER_CURRENT_CERT; + break; case 's': command = CMD_STATE_CERT; break; @@ -819,7 +840,10 @@ int main(int argc, char *argv[]) ret = sdsi_read_reg(s); break; case CMD_METER_CERT: - ret = sdsi_meter_cert_show(s); + ret = sdsi_meter_cert_show(s, false); + break; + case CMD_METER_CURRENT_CERT: + ret = sdsi_meter_cert_show(s, true); break; case CMD_STATE_CERT: ret = sdsi_state_cert_show(s); diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index 2190adbe3002..ea32b101b999 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h @@ -1221,7 +1221,7 @@ struct kvm_vfio_spapr_tce { /* Available with KVM_CAP_SPAPR_RESIZE_HPT */ #define KVM_PPC_RESIZE_HPT_PREPARE _IOR(KVMIO, 0xad, struct kvm_ppc_resize_hpt) #define KVM_PPC_RESIZE_HPT_COMMIT _IOR(KVMIO, 0xae, struct kvm_ppc_resize_hpt) -/* Available with KVM_CAP_PPC_RADIX_MMU or KVM_CAP_PPC_HASH_MMU_V3 */ +/* Available with KVM_CAP_PPC_RADIX_MMU or KVM_CAP_PPC_MMU_HASH_V3 */ #define KVM_PPC_CONFIGURE_V3_MMU _IOW(KVMIO, 0xaf, struct kvm_ppc_mmuv3_cfg) /* Available with KVM_CAP_PPC_RADIX_MMU */ #define KVM_PPC_GET_RMMU_INFO _IOW(KVMIO, 0xb0, struct kvm_ppc_rmmu_info) diff --git a/tools/power/x86/intel-speed-select/isst-config.c b/tools/power/x86/intel-speed-select/isst-config.c index d865dc1f89ee..5899c27c2e2e 100644 --- a/tools/power/x86/intel-speed-select/isst-config.c +++ b/tools/power/x86/intel-speed-select/isst-config.c @@ -16,9 +16,9 @@ struct process_cmd_struct { int arg; }; -static const char *version_str = "v1.18"; +static const char *version_str = "v1.19"; -static const int supported_api_ver = 2; +static const int supported_api_ver = 3; static struct isst_if_platform_info isst_platform_info; static char *progname; static int debug_flag; @@ -46,6 +46,8 @@ static int force_online_offline; static int auto_mode; static int fact_enable_fail; static int cgroupv2; +static int max_die_id; +static int max_punit_id; /* clos related */ static int current_clos = -1; @@ -562,6 +564,18 @@ void for_each_online_power_domain_in_set(void (*callback)(struct isst_id *, void } for (i = 0; i < MAX_PACKAGE_COUNT; i++) { + if (max_die_id == max_punit_id) { + for (k = 0; k < MAX_PUNIT_PER_DIE && k < MAX_DIE_PER_PACKAGE; k++) { + id.cpu = cpus[i][k][k]; + id.pkg = i; + id.die = k; + id.punit = k; + if (isst_is_punit_valid(&id)) + callback(&id, arg1, arg2, arg3, arg4); + } + continue; + } + for (j = 0; j < MAX_DIE_PER_PACKAGE; j++) { /* * Fix me: @@ -795,6 +809,12 @@ static void create_cpu_map(void) cpu_cnt[pkg_id][die_id][punit_id]++; + if (max_die_id < die_id) + max_die_id = die_id; + + if (max_punit_id < cpu_map[i].punit_id) + max_punit_id = cpu_map[i].punit_id; + debug_printf( "map logical_cpu:%d core: %d die:%d pkg:%d punit:%d punit_cpu:%d punit_core:%d\n", i, cpu_map[i].core_id, cpu_map[i].die_id, @@ -2054,6 +2074,7 @@ static void dump_fact_config_for_cpu(struct isst_id *id, void *arg1, void *arg2, struct isst_fact_info fact_info; int ret; + memset(&fact_info, 0, sizeof(fact_info)); ret = isst_get_fact_info(id, tdp_level, fact_bucket, &fact_info); if (ret) { isst_display_error_info_message(1, "Failed to get turbo-freq info at this level", 1, tdp_level); diff --git a/tools/power/x86/intel-speed-select/isst-core-mbox.c b/tools/power/x86/intel-speed-select/isst-core-mbox.c index 24bea57f4ff5..c81ecd602bcf 100644 --- a/tools/power/x86/intel-speed-select/isst-core-mbox.c +++ b/tools/power/x86/intel-speed-select/isst-core-mbox.c @@ -746,6 +746,7 @@ static int mbox_set_pbf_fact_status(struct isst_id *id, int pbf, int enable) static int _get_fact_bucket_info(struct isst_id *id, int level, struct isst_fact_bucket_info *bucket_info) { + int trl_max_levels = isst_get_trl_max_levels(); unsigned int resp; int i, k, ret; @@ -769,7 +770,7 @@ static int _get_fact_bucket_info(struct isst_id *id, int level, } } - for (k = 0; k < 3; ++k) { + for (k = 0; k < trl_max_levels; ++k) { for (i = 0; i < 2; ++i) { int j; diff --git a/tools/power/x86/intel-speed-select/isst-core-tpmi.c b/tools/power/x86/intel-speed-select/isst-core-tpmi.c index 3458768562e5..32ea70c7dbd8 100644 --- a/tools/power/x86/intel-speed-select/isst-core-tpmi.c +++ b/tools/power/x86/intel-speed-select/isst-core-tpmi.c @@ -194,8 +194,14 @@ static int tpmi_get_ctdp_control(struct isst_id *id, int config_index, if (!(info.level_mask & level_mask)) return -1; - ctdp_level->fact_support = info.sst_tf_support; - ctdp_level->pbf_support = info.sst_bf_support; + if (api_version() > 2) { + ctdp_level->fact_support = info.sst_tf_support & BIT(config_index); + ctdp_level->pbf_support = info.sst_bf_support & BIT(config_index); + } else { + ctdp_level->fact_support = info.sst_tf_support; + ctdp_level->pbf_support = info.sst_bf_support; + } + ctdp_level->fact_enabled = !!(info.feature_state & BIT(1)); ctdp_level->pbf_enabled = !!(info.feature_state & BIT(0)); diff --git a/tools/power/x86/intel-speed-select/isst-core.c b/tools/power/x86/intel-speed-select/isst-core.c index f55fef4c13a7..05efffbca3b7 100644 --- a/tools/power/x86/intel-speed-select/isst-core.c +++ b/tools/power/x86/intel-speed-select/isst-core.c @@ -23,6 +23,7 @@ int isst_set_platform_ops(int api_version) isst_ops = mbox_get_platform_ops(); break; case 2: + case 3: isst_ops = tpmi_get_platform_ops(); break; default: diff --git a/tools/power/x86/intel-speed-select/isst-display.c b/tools/power/x86/intel-speed-select/isst-display.c index 14c9b037859a..07ebd08f3202 100644 --- a/tools/power/x86/intel-speed-select/isst-display.c +++ b/tools/power/x86/intel-speed-select/isst-display.c @@ -172,12 +172,19 @@ static int print_package_info(struct isst_id *id, FILE *outf) int level = 1; if (out_format_is_json()) { - if (api_version() > 1) - snprintf(header, sizeof(header), "package-%d:die-%d:powerdomain-%d:cpu-%d", - id->pkg, id->die, id->punit, id->cpu); - else + if (api_version() > 1) { + if (id->cpu < 0) + snprintf(header, sizeof(header), + "package-%d:die-%d:powerdomain-%d:cpu-None", + id->pkg, id->die, id->punit); + else + snprintf(header, sizeof(header), + "package-%d:die-%d:powerdomain-%d:cpu-%d", + id->pkg, id->die, id->punit, id->cpu); + } else { snprintf(header, sizeof(header), "package-%d:die-%d:cpu-%d", id->pkg, id->die, id->cpu); + } format_and_print(outf, level, header, NULL); return 1; } @@ -189,7 +196,12 @@ static int print_package_info(struct isst_id *id, FILE *outf) snprintf(header, sizeof(header), "powerdomain-%d", id->punit); format_and_print(outf, level++, header, NULL); } - snprintf(header, sizeof(header), "cpu-%d", id->cpu); + + if (id->cpu < 0) + snprintf(header, sizeof(header), "cpu-None"); + else + snprintf(header, sizeof(header), "cpu-%d", id->cpu); + format_and_print(outf, level, header, NULL); return level; @@ -199,8 +211,8 @@ static void _isst_pbf_display_information(struct isst_id *id, FILE *outf, int le struct isst_pbf_info *pbf_info, int disp_level) { - char header[256]; - char value[512]; + static char header[256]; + static char value[1024]; snprintf(header, sizeof(header), "speed-select-base-freq-properties"); format_and_print(outf, disp_level, header, NULL); @@ -338,8 +350,8 @@ void isst_ctdp_display_core_info(struct isst_id *id, FILE *outf, char *prefix, void isst_ctdp_display_information(struct isst_id *id, FILE *outf, int tdp_level, struct isst_pkg_ctdp *pkg_dev) { - char header[256]; - char value[512]; + static char header[256]; + static char value[1024]; static int level; int trl_max_levels = isst_get_trl_max_levels(); int i; diff --git a/tools/power/x86/intel-speed-select/isst.h b/tools/power/x86/intel-speed-select/isst.h index 4bddd3c66bf7..39ee75677c2c 100644 --- a/tools/power/x86/intel-speed-select/isst.h +++ b/tools/power/x86/intel-speed-select/isst.h @@ -80,7 +80,7 @@ #define DISP_FREQ_MULTIPLIER 100 #define MAX_PACKAGE_COUNT 32 -#define MAX_DIE_PER_PACKAGE 2 +#define MAX_DIE_PER_PACKAGE 16 #define MAX_PUNIT_PER_DIE 8 /* Unified structure to specific a CPU or a Power Domain */ diff --git a/tools/sound/dapm-graph b/tools/sound/dapm-graph new file mode 100755 index 000000000000..57d78f6df041 --- /dev/null +++ b/tools/sound/dapm-graph @@ -0,0 +1,303 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# +# Generate a graph of the current DAPM state for an audio card +# +# Copyright 2024 Bootlin +# Author: Luca Ceresoli <luca.ceresol@bootlin.com> + +set -eu + +STYLE_NODE_ON="shape=box,style=bold,color=green4" +STYLE_NODE_OFF="shape=box,style=filled,color=gray30,fillcolor=gray95" + +# Print usage and exit +# +# $1 = exit return value +# $2 = error string (required if $1 != 0) +usage() +{ + if [ "${1}" -ne 0 ]; then + echo "${2}" >&2 + fi + + echo " +Generate a graph of the current DAPM state for an audio card. + +The DAPM state can be obtained via debugfs for a card on the local host or +a remote target, or from a local copy of the debugfs tree for the card. + +Usage: + $(basename $0) [options] -c CARD - Local sound card + $(basename $0) [options] -c CARD -r REMOTE_TARGET - Card on remote system + $(basename $0) [options] -d STATE_DIR - Local directory + +Options: + -c CARD Sound card to get DAPM state of + -r REMOTE_TARGET Get DAPM state from REMOTE_TARGET via SSH and SCP + instead of using a local sound card + -d STATE_DIR Get DAPM state from a local copy of a debugfs tree + -o OUT_FILE Output file (default: dapm.dot) + -D Show verbose debugging info + -h Print this help and exit + +The output format is implied by the extension of OUT_FILE: + + * Use the .dot extension to generate a text graph representation in + graphviz dot syntax. + * Any other extension is assumed to be a format supported by graphviz for + rendering, e.g. 'png', 'svg', and will produce both the .dot file and a + picture from it. This requires the 'dot' program from the graphviz + package. +" + + exit ${1} +} + +# Connect to a remote target via SSH, collect all DAPM files from debufs +# into a tarball and get the tarball via SCP into $3/dapm.tar +# +# $1 = target as used by ssh and scp, e.g. "root@192.168.1.1" +# $2 = sound card name +# $3 = temp dir path (present on the host, created on the target) +# $4 = local directory to extract the tarball into +# +# Requires an ssh+scp server, find and tar+gz on the target +# +# Note: the tarball is needed because plain 'scp -r' from debugfs would +# copy only empty files +grab_remote_files() +{ + echo "Collecting DAPM state from ${1}" + dbg_echo "Collected DAPM state in ${3}" + + ssh "${1}" " +set -eu && +cd \"/sys/kernel/debug/asoc/${2}\" && +find * -type d -exec mkdir -p ${3}/dapm-tree/{} \; && +find * -type f -exec cp \"{}\" \"${3}/dapm-tree/{}\" \; && +cd ${3}/dapm-tree && +tar cf ${3}/dapm.tar ." + scp -q "${1}:${3}/dapm.tar" "${3}" + + mkdir -p "${4}" + tar xf "${tmp_dir}/dapm.tar" -C "${4}" +} + +# Parse a widget file and generate graph description in graphviz dot format +# +# Skips any file named "bias_level". +# +# $1 = temporary work dir +# $2 = component name +# $3 = widget filename +process_dapm_widget() +{ + local tmp_dir="${1}" + local c_name="${2}" + local w_file="${3}" + local dot_file="${tmp_dir}/main.dot" + local links_file="${tmp_dir}/links.dot" + + local w_name="$(basename "${w_file}")" + local w_tag="${c_name}_${w_name}" + + if [ "${w_name}" = "bias_level" ]; then + return 0 + fi + + dbg_echo " + Widget: ${w_name}" + + cat "${w_file}" | ( + read line + + if echo "${line}" | grep -q ': On ' + then local node_style="${STYLE_NODE_ON}" + else local node_style="${STYLE_NODE_OFF}" + fi + + local w_type="" + while read line; do + # Collect widget type if present + if echo "${line}" | grep -q '^widget-type '; then + local w_type_raw="$(echo "$line" | cut -d ' ' -f 2)" + dbg_echo " - Widget type: ${w_type_raw}" + + # Note: escaping '\n' is tricky to get working with both + # bash and busybox ash, so use a '%' here and replace it + # later + local w_type="%n[${w_type_raw}]" + fi + + # Collect any links. We could use "in" links or "out" links, + # let's use "in" links + if echo "${line}" | grep -q '^in '; then + local w_src=$(echo "$line" | + awk -F\" '{print $6 "_" $4}' | + sed 's/^(null)_/ROOT_/') + dbg_echo " - Input route from: ${w_src}" + echo " \"${w_src}\" -> \"$w_tag\"" >> "${links_file}" + fi + done + + echo " \"${w_tag}\" [label=\"${w_name}${w_type}\",${node_style}]" | + tr '%' '\\' >> "${dot_file}" + ) +} + +# Parse the DAPM tree for a sound card component and generate graph +# description in graphviz dot format +# +# $1 = temporary work dir +# $2 = component directory +# $3 = forced component name (extracted for path if empty) +process_dapm_component() +{ + local tmp_dir="${1}" + local c_dir="${2}" + local c_name="${3}" + local dot_file="${tmp_dir}/main.dot" + local links_file="${tmp_dir}/links.dot" + + if [ -z "${c_name}" ]; then + # Extract directory name into component name: + # "./cs42l51.0-004a/dapm" -> "cs42l51.0-004a" + c_name="$(basename $(dirname "${c_dir}"))" + fi + + dbg_echo " * Component: ${c_name}" + + echo "" >> "${dot_file}" + echo " subgraph \"${c_name}\" {" >> "${dot_file}" + echo " cluster = true" >> "${dot_file}" + echo " label = \"${c_name}\"" >> "${dot_file}" + echo " color=dodgerblue" >> "${dot_file}" + + # Create empty file to ensure it will exist in all cases + >"${links_file}" + + # Iterate over widgets in the component dir + for w_file in ${c_dir}/*; do + process_dapm_widget "${tmp_dir}" "${c_name}" "${w_file}" + done + + echo " }" >> "${dot_file}" + + cat "${links_file}" >> "${dot_file}" +} + +# Parse the DAPM tree for a sound card and generate graph description in +# graphviz dot format +# +# $1 = temporary work dir +# $2 = directory tree with DAPM state (either in debugfs or a mirror) +process_dapm_tree() +{ + local tmp_dir="${1}" + local dapm_dir="${2}" + local dot_file="${tmp_dir}/main.dot" + + echo "digraph G {" > "${dot_file}" + echo " fontname=\"sans-serif\"" >> "${dot_file}" + echo " node [fontname=\"sans-serif\"]" >> "${dot_file}" + + + # Process root directory (no component) + process_dapm_component "${tmp_dir}" "${dapm_dir}/dapm" "ROOT" + + # Iterate over components + for c_dir in "${dapm_dir}"/*/dapm + do + process_dapm_component "${tmp_dir}" "${c_dir}" "" + done + + echo "}" >> "${dot_file}" +} + +main() +{ + # Parse command line + local out_file="dapm.dot" + local card_name="" + local remote_target="" + local dapm_tree="" + local dbg_on="" + while getopts "c:r:d:o:Dh" arg; do + case $arg in + c) card_name="${OPTARG}" ;; + r) remote_target="${OPTARG}" ;; + d) dapm_tree="${OPTARG}" ;; + o) out_file="${OPTARG}" ;; + D) dbg_on="1" ;; + h) usage 0 ;; + *) usage 1 ;; + esac + done + shift $(($OPTIND - 1)) + + if [ -n "${dapm_tree}" ]; then + if [ -n "${card_name}${remote_target}" ]; then + usage 1 "Cannot use -c and -r with -d" + fi + echo "Using local tree: ${dapm_tree}" + elif [ -n "${remote_target}" ]; then + if [ -z "${card_name}" ]; then + usage 1 "-r requires -c" + fi + echo "Using card ${card_name} from remote target ${remote_target}" + elif [ -n "${card_name}" ]; then + echo "Using local card: ${card_name}" + else + usage 1 "Please choose mode using -c, -r or -d" + fi + + # Define logging function + if [ "${dbg_on}" ]; then + dbg_echo() { + echo "$*" >&2 + } + else + dbg_echo() { + : + } + fi + + # Filename must have a dot in order the infer the format from the + # extension + if ! echo "${out_file}" | grep -qE '\.'; then + echo "Missing extension in output filename ${out_file}" >&2 + usage + exit 1 + fi + + local out_fmt="${out_file##*.}" + local dot_file="${out_file%.*}.dot" + + dbg_echo "dot file: $dot_file" + dbg_echo "Output file: $out_file" + dbg_echo "Output format: $out_fmt" + + tmp_dir="$(mktemp -d /tmp/$(basename $0).XXXXXX)" + trap "{ rm -fr ${tmp_dir}; }" INT TERM EXIT + + if [ -z "${dapm_tree}" ] + then + dapm_tree="/sys/kernel/debug/asoc/${card_name}" + fi + if [ -n "${remote_target}" ]; then + dapm_tree="${tmp_dir}/dapm-tree" + grab_remote_files "${remote_target}" "${card_name}" "${tmp_dir}" "${dapm_tree}" + fi + # In all cases now ${dapm_tree} contains the DAPM state + + process_dapm_tree "${tmp_dir}" "${dapm_tree}" + cp "${tmp_dir}/main.dot" "${dot_file}" + + if [ "${out_file}" != "${dot_file}" ]; then + dot -T"${out_fmt}" "${dot_file}" -o "${out_file}" + fi + + echo "Generated file ${out_file}" +} + +main "${@}" diff --git a/tools/testing/cxl/test/mem.c b/tools/testing/cxl/test/mem.c index 35ee41e435ab..6584443144de 100644 --- a/tools/testing/cxl/test/mem.c +++ b/tools/testing/cxl/test/mem.c @@ -127,7 +127,7 @@ static struct { #define CXL_TEST_EVENT_CNT_MAX 15 /* Set a number of events to return at a time for simulation. */ -#define CXL_TEST_EVENT_CNT 3 +#define CXL_TEST_EVENT_RET_MAX 4 struct mock_event_log { u16 clear_idx; @@ -222,6 +222,12 @@ static void mes_add_event(struct mock_event_store *mes, log->nr_events++; } +/* + * Vary the number of events returned to simulate events occuring while the + * logs are being read. + */ +static int ret_limit = 0; + static int mock_get_event(struct device *dev, struct cxl_mbox_cmd *cmd) { struct cxl_get_event_payload *pl; @@ -233,14 +239,18 @@ static int mock_get_event(struct device *dev, struct cxl_mbox_cmd *cmd) if (cmd->size_in != sizeof(log_type)) return -EINVAL; - if (cmd->size_out < struct_size(pl, records, CXL_TEST_EVENT_CNT)) + ret_limit = (ret_limit + 1) % CXL_TEST_EVENT_RET_MAX; + if (!ret_limit) + ret_limit = 1; + + if (cmd->size_out < struct_size(pl, records, ret_limit)) return -EINVAL; log_type = *((u8 *)cmd->payload_in); if (log_type >= CXL_EVENT_TYPE_MAX) return -EINVAL; - memset(cmd->payload_out, 0, cmd->size_out); + memset(cmd->payload_out, 0, struct_size(pl, records, 0)); log = event_find_log(dev, log_type); if (!log || event_log_empty(log)) @@ -248,7 +258,7 @@ static int mock_get_event(struct device *dev, struct cxl_mbox_cmd *cmd) pl = cmd->payload_out; - for (i = 0; i < CXL_TEST_EVENT_CNT && !event_log_empty(log); i++) { + for (i = 0; i < ret_limit && !event_log_empty(log); i++) { memcpy(&pl->records[i], event_get_current(log), sizeof(pl->records[i])); pl->records[i].event.generic.hdr.handle = @@ -256,6 +266,7 @@ static int mock_get_event(struct device *dev, struct cxl_mbox_cmd *cmd) log->cur_idx++; } + cmd->size_out = struct_size(pl, records, i); pl->record_count = cpu_to_le16(i); if (!event_log_empty(log)) pl->flags |= CXL_GET_EVENT_FLAG_MORE_RECORDS; diff --git a/tools/testing/nvdimm/test/ndtest.c b/tools/testing/nvdimm/test/ndtest.c index b8419f460368..b438f3d053ee 100644 --- a/tools/testing/nvdimm/test/ndtest.c +++ b/tools/testing/nvdimm/test/ndtest.c @@ -13,6 +13,8 @@ #include <nd-core.h> #include <linux/printk.h> #include <linux/seq_buf.h> +#include <linux/papr_scm.h> +#include <uapi/linux/papr_pdsm.h> #include "../watermark.h" #include "nfit_test.h" @@ -830,12 +832,11 @@ static int ndtest_bus_register(struct ndtest_priv *p) return 0; } -static int ndtest_remove(struct platform_device *pdev) +static void ndtest_remove(struct platform_device *pdev) { struct ndtest_priv *p = to_ndtest_priv(&pdev->dev); nvdimm_bus_unregister(p->bus); - return 0; } static int ndtest_probe(struct platform_device *pdev) @@ -882,7 +883,7 @@ static const struct platform_device_id ndtest_id[] = { static struct platform_driver ndtest_driver = { .probe = ndtest_probe, - .remove = ndtest_remove, + .remove_new = ndtest_remove, .driver = { .name = KBUILD_MODNAME, }, diff --git a/tools/testing/nvdimm/test/ndtest.h b/tools/testing/nvdimm/test/ndtest.h index 2c54c9cbb90c..8f27ad6f7319 100644 --- a/tools/testing/nvdimm/test/ndtest.h +++ b/tools/testing/nvdimm/test/ndtest.h @@ -5,37 +5,6 @@ #include <linux/platform_device.h> #include <linux/libnvdimm.h> -/* SCM device is unable to persist memory contents */ -#define PAPR_PMEM_UNARMED (1ULL << (63 - 0)) -/* SCM device failed to persist memory contents */ -#define PAPR_PMEM_SHUTDOWN_DIRTY (1ULL << (63 - 1)) -/* SCM device contents are not persisted from previous IPL */ -#define PAPR_PMEM_EMPTY (1ULL << (63 - 3)) -#define PAPR_PMEM_HEALTH_CRITICAL (1ULL << (63 - 4)) -/* SCM device will be garded off next IPL due to failure */ -#define PAPR_PMEM_HEALTH_FATAL (1ULL << (63 - 5)) -/* SCM contents cannot persist due to current platform health status */ -#define PAPR_PMEM_HEALTH_UNHEALTHY (1ULL << (63 - 6)) - -/* Bits status indicators for health bitmap indicating unarmed dimm */ -#define PAPR_PMEM_UNARMED_MASK (PAPR_PMEM_UNARMED | \ - PAPR_PMEM_HEALTH_UNHEALTHY) - -#define PAPR_PMEM_SAVE_FAILED (1ULL << (63 - 10)) - -/* Bits status indicators for health bitmap indicating unflushed dimm */ -#define PAPR_PMEM_BAD_SHUTDOWN_MASK (PAPR_PMEM_SHUTDOWN_DIRTY) - -/* Bits status indicators for health bitmap indicating unrestored dimm */ -#define PAPR_PMEM_BAD_RESTORE_MASK (PAPR_PMEM_EMPTY) - -/* Bit status indicators for smart event notification */ -#define PAPR_PMEM_SMART_EVENT_MASK (PAPR_PMEM_HEALTH_CRITICAL | \ - PAPR_PMEM_HEALTH_FATAL | \ - PAPR_PMEM_HEALTH_UNHEALTHY) - -#define PAPR_PMEM_SAVE_MASK (PAPR_PMEM_SAVE_FAILED) - struct ndtest_config; struct ndtest_priv { diff --git a/tools/testing/selftests/alsa/conf.c b/tools/testing/selftests/alsa/conf.c index 89e3656a042d..e2b3a5810f47 100644 --- a/tools/testing/selftests/alsa/conf.c +++ b/tools/testing/selftests/alsa/conf.c @@ -105,7 +105,7 @@ static struct card_cfg_data *conf_data_by_card(int card, bool msg) return NULL; } -static int dump_config_tree(snd_config_t *top) +static void dump_config_tree(snd_config_t *top) { snd_output_t *out; int err; diff --git a/tools/testing/selftests/cgroup/Makefile b/tools/testing/selftests/cgroup/Makefile index 00b441928909..16461dc0ffdf 100644 --- a/tools/testing/selftests/cgroup/Makefile +++ b/tools/testing/selftests/cgroup/Makefile @@ -4,7 +4,7 @@ CFLAGS += -Wall -pthread all: ${HELPER_PROGS} TEST_FILES := with_stress.sh -TEST_PROGS := test_stress.sh test_cpuset_prs.sh +TEST_PROGS := test_stress.sh test_cpuset_prs.sh test_cpuset_v1_hp.sh TEST_GEN_FILES := wait_inotify TEST_GEN_PROGS = test_memcontrol TEST_GEN_PROGS += test_kmem diff --git a/tools/testing/selftests/cgroup/cgroup_util.c b/tools/testing/selftests/cgroup/cgroup_util.c index 0340d4ca8f51..ce16a50ecff8 100644 --- a/tools/testing/selftests/cgroup/cgroup_util.c +++ b/tools/testing/selftests/cgroup/cgroup_util.c @@ -1,7 +1,4 @@ /* SPDX-License-Identifier: GPL-2.0 */ - -#define _GNU_SOURCE - #include <errno.h> #include <fcntl.h> #include <linux/limits.h> @@ -195,10 +192,10 @@ int cg_write_numeric(const char *cgroup, const char *control, long value) return cg_write(cgroup, control, buf); } -int cg_find_unified_root(char *root, size_t len) +int cg_find_unified_root(char *root, size_t len, bool *nsdelegate) { char buf[10 * PAGE_SIZE]; - char *fs, *mount, *type; + char *fs, *mount, *type, *options; const char delim[] = "\n\t "; if (read_text("/proc/self/mounts", buf, sizeof(buf)) <= 0) @@ -211,12 +208,14 @@ int cg_find_unified_root(char *root, size_t len) for (fs = strtok(buf, delim); fs; fs = strtok(NULL, delim)) { mount = strtok(NULL, delim); type = strtok(NULL, delim); - strtok(NULL, delim); + options = strtok(NULL, delim); strtok(NULL, delim); strtok(NULL, delim); if (strcmp(type, "cgroup2") == 0) { strncpy(root, mount, len); + if (nsdelegate) + *nsdelegate = !!strstr(options, "nsdelegate"); return 0; } } diff --git a/tools/testing/selftests/cgroup/cgroup_util.h b/tools/testing/selftests/cgroup/cgroup_util.h index 1df7f202214a..e8d04ac9e3d2 100644 --- a/tools/testing/selftests/cgroup/cgroup_util.h +++ b/tools/testing/selftests/cgroup/cgroup_util.h @@ -18,10 +18,10 @@ */ static inline int values_close(long a, long b, int err) { - return abs(a - b) <= (a + b) / 100 * err; + return labs(a - b) <= (a + b) / 100 * err; } -extern int cg_find_unified_root(char *root, size_t len); +extern int cg_find_unified_root(char *root, size_t len, bool *nsdelegate); extern char *cg_name(const char *root, const char *name); extern char *cg_name_indexed(const char *root, const char *name, int index); extern char *cg_control(const char *cgroup, const char *control); diff --git a/tools/testing/selftests/cgroup/test_core.c b/tools/testing/selftests/cgroup/test_core.c index 80aa6b2373b9..de8baad46022 100644 --- a/tools/testing/selftests/cgroup/test_core.c +++ b/tools/testing/selftests/cgroup/test_core.c @@ -1,6 +1,4 @@ /* SPDX-License-Identifier: GPL-2.0 */ - -#define _GNU_SOURCE #include <linux/limits.h> #include <linux/sched.h> #include <sys/types.h> @@ -18,6 +16,8 @@ #include "../kselftest.h" #include "cgroup_util.h" +static bool nsdelegate; + static int touch_anon(char *buf, size_t size) { int fd; @@ -775,6 +775,9 @@ static int test_cgcore_lesser_ns_open(const char *root) pid_t pid; int status; + if (!nsdelegate) + return KSFT_SKIP; + cg_test_a = cg_name(root, "cg_test_a"); cg_test_b = cg_name(root, "cg_test_b"); @@ -862,7 +865,7 @@ int main(int argc, char *argv[]) char root[PATH_MAX]; int i, ret = EXIT_SUCCESS; - if (cg_find_unified_root(root, sizeof(root))) + if (cg_find_unified_root(root, sizeof(root), &nsdelegate)) ksft_exit_skip("cgroup v2 isn't mounted\n"); if (cg_read_strstr(root, "cgroup.subtree_control", "memory")) diff --git a/tools/testing/selftests/cgroup/test_cpu.c b/tools/testing/selftests/cgroup/test_cpu.c index 24020a2c68dc..5a4a314f6af7 100644 --- a/tools/testing/selftests/cgroup/test_cpu.c +++ b/tools/testing/selftests/cgroup/test_cpu.c @@ -1,6 +1,4 @@ // SPDX-License-Identifier: GPL-2.0 - -#define _GNU_SOURCE #include <linux/limits.h> #include <sys/sysinfo.h> #include <sys/wait.h> @@ -237,7 +235,7 @@ run_cpucg_weight_test( { int ret = KSFT_FAIL, i; char *parent = NULL; - struct cpu_hogger children[3] = {NULL}; + struct cpu_hogger children[3] = {}; parent = cg_name(root, "cpucg_test_0"); if (!parent) @@ -408,7 +406,7 @@ run_cpucg_nested_weight_test(const char *root, bool overprovisioned) { int ret = KSFT_FAIL, i; char *parent = NULL, *child = NULL; - struct cpu_hogger leaf[3] = {NULL}; + struct cpu_hogger leaf[3] = {}; long nested_leaf_usage, child_usage; int nprocs = get_nprocs(); @@ -700,7 +698,7 @@ int main(int argc, char *argv[]) char root[PATH_MAX]; int i, ret = EXIT_SUCCESS; - if (cg_find_unified_root(root, sizeof(root))) + if (cg_find_unified_root(root, sizeof(root), NULL)) ksft_exit_skip("cgroup v2 isn't mounted\n"); if (cg_read_strstr(root, "cgroup.subtree_control", "cpu")) diff --git a/tools/testing/selftests/cgroup/test_cpuset.c b/tools/testing/selftests/cgroup/test_cpuset.c index b061ed1e05b4..4034d14ba69a 100644 --- a/tools/testing/selftests/cgroup/test_cpuset.c +++ b/tools/testing/selftests/cgroup/test_cpuset.c @@ -249,7 +249,7 @@ int main(int argc, char *argv[]) char root[PATH_MAX]; int i, ret = EXIT_SUCCESS; - if (cg_find_unified_root(root, sizeof(root))) + if (cg_find_unified_root(root, sizeof(root), NULL)) ksft_exit_skip("cgroup v2 isn't mounted\n"); if (cg_read_strstr(root, "cgroup.subtree_control", "cpuset")) diff --git a/tools/testing/selftests/cgroup/test_cpuset_v1_hp.sh b/tools/testing/selftests/cgroup/test_cpuset_v1_hp.sh new file mode 100755 index 000000000000..3f45512fb512 --- /dev/null +++ b/tools/testing/selftests/cgroup/test_cpuset_v1_hp.sh @@ -0,0 +1,46 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# +# Test the special cpuset v1 hotplug case where a cpuset become empty of +# CPUs will force migration of tasks out to an ancestor. +# + +skip_test() { + echo "$1" + echo "Test SKIPPED" + exit 4 # ksft_skip +} + +[[ $(id -u) -eq 0 ]] || skip_test "Test must be run as root!" + +# Find cpuset v1 mount point +CPUSET=$(mount -t cgroup | grep cpuset | head -1 | awk -e '{print $3}') +[[ -n "$CPUSET" ]] || skip_test "cpuset v1 mount point not found!" + +# +# Create a test cpuset, put a CPU and a task there and offline that CPU +# +TDIR=test$$ +[[ -d $CPUSET/$TDIR ]] || mkdir $CPUSET/$TDIR +echo 1 > $CPUSET/$TDIR/cpuset.cpus +echo 0 > $CPUSET/$TDIR/cpuset.mems +sleep 10& +TASK=$! +echo $TASK > $CPUSET/$TDIR/tasks +NEWCS=$(cat /proc/$TASK/cpuset) +[[ $NEWCS != "/$TDIR" ]] && { + echo "Unexpected cpuset $NEWCS, test FAILED!" + exit 1 +} + +echo 0 > /sys/devices/system/cpu/cpu1/online +sleep 0.5 +echo 1 > /sys/devices/system/cpu/cpu1/online +NEWCS=$(cat /proc/$TASK/cpuset) +rmdir $CPUSET/$TDIR +[[ $NEWCS != "/" ]] && { + echo "cpuset $NEWCS, test FAILED!" + exit 1 +} +echo "Test PASSED" +exit 0 diff --git a/tools/testing/selftests/cgroup/test_freezer.c b/tools/testing/selftests/cgroup/test_freezer.c index 8845353aca53..8730645d363a 100644 --- a/tools/testing/selftests/cgroup/test_freezer.c +++ b/tools/testing/selftests/cgroup/test_freezer.c @@ -827,7 +827,7 @@ int main(int argc, char *argv[]) char root[PATH_MAX]; int i, ret = EXIT_SUCCESS; - if (cg_find_unified_root(root, sizeof(root))) + if (cg_find_unified_root(root, sizeof(root), NULL)) ksft_exit_skip("cgroup v2 isn't mounted\n"); for (i = 0; i < ARRAY_SIZE(tests); i++) { switch (tests[i].fn(root)) { diff --git a/tools/testing/selftests/cgroup/test_hugetlb_memcg.c b/tools/testing/selftests/cgroup/test_hugetlb_memcg.c index f0fefeb4cc24..80d05d50a42d 100644 --- a/tools/testing/selftests/cgroup/test_hugetlb_memcg.c +++ b/tools/testing/selftests/cgroup/test_hugetlb_memcg.c @@ -1,6 +1,4 @@ // SPDX-License-Identifier: GPL-2.0 -#define _GNU_SOURCE - #include <linux/limits.h> #include <sys/mman.h> #include <stdio.h> @@ -214,7 +212,7 @@ int main(int argc, char **argv) return ret; } - if (cg_find_unified_root(root, sizeof(root))) + if (cg_find_unified_root(root, sizeof(root), NULL)) ksft_exit_skip("cgroup v2 isn't mounted\n"); switch (test_hugetlb_memcg(root)) { diff --git a/tools/testing/selftests/cgroup/test_kill.c b/tools/testing/selftests/cgroup/test_kill.c index 6153690319c9..0e5bb6c7307a 100644 --- a/tools/testing/selftests/cgroup/test_kill.c +++ b/tools/testing/selftests/cgroup/test_kill.c @@ -276,7 +276,7 @@ int main(int argc, char *argv[]) char root[PATH_MAX]; int i, ret = EXIT_SUCCESS; - if (cg_find_unified_root(root, sizeof(root))) + if (cg_find_unified_root(root, sizeof(root), NULL)) ksft_exit_skip("cgroup v2 isn't mounted\n"); for (i = 0; i < ARRAY_SIZE(tests); i++) { switch (tests[i].fn(root)) { diff --git a/tools/testing/selftests/cgroup/test_kmem.c b/tools/testing/selftests/cgroup/test_kmem.c index c82f974b85c9..2e453ac50c0d 100644 --- a/tools/testing/selftests/cgroup/test_kmem.c +++ b/tools/testing/selftests/cgroup/test_kmem.c @@ -1,6 +1,4 @@ // SPDX-License-Identifier: GPL-2.0 -#define _GNU_SOURCE - #include <linux/limits.h> #include <fcntl.h> #include <stdio.h> @@ -192,7 +190,7 @@ static int test_kmem_memcg_deletion(const char *root) goto cleanup; sum = anon + file + kernel + sock; - if (abs(sum - current) < MAX_VMSTAT_ERROR) { + if (labs(sum - current) < MAX_VMSTAT_ERROR) { ret = KSFT_PASS; } else { printf("memory.current = %ld\n", current); @@ -380,7 +378,7 @@ static int test_percpu_basic(const char *root) current = cg_read_long(parent, "memory.current"); percpu = cg_read_key_long(parent, "memory.stat", "percpu "); - if (current > 0 && percpu > 0 && abs(current - percpu) < + if (current > 0 && percpu > 0 && labs(current - percpu) < MAX_VMSTAT_ERROR) ret = KSFT_PASS; else @@ -420,7 +418,7 @@ int main(int argc, char **argv) char root[PATH_MAX]; int i, ret = EXIT_SUCCESS; - if (cg_find_unified_root(root, sizeof(root))) + if (cg_find_unified_root(root, sizeof(root), NULL)) ksft_exit_skip("cgroup v2 isn't mounted\n"); /* diff --git a/tools/testing/selftests/cgroup/test_memcontrol.c b/tools/testing/selftests/cgroup/test_memcontrol.c index c7c9572003a8..c871630d62a3 100644 --- a/tools/testing/selftests/cgroup/test_memcontrol.c +++ b/tools/testing/selftests/cgroup/test_memcontrol.c @@ -1,6 +1,4 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#define _GNU_SOURCE - #include <linux/limits.h> #include <linux/oom.h> #include <fcntl.h> @@ -716,7 +714,9 @@ static bool reclaim_until(const char *memcg, long goal) */ static int test_memcg_reclaim(const char *root) { - int ret = KSFT_FAIL, fd, retries; + int ret = KSFT_FAIL; + int fd = -1; + int retries; char *memcg; long current, expected_usage; @@ -1314,7 +1314,7 @@ int main(int argc, char **argv) char root[PATH_MAX]; int i, proc_status, ret = EXIT_SUCCESS; - if (cg_find_unified_root(root, sizeof(root))) + if (cg_find_unified_root(root, sizeof(root), NULL)) ksft_exit_skip("cgroup v2 isn't mounted\n"); /* diff --git a/tools/testing/selftests/cgroup/test_zswap.c b/tools/testing/selftests/cgroup/test_zswap.c index f0e488ed90d8..8418a8d7439f 100644 --- a/tools/testing/selftests/cgroup/test_zswap.c +++ b/tools/testing/selftests/cgroup/test_zswap.c @@ -1,6 +1,4 @@ // SPDX-License-Identifier: GPL-2.0 -#define _GNU_SOURCE - #include <linux/limits.h> #include <unistd.h> #include <stdio.h> @@ -257,7 +255,7 @@ static int test_no_invasive_cgroup_shrink(const char *root) { int ret = KSFT_FAIL; size_t control_allocation_size = MB(10); - char *control_allocation, *wb_group = NULL, *control_group = NULL; + char *control_allocation = NULL, *wb_group = NULL, *control_group = NULL; wb_group = setup_test_group_1M(root, "per_memcg_wb_test1"); if (!wb_group) @@ -342,7 +340,7 @@ static int test_no_kmem_bypass(const char *root) struct sysinfo sys_info; int ret = KSFT_FAIL; int child_status; - char *test_group; + char *test_group = NULL; pid_t child_pid; /* Read sys info and compute test values accordingly */ @@ -440,7 +438,7 @@ int main(int argc, char **argv) char root[PATH_MAX]; int i, ret = EXIT_SUCCESS; - if (cg_find_unified_root(root, sizeof(root))) + if (cg_find_unified_root(root, sizeof(root), NULL)) ksft_exit_skip("cgroup v2 isn't mounted\n"); if (!zswap_configured()) diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/fprobe_args_vfs.tc b/tools/testing/selftests/ftrace/test.d/dynevent/fprobe_args_vfs.tc new file mode 100644 index 000000000000..c6a9d2466a71 --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/dynevent/fprobe_args_vfs.tc @@ -0,0 +1,41 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# description: Fprobe event VFS type argument +# requires: dynamic_events "%pd/%pD":README "f[:[<group>/][<event>]] <func-name>[%return] [<args>]":README + + +: "Test argument %pd with name for fprobe" +echo 'f:testprobe dput name=$arg1:%pd' > dynamic_events +echo 1 > events/fprobes/testprobe/enable +grep -q "1" events/fprobes/testprobe/enable +echo 0 > events/fprobes/testprobe/enable +grep "dput" trace | grep -q "enable" +echo "" > dynamic_events +echo "" > trace + +: "Test argument %pd without name for fprobe" +echo 'f:testprobe dput $arg1:%pd' > dynamic_events +echo 1 > events/fprobes/testprobe/enable +grep -q "1" events/fprobes/testprobe/enable +echo 0 > events/fprobes/testprobe/enable +grep "dput" trace | grep -q "enable" +echo "" > dynamic_events +echo "" > trace + +: "Test argument %pD with name for fprobe" +echo 'f:testprobe vfs_read name=$arg1:%pD' > dynamic_events +echo 1 > events/fprobes/testprobe/enable +grep -q "1" events/fprobes/testprobe/enable +echo 0 > events/fprobes/testprobe/enable +grep "vfs_read" trace | grep -q "enable" +echo "" > dynamic_events +echo "" > trace + +: "Test argument %pD without name for fprobe" +echo 'f:testprobe vfs_read $arg1:%pD' > dynamic_events +echo 1 > events/fprobes/testprobe/enable +grep -q "1" events/fprobes/testprobe/enable +echo 0 > events/fprobes/testprobe/enable +grep "vfs_read" trace | grep -q "enable" +echo "" > dynamic_events +echo "" > trace diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_vfs.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_vfs.tc new file mode 100644 index 000000000000..21a54be6894c --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_vfs.tc @@ -0,0 +1,40 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# description: Kprobe event VFS type argument +# requires: kprobe_events "%pd/%pD":README + +: "Test argument %pd with name" +echo 'p:testprobe dput name=$arg1:%pd' > kprobe_events +echo 1 > events/kprobes/testprobe/enable +grep -q "1" events/kprobes/testprobe/enable +echo 0 > events/kprobes/testprobe/enable +grep "dput" trace | grep -q "enable" +echo "" > kprobe_events +echo "" > trace + +: "Test argument %pd without name" +echo 'p:testprobe dput $arg1:%pd' > kprobe_events +echo 1 > events/kprobes/testprobe/enable +grep -q "1" events/kprobes/testprobe/enable +echo 0 > events/kprobes/testprobe/enable +grep "dput" trace | grep -q "enable" +echo "" > kprobe_events +echo "" > trace + +: "Test argument %pD with name" +echo 'p:testprobe vfs_read name=$arg1:%pD' > kprobe_events +echo 1 > events/kprobes/testprobe/enable +grep -q "1" events/kprobes/testprobe/enable +echo 0 > events/kprobes/testprobe/enable +grep "vfs_read" trace | grep -q "enable" +echo "" > kprobe_events +echo "" > trace + +: "Test argument %pD without name" +echo 'p:testprobe vfs_read $arg1:%pD' > kprobe_events +echo 1 > events/kprobes/testprobe/enable +grep -q "1" events/kprobes/testprobe/enable +echo 0 > events/kprobes/testprobe/enable +grep "vfs_read" trace | grep -q "enable" +echo "" > kprobe_events +echo "" > trace diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index 741c7dc16afc..ce8ff8e8ce3a 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -45,6 +45,7 @@ LIBKVM_x86_64 += lib/x86_64/vmx.c LIBKVM_aarch64 += lib/aarch64/gic.c LIBKVM_aarch64 += lib/aarch64/gic_v3.c +LIBKVM_aarch64 += lib/aarch64/gic_v3_its.c LIBKVM_aarch64 += lib/aarch64/handlers.S LIBKVM_aarch64 += lib/aarch64/processor.c LIBKVM_aarch64 += lib/aarch64/spinlock.c @@ -120,6 +121,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/tsc_msrs_test TEST_GEN_PROGS_x86_64 += x86_64/vmx_pmu_caps_test TEST_GEN_PROGS_x86_64 += x86_64/xen_shinfo_test TEST_GEN_PROGS_x86_64 += x86_64/xen_vmcall_test +TEST_GEN_PROGS_x86_64 += x86_64/sev_init2_tests TEST_GEN_PROGS_x86_64 += x86_64/sev_migrate_tests TEST_GEN_PROGS_x86_64 += x86_64/sev_smoke_test TEST_GEN_PROGS_x86_64 += x86_64/amx_test @@ -157,6 +159,7 @@ TEST_GEN_PROGS_aarch64 += aarch64/smccc_filter TEST_GEN_PROGS_aarch64 += aarch64/vcpu_width_config TEST_GEN_PROGS_aarch64 += aarch64/vgic_init TEST_GEN_PROGS_aarch64 += aarch64/vgic_irq +TEST_GEN_PROGS_aarch64 += aarch64/vgic_lpi_stress TEST_GEN_PROGS_aarch64 += aarch64/vpmu_counter_access TEST_GEN_PROGS_aarch64 += access_tracking_perf_test TEST_GEN_PROGS_aarch64 += arch_timer @@ -189,6 +192,8 @@ TEST_GEN_PROGS_s390x += rseq_test TEST_GEN_PROGS_s390x += set_memory_region_test TEST_GEN_PROGS_s390x += kvm_binary_stats_test +TEST_GEN_PROGS_riscv += riscv/sbi_pmu_test +TEST_GEN_PROGS_riscv += riscv/ebreak_test TEST_GEN_PROGS_riscv += arch_timer TEST_GEN_PROGS_riscv += demand_paging_test TEST_GEN_PROGS_riscv += dirty_log_test @@ -225,8 +230,8 @@ LINUX_TOOL_ARCH_INCLUDE = $(top_srcdir)/tools/arch/$(ARCH)/include endif CFLAGS += -Wall -Wstrict-prototypes -Wuninitialized -O2 -g -std=gnu99 \ -Wno-gnu-variable-sized-type-not-at-end -MD -MP -DCONFIG_64BIT \ - -fno-builtin-memcmp -fno-builtin-memcpy -fno-builtin-memset \ - -fno-builtin-strnlen \ + -D_GNU_SOURCE -fno-builtin-memcmp -fno-builtin-memcpy \ + -fno-builtin-memset -fno-builtin-strnlen \ -fno-stack-protector -fno-PIE -I$(LINUX_TOOL_INCLUDE) \ -I$(LINUX_TOOL_ARCH_INCLUDE) -I$(LINUX_HDR_PATH) -Iinclude \ -I$(<D) -Iinclude/$(ARCH_DIR) -I ../rseq -I.. $(EXTRA_CFLAGS) \ diff --git a/tools/testing/selftests/kvm/aarch64/arch_timer.c b/tools/testing/selftests/kvm/aarch64/arch_timer.c index 4eaba83cdcf3..eeba1cc87ff8 100644 --- a/tools/testing/selftests/kvm/aarch64/arch_timer.c +++ b/tools/testing/selftests/kvm/aarch64/arch_timer.c @@ -5,18 +5,14 @@ * * Copyright (c) 2021, Google LLC. */ -#define _GNU_SOURCE - #include "arch_timer.h" #include "delay.h" #include "gic.h" #include "processor.h" #include "timer_test.h" +#include "ucall_common.h" #include "vgic.h" -#define GICD_BASE_GPA 0x8000000ULL -#define GICR_BASE_GPA 0x80A0000ULL - enum guest_stage { GUEST_STAGE_VTIMER_CVAL = 1, GUEST_STAGE_VTIMER_TVAL, @@ -149,8 +145,7 @@ static void guest_code(void) local_irq_disable(); - gic_init(GIC_V3, test_args.nr_vcpus, - (void *)GICD_BASE_GPA, (void *)GICR_BASE_GPA); + gic_init(GIC_V3, test_args.nr_vcpus); timer_set_ctl(VIRTUAL, CTL_IMASK); timer_set_ctl(PHYSICAL, CTL_IMASK); @@ -209,7 +204,7 @@ struct kvm_vm *test_vm_create(void) vcpu_init_descriptor_tables(vcpus[i]); test_init_timer_irq(vm); - gic_fd = vgic_v3_setup(vm, nr_vcpus, 64, GICD_BASE_GPA, GICR_BASE_GPA); + gic_fd = vgic_v3_setup(vm, nr_vcpus, 64); __TEST_REQUIRE(gic_fd >= 0, "Failed to create vgic-v3"); /* Make all the test's cmdline args visible to the guest */ diff --git a/tools/testing/selftests/kvm/aarch64/page_fault_test.c b/tools/testing/selftests/kvm/aarch64/page_fault_test.c index 5972905275cf..d29b08198b42 100644 --- a/tools/testing/selftests/kvm/aarch64/page_fault_test.c +++ b/tools/testing/selftests/kvm/aarch64/page_fault_test.c @@ -7,7 +7,6 @@ * hugetlbfs with a hole). It checks that the expected handling method is * called (e.g., uffd faults with the right address and write/read flag). */ -#define _GNU_SOURCE #include <linux/bitmap.h> #include <fcntl.h> #include <test_util.h> @@ -375,14 +374,14 @@ static void setup_uffd(struct kvm_vm *vm, struct test_params *p, *pt_uffd = uffd_setup_demand_paging(uffd_mode, 0, pt_args.hva, pt_args.paging_size, - test->uffd_pt_handler); + 1, test->uffd_pt_handler); *data_uffd = NULL; if (test->uffd_data_handler) *data_uffd = uffd_setup_demand_paging(uffd_mode, 0, data_args.hva, data_args.paging_size, - test->uffd_data_handler); + 1, test->uffd_data_handler); } static void free_uffd(struct test_desc *test, struct uffd_desc *pt_uffd, diff --git a/tools/testing/selftests/kvm/aarch64/psci_test.c b/tools/testing/selftests/kvm/aarch64/psci_test.c index 9b004905d1d3..61731a950def 100644 --- a/tools/testing/selftests/kvm/aarch64/psci_test.c +++ b/tools/testing/selftests/kvm/aarch64/psci_test.c @@ -11,9 +11,9 @@ * KVM_SYSTEM_EVENT_SUSPEND UAPI. */ -#define _GNU_SOURCE - +#include <linux/kernel.h> #include <linux/psci.h> +#include <asm/cputype.h> #include "kvm_util.h" #include "processor.h" diff --git a/tools/testing/selftests/kvm/aarch64/set_id_regs.c b/tools/testing/selftests/kvm/aarch64/set_id_regs.c index 16e2338686c1..a7de39fa2a0a 100644 --- a/tools/testing/selftests/kvm/aarch64/set_id_regs.c +++ b/tools/testing/selftests/kvm/aarch64/set_id_regs.c @@ -327,8 +327,8 @@ uint64_t get_invalid_value(const struct reg_ftr_bits *ftr_bits, uint64_t ftr) return ftr; } -static void test_reg_set_success(struct kvm_vcpu *vcpu, uint64_t reg, - const struct reg_ftr_bits *ftr_bits) +static uint64_t test_reg_set_success(struct kvm_vcpu *vcpu, uint64_t reg, + const struct reg_ftr_bits *ftr_bits) { uint8_t shift = ftr_bits->shift; uint64_t mask = ftr_bits->mask; @@ -346,6 +346,8 @@ static void test_reg_set_success(struct kvm_vcpu *vcpu, uint64_t reg, vcpu_set_reg(vcpu, reg, val); vcpu_get_reg(vcpu, reg, &new_val); TEST_ASSERT_EQ(new_val, val); + + return new_val; } static void test_reg_set_fail(struct kvm_vcpu *vcpu, uint64_t reg, @@ -374,7 +376,15 @@ static void test_reg_set_fail(struct kvm_vcpu *vcpu, uint64_t reg, TEST_ASSERT_EQ(val, old_val); } -static void test_user_set_reg(struct kvm_vcpu *vcpu, bool aarch64_only) +static uint64_t test_reg_vals[KVM_ARM_FEATURE_ID_RANGE_SIZE]; + +#define encoding_to_range_idx(encoding) \ + KVM_ARM_FEATURE_ID_RANGE_IDX(sys_reg_Op0(encoding), sys_reg_Op1(encoding), \ + sys_reg_CRn(encoding), sys_reg_CRm(encoding), \ + sys_reg_Op2(encoding)) + + +static void test_vm_ftr_id_regs(struct kvm_vcpu *vcpu, bool aarch64_only) { uint64_t masks[KVM_ARM_FEATURE_ID_RANGE_SIZE]; struct reg_mask_range range = { @@ -398,9 +408,7 @@ static void test_user_set_reg(struct kvm_vcpu *vcpu, bool aarch64_only) int idx; /* Get the index to masks array for the idreg */ - idx = KVM_ARM_FEATURE_ID_RANGE_IDX(sys_reg_Op0(reg_id), sys_reg_Op1(reg_id), - sys_reg_CRn(reg_id), sys_reg_CRm(reg_id), - sys_reg_Op2(reg_id)); + idx = encoding_to_range_idx(reg_id); for (int j = 0; ftr_bits[j].type != FTR_END; j++) { /* Skip aarch32 reg on aarch64 only system, since they are RAZ/WI. */ @@ -414,7 +422,9 @@ static void test_user_set_reg(struct kvm_vcpu *vcpu, bool aarch64_only) TEST_ASSERT_EQ(masks[idx] & ftr_bits[j].mask, ftr_bits[j].mask); test_reg_set_fail(vcpu, reg, &ftr_bits[j]); - test_reg_set_success(vcpu, reg, &ftr_bits[j]); + + test_reg_vals[idx] = test_reg_set_success(vcpu, reg, + &ftr_bits[j]); ksft_test_result_pass("%s\n", ftr_bits[j].name); } @@ -425,7 +435,6 @@ static void test_guest_reg_read(struct kvm_vcpu *vcpu) { bool done = false; struct ucall uc; - uint64_t val; while (!done) { vcpu_run(vcpu); @@ -436,8 +445,8 @@ static void test_guest_reg_read(struct kvm_vcpu *vcpu) break; case UCALL_SYNC: /* Make sure the written values are seen by guest */ - vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(uc.args[2]), &val); - TEST_ASSERT_EQ(val, uc.args[3]); + TEST_ASSERT_EQ(test_reg_vals[encoding_to_range_idx(uc.args[2])], + uc.args[3]); break; case UCALL_DONE: done = true; @@ -448,13 +457,85 @@ static void test_guest_reg_read(struct kvm_vcpu *vcpu) } } +/* Politely lifted from arch/arm64/include/asm/cache.h */ +/* Ctypen, bits[3(n - 1) + 2 : 3(n - 1)], for n = 1 to 7 */ +#define CLIDR_CTYPE_SHIFT(level) (3 * (level - 1)) +#define CLIDR_CTYPE_MASK(level) (7 << CLIDR_CTYPE_SHIFT(level)) +#define CLIDR_CTYPE(clidr, level) \ + (((clidr) & CLIDR_CTYPE_MASK(level)) >> CLIDR_CTYPE_SHIFT(level)) + +static void test_clidr(struct kvm_vcpu *vcpu) +{ + uint64_t clidr; + int level; + + vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_CLIDR_EL1), &clidr); + + /* find the first empty level in the cache hierarchy */ + for (level = 1; level < 7; level++) { + if (!CLIDR_CTYPE(clidr, level)) + break; + } + + /* + * If you have a mind-boggling 7 levels of cache, congratulations, you + * get to fix this. + */ + TEST_ASSERT(level <= 7, "can't find an empty level in cache hierarchy"); + + /* stick in a unified cache level */ + clidr |= BIT(2) << CLIDR_CTYPE_SHIFT(level); + + vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_CLIDR_EL1), clidr); + test_reg_vals[encoding_to_range_idx(SYS_CLIDR_EL1)] = clidr; +} + +static void test_vcpu_ftr_id_regs(struct kvm_vcpu *vcpu) +{ + u64 val; + + test_clidr(vcpu); + + vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_MPIDR_EL1), &val); + val++; + vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_MPIDR_EL1), val); + + test_reg_vals[encoding_to_range_idx(SYS_MPIDR_EL1)] = val; + ksft_test_result_pass("%s\n", __func__); +} + +static void test_assert_id_reg_unchanged(struct kvm_vcpu *vcpu, uint32_t encoding) +{ + size_t idx = encoding_to_range_idx(encoding); + uint64_t observed; + + vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(encoding), &observed); + TEST_ASSERT_EQ(test_reg_vals[idx], observed); +} + +static void test_reset_preserves_id_regs(struct kvm_vcpu *vcpu) +{ + /* + * Calls KVM_ARM_VCPU_INIT behind the scenes, which will do an + * architectural reset of the vCPU. + */ + aarch64_vcpu_setup(vcpu, NULL); + + for (int i = 0; i < ARRAY_SIZE(test_regs); i++) + test_assert_id_reg_unchanged(vcpu, test_regs[i].reg); + + test_assert_id_reg_unchanged(vcpu, SYS_CLIDR_EL1); + + ksft_test_result_pass("%s\n", __func__); +} + int main(void) { struct kvm_vcpu *vcpu; struct kvm_vm *vm; bool aarch64_only; uint64_t val, el0; - int ftr_cnt; + int test_cnt; TEST_REQUIRE(kvm_has_cap(KVM_CAP_ARM_SUPPORTED_REG_MASK_RANGES)); @@ -467,18 +548,22 @@ int main(void) ksft_print_header(); - ftr_cnt = ARRAY_SIZE(ftr_id_aa64dfr0_el1) + ARRAY_SIZE(ftr_id_dfr0_el1) + - ARRAY_SIZE(ftr_id_aa64isar0_el1) + ARRAY_SIZE(ftr_id_aa64isar1_el1) + - ARRAY_SIZE(ftr_id_aa64isar2_el1) + ARRAY_SIZE(ftr_id_aa64pfr0_el1) + - ARRAY_SIZE(ftr_id_aa64mmfr0_el1) + ARRAY_SIZE(ftr_id_aa64mmfr1_el1) + - ARRAY_SIZE(ftr_id_aa64mmfr2_el1) + ARRAY_SIZE(ftr_id_aa64zfr0_el1) - - ARRAY_SIZE(test_regs); + test_cnt = ARRAY_SIZE(ftr_id_aa64dfr0_el1) + ARRAY_SIZE(ftr_id_dfr0_el1) + + ARRAY_SIZE(ftr_id_aa64isar0_el1) + ARRAY_SIZE(ftr_id_aa64isar1_el1) + + ARRAY_SIZE(ftr_id_aa64isar2_el1) + ARRAY_SIZE(ftr_id_aa64pfr0_el1) + + ARRAY_SIZE(ftr_id_aa64mmfr0_el1) + ARRAY_SIZE(ftr_id_aa64mmfr1_el1) + + ARRAY_SIZE(ftr_id_aa64mmfr2_el1) + ARRAY_SIZE(ftr_id_aa64zfr0_el1) - + ARRAY_SIZE(test_regs) + 2; - ksft_set_plan(ftr_cnt); + ksft_set_plan(test_cnt); + + test_vm_ftr_id_regs(vcpu, aarch64_only); + test_vcpu_ftr_id_regs(vcpu); - test_user_set_reg(vcpu, aarch64_only); test_guest_reg_read(vcpu); + test_reset_preserves_id_regs(vcpu); + kvm_vm_free(vm); ksft_finished(); diff --git a/tools/testing/selftests/kvm/aarch64/vgic_init.c b/tools/testing/selftests/kvm/aarch64/vgic_init.c index ca917c71ff60..b3b5fb0ff0a9 100644 --- a/tools/testing/selftests/kvm/aarch64/vgic_init.c +++ b/tools/testing/selftests/kvm/aarch64/vgic_init.c @@ -4,7 +4,6 @@ * * Copyright (C) 2020, Red Hat, Inc. */ -#define _GNU_SOURCE #include <linux/kernel.h> #include <sys/syscall.h> #include <asm/kvm.h> diff --git a/tools/testing/selftests/kvm/aarch64/vgic_irq.c b/tools/testing/selftests/kvm/aarch64/vgic_irq.c index 2e64b4856e38..a51dbd2a5f84 100644 --- a/tools/testing/selftests/kvm/aarch64/vgic_irq.c +++ b/tools/testing/selftests/kvm/aarch64/vgic_irq.c @@ -19,9 +19,6 @@ #include "gic_v3.h" #include "vgic.h" -#define GICD_BASE_GPA 0x08000000ULL -#define GICR_BASE_GPA 0x080A0000ULL - /* * Stores the user specified args; it's passed to the guest and to every test * function. @@ -49,9 +46,6 @@ struct test_args { #define IRQ_DEFAULT_PRIO (LOWEST_PRIO - 1) #define IRQ_DEFAULT_PRIO_REG (IRQ_DEFAULT_PRIO << KVM_PRIO_SHIFT) /* 0xf0 */ -static void *dist = (void *)GICD_BASE_GPA; -static void *redist = (void *)GICR_BASE_GPA; - /* * The kvm_inject_* utilities are used by the guest to ask the host to inject * interrupts (e.g., using the KVM_IRQ_LINE ioctl). @@ -152,7 +146,7 @@ static void reset_stats(void) static uint64_t gic_read_ap1r0(void) { - uint64_t reg = read_sysreg_s(SYS_ICV_AP1R0_EL1); + uint64_t reg = read_sysreg_s(SYS_ICC_AP1R0_EL1); dsb(sy); return reg; @@ -160,7 +154,7 @@ static uint64_t gic_read_ap1r0(void) static void gic_write_ap1r0(uint64_t val) { - write_sysreg_s(val, SYS_ICV_AP1R0_EL1); + write_sysreg_s(val, SYS_ICC_AP1R0_EL1); isb(); } @@ -478,7 +472,7 @@ static void guest_code(struct test_args *args) bool level_sensitive = args->level_sensitive; struct kvm_inject_desc *f, *inject_fns; - gic_init(GIC_V3, 1, dist, redist); + gic_init(GIC_V3, 1); for (i = 0; i < nr_irqs; i++) gic_irq_enable(i); @@ -764,8 +758,7 @@ static void test_vgic(uint32_t nr_irqs, bool level_sensitive, bool eoi_split) memcpy(addr_gva2hva(vm, args_gva), &args, sizeof(args)); vcpu_args_set(vcpu, 1, args_gva); - gic_fd = vgic_v3_setup(vm, 1, nr_irqs, - GICD_BASE_GPA, GICR_BASE_GPA); + gic_fd = vgic_v3_setup(vm, 1, nr_irqs); __TEST_REQUIRE(gic_fd >= 0, "Failed to create vgic-v3, skipping"); vm_install_exception_handler(vm, VECTOR_IRQ_CURRENT, diff --git a/tools/testing/selftests/kvm/aarch64/vgic_lpi_stress.c b/tools/testing/selftests/kvm/aarch64/vgic_lpi_stress.c new file mode 100644 index 000000000000..fc4fe52fb6f8 --- /dev/null +++ b/tools/testing/selftests/kvm/aarch64/vgic_lpi_stress.c @@ -0,0 +1,410 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * vgic_lpi_stress - Stress test for KVM's ITS emulation + * + * Copyright (c) 2024 Google LLC + */ + +#include <linux/sizes.h> +#include <pthread.h> +#include <stdatomic.h> +#include <sys/sysinfo.h> + +#include "kvm_util.h" +#include "gic.h" +#include "gic_v3.h" +#include "gic_v3_its.h" +#include "processor.h" +#include "ucall.h" +#include "vgic.h" + +#define TEST_MEMSLOT_INDEX 1 + +#define GIC_LPI_OFFSET 8192 + +static size_t nr_iterations = 1000; +static vm_paddr_t gpa_base; + +static struct kvm_vm *vm; +static struct kvm_vcpu **vcpus; +static int gic_fd, its_fd; + +static struct test_data { + bool request_vcpus_stop; + u32 nr_cpus; + u32 nr_devices; + u32 nr_event_ids; + + vm_paddr_t device_table; + vm_paddr_t collection_table; + vm_paddr_t cmdq_base; + void *cmdq_base_va; + vm_paddr_t itt_tables; + + vm_paddr_t lpi_prop_table; + vm_paddr_t lpi_pend_tables; +} test_data = { + .nr_cpus = 1, + .nr_devices = 1, + .nr_event_ids = 16, +}; + +static void guest_irq_handler(struct ex_regs *regs) +{ + u32 intid = gic_get_and_ack_irq(); + + if (intid == IAR_SPURIOUS) + return; + + GUEST_ASSERT(intid >= GIC_LPI_OFFSET); + gic_set_eoi(intid); +} + +static void guest_setup_its_mappings(void) +{ + u32 coll_id, device_id, event_id, intid = GIC_LPI_OFFSET; + u32 nr_events = test_data.nr_event_ids; + u32 nr_devices = test_data.nr_devices; + u32 nr_cpus = test_data.nr_cpus; + + for (coll_id = 0; coll_id < nr_cpus; coll_id++) + its_send_mapc_cmd(test_data.cmdq_base_va, coll_id, coll_id, true); + + /* Round-robin the LPIs to all of the vCPUs in the VM */ + coll_id = 0; + for (device_id = 0; device_id < nr_devices; device_id++) { + vm_paddr_t itt_base = test_data.itt_tables + (device_id * SZ_64K); + + its_send_mapd_cmd(test_data.cmdq_base_va, device_id, + itt_base, SZ_64K, true); + + for (event_id = 0; event_id < nr_events; event_id++) { + its_send_mapti_cmd(test_data.cmdq_base_va, device_id, + event_id, coll_id, intid++); + + coll_id = (coll_id + 1) % test_data.nr_cpus; + } + } +} + +static void guest_invalidate_all_rdists(void) +{ + int i; + + for (i = 0; i < test_data.nr_cpus; i++) + its_send_invall_cmd(test_data.cmdq_base_va, i); +} + +static void guest_setup_gic(void) +{ + static atomic_int nr_cpus_ready = 0; + u32 cpuid = guest_get_vcpuid(); + + gic_init(GIC_V3, test_data.nr_cpus); + gic_rdist_enable_lpis(test_data.lpi_prop_table, SZ_64K, + test_data.lpi_pend_tables + (cpuid * SZ_64K)); + + atomic_fetch_add(&nr_cpus_ready, 1); + + if (cpuid > 0) + return; + + while (atomic_load(&nr_cpus_ready) < test_data.nr_cpus) + cpu_relax(); + + its_init(test_data.collection_table, SZ_64K, + test_data.device_table, SZ_64K, + test_data.cmdq_base, SZ_64K); + + guest_setup_its_mappings(); + guest_invalidate_all_rdists(); +} + +static void guest_code(size_t nr_lpis) +{ + guest_setup_gic(); + + GUEST_SYNC(0); + + /* + * Don't use WFI here to avoid blocking the vCPU thread indefinitely and + * never getting the stop signal. + */ + while (!READ_ONCE(test_data.request_vcpus_stop)) + cpu_relax(); + + GUEST_DONE(); +} + +static void setup_memslot(void) +{ + size_t pages; + size_t sz; + + /* + * For the ITS: + * - A single level device table + * - A single level collection table + * - The command queue + * - An ITT for each device + */ + sz = (3 + test_data.nr_devices) * SZ_64K; + + /* + * For the redistributors: + * - A shared LPI configuration table + * - An LPI pending table for each vCPU + */ + sz += (1 + test_data.nr_cpus) * SZ_64K; + + pages = sz / vm->page_size; + gpa_base = ((vm_compute_max_gfn(vm) + 1) * vm->page_size) - sz; + vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, gpa_base, + TEST_MEMSLOT_INDEX, pages, 0); +} + +#define LPI_PROP_DEFAULT_PRIO 0xa0 + +static void configure_lpis(void) +{ + size_t nr_lpis = test_data.nr_devices * test_data.nr_event_ids; + u8 *tbl = addr_gpa2hva(vm, test_data.lpi_prop_table); + size_t i; + + for (i = 0; i < nr_lpis; i++) { + tbl[i] = LPI_PROP_DEFAULT_PRIO | + LPI_PROP_GROUP1 | + LPI_PROP_ENABLED; + } +} + +static void setup_test_data(void) +{ + size_t pages_per_64k = vm_calc_num_guest_pages(vm->mode, SZ_64K); + u32 nr_devices = test_data.nr_devices; + u32 nr_cpus = test_data.nr_cpus; + vm_paddr_t cmdq_base; + + test_data.device_table = vm_phy_pages_alloc(vm, pages_per_64k, + gpa_base, + TEST_MEMSLOT_INDEX); + + test_data.collection_table = vm_phy_pages_alloc(vm, pages_per_64k, + gpa_base, + TEST_MEMSLOT_INDEX); + + cmdq_base = vm_phy_pages_alloc(vm, pages_per_64k, gpa_base, + TEST_MEMSLOT_INDEX); + virt_map(vm, cmdq_base, cmdq_base, pages_per_64k); + test_data.cmdq_base = cmdq_base; + test_data.cmdq_base_va = (void *)cmdq_base; + + test_data.itt_tables = vm_phy_pages_alloc(vm, pages_per_64k * nr_devices, + gpa_base, TEST_MEMSLOT_INDEX); + + test_data.lpi_prop_table = vm_phy_pages_alloc(vm, pages_per_64k, + gpa_base, TEST_MEMSLOT_INDEX); + configure_lpis(); + + test_data.lpi_pend_tables = vm_phy_pages_alloc(vm, pages_per_64k * nr_cpus, + gpa_base, TEST_MEMSLOT_INDEX); + + sync_global_to_guest(vm, test_data); +} + +static void setup_gic(void) +{ + gic_fd = vgic_v3_setup(vm, test_data.nr_cpus, 64); + __TEST_REQUIRE(gic_fd >= 0, "Failed to create GICv3"); + + its_fd = vgic_its_setup(vm); +} + +static void signal_lpi(u32 device_id, u32 event_id) +{ + vm_paddr_t db_addr = GITS_BASE_GPA + GITS_TRANSLATER; + + struct kvm_msi msi = { + .address_lo = db_addr, + .address_hi = db_addr >> 32, + .data = event_id, + .devid = device_id, + .flags = KVM_MSI_VALID_DEVID, + }; + + /* + * KVM_SIGNAL_MSI returns 1 if the MSI wasn't 'blocked' by the VM, + * which for arm64 implies having a valid translation in the ITS. + */ + TEST_ASSERT(__vm_ioctl(vm, KVM_SIGNAL_MSI, &msi) == 1, + "KVM_SIGNAL_MSI ioctl failed"); +} + +static pthread_barrier_t test_setup_barrier; + +static void *lpi_worker_thread(void *data) +{ + u32 device_id = (size_t)data; + u32 event_id; + size_t i; + + pthread_barrier_wait(&test_setup_barrier); + + for (i = 0; i < nr_iterations; i++) + for (event_id = 0; event_id < test_data.nr_event_ids; event_id++) + signal_lpi(device_id, event_id); + + return NULL; +} + +static void *vcpu_worker_thread(void *data) +{ + struct kvm_vcpu *vcpu = data; + struct ucall uc; + + while (true) { + vcpu_run(vcpu); + + switch (get_ucall(vcpu, &uc)) { + case UCALL_SYNC: + pthread_barrier_wait(&test_setup_barrier); + continue; + case UCALL_DONE: + return NULL; + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + break; + default: + TEST_FAIL("Unknown ucall: %lu", uc.cmd); + } + } + + return NULL; +} + +static void report_stats(struct timespec delta) +{ + double nr_lpis; + double time; + + nr_lpis = test_data.nr_devices * test_data.nr_event_ids * nr_iterations; + + time = delta.tv_sec; + time += ((double)delta.tv_nsec) / NSEC_PER_SEC; + + pr_info("Rate: %.2f LPIs/sec\n", nr_lpis / time); +} + +static void run_test(void) +{ + u32 nr_devices = test_data.nr_devices; + u32 nr_vcpus = test_data.nr_cpus; + pthread_t *lpi_threads = malloc(nr_devices * sizeof(pthread_t)); + pthread_t *vcpu_threads = malloc(nr_vcpus * sizeof(pthread_t)); + struct timespec start, delta; + size_t i; + + TEST_ASSERT(lpi_threads && vcpu_threads, "Failed to allocate pthread arrays"); + + pthread_barrier_init(&test_setup_barrier, NULL, nr_vcpus + nr_devices + 1); + + for (i = 0; i < nr_vcpus; i++) + pthread_create(&vcpu_threads[i], NULL, vcpu_worker_thread, vcpus[i]); + + for (i = 0; i < nr_devices; i++) + pthread_create(&lpi_threads[i], NULL, lpi_worker_thread, (void *)i); + + pthread_barrier_wait(&test_setup_barrier); + + clock_gettime(CLOCK_MONOTONIC, &start); + + for (i = 0; i < nr_devices; i++) + pthread_join(lpi_threads[i], NULL); + + delta = timespec_elapsed(start); + write_guest_global(vm, test_data.request_vcpus_stop, true); + + for (i = 0; i < nr_vcpus; i++) + pthread_join(vcpu_threads[i], NULL); + + report_stats(delta); +} + +static void setup_vm(void) +{ + int i; + + vcpus = malloc(test_data.nr_cpus * sizeof(struct kvm_vcpu)); + TEST_ASSERT(vcpus, "Failed to allocate vCPU array"); + + vm = vm_create_with_vcpus(test_data.nr_cpus, guest_code, vcpus); + + vm_init_descriptor_tables(vm); + for (i = 0; i < test_data.nr_cpus; i++) + vcpu_init_descriptor_tables(vcpus[i]); + + vm_install_exception_handler(vm, VECTOR_IRQ_CURRENT, guest_irq_handler); + + setup_memslot(); + + setup_gic(); + + setup_test_data(); +} + +static void destroy_vm(void) +{ + close(its_fd); + close(gic_fd); + kvm_vm_free(vm); + free(vcpus); +} + +static void pr_usage(const char *name) +{ + pr_info("%s [-v NR_VCPUS] [-d NR_DEVICES] [-e NR_EVENTS] [-i ITERS] -h\n", name); + pr_info(" -v:\tnumber of vCPUs (default: %u)\n", test_data.nr_cpus); + pr_info(" -d:\tnumber of devices (default: %u)\n", test_data.nr_devices); + pr_info(" -e:\tnumber of event IDs per device (default: %u)\n", test_data.nr_event_ids); + pr_info(" -i:\tnumber of iterations (default: %lu)\n", nr_iterations); +} + +int main(int argc, char **argv) +{ + u32 nr_threads; + int c; + + while ((c = getopt(argc, argv, "hv:d:e:i:")) != -1) { + switch (c) { + case 'v': + test_data.nr_cpus = atoi(optarg); + break; + case 'd': + test_data.nr_devices = atoi(optarg); + break; + case 'e': + test_data.nr_event_ids = atoi(optarg); + break; + case 'i': + nr_iterations = strtoul(optarg, NULL, 0); + break; + case 'h': + default: + pr_usage(argv[0]); + return 1; + } + } + + nr_threads = test_data.nr_cpus + test_data.nr_devices; + if (nr_threads > get_nprocs()) + pr_info("WARNING: running %u threads on %d CPUs; performance is degraded.\n", + nr_threads, get_nprocs()); + + setup_vm(); + + run_test(); + + destroy_vm(); + + return 0; +} diff --git a/tools/testing/selftests/kvm/aarch64/vpmu_counter_access.c b/tools/testing/selftests/kvm/aarch64/vpmu_counter_access.c index f2fb0e3f14bc..d31b9f64ba14 100644 --- a/tools/testing/selftests/kvm/aarch64/vpmu_counter_access.c +++ b/tools/testing/selftests/kvm/aarch64/vpmu_counter_access.c @@ -404,9 +404,6 @@ static void guest_code(uint64_t expected_pmcr_n) GUEST_DONE(); } -#define GICD_BASE_GPA 0x8000000ULL -#define GICR_BASE_GPA 0x80A0000ULL - /* Create a VM that has one vCPU with PMUv3 configured. */ static void create_vpmu_vm(void *guest_code) { @@ -438,8 +435,7 @@ static void create_vpmu_vm(void *guest_code) init.features[0] |= (1 << KVM_ARM_VCPU_PMU_V3); vpmu_vm.vcpu = aarch64_vcpu_add(vpmu_vm.vm, 0, &init, guest_code); vcpu_init_descriptor_tables(vpmu_vm.vcpu); - vpmu_vm.gic_fd = vgic_v3_setup(vpmu_vm.vm, 1, 64, - GICD_BASE_GPA, GICR_BASE_GPA); + vpmu_vm.gic_fd = vgic_v3_setup(vpmu_vm.vm, 1, 64); __TEST_REQUIRE(vpmu_vm.gic_fd >= 0, "Failed to create vgic-v3, skipping"); diff --git a/tools/testing/selftests/kvm/arch_timer.c b/tools/testing/selftests/kvm/arch_timer.c index ae1f1a6d8312..acb2cb596332 100644 --- a/tools/testing/selftests/kvm/arch_timer.c +++ b/tools/testing/selftests/kvm/arch_timer.c @@ -19,9 +19,6 @@ * * Copyright (c) 2021, Google LLC. */ - -#define _GNU_SOURCE - #include <stdlib.h> #include <pthread.h> #include <linux/sizes.h> @@ -29,6 +26,7 @@ #include <sys/sysinfo.h> #include "timer_test.h" +#include "ucall_common.h" struct test_args test_args = { .nr_vcpus = NR_VCPUS_DEF, diff --git a/tools/testing/selftests/kvm/demand_paging_test.c b/tools/testing/selftests/kvm/demand_paging_test.c index bf3609f71854..0202b78f8680 100644 --- a/tools/testing/selftests/kvm/demand_paging_test.c +++ b/tools/testing/selftests/kvm/demand_paging_test.c @@ -6,14 +6,10 @@ * Copyright (C) 2018, Red Hat, Inc. * Copyright (C) 2019, Google, Inc. */ - -#define _GNU_SOURCE /* for pipe2 */ - #include <inttypes.h> #include <stdio.h> #include <stdlib.h> #include <time.h> -#include <poll.h> #include <pthread.h> #include <linux/userfaultfd.h> #include <sys/syscall.h> @@ -22,6 +18,7 @@ #include "test_util.h" #include "memstress.h" #include "guest_modes.h" +#include "ucall_common.h" #include "userfaultfd_util.h" #ifdef __NR_userfaultfd @@ -77,8 +74,20 @@ static int handle_uffd_page_request(int uffd_mode, int uffd, copy.mode = 0; r = ioctl(uffd, UFFDIO_COPY, ©); - if (r == -1) { - pr_info("Failed UFFDIO_COPY in 0x%lx from thread %d with errno: %d\n", + /* + * With multiple vCPU threads fault on a single page and there are + * multiple readers for the UFFD, at least one of the UFFDIO_COPYs + * will fail with EEXIST: handle that case without signaling an + * error. + * + * Note that this also suppress any EEXISTs occurring from, + * e.g., the first UFFDIO_COPY/CONTINUEs on a page. That never + * happens here, but a realistic VMM might potentially maintain + * some external state to correctly surface EEXISTs to userspace + * (or prevent duplicate COPY/CONTINUEs in the first place). + */ + if (r == -1 && errno != EEXIST) { + pr_info("Failed UFFDIO_COPY in 0x%lx from thread %d, errno = %d\n", addr, tid, errno); return r; } @@ -89,8 +98,20 @@ static int handle_uffd_page_request(int uffd_mode, int uffd, cont.range.len = demand_paging_size; r = ioctl(uffd, UFFDIO_CONTINUE, &cont); - if (r == -1) { - pr_info("Failed UFFDIO_CONTINUE in 0x%lx from thread %d with errno: %d\n", + /* + * With multiple vCPU threads fault on a single page and there are + * multiple readers for the UFFD, at least one of the UFFDIO_COPYs + * will fail with EEXIST: handle that case without signaling an + * error. + * + * Note that this also suppress any EEXISTs occurring from, + * e.g., the first UFFDIO_COPY/CONTINUEs on a page. That never + * happens here, but a realistic VMM might potentially maintain + * some external state to correctly surface EEXISTs to userspace + * (or prevent duplicate COPY/CONTINUEs in the first place). + */ + if (r == -1 && errno != EEXIST) { + pr_info("Failed UFFDIO_CONTINUE in 0x%lx, thread %d, errno = %d\n", addr, tid, errno); return r; } @@ -110,7 +131,9 @@ static int handle_uffd_page_request(int uffd_mode, int uffd, struct test_params { int uffd_mode; + bool single_uffd; useconds_t uffd_delay; + int readers_per_uffd; enum vm_mem_backing_src_type src_type; bool partition_vcpu_memory_access; }; @@ -131,10 +154,12 @@ static void run_test(enum vm_guest_mode mode, void *arg) struct memstress_vcpu_args *vcpu_args; struct test_params *p = arg; struct uffd_desc **uffd_descs = NULL; + uint64_t uffd_region_size; struct timespec start; struct timespec ts_diff; + double vcpu_paging_rate; struct kvm_vm *vm; - int i; + int i, num_uffds = 0; vm = memstress_create_vm(mode, nr_vcpus, guest_percpu_mem_size, 1, p->src_type, p->partition_vcpu_memory_access); @@ -147,7 +172,8 @@ static void run_test(enum vm_guest_mode mode, void *arg) memset(guest_data_prototype, 0xAB, demand_paging_size); if (p->uffd_mode == UFFDIO_REGISTER_MODE_MINOR) { - for (i = 0; i < nr_vcpus; i++) { + num_uffds = p->single_uffd ? 1 : nr_vcpus; + for (i = 0; i < num_uffds; i++) { vcpu_args = &memstress_args.vcpu_args[i]; prefault_mem(addr_gpa2alias(vm, vcpu_args->gpa), vcpu_args->pages * memstress_args.guest_page_size); @@ -155,9 +181,13 @@ static void run_test(enum vm_guest_mode mode, void *arg) } if (p->uffd_mode) { - uffd_descs = malloc(nr_vcpus * sizeof(struct uffd_desc *)); + num_uffds = p->single_uffd ? 1 : nr_vcpus; + uffd_region_size = nr_vcpus * guest_percpu_mem_size / num_uffds; + + uffd_descs = malloc(num_uffds * sizeof(struct uffd_desc *)); TEST_ASSERT(uffd_descs, "Memory allocation failed"); - for (i = 0; i < nr_vcpus; i++) { + for (i = 0; i < num_uffds; i++) { + struct memstress_vcpu_args *vcpu_args; void *vcpu_hva; vcpu_args = &memstress_args.vcpu_args[i]; @@ -170,7 +200,8 @@ static void run_test(enum vm_guest_mode mode, void *arg) */ uffd_descs[i] = uffd_setup_demand_paging( p->uffd_mode, p->uffd_delay, vcpu_hva, - vcpu_args->pages * memstress_args.guest_page_size, + uffd_region_size, + p->readers_per_uffd, &handle_uffd_page_request); } } @@ -187,15 +218,19 @@ static void run_test(enum vm_guest_mode mode, void *arg) if (p->uffd_mode) { /* Tell the user fault fd handler threads to quit */ - for (i = 0; i < nr_vcpus; i++) + for (i = 0; i < num_uffds; i++) uffd_stop_demand_paging(uffd_descs[i]); } - pr_info("Total guest execution time: %ld.%.9lds\n", + pr_info("Total guest execution time:\t%ld.%.9lds\n", ts_diff.tv_sec, ts_diff.tv_nsec); - pr_info("Overall demand paging rate: %f pgs/sec\n", - memstress_args.vcpu_args[0].pages * nr_vcpus / - ((double)ts_diff.tv_sec + (double)ts_diff.tv_nsec / NSEC_PER_SEC)); + + vcpu_paging_rate = memstress_args.vcpu_args[0].pages / + ((double)ts_diff.tv_sec + (double)ts_diff.tv_nsec / NSEC_PER_SEC); + pr_info("Per-vcpu demand paging rate:\t%f pgs/sec/vcpu\n", + vcpu_paging_rate); + pr_info("Overall demand paging rate:\t%f pgs/sec\n", + vcpu_paging_rate * nr_vcpus); memstress_destroy_vm(vm); @@ -207,15 +242,20 @@ static void run_test(enum vm_guest_mode mode, void *arg) static void help(char *name) { puts(""); - printf("usage: %s [-h] [-m vm_mode] [-u uffd_mode] [-d uffd_delay_usec]\n" - " [-b memory] [-s type] [-v vcpus] [-c cpu_list] [-o]\n", name); + printf("usage: %s [-h] [-m vm_mode] [-u uffd_mode] [-a]\n" + " [-d uffd_delay_usec] [-r readers_per_uffd] [-b memory]\n" + " [-s type] [-v vcpus] [-c cpu_list] [-o]\n", name); guest_modes_help(); printf(" -u: use userfaultfd to handle vCPU page faults. Mode is a\n" " UFFD registration mode: 'MISSING' or 'MINOR'.\n"); kvm_print_vcpu_pinning_help(); + printf(" -a: Use a single userfaultfd for all of guest memory, instead of\n" + " creating one for each region paged by a unique vCPU\n" + " Set implicitly with -o, and no effect without -u.\n"); printf(" -d: add a delay in usec to the User Fault\n" " FD handler to simulate demand paging\n" " overheads. Ignored without -u.\n"); + printf(" -r: Set the number of reader threads per uffd.\n"); printf(" -b: specify the size of the memory region which should be\n" " demand paged by each vCPU. e.g. 10M or 3G.\n" " Default: 1G\n"); @@ -234,12 +274,14 @@ int main(int argc, char *argv[]) struct test_params p = { .src_type = DEFAULT_VM_MEM_SRC, .partition_vcpu_memory_access = true, + .readers_per_uffd = 1, + .single_uffd = false, }; int opt; guest_modes_append_default(); - while ((opt = getopt(argc, argv, "hm:u:d:b:s:v:c:o")) != -1) { + while ((opt = getopt(argc, argv, "ahom:u:d:b:s:v:c:r:")) != -1) { switch (opt) { case 'm': guest_modes_cmdline(optarg); @@ -251,6 +293,9 @@ int main(int argc, char *argv[]) p.uffd_mode = UFFDIO_REGISTER_MODE_MINOR; TEST_ASSERT(p.uffd_mode, "UFFD mode must be 'MISSING' or 'MINOR'."); break; + case 'a': + p.single_uffd = true; + break; case 'd': p.uffd_delay = strtoul(optarg, NULL, 0); TEST_ASSERT(p.uffd_delay >= 0, "A negative UFFD delay is not supported."); @@ -271,6 +316,13 @@ int main(int argc, char *argv[]) break; case 'o': p.partition_vcpu_memory_access = false; + p.single_uffd = true; + break; + case 'r': + p.readers_per_uffd = atoi(optarg); + TEST_ASSERT(p.readers_per_uffd >= 1, + "Invalid number of readers per uffd %d: must be >=1", + p.readers_per_uffd); break; case 'h': default: diff --git a/tools/testing/selftests/kvm/dirty_log_perf_test.c b/tools/testing/selftests/kvm/dirty_log_perf_test.c index 504f6fe980e8..9f24303acb8c 100644 --- a/tools/testing/selftests/kvm/dirty_log_perf_test.c +++ b/tools/testing/selftests/kvm/dirty_log_perf_test.c @@ -18,13 +18,11 @@ #include "test_util.h" #include "memstress.h" #include "guest_modes.h" +#include "ucall_common.h" #ifdef __aarch64__ #include "aarch64/vgic.h" -#define GICD_BASE_GPA 0x8000000ULL -#define GICR_BASE_GPA 0x80A0000ULL - static int gic_fd; static void arch_setup_vm(struct kvm_vm *vm, unsigned int nr_vcpus) @@ -33,7 +31,7 @@ static void arch_setup_vm(struct kvm_vm *vm, unsigned int nr_vcpus) * The test can still run even if hardware does not support GICv3, as it * is only an optimization to reduce guest exits. */ - gic_fd = vgic_v3_setup(vm, nr_vcpus, 64, GICD_BASE_GPA, GICR_BASE_GPA); + gic_fd = vgic_v3_setup(vm, nr_vcpus, 64); } static void arch_cleanup_vm(struct kvm_vm *vm) @@ -132,7 +130,6 @@ struct test_params { enum vm_mem_backing_src_type backing_src; int slots; uint32_t write_percent; - uint32_t random_seed; bool random_access; }; @@ -156,8 +153,6 @@ static void run_test(enum vm_guest_mode mode, void *arg) p->slots, p->backing_src, p->partition_vcpu_memory_access); - pr_info("Random seed: %u\n", p->random_seed); - memstress_set_random_seed(vm, p->random_seed); memstress_set_write_percent(vm, p->write_percent); guest_num_pages = (nr_vcpus * guest_percpu_mem_size) >> vm->page_shift; @@ -346,11 +341,13 @@ int main(int argc, char *argv[]) .partition_vcpu_memory_access = true, .backing_src = DEFAULT_VM_MEM_SRC, .slots = 1, - .random_seed = 1, .write_percent = 100, }; int opt; + /* Override the seed to be deterministic by default. */ + guest_random_seed = 1; + dirty_log_manual_caps = kvm_check_cap(KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2); dirty_log_manual_caps &= (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | @@ -395,7 +392,7 @@ int main(int argc, char *argv[]) p.phys_offset = strtoull(optarg, NULL, 0); break; case 'r': - p.random_seed = atoi_positive("Random seed", optarg); + guest_random_seed = atoi_positive("Random seed", optarg); break; case 's': p.backing_src = parse_backing_src_type(optarg); diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c index eaad5b20854c..aacf80f57439 100644 --- a/tools/testing/selftests/kvm/dirty_log_test.c +++ b/tools/testing/selftests/kvm/dirty_log_test.c @@ -4,9 +4,6 @@ * * Copyright (C) 2018, Red Hat, Inc. */ - -#define _GNU_SOURCE /* for program_invocation_name */ - #include <stdio.h> #include <stdlib.h> #include <pthread.h> @@ -23,6 +20,7 @@ #include "test_util.h" #include "guest_modes.h" #include "processor.h" +#include "ucall_common.h" #define DIRTY_MEM_BITS 30 /* 1G */ #define PAGE_SHIFT_4K 12 @@ -76,7 +74,6 @@ static uint64_t host_page_size; static uint64_t guest_page_size; static uint64_t guest_num_pages; -static uint64_t random_array[TEST_PAGES_PER_LOOP]; static uint64_t iteration; /* @@ -109,19 +106,19 @@ static void guest_code(void) */ for (i = 0; i < guest_num_pages; i++) { addr = guest_test_virt_mem + i * guest_page_size; - *(uint64_t *)addr = READ_ONCE(iteration); + vcpu_arch_put_guest(*(uint64_t *)addr, READ_ONCE(iteration)); } while (true) { for (i = 0; i < TEST_PAGES_PER_LOOP; i++) { addr = guest_test_virt_mem; - addr += (READ_ONCE(random_array[i]) % guest_num_pages) + addr += (guest_random_u64(&guest_rng) % guest_num_pages) * guest_page_size; addr = align_down(addr, host_page_size); - *(uint64_t *)addr = READ_ONCE(iteration); + + vcpu_arch_put_guest(*(uint64_t *)addr, READ_ONCE(iteration)); } - /* Tell the host that we need more random numbers */ GUEST_SYNC(1); } } @@ -508,20 +505,10 @@ static void log_mode_after_vcpu_run(struct kvm_vcpu *vcpu, int ret, int err) mode->after_vcpu_run(vcpu, ret, err); } -static void generate_random_array(uint64_t *guest_array, uint64_t size) -{ - uint64_t i; - - for (i = 0; i < size; i++) - guest_array[i] = random(); -} - static void *vcpu_worker(void *data) { int ret; struct kvm_vcpu *vcpu = data; - struct kvm_vm *vm = vcpu->vm; - uint64_t *guest_array; uint64_t pages_count = 0; struct kvm_signal_mask *sigmask = alloca(offsetof(struct kvm_signal_mask, sigset) + sizeof(sigset_t)); @@ -540,11 +527,8 @@ static void *vcpu_worker(void *data) sigemptyset(sigset); sigaddset(sigset, SIG_IPI); - guest_array = addr_gva2hva(vm, (vm_vaddr_t)random_array); - while (!READ_ONCE(host_quit)) { /* Clear any existing kick signals */ - generate_random_array(guest_array, TEST_PAGES_PER_LOOP); pages_count += TEST_PAGES_PER_LOOP; /* Let the guest dirty the random pages */ ret = __vcpu_run(vcpu); diff --git a/tools/testing/selftests/kvm/guest_memfd_test.c b/tools/testing/selftests/kvm/guest_memfd_test.c index 92eae206baa6..ba0c8e996035 100644 --- a/tools/testing/selftests/kvm/guest_memfd_test.c +++ b/tools/testing/selftests/kvm/guest_memfd_test.c @@ -4,8 +4,6 @@ * * Author: Chao Peng <chao.p.peng@linux.intel.com> */ - -#define _GNU_SOURCE #include <stdlib.h> #include <string.h> #include <unistd.h> @@ -19,8 +17,8 @@ #include <sys/types.h> #include <sys/stat.h> +#include "kvm_util.h" #include "test_util.h" -#include "kvm_util_base.h" static void test_file_read_write(int fd) { diff --git a/tools/testing/selftests/kvm/guest_print_test.c b/tools/testing/selftests/kvm/guest_print_test.c index 3502caa3590c..8092c2d0f5d6 100644 --- a/tools/testing/selftests/kvm/guest_print_test.c +++ b/tools/testing/selftests/kvm/guest_print_test.c @@ -13,6 +13,7 @@ #include "test_util.h" #include "kvm_util.h" #include "processor.h" +#include "ucall_common.h" struct guest_vals { uint64_t a; diff --git a/tools/testing/selftests/kvm/hardware_disable_test.c b/tools/testing/selftests/kvm/hardware_disable_test.c index decc521fc760..bce73bcb973c 100644 --- a/tools/testing/selftests/kvm/hardware_disable_test.c +++ b/tools/testing/selftests/kvm/hardware_disable_test.c @@ -4,9 +4,6 @@ * kvm_arch_hardware_disable is called and it attempts to unregister the user * return notifiers. */ - -#define _GNU_SOURCE - #include <fcntl.h> #include <pthread.h> #include <semaphore.h> diff --git a/tools/testing/selftests/kvm/include/aarch64/gic.h b/tools/testing/selftests/kvm/include/aarch64/gic.h index b217ea17cac5..baeb3c859389 100644 --- a/tools/testing/selftests/kvm/include/aarch64/gic.h +++ b/tools/testing/selftests/kvm/include/aarch64/gic.h @@ -6,11 +6,26 @@ #ifndef SELFTEST_KVM_GIC_H #define SELFTEST_KVM_GIC_H +#include <asm/kvm.h> + enum gic_type { GIC_V3, GIC_TYPE_MAX, }; +/* + * Note that the redistributor frames are at the end, as the range scales + * with the number of vCPUs in the VM. + */ +#define GITS_BASE_GPA 0x8000000ULL +#define GICD_BASE_GPA (GITS_BASE_GPA + KVM_VGIC_V3_ITS_SIZE) +#define GICR_BASE_GPA (GICD_BASE_GPA + KVM_VGIC_V3_DIST_SIZE) + +/* The GIC is identity-mapped into the guest at the time of setup. */ +#define GITS_BASE_GVA ((volatile void *)GITS_BASE_GPA) +#define GICD_BASE_GVA ((volatile void *)GICD_BASE_GPA) +#define GICR_BASE_GVA ((volatile void *)GICR_BASE_GPA) + #define MIN_SGI 0 #define MIN_PPI 16 #define MIN_SPI 32 @@ -21,8 +36,7 @@ enum gic_type { #define INTID_IS_PPI(intid) (MIN_PPI <= (intid) && (intid) < MIN_SPI) #define INTID_IS_SPI(intid) (MIN_SPI <= (intid) && (intid) <= MAX_SPI) -void gic_init(enum gic_type type, unsigned int nr_cpus, - void *dist_base, void *redist_base); +void gic_init(enum gic_type type, unsigned int nr_cpus); void gic_irq_enable(unsigned int intid); void gic_irq_disable(unsigned int intid); unsigned int gic_get_and_ack_irq(void); @@ -44,4 +58,7 @@ void gic_irq_clear_pending(unsigned int intid); bool gic_irq_get_pending(unsigned int intid); void gic_irq_set_config(unsigned int intid, bool is_edge); +void gic_rdist_enable_lpis(vm_paddr_t cfg_table, size_t cfg_table_size, + vm_paddr_t pend_table); + #endif /* SELFTEST_KVM_GIC_H */ diff --git a/tools/testing/selftests/kvm/include/aarch64/gic_v3.h b/tools/testing/selftests/kvm/include/aarch64/gic_v3.h index ba0886e8a2bb..a76615fa39a1 100644 --- a/tools/testing/selftests/kvm/include/aarch64/gic_v3.h +++ b/tools/testing/selftests/kvm/include/aarch64/gic_v3.h @@ -1,82 +1,604 @@ -/* SPDX-License-Identifier: GPL-2.0 */ +/* SPDX-License-Identifier: GPL-2.0-only */ /* - * ARM Generic Interrupt Controller (GIC) v3 specific defines + * Copyright (C) 2013, 2014 ARM Limited, All Rights Reserved. + * Author: Marc Zyngier <marc.zyngier@arm.com> */ - -#ifndef SELFTEST_KVM_GICV3_H -#define SELFTEST_KVM_GICV3_H - -#include <asm/sysreg.h> +#ifndef __SELFTESTS_GIC_V3_H +#define __SELFTESTS_GIC_V3_H /* - * Distributor registers + * Distributor registers. We assume we're running non-secure, with ARE + * being set. Secure-only and non-ARE registers are not described. */ #define GICD_CTLR 0x0000 #define GICD_TYPER 0x0004 +#define GICD_IIDR 0x0008 +#define GICD_TYPER2 0x000C +#define GICD_STATUSR 0x0010 +#define GICD_SETSPI_NSR 0x0040 +#define GICD_CLRSPI_NSR 0x0048 +#define GICD_SETSPI_SR 0x0050 +#define GICD_CLRSPI_SR 0x0058 #define GICD_IGROUPR 0x0080 #define GICD_ISENABLER 0x0100 #define GICD_ICENABLER 0x0180 #define GICD_ISPENDR 0x0200 #define GICD_ICPENDR 0x0280 -#define GICD_ICACTIVER 0x0380 #define GICD_ISACTIVER 0x0300 +#define GICD_ICACTIVER 0x0380 #define GICD_IPRIORITYR 0x0400 #define GICD_ICFGR 0x0C00 +#define GICD_IGRPMODR 0x0D00 +#define GICD_NSACR 0x0E00 +#define GICD_IGROUPRnE 0x1000 +#define GICD_ISENABLERnE 0x1200 +#define GICD_ICENABLERnE 0x1400 +#define GICD_ISPENDRnE 0x1600 +#define GICD_ICPENDRnE 0x1800 +#define GICD_ISACTIVERnE 0x1A00 +#define GICD_ICACTIVERnE 0x1C00 +#define GICD_IPRIORITYRnE 0x2000 +#define GICD_ICFGRnE 0x3000 +#define GICD_IROUTER 0x6000 +#define GICD_IROUTERnE 0x8000 +#define GICD_IDREGS 0xFFD0 +#define GICD_PIDR2 0xFFE8 + +#define ESPI_BASE_INTID 4096 /* - * The assumption is that the guest runs in a non-secure mode. - * The following bits of GICD_CTLR are defined accordingly. + * Those registers are actually from GICv2, but the spec demands that they + * are implemented as RES0 if ARE is 1 (which we do in KVM's emulated GICv3). */ +#define GICD_ITARGETSR 0x0800 +#define GICD_SGIR 0x0F00 +#define GICD_CPENDSGIR 0x0F10 +#define GICD_SPENDSGIR 0x0F20 + #define GICD_CTLR_RWP (1U << 31) #define GICD_CTLR_nASSGIreq (1U << 8) +#define GICD_CTLR_DS (1U << 6) #define GICD_CTLR_ARE_NS (1U << 4) #define GICD_CTLR_ENABLE_G1A (1U << 1) #define GICD_CTLR_ENABLE_G1 (1U << 0) +#define GICD_IIDR_IMPLEMENTER_SHIFT 0 +#define GICD_IIDR_IMPLEMENTER_MASK (0xfff << GICD_IIDR_IMPLEMENTER_SHIFT) +#define GICD_IIDR_REVISION_SHIFT 12 +#define GICD_IIDR_REVISION_MASK (0xf << GICD_IIDR_REVISION_SHIFT) +#define GICD_IIDR_VARIANT_SHIFT 16 +#define GICD_IIDR_VARIANT_MASK (0xf << GICD_IIDR_VARIANT_SHIFT) +#define GICD_IIDR_PRODUCT_ID_SHIFT 24 +#define GICD_IIDR_PRODUCT_ID_MASK (0xff << GICD_IIDR_PRODUCT_ID_SHIFT) + + +/* + * In systems with a single security state (what we emulate in KVM) + * the meaning of the interrupt group enable bits is slightly different + */ +#define GICD_CTLR_ENABLE_SS_G1 (1U << 1) +#define GICD_CTLR_ENABLE_SS_G0 (1U << 0) + +#define GICD_TYPER_RSS (1U << 26) +#define GICD_TYPER_LPIS (1U << 17) +#define GICD_TYPER_MBIS (1U << 16) +#define GICD_TYPER_ESPI (1U << 8) + +#define GICD_TYPER_ID_BITS(typer) ((((typer) >> 19) & 0x1f) + 1) +#define GICD_TYPER_NUM_LPIS(typer) ((((typer) >> 11) & 0x1f) + 1) #define GICD_TYPER_SPIS(typer) ((((typer) & 0x1f) + 1) * 32) -#define GICD_INT_DEF_PRI_X4 0xa0a0a0a0 +#define GICD_TYPER_ESPIS(typer) \ + (((typer) & GICD_TYPER_ESPI) ? GICD_TYPER_SPIS((typer) >> 27) : 0) + +#define GICD_TYPER2_nASSGIcap (1U << 8) +#define GICD_TYPER2_VIL (1U << 7) +#define GICD_TYPER2_VID GENMASK(4, 0) + +#define GICD_IROUTER_SPI_MODE_ONE (0U << 31) +#define GICD_IROUTER_SPI_MODE_ANY (1U << 31) + +#define GIC_PIDR2_ARCH_MASK 0xf0 +#define GIC_PIDR2_ARCH_GICv3 0x30 +#define GIC_PIDR2_ARCH_GICv4 0x40 + +#define GIC_V3_DIST_SIZE 0x10000 + +#define GIC_PAGE_SIZE_4K 0ULL +#define GIC_PAGE_SIZE_16K 1ULL +#define GIC_PAGE_SIZE_64K 2ULL +#define GIC_PAGE_SIZE_MASK 3ULL /* - * Redistributor registers + * Re-Distributor registers, offsets from RD_base */ -#define GICR_CTLR 0x000 -#define GICR_WAKER 0x014 +#define GICR_CTLR GICD_CTLR +#define GICR_IIDR 0x0004 +#define GICR_TYPER 0x0008 +#define GICR_STATUSR GICD_STATUSR +#define GICR_WAKER 0x0014 +#define GICR_SETLPIR 0x0040 +#define GICR_CLRLPIR 0x0048 +#define GICR_PROPBASER 0x0070 +#define GICR_PENDBASER 0x0078 +#define GICR_INVLPIR 0x00A0 +#define GICR_INVALLR 0x00B0 +#define GICR_SYNCR 0x00C0 +#define GICR_IDREGS GICD_IDREGS +#define GICR_PIDR2 GICD_PIDR2 + +#define GICR_CTLR_ENABLE_LPIS (1UL << 0) +#define GICR_CTLR_CES (1UL << 1) +#define GICR_CTLR_IR (1UL << 2) +#define GICR_CTLR_RWP (1UL << 3) -#define GICR_CTLR_RWP (1U << 3) +#define GICR_TYPER_CPU_NUMBER(r) (((r) >> 8) & 0xffff) + +#define EPPI_BASE_INTID 1056 + +#define GICR_TYPER_NR_PPIS(r) \ + ({ \ + unsigned int __ppinum = ((r) >> 27) & 0x1f; \ + unsigned int __nr_ppis = 16; \ + if (__ppinum == 1 || __ppinum == 2) \ + __nr_ppis += __ppinum * 32; \ + \ + __nr_ppis; \ + }) #define GICR_WAKER_ProcessorSleep (1U << 1) #define GICR_WAKER_ChildrenAsleep (1U << 2) +#define GIC_BASER_CACHE_nCnB 0ULL +#define GIC_BASER_CACHE_SameAsInner 0ULL +#define GIC_BASER_CACHE_nC 1ULL +#define GIC_BASER_CACHE_RaWt 2ULL +#define GIC_BASER_CACHE_RaWb 3ULL +#define GIC_BASER_CACHE_WaWt 4ULL +#define GIC_BASER_CACHE_WaWb 5ULL +#define GIC_BASER_CACHE_RaWaWt 6ULL +#define GIC_BASER_CACHE_RaWaWb 7ULL +#define GIC_BASER_CACHE_MASK 7ULL +#define GIC_BASER_NonShareable 0ULL +#define GIC_BASER_InnerShareable 1ULL +#define GIC_BASER_OuterShareable 2ULL +#define GIC_BASER_SHAREABILITY_MASK 3ULL + +#define GIC_BASER_CACHEABILITY(reg, inner_outer, type) \ + (GIC_BASER_CACHE_##type << reg##_##inner_outer##_CACHEABILITY_SHIFT) + +#define GIC_BASER_SHAREABILITY(reg, type) \ + (GIC_BASER_##type << reg##_SHAREABILITY_SHIFT) + +/* encode a size field of width @w containing @n - 1 units */ +#define GIC_ENCODE_SZ(n, w) (((unsigned long)(n) - 1) & GENMASK_ULL(((w) - 1), 0)) + +#define GICR_PROPBASER_SHAREABILITY_SHIFT (10) +#define GICR_PROPBASER_INNER_CACHEABILITY_SHIFT (7) +#define GICR_PROPBASER_OUTER_CACHEABILITY_SHIFT (56) +#define GICR_PROPBASER_SHAREABILITY_MASK \ + GIC_BASER_SHAREABILITY(GICR_PROPBASER, SHAREABILITY_MASK) +#define GICR_PROPBASER_INNER_CACHEABILITY_MASK \ + GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, MASK) +#define GICR_PROPBASER_OUTER_CACHEABILITY_MASK \ + GIC_BASER_CACHEABILITY(GICR_PROPBASER, OUTER, MASK) +#define GICR_PROPBASER_CACHEABILITY_MASK GICR_PROPBASER_INNER_CACHEABILITY_MASK + +#define GICR_PROPBASER_InnerShareable \ + GIC_BASER_SHAREABILITY(GICR_PROPBASER, InnerShareable) + +#define GICR_PROPBASER_nCnB GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, nCnB) +#define GICR_PROPBASER_nC GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, nC) +#define GICR_PROPBASER_RaWt GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, RaWt) +#define GICR_PROPBASER_RaWb GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, RaWb) +#define GICR_PROPBASER_WaWt GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, WaWt) +#define GICR_PROPBASER_WaWb GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, WaWb) +#define GICR_PROPBASER_RaWaWt GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, RaWaWt) +#define GICR_PROPBASER_RaWaWb GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, RaWaWb) + +#define GICR_PROPBASER_IDBITS_MASK (0x1f) +#define GICR_PROPBASER_ADDRESS(x) ((x) & GENMASK_ULL(51, 12)) +#define GICR_PENDBASER_ADDRESS(x) ((x) & GENMASK_ULL(51, 16)) + +#define GICR_PENDBASER_SHAREABILITY_SHIFT (10) +#define GICR_PENDBASER_INNER_CACHEABILITY_SHIFT (7) +#define GICR_PENDBASER_OUTER_CACHEABILITY_SHIFT (56) +#define GICR_PENDBASER_SHAREABILITY_MASK \ + GIC_BASER_SHAREABILITY(GICR_PENDBASER, SHAREABILITY_MASK) +#define GICR_PENDBASER_INNER_CACHEABILITY_MASK \ + GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, MASK) +#define GICR_PENDBASER_OUTER_CACHEABILITY_MASK \ + GIC_BASER_CACHEABILITY(GICR_PENDBASER, OUTER, MASK) +#define GICR_PENDBASER_CACHEABILITY_MASK GICR_PENDBASER_INNER_CACHEABILITY_MASK + +#define GICR_PENDBASER_InnerShareable \ + GIC_BASER_SHAREABILITY(GICR_PENDBASER, InnerShareable) + +#define GICR_PENDBASER_nCnB GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, nCnB) +#define GICR_PENDBASER_nC GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, nC) +#define GICR_PENDBASER_RaWt GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, RaWt) +#define GICR_PENDBASER_RaWb GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, RaWb) +#define GICR_PENDBASER_WaWt GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, WaWt) +#define GICR_PENDBASER_WaWb GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, WaWb) +#define GICR_PENDBASER_RaWaWt GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, RaWaWt) +#define GICR_PENDBASER_RaWaWb GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, RaWaWb) + +#define GICR_PENDBASER_PTZ BIT_ULL(62) + /* - * Redistributor registers, offsets from SGI base + * Re-Distributor registers, offsets from SGI_base */ #define GICR_IGROUPR0 GICD_IGROUPR #define GICR_ISENABLER0 GICD_ISENABLER #define GICR_ICENABLER0 GICD_ICENABLER #define GICR_ISPENDR0 GICD_ISPENDR +#define GICR_ICPENDR0 GICD_ICPENDR #define GICR_ISACTIVER0 GICD_ISACTIVER #define GICR_ICACTIVER0 GICD_ICACTIVER -#define GICR_ICENABLER GICD_ICENABLER -#define GICR_ICACTIVER GICD_ICACTIVER #define GICR_IPRIORITYR0 GICD_IPRIORITYR +#define GICR_ICFGR0 GICD_ICFGR +#define GICR_IGRPMODR0 GICD_IGRPMODR +#define GICR_NSACR GICD_NSACR + +#define GICR_TYPER_PLPIS (1U << 0) +#define GICR_TYPER_VLPIS (1U << 1) +#define GICR_TYPER_DIRTY (1U << 2) +#define GICR_TYPER_DirectLPIS (1U << 3) +#define GICR_TYPER_LAST (1U << 4) +#define GICR_TYPER_RVPEID (1U << 7) +#define GICR_TYPER_COMMON_LPI_AFF GENMASK_ULL(25, 24) +#define GICR_TYPER_AFFINITY GENMASK_ULL(63, 32) + +#define GICR_INVLPIR_INTID GENMASK_ULL(31, 0) +#define GICR_INVLPIR_VPEID GENMASK_ULL(47, 32) +#define GICR_INVLPIR_V GENMASK_ULL(63, 63) + +#define GICR_INVALLR_VPEID GICR_INVLPIR_VPEID +#define GICR_INVALLR_V GICR_INVLPIR_V + +#define GIC_V3_REDIST_SIZE 0x20000 + +#define LPI_PROP_GROUP1 (1 << 1) +#define LPI_PROP_ENABLED (1 << 0) + +/* + * Re-Distributor registers, offsets from VLPI_base + */ +#define GICR_VPROPBASER 0x0070 + +#define GICR_VPROPBASER_IDBITS_MASK 0x1f + +#define GICR_VPROPBASER_SHAREABILITY_SHIFT (10) +#define GICR_VPROPBASER_INNER_CACHEABILITY_SHIFT (7) +#define GICR_VPROPBASER_OUTER_CACHEABILITY_SHIFT (56) + +#define GICR_VPROPBASER_SHAREABILITY_MASK \ + GIC_BASER_SHAREABILITY(GICR_VPROPBASER, SHAREABILITY_MASK) +#define GICR_VPROPBASER_INNER_CACHEABILITY_MASK \ + GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, MASK) +#define GICR_VPROPBASER_OUTER_CACHEABILITY_MASK \ + GIC_BASER_CACHEABILITY(GICR_VPROPBASER, OUTER, MASK) +#define GICR_VPROPBASER_CACHEABILITY_MASK \ + GICR_VPROPBASER_INNER_CACHEABILITY_MASK + +#define GICR_VPROPBASER_InnerShareable \ + GIC_BASER_SHAREABILITY(GICR_VPROPBASER, InnerShareable) + +#define GICR_VPROPBASER_nCnB GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, nCnB) +#define GICR_VPROPBASER_nC GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, nC) +#define GICR_VPROPBASER_RaWt GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, RaWt) +#define GICR_VPROPBASER_RaWb GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, RaWb) +#define GICR_VPROPBASER_WaWt GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, WaWt) +#define GICR_VPROPBASER_WaWb GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, WaWb) +#define GICR_VPROPBASER_RaWaWt GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, RaWaWt) +#define GICR_VPROPBASER_RaWaWb GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, RaWaWb) + +/* + * GICv4.1 VPROPBASER reinvention. A subtle mix between the old + * VPROPBASER and ITS_BASER. Just not quite any of the two. + */ +#define GICR_VPROPBASER_4_1_VALID (1ULL << 63) +#define GICR_VPROPBASER_4_1_ENTRY_SIZE GENMASK_ULL(61, 59) +#define GICR_VPROPBASER_4_1_INDIRECT (1ULL << 55) +#define GICR_VPROPBASER_4_1_PAGE_SIZE GENMASK_ULL(54, 53) +#define GICR_VPROPBASER_4_1_Z (1ULL << 52) +#define GICR_VPROPBASER_4_1_ADDR GENMASK_ULL(51, 12) +#define GICR_VPROPBASER_4_1_SIZE GENMASK_ULL(6, 0) + +#define GICR_VPENDBASER 0x0078 + +#define GICR_VPENDBASER_SHAREABILITY_SHIFT (10) +#define GICR_VPENDBASER_INNER_CACHEABILITY_SHIFT (7) +#define GICR_VPENDBASER_OUTER_CACHEABILITY_SHIFT (56) +#define GICR_VPENDBASER_SHAREABILITY_MASK \ + GIC_BASER_SHAREABILITY(GICR_VPENDBASER, SHAREABILITY_MASK) +#define GICR_VPENDBASER_INNER_CACHEABILITY_MASK \ + GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, MASK) +#define GICR_VPENDBASER_OUTER_CACHEABILITY_MASK \ + GIC_BASER_CACHEABILITY(GICR_VPENDBASER, OUTER, MASK) +#define GICR_VPENDBASER_CACHEABILITY_MASK \ + GICR_VPENDBASER_INNER_CACHEABILITY_MASK + +#define GICR_VPENDBASER_NonShareable \ + GIC_BASER_SHAREABILITY(GICR_VPENDBASER, NonShareable) + +#define GICR_VPENDBASER_InnerShareable \ + GIC_BASER_SHAREABILITY(GICR_VPENDBASER, InnerShareable) + +#define GICR_VPENDBASER_nCnB GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, nCnB) +#define GICR_VPENDBASER_nC GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, nC) +#define GICR_VPENDBASER_RaWt GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, RaWt) +#define GICR_VPENDBASER_RaWb GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, RaWb) +#define GICR_VPENDBASER_WaWt GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, WaWt) +#define GICR_VPENDBASER_WaWb GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, WaWb) +#define GICR_VPENDBASER_RaWaWt GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, RaWaWt) +#define GICR_VPENDBASER_RaWaWb GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, RaWaWb) + +#define GICR_VPENDBASER_Dirty (1ULL << 60) +#define GICR_VPENDBASER_PendingLast (1ULL << 61) +#define GICR_VPENDBASER_IDAI (1ULL << 62) +#define GICR_VPENDBASER_Valid (1ULL << 63) + +/* + * GICv4.1 VPENDBASER, used for VPE residency. On top of these fields, + * also use the above Valid, PendingLast and Dirty. + */ +#define GICR_VPENDBASER_4_1_DB (1ULL << 62) +#define GICR_VPENDBASER_4_1_VGRP0EN (1ULL << 59) +#define GICR_VPENDBASER_4_1_VGRP1EN (1ULL << 58) +#define GICR_VPENDBASER_4_1_VPEID GENMASK_ULL(15, 0) + +#define GICR_VSGIR 0x0080 + +#define GICR_VSGIR_VPEID GENMASK(15, 0) + +#define GICR_VSGIPENDR 0x0088 + +#define GICR_VSGIPENDR_BUSY (1U << 31) +#define GICR_VSGIPENDR_PENDING GENMASK(15, 0) + +/* + * ITS registers, offsets from ITS_base + */ +#define GITS_CTLR 0x0000 +#define GITS_IIDR 0x0004 +#define GITS_TYPER 0x0008 +#define GITS_MPIDR 0x0018 +#define GITS_CBASER 0x0080 +#define GITS_CWRITER 0x0088 +#define GITS_CREADR 0x0090 +#define GITS_BASER 0x0100 +#define GITS_IDREGS_BASE 0xffd0 +#define GITS_PIDR0 0xffe0 +#define GITS_PIDR1 0xffe4 +#define GITS_PIDR2 GICR_PIDR2 +#define GITS_PIDR4 0xffd0 +#define GITS_CIDR0 0xfff0 +#define GITS_CIDR1 0xfff4 +#define GITS_CIDR2 0xfff8 +#define GITS_CIDR3 0xfffc + +#define GITS_TRANSLATER 0x10040 + +#define GITS_SGIR 0x20020 + +#define GITS_SGIR_VPEID GENMASK_ULL(47, 32) +#define GITS_SGIR_VINTID GENMASK_ULL(3, 0) + +#define GITS_CTLR_ENABLE (1U << 0) +#define GITS_CTLR_ImDe (1U << 1) +#define GITS_CTLR_ITS_NUMBER_SHIFT 4 +#define GITS_CTLR_ITS_NUMBER (0xFU << GITS_CTLR_ITS_NUMBER_SHIFT) +#define GITS_CTLR_QUIESCENT (1U << 31) + +#define GITS_TYPER_PLPIS (1UL << 0) +#define GITS_TYPER_VLPIS (1UL << 1) +#define GITS_TYPER_ITT_ENTRY_SIZE_SHIFT 4 +#define GITS_TYPER_ITT_ENTRY_SIZE GENMASK_ULL(7, 4) +#define GITS_TYPER_IDBITS_SHIFT 8 +#define GITS_TYPER_DEVBITS_SHIFT 13 +#define GITS_TYPER_DEVBITS GENMASK_ULL(17, 13) +#define GITS_TYPER_PTA (1UL << 19) +#define GITS_TYPER_HCC_SHIFT 24 +#define GITS_TYPER_HCC(r) (((r) >> GITS_TYPER_HCC_SHIFT) & 0xff) +#define GITS_TYPER_VMOVP (1ULL << 37) +#define GITS_TYPER_VMAPP (1ULL << 40) +#define GITS_TYPER_SVPET GENMASK_ULL(42, 41) -/* CPU interface registers */ -#define SYS_ICC_PMR_EL1 sys_reg(3, 0, 4, 6, 0) -#define SYS_ICC_IAR1_EL1 sys_reg(3, 0, 12, 12, 0) -#define SYS_ICC_EOIR1_EL1 sys_reg(3, 0, 12, 12, 1) -#define SYS_ICC_DIR_EL1 sys_reg(3, 0, 12, 11, 1) -#define SYS_ICC_CTLR_EL1 sys_reg(3, 0, 12, 12, 4) -#define SYS_ICC_SRE_EL1 sys_reg(3, 0, 12, 12, 5) -#define SYS_ICC_GRPEN1_EL1 sys_reg(3, 0, 12, 12, 7) +#define GITS_IIDR_REV_SHIFT 12 +#define GITS_IIDR_REV_MASK (0xf << GITS_IIDR_REV_SHIFT) +#define GITS_IIDR_REV(r) (((r) >> GITS_IIDR_REV_SHIFT) & 0xf) +#define GITS_IIDR_PRODUCTID_SHIFT 24 -#define SYS_ICV_AP1R0_EL1 sys_reg(3, 0, 12, 9, 0) +#define GITS_CBASER_VALID (1ULL << 63) +#define GITS_CBASER_SHAREABILITY_SHIFT (10) +#define GITS_CBASER_INNER_CACHEABILITY_SHIFT (59) +#define GITS_CBASER_OUTER_CACHEABILITY_SHIFT (53) +#define GITS_CBASER_SHAREABILITY_MASK \ + GIC_BASER_SHAREABILITY(GITS_CBASER, SHAREABILITY_MASK) +#define GITS_CBASER_INNER_CACHEABILITY_MASK \ + GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, MASK) +#define GITS_CBASER_OUTER_CACHEABILITY_MASK \ + GIC_BASER_CACHEABILITY(GITS_CBASER, OUTER, MASK) +#define GITS_CBASER_CACHEABILITY_MASK GITS_CBASER_INNER_CACHEABILITY_MASK -#define ICC_PMR_DEF_PRIO 0xf0 +#define GITS_CBASER_InnerShareable \ + GIC_BASER_SHAREABILITY(GITS_CBASER, InnerShareable) +#define GITS_CBASER_nCnB GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, nCnB) +#define GITS_CBASER_nC GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, nC) +#define GITS_CBASER_RaWt GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, RaWt) +#define GITS_CBASER_RaWb GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, RaWb) +#define GITS_CBASER_WaWt GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, WaWt) +#define GITS_CBASER_WaWb GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, WaWb) +#define GITS_CBASER_RaWaWt GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, RaWaWt) +#define GITS_CBASER_RaWaWb GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, RaWaWb) + +#define GITS_CBASER_ADDRESS(cbaser) ((cbaser) & GENMASK_ULL(51, 12)) + +#define GITS_BASER_NR_REGS 8 + +#define GITS_BASER_VALID (1ULL << 63) +#define GITS_BASER_INDIRECT (1ULL << 62) + +#define GITS_BASER_INNER_CACHEABILITY_SHIFT (59) +#define GITS_BASER_OUTER_CACHEABILITY_SHIFT (53) +#define GITS_BASER_INNER_CACHEABILITY_MASK \ + GIC_BASER_CACHEABILITY(GITS_BASER, INNER, MASK) +#define GITS_BASER_CACHEABILITY_MASK GITS_BASER_INNER_CACHEABILITY_MASK +#define GITS_BASER_OUTER_CACHEABILITY_MASK \ + GIC_BASER_CACHEABILITY(GITS_BASER, OUTER, MASK) +#define GITS_BASER_SHAREABILITY_MASK \ + GIC_BASER_SHAREABILITY(GITS_BASER, SHAREABILITY_MASK) + +#define GITS_BASER_nCnB GIC_BASER_CACHEABILITY(GITS_BASER, INNER, nCnB) +#define GITS_BASER_nC GIC_BASER_CACHEABILITY(GITS_BASER, INNER, nC) +#define GITS_BASER_RaWt GIC_BASER_CACHEABILITY(GITS_BASER, INNER, RaWt) +#define GITS_BASER_RaWb GIC_BASER_CACHEABILITY(GITS_BASER, INNER, RaWb) +#define GITS_BASER_WaWt GIC_BASER_CACHEABILITY(GITS_BASER, INNER, WaWt) +#define GITS_BASER_WaWb GIC_BASER_CACHEABILITY(GITS_BASER, INNER, WaWb) +#define GITS_BASER_RaWaWt GIC_BASER_CACHEABILITY(GITS_BASER, INNER, RaWaWt) +#define GITS_BASER_RaWaWb GIC_BASER_CACHEABILITY(GITS_BASER, INNER, RaWaWb) + +#define GITS_BASER_TYPE_SHIFT (56) +#define GITS_BASER_TYPE(r) (((r) >> GITS_BASER_TYPE_SHIFT) & 7) +#define GITS_BASER_ENTRY_SIZE_SHIFT (48) +#define GITS_BASER_ENTRY_SIZE(r) ((((r) >> GITS_BASER_ENTRY_SIZE_SHIFT) & 0x1f) + 1) +#define GITS_BASER_ENTRY_SIZE_MASK GENMASK_ULL(52, 48) +#define GITS_BASER_PHYS_52_to_48(phys) \ + (((phys) & GENMASK_ULL(47, 16)) | (((phys) >> 48) & 0xf) << 12) +#define GITS_BASER_ADDR_48_to_52(baser) \ + (((baser) & GENMASK_ULL(47, 16)) | (((baser) >> 12) & 0xf) << 48) + +#define GITS_BASER_SHAREABILITY_SHIFT (10) +#define GITS_BASER_InnerShareable \ + GIC_BASER_SHAREABILITY(GITS_BASER, InnerShareable) +#define GITS_BASER_PAGE_SIZE_SHIFT (8) +#define __GITS_BASER_PSZ(sz) (GIC_PAGE_SIZE_ ## sz << GITS_BASER_PAGE_SIZE_SHIFT) +#define GITS_BASER_PAGE_SIZE_4K __GITS_BASER_PSZ(4K) +#define GITS_BASER_PAGE_SIZE_16K __GITS_BASER_PSZ(16K) +#define GITS_BASER_PAGE_SIZE_64K __GITS_BASER_PSZ(64K) +#define GITS_BASER_PAGE_SIZE_MASK __GITS_BASER_PSZ(MASK) +#define GITS_BASER_PAGES_MAX 256 +#define GITS_BASER_PAGES_SHIFT (0) +#define GITS_BASER_NR_PAGES(r) (((r) & 0xff) + 1) + +#define GITS_BASER_TYPE_NONE 0 +#define GITS_BASER_TYPE_DEVICE 1 +#define GITS_BASER_TYPE_VCPU 2 +#define GITS_BASER_TYPE_RESERVED3 3 +#define GITS_BASER_TYPE_COLLECTION 4 +#define GITS_BASER_TYPE_RESERVED5 5 +#define GITS_BASER_TYPE_RESERVED6 6 +#define GITS_BASER_TYPE_RESERVED7 7 + +#define GITS_LVL1_ENTRY_SIZE (8UL) + +/* + * ITS commands + */ +#define GITS_CMD_MAPD 0x08 +#define GITS_CMD_MAPC 0x09 +#define GITS_CMD_MAPTI 0x0a +#define GITS_CMD_MAPI 0x0b +#define GITS_CMD_MOVI 0x01 +#define GITS_CMD_DISCARD 0x0f +#define GITS_CMD_INV 0x0c +#define GITS_CMD_MOVALL 0x0e +#define GITS_CMD_INVALL 0x0d +#define GITS_CMD_INT 0x03 +#define GITS_CMD_CLEAR 0x04 +#define GITS_CMD_SYNC 0x05 + +/* + * GICv4 ITS specific commands + */ +#define GITS_CMD_GICv4(x) ((x) | 0x20) +#define GITS_CMD_VINVALL GITS_CMD_GICv4(GITS_CMD_INVALL) +#define GITS_CMD_VMAPP GITS_CMD_GICv4(GITS_CMD_MAPC) +#define GITS_CMD_VMAPTI GITS_CMD_GICv4(GITS_CMD_MAPTI) +#define GITS_CMD_VMOVI GITS_CMD_GICv4(GITS_CMD_MOVI) +#define GITS_CMD_VSYNC GITS_CMD_GICv4(GITS_CMD_SYNC) +/* VMOVP, VSGI and INVDB are the odd ones, as they dont have a physical counterpart */ +#define GITS_CMD_VMOVP GITS_CMD_GICv4(2) +#define GITS_CMD_VSGI GITS_CMD_GICv4(3) +#define GITS_CMD_INVDB GITS_CMD_GICv4(0xe) + +/* + * ITS error numbers + */ +#define E_ITS_MOVI_UNMAPPED_INTERRUPT 0x010107 +#define E_ITS_MOVI_UNMAPPED_COLLECTION 0x010109 +#define E_ITS_INT_UNMAPPED_INTERRUPT 0x010307 +#define E_ITS_CLEAR_UNMAPPED_INTERRUPT 0x010507 +#define E_ITS_MAPD_DEVICE_OOR 0x010801 +#define E_ITS_MAPD_ITTSIZE_OOR 0x010802 +#define E_ITS_MAPC_PROCNUM_OOR 0x010902 +#define E_ITS_MAPC_COLLECTION_OOR 0x010903 +#define E_ITS_MAPTI_UNMAPPED_DEVICE 0x010a04 +#define E_ITS_MAPTI_ID_OOR 0x010a05 +#define E_ITS_MAPTI_PHYSICALID_OOR 0x010a06 +#define E_ITS_INV_UNMAPPED_INTERRUPT 0x010c07 +#define E_ITS_INVALL_UNMAPPED_COLLECTION 0x010d09 +#define E_ITS_MOVALL_PROCNUM_OOR 0x010e01 +#define E_ITS_DISCARD_UNMAPPED_INTERRUPT 0x010f07 + +/* + * CPU interface registers + */ +#define ICC_CTLR_EL1_EOImode_SHIFT (1) +#define ICC_CTLR_EL1_EOImode_drop_dir (0U << ICC_CTLR_EL1_EOImode_SHIFT) +#define ICC_CTLR_EL1_EOImode_drop (1U << ICC_CTLR_EL1_EOImode_SHIFT) +#define ICC_CTLR_EL1_EOImode_MASK (1 << ICC_CTLR_EL1_EOImode_SHIFT) +#define ICC_CTLR_EL1_CBPR_SHIFT 0 +#define ICC_CTLR_EL1_CBPR_MASK (1 << ICC_CTLR_EL1_CBPR_SHIFT) +#define ICC_CTLR_EL1_PMHE_SHIFT 6 +#define ICC_CTLR_EL1_PMHE_MASK (1 << ICC_CTLR_EL1_PMHE_SHIFT) +#define ICC_CTLR_EL1_PRI_BITS_SHIFT 8 +#define ICC_CTLR_EL1_PRI_BITS_MASK (0x7 << ICC_CTLR_EL1_PRI_BITS_SHIFT) +#define ICC_CTLR_EL1_ID_BITS_SHIFT 11 +#define ICC_CTLR_EL1_ID_BITS_MASK (0x7 << ICC_CTLR_EL1_ID_BITS_SHIFT) +#define ICC_CTLR_EL1_SEIS_SHIFT 14 +#define ICC_CTLR_EL1_SEIS_MASK (0x1 << ICC_CTLR_EL1_SEIS_SHIFT) +#define ICC_CTLR_EL1_A3V_SHIFT 15 +#define ICC_CTLR_EL1_A3V_MASK (0x1 << ICC_CTLR_EL1_A3V_SHIFT) +#define ICC_CTLR_EL1_RSS (0x1 << 18) +#define ICC_CTLR_EL1_ExtRange (0x1 << 19) +#define ICC_PMR_EL1_SHIFT 0 +#define ICC_PMR_EL1_MASK (0xff << ICC_PMR_EL1_SHIFT) +#define ICC_BPR0_EL1_SHIFT 0 +#define ICC_BPR0_EL1_MASK (0x7 << ICC_BPR0_EL1_SHIFT) +#define ICC_BPR1_EL1_SHIFT 0 +#define ICC_BPR1_EL1_MASK (0x7 << ICC_BPR1_EL1_SHIFT) +#define ICC_IGRPEN0_EL1_SHIFT 0 +#define ICC_IGRPEN0_EL1_MASK (1 << ICC_IGRPEN0_EL1_SHIFT) +#define ICC_IGRPEN1_EL1_SHIFT 0 +#define ICC_IGRPEN1_EL1_MASK (1 << ICC_IGRPEN1_EL1_SHIFT) +#define ICC_SRE_EL1_DIB (1U << 2) +#define ICC_SRE_EL1_DFB (1U << 1) #define ICC_SRE_EL1_SRE (1U << 0) -#define ICC_IGRPEN1_EL1_ENABLE (1U << 0) +/* These are for GICv2 emulation only */ +#define GICH_LR_VIRTUALID (0x3ffUL << 0) +#define GICH_LR_PHYSID_CPUID_SHIFT (10) +#define GICH_LR_PHYSID_CPUID (7UL << GICH_LR_PHYSID_CPUID_SHIFT) + +#define ICC_IAR1_EL1_SPURIOUS 0x3ff + +#define ICC_SRE_EL2_SRE (1 << 0) +#define ICC_SRE_EL2_ENABLE (1 << 3) -#define GICV3_MAX_CPUS 512 +#define ICC_SGI1R_TARGET_LIST_SHIFT 0 +#define ICC_SGI1R_TARGET_LIST_MASK (0xffff << ICC_SGI1R_TARGET_LIST_SHIFT) +#define ICC_SGI1R_AFFINITY_1_SHIFT 16 +#define ICC_SGI1R_AFFINITY_1_MASK (0xff << ICC_SGI1R_AFFINITY_1_SHIFT) +#define ICC_SGI1R_SGI_ID_SHIFT 24 +#define ICC_SGI1R_SGI_ID_MASK (0xfULL << ICC_SGI1R_SGI_ID_SHIFT) +#define ICC_SGI1R_AFFINITY_2_SHIFT 32 +#define ICC_SGI1R_AFFINITY_2_MASK (0xffULL << ICC_SGI1R_AFFINITY_2_SHIFT) +#define ICC_SGI1R_IRQ_ROUTING_MODE_BIT 40 +#define ICC_SGI1R_RS_SHIFT 44 +#define ICC_SGI1R_RS_MASK (0xfULL << ICC_SGI1R_RS_SHIFT) +#define ICC_SGI1R_AFFINITY_3_SHIFT 48 +#define ICC_SGI1R_AFFINITY_3_MASK (0xffULL << ICC_SGI1R_AFFINITY_3_SHIFT) -#endif /* SELFTEST_KVM_GICV3_H */ +#endif diff --git a/tools/testing/selftests/kvm/include/aarch64/gic_v3_its.h b/tools/testing/selftests/kvm/include/aarch64/gic_v3_its.h new file mode 100644 index 000000000000..3722ed9c8f96 --- /dev/null +++ b/tools/testing/selftests/kvm/include/aarch64/gic_v3_its.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef __SELFTESTS_GIC_V3_ITS_H__ +#define __SELFTESTS_GIC_V3_ITS_H__ + +#include <linux/sizes.h> + +void its_init(vm_paddr_t coll_tbl, size_t coll_tbl_sz, + vm_paddr_t device_tbl, size_t device_tbl_sz, + vm_paddr_t cmdq, size_t cmdq_size); + +void its_send_mapd_cmd(void *cmdq_base, u32 device_id, vm_paddr_t itt_base, + size_t itt_size, bool valid); +void its_send_mapc_cmd(void *cmdq_base, u32 vcpu_id, u32 collection_id, bool valid); +void its_send_mapti_cmd(void *cmdq_base, u32 device_id, u32 event_id, + u32 collection_id, u32 intid); +void its_send_invall_cmd(void *cmdq_base, u32 collection_id); + +#endif // __SELFTESTS_GIC_V3_ITS_H__ diff --git a/tools/testing/selftests/kvm/include/aarch64/processor.h b/tools/testing/selftests/kvm/include/aarch64/processor.h index 9e518b562827..9b20a355d81a 100644 --- a/tools/testing/selftests/kvm/include/aarch64/processor.h +++ b/tools/testing/selftests/kvm/include/aarch64/processor.h @@ -8,6 +8,8 @@ #define SELFTEST_KVM_PROCESSOR_H #include "kvm_util.h" +#include "ucall_common.h" + #include <linux/stringify.h> #include <linux/types.h> #include <asm/sysreg.h> @@ -58,8 +60,6 @@ MAIR_ATTRIDX(MAIR_ATTR_NORMAL, MT_NORMAL) | \ MAIR_ATTRIDX(MAIR_ATTR_NORMAL_WT, MT_NORMAL_WT)) -#define MPIDR_HWID_BITMASK (0xff00fffffful) - void aarch64_vcpu_setup(struct kvm_vcpu *vcpu, struct kvm_vcpu_init *init); struct kvm_vcpu *aarch64_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id, struct kvm_vcpu_init *init, void *guest_code); @@ -177,11 +177,28 @@ static __always_inline u32 __raw_readl(const volatile void *addr) return val; } +static __always_inline void __raw_writeq(u64 val, volatile void *addr) +{ + asm volatile("str %0, [%1]" : : "rZ" (val), "r" (addr)); +} + +static __always_inline u64 __raw_readq(const volatile void *addr) +{ + u64 val; + asm volatile("ldr %0, [%1]" : "=r" (val) : "r" (addr)); + return val; +} + #define writel_relaxed(v,c) ((void)__raw_writel((__force u32)cpu_to_le32(v),(c))) #define readl_relaxed(c) ({ u32 __r = le32_to_cpu((__force __le32)__raw_readl(c)); __r; }) +#define writeq_relaxed(v,c) ((void)__raw_writeq((__force u64)cpu_to_le64(v),(c))) +#define readq_relaxed(c) ({ u64 __r = le64_to_cpu((__force __le64)__raw_readq(c)); __r; }) #define writel(v,c) ({ __iowmb(); writel_relaxed((v),(c));}) #define readl(c) ({ u32 __v = readl_relaxed(c); __iormb(__v); __v; }) +#define writeq(v,c) ({ __iowmb(); writeq_relaxed((v),(c));}) +#define readq(c) ({ u64 __v = readq_relaxed(c); __iormb(__v); __v; }) + static inline void local_irq_enable(void) { diff --git a/tools/testing/selftests/kvm/include/aarch64/ucall.h b/tools/testing/selftests/kvm/include/aarch64/ucall.h index 4b68f37efd36..4ec801f37f00 100644 --- a/tools/testing/selftests/kvm/include/aarch64/ucall.h +++ b/tools/testing/selftests/kvm/include/aarch64/ucall.h @@ -2,7 +2,7 @@ #ifndef SELFTEST_KVM_UCALL_H #define SELFTEST_KVM_UCALL_H -#include "kvm_util_base.h" +#include "kvm_util.h" #define UCALL_EXIT_REASON KVM_EXIT_MMIO diff --git a/tools/testing/selftests/kvm/include/aarch64/vgic.h b/tools/testing/selftests/kvm/include/aarch64/vgic.h index 0ac6f05c63f9..c481d0c00a5d 100644 --- a/tools/testing/selftests/kvm/include/aarch64/vgic.h +++ b/tools/testing/selftests/kvm/include/aarch64/vgic.h @@ -16,8 +16,7 @@ ((uint64_t)(flags) << 12) | \ index) -int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, uint32_t nr_irqs, - uint64_t gicd_base_gpa, uint64_t gicr_base_gpa); +int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, uint32_t nr_irqs); #define VGIC_MAX_RESERVED 1023 @@ -33,4 +32,6 @@ void kvm_irq_write_isactiver(int gic_fd, uint32_t intid, struct kvm_vcpu *vcpu); #define KVM_IRQCHIP_NUM_PINS (1020 - 32) +int vgic_its_setup(struct kvm_vm *vm); + #endif // SELFTEST_KVM_VGIC_H diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index c9286811a4cb..63c2aaae51f3 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -1,13 +1,1116 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* - * tools/testing/selftests/kvm/include/kvm_util.h - * * Copyright (C) 2018, Google LLC. */ #ifndef SELFTEST_KVM_UTIL_H #define SELFTEST_KVM_UTIL_H -#include "kvm_util_base.h" -#include "ucall_common.h" +#include "test_util.h" + +#include <linux/compiler.h> +#include "linux/hashtable.h" +#include "linux/list.h" +#include <linux/kernel.h> +#include <linux/kvm.h> +#include "linux/rbtree.h" +#include <linux/types.h> + +#include <asm/atomic.h> +#include <asm/kvm.h> + +#include <sys/ioctl.h> + +#include "kvm_util_arch.h" +#include "kvm_util_types.h" +#include "sparsebit.h" + +#define KVM_DEV_PATH "/dev/kvm" +#define KVM_MAX_VCPUS 512 + +#define NSEC_PER_SEC 1000000000L + +struct userspace_mem_region { + struct kvm_userspace_memory_region2 region; + struct sparsebit *unused_phy_pages; + struct sparsebit *protected_phy_pages; + int fd; + off_t offset; + enum vm_mem_backing_src_type backing_src_type; + void *host_mem; + void *host_alias; + void *mmap_start; + void *mmap_alias; + size_t mmap_size; + struct rb_node gpa_node; + struct rb_node hva_node; + struct hlist_node slot_node; +}; + +struct kvm_vcpu { + struct list_head list; + uint32_t id; + int fd; + struct kvm_vm *vm; + struct kvm_run *run; +#ifdef __x86_64__ + struct kvm_cpuid2 *cpuid; +#endif + struct kvm_dirty_gfn *dirty_gfns; + uint32_t fetch_index; + uint32_t dirty_gfns_count; +}; + +struct userspace_mem_regions { + struct rb_root gpa_tree; + struct rb_root hva_tree; + DECLARE_HASHTABLE(slot_hash, 9); +}; + +enum kvm_mem_region_type { + MEM_REGION_CODE, + MEM_REGION_DATA, + MEM_REGION_PT, + MEM_REGION_TEST_DATA, + NR_MEM_REGIONS, +}; + +struct kvm_vm { + int mode; + unsigned long type; + int kvm_fd; + int fd; + unsigned int pgtable_levels; + unsigned int page_size; + unsigned int page_shift; + unsigned int pa_bits; + unsigned int va_bits; + uint64_t max_gfn; + struct list_head vcpus; + struct userspace_mem_regions regions; + struct sparsebit *vpages_valid; + struct sparsebit *vpages_mapped; + bool has_irqchip; + bool pgd_created; + vm_paddr_t ucall_mmio_addr; + vm_paddr_t pgd; + vm_vaddr_t handlers; + uint32_t dirty_ring_size; + uint64_t gpa_tag_mask; + + struct kvm_vm_arch arch; + + /* Cache of information for binary stats interface */ + int stats_fd; + struct kvm_stats_header stats_header; + struct kvm_stats_desc *stats_desc; + + /* + * KVM region slots. These are the default memslots used by page + * allocators, e.g., lib/elf uses the memslots[MEM_REGION_CODE] + * memslot. + */ + uint32_t memslots[NR_MEM_REGIONS]; +}; + +struct vcpu_reg_sublist { + const char *name; + long capability; + int feature; + int feature_type; + bool finalize; + __u64 *regs; + __u64 regs_n; + __u64 *rejects_set; + __u64 rejects_set_n; + __u64 *skips_set; + __u64 skips_set_n; +}; + +struct vcpu_reg_list { + char *name; + struct vcpu_reg_sublist sublists[]; +}; + +#define for_each_sublist(c, s) \ + for ((s) = &(c)->sublists[0]; (s)->regs; ++(s)) + +#define kvm_for_each_vcpu(vm, i, vcpu) \ + for ((i) = 0; (i) <= (vm)->last_vcpu_id; (i)++) \ + if (!((vcpu) = vm->vcpus[i])) \ + continue; \ + else + +struct userspace_mem_region * +memslot2region(struct kvm_vm *vm, uint32_t memslot); + +static inline struct userspace_mem_region *vm_get_mem_region(struct kvm_vm *vm, + enum kvm_mem_region_type type) +{ + assert(type < NR_MEM_REGIONS); + return memslot2region(vm, vm->memslots[type]); +} + +/* Minimum allocated guest virtual and physical addresses */ +#define KVM_UTIL_MIN_VADDR 0x2000 +#define KVM_GUEST_PAGE_TABLE_MIN_PADDR 0x180000 + +#define DEFAULT_GUEST_STACK_VADDR_MIN 0xab6000 +#define DEFAULT_STACK_PGS 5 + +enum vm_guest_mode { + VM_MODE_P52V48_4K, + VM_MODE_P52V48_16K, + VM_MODE_P52V48_64K, + VM_MODE_P48V48_4K, + VM_MODE_P48V48_16K, + VM_MODE_P48V48_64K, + VM_MODE_P40V48_4K, + VM_MODE_P40V48_16K, + VM_MODE_P40V48_64K, + VM_MODE_PXXV48_4K, /* For 48bits VA but ANY bits PA */ + VM_MODE_P47V64_4K, + VM_MODE_P44V64_4K, + VM_MODE_P36V48_4K, + VM_MODE_P36V48_16K, + VM_MODE_P36V48_64K, + VM_MODE_P36V47_16K, + NUM_VM_MODES, +}; + +struct vm_shape { + uint32_t type; + uint8_t mode; + uint8_t pad0; + uint16_t pad1; +}; + +kvm_static_assert(sizeof(struct vm_shape) == sizeof(uint64_t)); + +#define VM_TYPE_DEFAULT 0 + +#define VM_SHAPE(__mode) \ +({ \ + struct vm_shape shape = { \ + .mode = (__mode), \ + .type = VM_TYPE_DEFAULT \ + }; \ + \ + shape; \ +}) + +#if defined(__aarch64__) + +extern enum vm_guest_mode vm_mode_default; + +#define VM_MODE_DEFAULT vm_mode_default +#define MIN_PAGE_SHIFT 12U +#define ptes_per_page(page_size) ((page_size) / 8) + +#elif defined(__x86_64__) + +#define VM_MODE_DEFAULT VM_MODE_PXXV48_4K +#define MIN_PAGE_SHIFT 12U +#define ptes_per_page(page_size) ((page_size) / 8) + +#elif defined(__s390x__) + +#define VM_MODE_DEFAULT VM_MODE_P44V64_4K +#define MIN_PAGE_SHIFT 12U +#define ptes_per_page(page_size) ((page_size) / 16) + +#elif defined(__riscv) + +#if __riscv_xlen == 32 +#error "RISC-V 32-bit kvm selftests not supported" +#endif + +#define VM_MODE_DEFAULT VM_MODE_P40V48_4K +#define MIN_PAGE_SHIFT 12U +#define ptes_per_page(page_size) ((page_size) / 8) + +#endif + +#define VM_SHAPE_DEFAULT VM_SHAPE(VM_MODE_DEFAULT) + +#define MIN_PAGE_SIZE (1U << MIN_PAGE_SHIFT) +#define PTES_PER_MIN_PAGE ptes_per_page(MIN_PAGE_SIZE) + +struct vm_guest_mode_params { + unsigned int pa_bits; + unsigned int va_bits; + unsigned int page_size; + unsigned int page_shift; +}; +extern const struct vm_guest_mode_params vm_guest_mode_params[]; + +int open_path_or_exit(const char *path, int flags); +int open_kvm_dev_path_or_exit(void); + +bool get_kvm_param_bool(const char *param); +bool get_kvm_intel_param_bool(const char *param); +bool get_kvm_amd_param_bool(const char *param); + +int get_kvm_param_integer(const char *param); +int get_kvm_intel_param_integer(const char *param); +int get_kvm_amd_param_integer(const char *param); + +unsigned int kvm_check_cap(long cap); + +static inline bool kvm_has_cap(long cap) +{ + return kvm_check_cap(cap); +} + +#define __KVM_SYSCALL_ERROR(_name, _ret) \ + "%s failed, rc: %i errno: %i (%s)", (_name), (_ret), errno, strerror(errno) + +/* + * Use the "inner", double-underscore macro when reporting errors from within + * other macros so that the name of ioctl() and not its literal numeric value + * is printed on error. The "outer" macro is strongly preferred when reporting + * errors "directly", i.e. without an additional layer of macros, as it reduces + * the probability of passing in the wrong string. + */ +#define __KVM_IOCTL_ERROR(_name, _ret) __KVM_SYSCALL_ERROR(_name, _ret) +#define KVM_IOCTL_ERROR(_ioctl, _ret) __KVM_IOCTL_ERROR(#_ioctl, _ret) + +#define kvm_do_ioctl(fd, cmd, arg) \ +({ \ + kvm_static_assert(!_IOC_SIZE(cmd) || sizeof(*arg) == _IOC_SIZE(cmd)); \ + ioctl(fd, cmd, arg); \ +}) + +#define __kvm_ioctl(kvm_fd, cmd, arg) \ + kvm_do_ioctl(kvm_fd, cmd, arg) + +#define kvm_ioctl(kvm_fd, cmd, arg) \ +({ \ + int ret = __kvm_ioctl(kvm_fd, cmd, arg); \ + \ + TEST_ASSERT(!ret, __KVM_IOCTL_ERROR(#cmd, ret)); \ +}) + +static __always_inline void static_assert_is_vm(struct kvm_vm *vm) { } + +#define __vm_ioctl(vm, cmd, arg) \ +({ \ + static_assert_is_vm(vm); \ + kvm_do_ioctl((vm)->fd, cmd, arg); \ +}) + +/* + * Assert that a VM or vCPU ioctl() succeeded, with extra magic to detect if + * the ioctl() failed because KVM killed/bugged the VM. To detect a dead VM, + * probe KVM_CAP_USER_MEMORY, which (a) has been supported by KVM since before + * selftests existed and (b) should never outright fail, i.e. is supposed to + * return 0 or 1. If KVM kills a VM, KVM returns -EIO for all ioctl()s for the + * VM and its vCPUs, including KVM_CHECK_EXTENSION. + */ +#define __TEST_ASSERT_VM_VCPU_IOCTL(cond, name, ret, vm) \ +do { \ + int __errno = errno; \ + \ + static_assert_is_vm(vm); \ + \ + if (cond) \ + break; \ + \ + if (errno == EIO && \ + __vm_ioctl(vm, KVM_CHECK_EXTENSION, (void *)KVM_CAP_USER_MEMORY) < 0) { \ + TEST_ASSERT(errno == EIO, "KVM killed the VM, should return -EIO"); \ + TEST_FAIL("KVM killed/bugged the VM, check the kernel log for clues"); \ + } \ + errno = __errno; \ + TEST_ASSERT(cond, __KVM_IOCTL_ERROR(name, ret)); \ +} while (0) + +#define TEST_ASSERT_VM_VCPU_IOCTL(cond, cmd, ret, vm) \ + __TEST_ASSERT_VM_VCPU_IOCTL(cond, #cmd, ret, vm) + +#define vm_ioctl(vm, cmd, arg) \ +({ \ + int ret = __vm_ioctl(vm, cmd, arg); \ + \ + __TEST_ASSERT_VM_VCPU_IOCTL(!ret, #cmd, ret, vm); \ +}) + +static __always_inline void static_assert_is_vcpu(struct kvm_vcpu *vcpu) { } + +#define __vcpu_ioctl(vcpu, cmd, arg) \ +({ \ + static_assert_is_vcpu(vcpu); \ + kvm_do_ioctl((vcpu)->fd, cmd, arg); \ +}) + +#define vcpu_ioctl(vcpu, cmd, arg) \ +({ \ + int ret = __vcpu_ioctl(vcpu, cmd, arg); \ + \ + __TEST_ASSERT_VM_VCPU_IOCTL(!ret, #cmd, ret, (vcpu)->vm); \ +}) + +/* + * Looks up and returns the value corresponding to the capability + * (KVM_CAP_*) given by cap. + */ +static inline int vm_check_cap(struct kvm_vm *vm, long cap) +{ + int ret = __vm_ioctl(vm, KVM_CHECK_EXTENSION, (void *)cap); + + TEST_ASSERT_VM_VCPU_IOCTL(ret >= 0, KVM_CHECK_EXTENSION, ret, vm); + return ret; +} + +static inline int __vm_enable_cap(struct kvm_vm *vm, uint32_t cap, uint64_t arg0) +{ + struct kvm_enable_cap enable_cap = { .cap = cap, .args = { arg0 } }; + + return __vm_ioctl(vm, KVM_ENABLE_CAP, &enable_cap); +} +static inline void vm_enable_cap(struct kvm_vm *vm, uint32_t cap, uint64_t arg0) +{ + struct kvm_enable_cap enable_cap = { .cap = cap, .args = { arg0 } }; + + vm_ioctl(vm, KVM_ENABLE_CAP, &enable_cap); +} + +static inline void vm_set_memory_attributes(struct kvm_vm *vm, uint64_t gpa, + uint64_t size, uint64_t attributes) +{ + struct kvm_memory_attributes attr = { + .attributes = attributes, + .address = gpa, + .size = size, + .flags = 0, + }; + + /* + * KVM_SET_MEMORY_ATTRIBUTES overwrites _all_ attributes. These flows + * need significant enhancements to support multiple attributes. + */ + TEST_ASSERT(!attributes || attributes == KVM_MEMORY_ATTRIBUTE_PRIVATE, + "Update me to support multiple attributes!"); + + vm_ioctl(vm, KVM_SET_MEMORY_ATTRIBUTES, &attr); +} + + +static inline void vm_mem_set_private(struct kvm_vm *vm, uint64_t gpa, + uint64_t size) +{ + vm_set_memory_attributes(vm, gpa, size, KVM_MEMORY_ATTRIBUTE_PRIVATE); +} + +static inline void vm_mem_set_shared(struct kvm_vm *vm, uint64_t gpa, + uint64_t size) +{ + vm_set_memory_attributes(vm, gpa, size, 0); +} + +void vm_guest_mem_fallocate(struct kvm_vm *vm, uint64_t gpa, uint64_t size, + bool punch_hole); + +static inline void vm_guest_mem_punch_hole(struct kvm_vm *vm, uint64_t gpa, + uint64_t size) +{ + vm_guest_mem_fallocate(vm, gpa, size, true); +} + +static inline void vm_guest_mem_allocate(struct kvm_vm *vm, uint64_t gpa, + uint64_t size) +{ + vm_guest_mem_fallocate(vm, gpa, size, false); +} + +void vm_enable_dirty_ring(struct kvm_vm *vm, uint32_t ring_size); +const char *vm_guest_mode_string(uint32_t i); + +void kvm_vm_free(struct kvm_vm *vmp); +void kvm_vm_restart(struct kvm_vm *vmp); +void kvm_vm_release(struct kvm_vm *vmp); +int kvm_memcmp_hva_gva(void *hva, struct kvm_vm *vm, const vm_vaddr_t gva, + size_t len); +void kvm_vm_elf_load(struct kvm_vm *vm, const char *filename); +int kvm_memfd_alloc(size_t size, bool hugepages); + +void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent); + +static inline void kvm_vm_get_dirty_log(struct kvm_vm *vm, int slot, void *log) +{ + struct kvm_dirty_log args = { .dirty_bitmap = log, .slot = slot }; + + vm_ioctl(vm, KVM_GET_DIRTY_LOG, &args); +} + +static inline void kvm_vm_clear_dirty_log(struct kvm_vm *vm, int slot, void *log, + uint64_t first_page, uint32_t num_pages) +{ + struct kvm_clear_dirty_log args = { + .dirty_bitmap = log, + .slot = slot, + .first_page = first_page, + .num_pages = num_pages + }; + + vm_ioctl(vm, KVM_CLEAR_DIRTY_LOG, &args); +} + +static inline uint32_t kvm_vm_reset_dirty_ring(struct kvm_vm *vm) +{ + return __vm_ioctl(vm, KVM_RESET_DIRTY_RINGS, NULL); +} + +static inline int vm_get_stats_fd(struct kvm_vm *vm) +{ + int fd = __vm_ioctl(vm, KVM_GET_STATS_FD, NULL); + + TEST_ASSERT_VM_VCPU_IOCTL(fd >= 0, KVM_GET_STATS_FD, fd, vm); + return fd; +} + +static inline void read_stats_header(int stats_fd, struct kvm_stats_header *header) +{ + ssize_t ret; + + ret = pread(stats_fd, header, sizeof(*header), 0); + TEST_ASSERT(ret == sizeof(*header), + "Failed to read '%lu' header bytes, ret = '%ld'", + sizeof(*header), ret); +} + +struct kvm_stats_desc *read_stats_descriptors(int stats_fd, + struct kvm_stats_header *header); + +static inline ssize_t get_stats_descriptor_size(struct kvm_stats_header *header) +{ + /* + * The base size of the descriptor is defined by KVM's ABI, but the + * size of the name field is variable, as far as KVM's ABI is + * concerned. For a given instance of KVM, the name field is the same + * size for all stats and is provided in the overall stats header. + */ + return sizeof(struct kvm_stats_desc) + header->name_size; +} + +static inline struct kvm_stats_desc *get_stats_descriptor(struct kvm_stats_desc *stats, + int index, + struct kvm_stats_header *header) +{ + /* + * Note, size_desc includes the size of the name field, which is + * variable. i.e. this is NOT equivalent to &stats_desc[i]. + */ + return (void *)stats + index * get_stats_descriptor_size(header); +} + +void read_stat_data(int stats_fd, struct kvm_stats_header *header, + struct kvm_stats_desc *desc, uint64_t *data, + size_t max_elements); + +void __vm_get_stat(struct kvm_vm *vm, const char *stat_name, uint64_t *data, + size_t max_elements); + +static inline uint64_t vm_get_stat(struct kvm_vm *vm, const char *stat_name) +{ + uint64_t data; + + __vm_get_stat(vm, stat_name, &data, 1); + return data; +} + +void vm_create_irqchip(struct kvm_vm *vm); + +static inline int __vm_create_guest_memfd(struct kvm_vm *vm, uint64_t size, + uint64_t flags) +{ + struct kvm_create_guest_memfd guest_memfd = { + .size = size, + .flags = flags, + }; + + return __vm_ioctl(vm, KVM_CREATE_GUEST_MEMFD, &guest_memfd); +} + +static inline int vm_create_guest_memfd(struct kvm_vm *vm, uint64_t size, + uint64_t flags) +{ + int fd = __vm_create_guest_memfd(vm, size, flags); + + TEST_ASSERT(fd >= 0, KVM_IOCTL_ERROR(KVM_CREATE_GUEST_MEMFD, fd)); + return fd; +} + +void vm_set_user_memory_region(struct kvm_vm *vm, uint32_t slot, uint32_t flags, + uint64_t gpa, uint64_t size, void *hva); +int __vm_set_user_memory_region(struct kvm_vm *vm, uint32_t slot, uint32_t flags, + uint64_t gpa, uint64_t size, void *hva); +void vm_set_user_memory_region2(struct kvm_vm *vm, uint32_t slot, uint32_t flags, + uint64_t gpa, uint64_t size, void *hva, + uint32_t guest_memfd, uint64_t guest_memfd_offset); +int __vm_set_user_memory_region2(struct kvm_vm *vm, uint32_t slot, uint32_t flags, + uint64_t gpa, uint64_t size, void *hva, + uint32_t guest_memfd, uint64_t guest_memfd_offset); + +void vm_userspace_mem_region_add(struct kvm_vm *vm, + enum vm_mem_backing_src_type src_type, + uint64_t guest_paddr, uint32_t slot, uint64_t npages, + uint32_t flags); +void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type, + uint64_t guest_paddr, uint32_t slot, uint64_t npages, + uint32_t flags, int guest_memfd_fd, uint64_t guest_memfd_offset); + +#ifndef vm_arch_has_protected_memory +static inline bool vm_arch_has_protected_memory(struct kvm_vm *vm) +{ + return false; +} +#endif + +void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags); +void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa); +void vm_mem_region_delete(struct kvm_vm *vm, uint32_t slot); +struct kvm_vcpu *__vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id); +void vm_populate_vaddr_bitmap(struct kvm_vm *vm); +vm_vaddr_t vm_vaddr_unused_gap(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min); +vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min); +vm_vaddr_t __vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min, + enum kvm_mem_region_type type); +vm_vaddr_t vm_vaddr_alloc_shared(struct kvm_vm *vm, size_t sz, + vm_vaddr_t vaddr_min, + enum kvm_mem_region_type type); +vm_vaddr_t vm_vaddr_alloc_pages(struct kvm_vm *vm, int nr_pages); +vm_vaddr_t __vm_vaddr_alloc_page(struct kvm_vm *vm, + enum kvm_mem_region_type type); +vm_vaddr_t vm_vaddr_alloc_page(struct kvm_vm *vm); + +void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, + unsigned int npages); +void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa); +void *addr_gva2hva(struct kvm_vm *vm, vm_vaddr_t gva); +vm_paddr_t addr_hva2gpa(struct kvm_vm *vm, void *hva); +void *addr_gpa2alias(struct kvm_vm *vm, vm_paddr_t gpa); + +#ifndef vcpu_arch_put_guest +#define vcpu_arch_put_guest(mem, val) do { (mem) = (val); } while (0) +#endif + +static inline vm_paddr_t vm_untag_gpa(struct kvm_vm *vm, vm_paddr_t gpa) +{ + return gpa & ~vm->gpa_tag_mask; +} + +void vcpu_run(struct kvm_vcpu *vcpu); +int _vcpu_run(struct kvm_vcpu *vcpu); + +static inline int __vcpu_run(struct kvm_vcpu *vcpu) +{ + return __vcpu_ioctl(vcpu, KVM_RUN, NULL); +} + +void vcpu_run_complete_io(struct kvm_vcpu *vcpu); +struct kvm_reg_list *vcpu_get_reg_list(struct kvm_vcpu *vcpu); + +static inline void vcpu_enable_cap(struct kvm_vcpu *vcpu, uint32_t cap, + uint64_t arg0) +{ + struct kvm_enable_cap enable_cap = { .cap = cap, .args = { arg0 } }; + + vcpu_ioctl(vcpu, KVM_ENABLE_CAP, &enable_cap); +} + +static inline void vcpu_guest_debug_set(struct kvm_vcpu *vcpu, + struct kvm_guest_debug *debug) +{ + vcpu_ioctl(vcpu, KVM_SET_GUEST_DEBUG, debug); +} + +static inline void vcpu_mp_state_get(struct kvm_vcpu *vcpu, + struct kvm_mp_state *mp_state) +{ + vcpu_ioctl(vcpu, KVM_GET_MP_STATE, mp_state); +} +static inline void vcpu_mp_state_set(struct kvm_vcpu *vcpu, + struct kvm_mp_state *mp_state) +{ + vcpu_ioctl(vcpu, KVM_SET_MP_STATE, mp_state); +} + +static inline void vcpu_regs_get(struct kvm_vcpu *vcpu, struct kvm_regs *regs) +{ + vcpu_ioctl(vcpu, KVM_GET_REGS, regs); +} + +static inline void vcpu_regs_set(struct kvm_vcpu *vcpu, struct kvm_regs *regs) +{ + vcpu_ioctl(vcpu, KVM_SET_REGS, regs); +} +static inline void vcpu_sregs_get(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) +{ + vcpu_ioctl(vcpu, KVM_GET_SREGS, sregs); + +} +static inline void vcpu_sregs_set(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) +{ + vcpu_ioctl(vcpu, KVM_SET_SREGS, sregs); +} +static inline int _vcpu_sregs_set(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) +{ + return __vcpu_ioctl(vcpu, KVM_SET_SREGS, sregs); +} +static inline void vcpu_fpu_get(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) +{ + vcpu_ioctl(vcpu, KVM_GET_FPU, fpu); +} +static inline void vcpu_fpu_set(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) +{ + vcpu_ioctl(vcpu, KVM_SET_FPU, fpu); +} + +static inline int __vcpu_get_reg(struct kvm_vcpu *vcpu, uint64_t id, void *addr) +{ + struct kvm_one_reg reg = { .id = id, .addr = (uint64_t)addr }; + + return __vcpu_ioctl(vcpu, KVM_GET_ONE_REG, ®); +} +static inline int __vcpu_set_reg(struct kvm_vcpu *vcpu, uint64_t id, uint64_t val) +{ + struct kvm_one_reg reg = { .id = id, .addr = (uint64_t)&val }; + + return __vcpu_ioctl(vcpu, KVM_SET_ONE_REG, ®); +} +static inline void vcpu_get_reg(struct kvm_vcpu *vcpu, uint64_t id, void *addr) +{ + struct kvm_one_reg reg = { .id = id, .addr = (uint64_t)addr }; + + vcpu_ioctl(vcpu, KVM_GET_ONE_REG, ®); +} +static inline void vcpu_set_reg(struct kvm_vcpu *vcpu, uint64_t id, uint64_t val) +{ + struct kvm_one_reg reg = { .id = id, .addr = (uint64_t)&val }; + + vcpu_ioctl(vcpu, KVM_SET_ONE_REG, ®); +} + +#ifdef __KVM_HAVE_VCPU_EVENTS +static inline void vcpu_events_get(struct kvm_vcpu *vcpu, + struct kvm_vcpu_events *events) +{ + vcpu_ioctl(vcpu, KVM_GET_VCPU_EVENTS, events); +} +static inline void vcpu_events_set(struct kvm_vcpu *vcpu, + struct kvm_vcpu_events *events) +{ + vcpu_ioctl(vcpu, KVM_SET_VCPU_EVENTS, events); +} +#endif +#ifdef __x86_64__ +static inline void vcpu_nested_state_get(struct kvm_vcpu *vcpu, + struct kvm_nested_state *state) +{ + vcpu_ioctl(vcpu, KVM_GET_NESTED_STATE, state); +} +static inline int __vcpu_nested_state_set(struct kvm_vcpu *vcpu, + struct kvm_nested_state *state) +{ + return __vcpu_ioctl(vcpu, KVM_SET_NESTED_STATE, state); +} + +static inline void vcpu_nested_state_set(struct kvm_vcpu *vcpu, + struct kvm_nested_state *state) +{ + vcpu_ioctl(vcpu, KVM_SET_NESTED_STATE, state); +} +#endif +static inline int vcpu_get_stats_fd(struct kvm_vcpu *vcpu) +{ + int fd = __vcpu_ioctl(vcpu, KVM_GET_STATS_FD, NULL); + + TEST_ASSERT_VM_VCPU_IOCTL(fd >= 0, KVM_CHECK_EXTENSION, fd, vcpu->vm); + return fd; +} + +int __kvm_has_device_attr(int dev_fd, uint32_t group, uint64_t attr); + +static inline void kvm_has_device_attr(int dev_fd, uint32_t group, uint64_t attr) +{ + int ret = __kvm_has_device_attr(dev_fd, group, attr); + + TEST_ASSERT(!ret, "KVM_HAS_DEVICE_ATTR failed, rc: %i errno: %i", ret, errno); +} + +int __kvm_device_attr_get(int dev_fd, uint32_t group, uint64_t attr, void *val); + +static inline void kvm_device_attr_get(int dev_fd, uint32_t group, + uint64_t attr, void *val) +{ + int ret = __kvm_device_attr_get(dev_fd, group, attr, val); + + TEST_ASSERT(!ret, KVM_IOCTL_ERROR(KVM_GET_DEVICE_ATTR, ret)); +} + +int __kvm_device_attr_set(int dev_fd, uint32_t group, uint64_t attr, void *val); + +static inline void kvm_device_attr_set(int dev_fd, uint32_t group, + uint64_t attr, void *val) +{ + int ret = __kvm_device_attr_set(dev_fd, group, attr, val); + + TEST_ASSERT(!ret, KVM_IOCTL_ERROR(KVM_SET_DEVICE_ATTR, ret)); +} + +static inline int __vcpu_has_device_attr(struct kvm_vcpu *vcpu, uint32_t group, + uint64_t attr) +{ + return __kvm_has_device_attr(vcpu->fd, group, attr); +} + +static inline void vcpu_has_device_attr(struct kvm_vcpu *vcpu, uint32_t group, + uint64_t attr) +{ + kvm_has_device_attr(vcpu->fd, group, attr); +} + +static inline int __vcpu_device_attr_get(struct kvm_vcpu *vcpu, uint32_t group, + uint64_t attr, void *val) +{ + return __kvm_device_attr_get(vcpu->fd, group, attr, val); +} + +static inline void vcpu_device_attr_get(struct kvm_vcpu *vcpu, uint32_t group, + uint64_t attr, void *val) +{ + kvm_device_attr_get(vcpu->fd, group, attr, val); +} + +static inline int __vcpu_device_attr_set(struct kvm_vcpu *vcpu, uint32_t group, + uint64_t attr, void *val) +{ + return __kvm_device_attr_set(vcpu->fd, group, attr, val); +} + +static inline void vcpu_device_attr_set(struct kvm_vcpu *vcpu, uint32_t group, + uint64_t attr, void *val) +{ + kvm_device_attr_set(vcpu->fd, group, attr, val); +} + +int __kvm_test_create_device(struct kvm_vm *vm, uint64_t type); +int __kvm_create_device(struct kvm_vm *vm, uint64_t type); + +static inline int kvm_create_device(struct kvm_vm *vm, uint64_t type) +{ + int fd = __kvm_create_device(vm, type); + + TEST_ASSERT(fd >= 0, KVM_IOCTL_ERROR(KVM_CREATE_DEVICE, fd)); + return fd; +} + +void *vcpu_map_dirty_ring(struct kvm_vcpu *vcpu); + +/* + * VM VCPU Args Set + * + * Input Args: + * vm - Virtual Machine + * num - number of arguments + * ... - arguments, each of type uint64_t + * + * Output Args: None + * + * Return: None + * + * Sets the first @num input parameters for the function at @vcpu's entry point, + * per the C calling convention of the architecture, to the values given as + * variable args. Each of the variable args is expected to be of type uint64_t. + * The maximum @num can be is specific to the architecture. + */ +void vcpu_args_set(struct kvm_vcpu *vcpu, unsigned int num, ...); + +void kvm_irq_line(struct kvm_vm *vm, uint32_t irq, int level); +int _kvm_irq_line(struct kvm_vm *vm, uint32_t irq, int level); + +#define KVM_MAX_IRQ_ROUTES 4096 + +struct kvm_irq_routing *kvm_gsi_routing_create(void); +void kvm_gsi_routing_irqchip_add(struct kvm_irq_routing *routing, + uint32_t gsi, uint32_t pin); +int _kvm_gsi_routing_write(struct kvm_vm *vm, struct kvm_irq_routing *routing); +void kvm_gsi_routing_write(struct kvm_vm *vm, struct kvm_irq_routing *routing); + +const char *exit_reason_str(unsigned int exit_reason); + +vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm, vm_paddr_t paddr_min, + uint32_t memslot); +vm_paddr_t __vm_phy_pages_alloc(struct kvm_vm *vm, size_t num, + vm_paddr_t paddr_min, uint32_t memslot, + bool protected); +vm_paddr_t vm_alloc_page_table(struct kvm_vm *vm); + +static inline vm_paddr_t vm_phy_pages_alloc(struct kvm_vm *vm, size_t num, + vm_paddr_t paddr_min, uint32_t memslot) +{ + /* + * By default, allocate memory as protected for VMs that support + * protected memory, as the majority of memory for such VMs is + * protected, i.e. using shared memory is effectively opt-in. + */ + return __vm_phy_pages_alloc(vm, num, paddr_min, memslot, + vm_arch_has_protected_memory(vm)); +} + +/* + * ____vm_create() does KVM_CREATE_VM and little else. __vm_create() also + * loads the test binary into guest memory and creates an IRQ chip (x86 only). + * __vm_create() does NOT create vCPUs, @nr_runnable_vcpus is used purely to + * calculate the amount of memory needed for per-vCPU data, e.g. stacks. + */ +struct kvm_vm *____vm_create(struct vm_shape shape); +struct kvm_vm *__vm_create(struct vm_shape shape, uint32_t nr_runnable_vcpus, + uint64_t nr_extra_pages); + +static inline struct kvm_vm *vm_create_barebones(void) +{ + return ____vm_create(VM_SHAPE_DEFAULT); +} + +static inline struct kvm_vm *vm_create_barebones_type(unsigned long type) +{ + const struct vm_shape shape = { + .mode = VM_MODE_DEFAULT, + .type = type, + }; + + return ____vm_create(shape); +} + +static inline struct kvm_vm *vm_create(uint32_t nr_runnable_vcpus) +{ + return __vm_create(VM_SHAPE_DEFAULT, nr_runnable_vcpus, 0); +} + +struct kvm_vm *__vm_create_with_vcpus(struct vm_shape shape, uint32_t nr_vcpus, + uint64_t extra_mem_pages, + void *guest_code, struct kvm_vcpu *vcpus[]); + +static inline struct kvm_vm *vm_create_with_vcpus(uint32_t nr_vcpus, + void *guest_code, + struct kvm_vcpu *vcpus[]) +{ + return __vm_create_with_vcpus(VM_SHAPE_DEFAULT, nr_vcpus, 0, + guest_code, vcpus); +} + + +struct kvm_vm *__vm_create_shape_with_one_vcpu(struct vm_shape shape, + struct kvm_vcpu **vcpu, + uint64_t extra_mem_pages, + void *guest_code); + +/* + * Create a VM with a single vCPU with reasonable defaults and @extra_mem_pages + * additional pages of guest memory. Returns the VM and vCPU (via out param). + */ +static inline struct kvm_vm *__vm_create_with_one_vcpu(struct kvm_vcpu **vcpu, + uint64_t extra_mem_pages, + void *guest_code) +{ + return __vm_create_shape_with_one_vcpu(VM_SHAPE_DEFAULT, vcpu, + extra_mem_pages, guest_code); +} + +static inline struct kvm_vm *vm_create_with_one_vcpu(struct kvm_vcpu **vcpu, + void *guest_code) +{ + return __vm_create_with_one_vcpu(vcpu, 0, guest_code); +} + +static inline struct kvm_vm *vm_create_shape_with_one_vcpu(struct vm_shape shape, + struct kvm_vcpu **vcpu, + void *guest_code) +{ + return __vm_create_shape_with_one_vcpu(shape, vcpu, 0, guest_code); +} + +struct kvm_vcpu *vm_recreate_with_one_vcpu(struct kvm_vm *vm); + +void kvm_pin_this_task_to_pcpu(uint32_t pcpu); +void kvm_print_vcpu_pinning_help(void); +void kvm_parse_vcpu_pinning(const char *pcpus_string, uint32_t vcpu_to_pcpu[], + int nr_vcpus); + +unsigned long vm_compute_max_gfn(struct kvm_vm *vm); +unsigned int vm_calc_num_guest_pages(enum vm_guest_mode mode, size_t size); +unsigned int vm_num_host_pages(enum vm_guest_mode mode, unsigned int num_guest_pages); +unsigned int vm_num_guest_pages(enum vm_guest_mode mode, unsigned int num_host_pages); +static inline unsigned int +vm_adjust_num_guest_pages(enum vm_guest_mode mode, unsigned int num_guest_pages) +{ + unsigned int n; + n = vm_num_guest_pages(mode, vm_num_host_pages(mode, num_guest_pages)); +#ifdef __s390x__ + /* s390 requires 1M aligned guest sizes */ + n = (n + 255) & ~255; +#endif + return n; +} + +#define sync_global_to_guest(vm, g) ({ \ + typeof(g) *_p = addr_gva2hva(vm, (vm_vaddr_t)&(g)); \ + memcpy(_p, &(g), sizeof(g)); \ +}) + +#define sync_global_from_guest(vm, g) ({ \ + typeof(g) *_p = addr_gva2hva(vm, (vm_vaddr_t)&(g)); \ + memcpy(&(g), _p, sizeof(g)); \ +}) + +/* + * Write a global value, but only in the VM's (guest's) domain. Primarily used + * for "globals" that hold per-VM values (VMs always duplicate code and global + * data into their own region of physical memory), but can be used anytime it's + * undesirable to change the host's copy of the global. + */ +#define write_guest_global(vm, g, val) ({ \ + typeof(g) *_p = addr_gva2hva(vm, (vm_vaddr_t)&(g)); \ + typeof(g) _val = val; \ + \ + memcpy(_p, &(_val), sizeof(g)); \ +}) + +void assert_on_unhandled_exception(struct kvm_vcpu *vcpu); + +void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, + uint8_t indent); + +static inline void vcpu_dump(FILE *stream, struct kvm_vcpu *vcpu, + uint8_t indent) +{ + vcpu_arch_dump(stream, vcpu, indent); +} + +/* + * Adds a vCPU with reasonable defaults (e.g. a stack) + * + * Input Args: + * vm - Virtual Machine + * vcpu_id - The id of the VCPU to add to the VM. + */ +struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id); +void vcpu_arch_set_entry_point(struct kvm_vcpu *vcpu, void *guest_code); + +static inline struct kvm_vcpu *vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id, + void *guest_code) +{ + struct kvm_vcpu *vcpu = vm_arch_vcpu_add(vm, vcpu_id); + + vcpu_arch_set_entry_point(vcpu, guest_code); + + return vcpu; +} + +/* Re-create a vCPU after restarting a VM, e.g. for state save/restore tests. */ +struct kvm_vcpu *vm_arch_vcpu_recreate(struct kvm_vm *vm, uint32_t vcpu_id); + +static inline struct kvm_vcpu *vm_vcpu_recreate(struct kvm_vm *vm, + uint32_t vcpu_id) +{ + return vm_arch_vcpu_recreate(vm, vcpu_id); +} + +void vcpu_arch_free(struct kvm_vcpu *vcpu); + +void virt_arch_pgd_alloc(struct kvm_vm *vm); + +static inline void virt_pgd_alloc(struct kvm_vm *vm) +{ + virt_arch_pgd_alloc(vm); +} + +/* + * VM Virtual Page Map + * + * Input Args: + * vm - Virtual Machine + * vaddr - VM Virtual Address + * paddr - VM Physical Address + * memslot - Memory region slot for new virtual translation tables + * + * Output Args: None + * + * Return: None + * + * Within @vm, creates a virtual translation for the page starting + * at @vaddr to the page starting at @paddr. + */ +void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr); + +static inline void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr) +{ + virt_arch_pg_map(vm, vaddr, paddr); +} + + +/* + * Address Guest Virtual to Guest Physical + * + * Input Args: + * vm - Virtual Machine + * gva - VM virtual address + * + * Output Args: None + * + * Return: + * Equivalent VM physical address + * + * Returns the VM physical address of the translated VM virtual + * address given by @gva. + */ +vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva); + +static inline vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva) +{ + return addr_arch_gva2gpa(vm, gva); +} + +/* + * Virtual Translation Tables Dump + * + * Input Args: + * stream - Output FILE stream + * vm - Virtual Machine + * indent - Left margin indent amount + * + * Output Args: None + * + * Return: None + * + * Dumps to the FILE stream given by @stream, the contents of all the + * virtual translation tables for the VM given by @vm. + */ +void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent); + +static inline void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) +{ + virt_arch_dump(stream, vm, indent); +} + + +static inline int __vm_disable_nx_huge_pages(struct kvm_vm *vm) +{ + return __vm_enable_cap(vm, KVM_CAP_VM_DISABLE_NX_HUGE_PAGES, 0); +} + +/* + * Arch hook that is invoked via a constructor, i.e. before exeucting main(), + * to allow for arch-specific setup that is common to all tests, e.g. computing + * the default guest "mode". + */ +void kvm_selftest_arch_init(void); + +void kvm_arch_vm_post_create(struct kvm_vm *vm); + +bool vm_is_gpa_protected(struct kvm_vm *vm, vm_paddr_t paddr); + +uint32_t guest_get_vcpuid(void); #endif /* SELFTEST_KVM_UTIL_H */ diff --git a/tools/testing/selftests/kvm/include/kvm_util_base.h b/tools/testing/selftests/kvm/include/kvm_util_base.h deleted file mode 100644 index 3e0db283a46a..000000000000 --- a/tools/testing/selftests/kvm/include/kvm_util_base.h +++ /dev/null @@ -1,1135 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * tools/testing/selftests/kvm/include/kvm_util_base.h - * - * Copyright (C) 2018, Google LLC. - */ -#ifndef SELFTEST_KVM_UTIL_BASE_H -#define SELFTEST_KVM_UTIL_BASE_H - -#include "test_util.h" - -#include <linux/compiler.h> -#include "linux/hashtable.h" -#include "linux/list.h" -#include <linux/kernel.h> -#include <linux/kvm.h> -#include "linux/rbtree.h" -#include <linux/types.h> - -#include <asm/atomic.h> -#include <asm/kvm.h> - -#include <sys/ioctl.h> - -#include "kvm_util_arch.h" -#include "sparsebit.h" - -/* - * Provide a version of static_assert() that is guaranteed to have an optional - * message param. If _ISOC11_SOURCE is defined, glibc (/usr/include/assert.h) - * #undefs and #defines static_assert() as a direct alias to _Static_assert(), - * i.e. effectively makes the message mandatory. Many KVM selftests #define - * _GNU_SOURCE for various reasons, and _GNU_SOURCE implies _ISOC11_SOURCE. As - * a result, static_assert() behavior is non-deterministic and may or may not - * require a message depending on #include order. - */ -#define __kvm_static_assert(expr, msg, ...) _Static_assert(expr, msg) -#define kvm_static_assert(expr, ...) __kvm_static_assert(expr, ##__VA_ARGS__, #expr) - -#define KVM_DEV_PATH "/dev/kvm" -#define KVM_MAX_VCPUS 512 - -#define NSEC_PER_SEC 1000000000L - -typedef uint64_t vm_paddr_t; /* Virtual Machine (Guest) physical address */ -typedef uint64_t vm_vaddr_t; /* Virtual Machine (Guest) virtual address */ - -struct userspace_mem_region { - struct kvm_userspace_memory_region2 region; - struct sparsebit *unused_phy_pages; - struct sparsebit *protected_phy_pages; - int fd; - off_t offset; - enum vm_mem_backing_src_type backing_src_type; - void *host_mem; - void *host_alias; - void *mmap_start; - void *mmap_alias; - size_t mmap_size; - struct rb_node gpa_node; - struct rb_node hva_node; - struct hlist_node slot_node; -}; - -struct kvm_vcpu { - struct list_head list; - uint32_t id; - int fd; - struct kvm_vm *vm; - struct kvm_run *run; -#ifdef __x86_64__ - struct kvm_cpuid2 *cpuid; -#endif - struct kvm_dirty_gfn *dirty_gfns; - uint32_t fetch_index; - uint32_t dirty_gfns_count; -}; - -struct userspace_mem_regions { - struct rb_root gpa_tree; - struct rb_root hva_tree; - DECLARE_HASHTABLE(slot_hash, 9); -}; - -enum kvm_mem_region_type { - MEM_REGION_CODE, - MEM_REGION_DATA, - MEM_REGION_PT, - MEM_REGION_TEST_DATA, - NR_MEM_REGIONS, -}; - -struct kvm_vm { - int mode; - unsigned long type; - uint8_t subtype; - int kvm_fd; - int fd; - unsigned int pgtable_levels; - unsigned int page_size; - unsigned int page_shift; - unsigned int pa_bits; - unsigned int va_bits; - uint64_t max_gfn; - struct list_head vcpus; - struct userspace_mem_regions regions; - struct sparsebit *vpages_valid; - struct sparsebit *vpages_mapped; - bool has_irqchip; - bool pgd_created; - vm_paddr_t ucall_mmio_addr; - vm_paddr_t pgd; - vm_vaddr_t gdt; - vm_vaddr_t tss; - vm_vaddr_t idt; - vm_vaddr_t handlers; - uint32_t dirty_ring_size; - uint64_t gpa_tag_mask; - - struct kvm_vm_arch arch; - - /* Cache of information for binary stats interface */ - int stats_fd; - struct kvm_stats_header stats_header; - struct kvm_stats_desc *stats_desc; - - /* - * KVM region slots. These are the default memslots used by page - * allocators, e.g., lib/elf uses the memslots[MEM_REGION_CODE] - * memslot. - */ - uint32_t memslots[NR_MEM_REGIONS]; -}; - -struct vcpu_reg_sublist { - const char *name; - long capability; - int feature; - int feature_type; - bool finalize; - __u64 *regs; - __u64 regs_n; - __u64 *rejects_set; - __u64 rejects_set_n; - __u64 *skips_set; - __u64 skips_set_n; -}; - -struct vcpu_reg_list { - char *name; - struct vcpu_reg_sublist sublists[]; -}; - -#define for_each_sublist(c, s) \ - for ((s) = &(c)->sublists[0]; (s)->regs; ++(s)) - -#define kvm_for_each_vcpu(vm, i, vcpu) \ - for ((i) = 0; (i) <= (vm)->last_vcpu_id; (i)++) \ - if (!((vcpu) = vm->vcpus[i])) \ - continue; \ - else - -struct userspace_mem_region * -memslot2region(struct kvm_vm *vm, uint32_t memslot); - -static inline struct userspace_mem_region *vm_get_mem_region(struct kvm_vm *vm, - enum kvm_mem_region_type type) -{ - assert(type < NR_MEM_REGIONS); - return memslot2region(vm, vm->memslots[type]); -} - -/* Minimum allocated guest virtual and physical addresses */ -#define KVM_UTIL_MIN_VADDR 0x2000 -#define KVM_GUEST_PAGE_TABLE_MIN_PADDR 0x180000 - -#define DEFAULT_GUEST_STACK_VADDR_MIN 0xab6000 -#define DEFAULT_STACK_PGS 5 - -enum vm_guest_mode { - VM_MODE_P52V48_4K, - VM_MODE_P52V48_16K, - VM_MODE_P52V48_64K, - VM_MODE_P48V48_4K, - VM_MODE_P48V48_16K, - VM_MODE_P48V48_64K, - VM_MODE_P40V48_4K, - VM_MODE_P40V48_16K, - VM_MODE_P40V48_64K, - VM_MODE_PXXV48_4K, /* For 48bits VA but ANY bits PA */ - VM_MODE_P47V64_4K, - VM_MODE_P44V64_4K, - VM_MODE_P36V48_4K, - VM_MODE_P36V48_16K, - VM_MODE_P36V48_64K, - VM_MODE_P36V47_16K, - NUM_VM_MODES, -}; - -struct vm_shape { - uint32_t type; - uint8_t mode; - uint8_t subtype; - uint16_t padding; -}; - -kvm_static_assert(sizeof(struct vm_shape) == sizeof(uint64_t)); - -#define VM_TYPE_DEFAULT 0 - -#define VM_SHAPE(__mode) \ -({ \ - struct vm_shape shape = { \ - .mode = (__mode), \ - .type = VM_TYPE_DEFAULT \ - }; \ - \ - shape; \ -}) - -#if defined(__aarch64__) - -extern enum vm_guest_mode vm_mode_default; - -#define VM_MODE_DEFAULT vm_mode_default -#define MIN_PAGE_SHIFT 12U -#define ptes_per_page(page_size) ((page_size) / 8) - -#elif defined(__x86_64__) - -#define VM_MODE_DEFAULT VM_MODE_PXXV48_4K -#define MIN_PAGE_SHIFT 12U -#define ptes_per_page(page_size) ((page_size) / 8) - -#elif defined(__s390x__) - -#define VM_MODE_DEFAULT VM_MODE_P44V64_4K -#define MIN_PAGE_SHIFT 12U -#define ptes_per_page(page_size) ((page_size) / 16) - -#elif defined(__riscv) - -#if __riscv_xlen == 32 -#error "RISC-V 32-bit kvm selftests not supported" -#endif - -#define VM_MODE_DEFAULT VM_MODE_P40V48_4K -#define MIN_PAGE_SHIFT 12U -#define ptes_per_page(page_size) ((page_size) / 8) - -#endif - -#define VM_SHAPE_DEFAULT VM_SHAPE(VM_MODE_DEFAULT) - -#define MIN_PAGE_SIZE (1U << MIN_PAGE_SHIFT) -#define PTES_PER_MIN_PAGE ptes_per_page(MIN_PAGE_SIZE) - -struct vm_guest_mode_params { - unsigned int pa_bits; - unsigned int va_bits; - unsigned int page_size; - unsigned int page_shift; -}; -extern const struct vm_guest_mode_params vm_guest_mode_params[]; - -int open_path_or_exit(const char *path, int flags); -int open_kvm_dev_path_or_exit(void); - -bool get_kvm_param_bool(const char *param); -bool get_kvm_intel_param_bool(const char *param); -bool get_kvm_amd_param_bool(const char *param); - -int get_kvm_param_integer(const char *param); -int get_kvm_intel_param_integer(const char *param); -int get_kvm_amd_param_integer(const char *param); - -unsigned int kvm_check_cap(long cap); - -static inline bool kvm_has_cap(long cap) -{ - return kvm_check_cap(cap); -} - -#define __KVM_SYSCALL_ERROR(_name, _ret) \ - "%s failed, rc: %i errno: %i (%s)", (_name), (_ret), errno, strerror(errno) - -/* - * Use the "inner", double-underscore macro when reporting errors from within - * other macros so that the name of ioctl() and not its literal numeric value - * is printed on error. The "outer" macro is strongly preferred when reporting - * errors "directly", i.e. without an additional layer of macros, as it reduces - * the probability of passing in the wrong string. - */ -#define __KVM_IOCTL_ERROR(_name, _ret) __KVM_SYSCALL_ERROR(_name, _ret) -#define KVM_IOCTL_ERROR(_ioctl, _ret) __KVM_IOCTL_ERROR(#_ioctl, _ret) - -#define kvm_do_ioctl(fd, cmd, arg) \ -({ \ - kvm_static_assert(!_IOC_SIZE(cmd) || sizeof(*arg) == _IOC_SIZE(cmd)); \ - ioctl(fd, cmd, arg); \ -}) - -#define __kvm_ioctl(kvm_fd, cmd, arg) \ - kvm_do_ioctl(kvm_fd, cmd, arg) - -#define kvm_ioctl(kvm_fd, cmd, arg) \ -({ \ - int ret = __kvm_ioctl(kvm_fd, cmd, arg); \ - \ - TEST_ASSERT(!ret, __KVM_IOCTL_ERROR(#cmd, ret)); \ -}) - -static __always_inline void static_assert_is_vm(struct kvm_vm *vm) { } - -#define __vm_ioctl(vm, cmd, arg) \ -({ \ - static_assert_is_vm(vm); \ - kvm_do_ioctl((vm)->fd, cmd, arg); \ -}) - -/* - * Assert that a VM or vCPU ioctl() succeeded, with extra magic to detect if - * the ioctl() failed because KVM killed/bugged the VM. To detect a dead VM, - * probe KVM_CAP_USER_MEMORY, which (a) has been supported by KVM since before - * selftests existed and (b) should never outright fail, i.e. is supposed to - * return 0 or 1. If KVM kills a VM, KVM returns -EIO for all ioctl()s for the - * VM and its vCPUs, including KVM_CHECK_EXTENSION. - */ -#define __TEST_ASSERT_VM_VCPU_IOCTL(cond, name, ret, vm) \ -do { \ - int __errno = errno; \ - \ - static_assert_is_vm(vm); \ - \ - if (cond) \ - break; \ - \ - if (errno == EIO && \ - __vm_ioctl(vm, KVM_CHECK_EXTENSION, (void *)KVM_CAP_USER_MEMORY) < 0) { \ - TEST_ASSERT(errno == EIO, "KVM killed the VM, should return -EIO"); \ - TEST_FAIL("KVM killed/bugged the VM, check the kernel log for clues"); \ - } \ - errno = __errno; \ - TEST_ASSERT(cond, __KVM_IOCTL_ERROR(name, ret)); \ -} while (0) - -#define TEST_ASSERT_VM_VCPU_IOCTL(cond, cmd, ret, vm) \ - __TEST_ASSERT_VM_VCPU_IOCTL(cond, #cmd, ret, vm) - -#define vm_ioctl(vm, cmd, arg) \ -({ \ - int ret = __vm_ioctl(vm, cmd, arg); \ - \ - __TEST_ASSERT_VM_VCPU_IOCTL(!ret, #cmd, ret, vm); \ -}) - -static __always_inline void static_assert_is_vcpu(struct kvm_vcpu *vcpu) { } - -#define __vcpu_ioctl(vcpu, cmd, arg) \ -({ \ - static_assert_is_vcpu(vcpu); \ - kvm_do_ioctl((vcpu)->fd, cmd, arg); \ -}) - -#define vcpu_ioctl(vcpu, cmd, arg) \ -({ \ - int ret = __vcpu_ioctl(vcpu, cmd, arg); \ - \ - __TEST_ASSERT_VM_VCPU_IOCTL(!ret, #cmd, ret, (vcpu)->vm); \ -}) - -/* - * Looks up and returns the value corresponding to the capability - * (KVM_CAP_*) given by cap. - */ -static inline int vm_check_cap(struct kvm_vm *vm, long cap) -{ - int ret = __vm_ioctl(vm, KVM_CHECK_EXTENSION, (void *)cap); - - TEST_ASSERT_VM_VCPU_IOCTL(ret >= 0, KVM_CHECK_EXTENSION, ret, vm); - return ret; -} - -static inline int __vm_enable_cap(struct kvm_vm *vm, uint32_t cap, uint64_t arg0) -{ - struct kvm_enable_cap enable_cap = { .cap = cap, .args = { arg0 } }; - - return __vm_ioctl(vm, KVM_ENABLE_CAP, &enable_cap); -} -static inline void vm_enable_cap(struct kvm_vm *vm, uint32_t cap, uint64_t arg0) -{ - struct kvm_enable_cap enable_cap = { .cap = cap, .args = { arg0 } }; - - vm_ioctl(vm, KVM_ENABLE_CAP, &enable_cap); -} - -static inline void vm_set_memory_attributes(struct kvm_vm *vm, uint64_t gpa, - uint64_t size, uint64_t attributes) -{ - struct kvm_memory_attributes attr = { - .attributes = attributes, - .address = gpa, - .size = size, - .flags = 0, - }; - - /* - * KVM_SET_MEMORY_ATTRIBUTES overwrites _all_ attributes. These flows - * need significant enhancements to support multiple attributes. - */ - TEST_ASSERT(!attributes || attributes == KVM_MEMORY_ATTRIBUTE_PRIVATE, - "Update me to support multiple attributes!"); - - vm_ioctl(vm, KVM_SET_MEMORY_ATTRIBUTES, &attr); -} - - -static inline void vm_mem_set_private(struct kvm_vm *vm, uint64_t gpa, - uint64_t size) -{ - vm_set_memory_attributes(vm, gpa, size, KVM_MEMORY_ATTRIBUTE_PRIVATE); -} - -static inline void vm_mem_set_shared(struct kvm_vm *vm, uint64_t gpa, - uint64_t size) -{ - vm_set_memory_attributes(vm, gpa, size, 0); -} - -void vm_guest_mem_fallocate(struct kvm_vm *vm, uint64_t gpa, uint64_t size, - bool punch_hole); - -static inline void vm_guest_mem_punch_hole(struct kvm_vm *vm, uint64_t gpa, - uint64_t size) -{ - vm_guest_mem_fallocate(vm, gpa, size, true); -} - -static inline void vm_guest_mem_allocate(struct kvm_vm *vm, uint64_t gpa, - uint64_t size) -{ - vm_guest_mem_fallocate(vm, gpa, size, false); -} - -void vm_enable_dirty_ring(struct kvm_vm *vm, uint32_t ring_size); -const char *vm_guest_mode_string(uint32_t i); - -void kvm_vm_free(struct kvm_vm *vmp); -void kvm_vm_restart(struct kvm_vm *vmp); -void kvm_vm_release(struct kvm_vm *vmp); -int kvm_memcmp_hva_gva(void *hva, struct kvm_vm *vm, const vm_vaddr_t gva, - size_t len); -void kvm_vm_elf_load(struct kvm_vm *vm, const char *filename); -int kvm_memfd_alloc(size_t size, bool hugepages); - -void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent); - -static inline void kvm_vm_get_dirty_log(struct kvm_vm *vm, int slot, void *log) -{ - struct kvm_dirty_log args = { .dirty_bitmap = log, .slot = slot }; - - vm_ioctl(vm, KVM_GET_DIRTY_LOG, &args); -} - -static inline void kvm_vm_clear_dirty_log(struct kvm_vm *vm, int slot, void *log, - uint64_t first_page, uint32_t num_pages) -{ - struct kvm_clear_dirty_log args = { - .dirty_bitmap = log, - .slot = slot, - .first_page = first_page, - .num_pages = num_pages - }; - - vm_ioctl(vm, KVM_CLEAR_DIRTY_LOG, &args); -} - -static inline uint32_t kvm_vm_reset_dirty_ring(struct kvm_vm *vm) -{ - return __vm_ioctl(vm, KVM_RESET_DIRTY_RINGS, NULL); -} - -static inline int vm_get_stats_fd(struct kvm_vm *vm) -{ - int fd = __vm_ioctl(vm, KVM_GET_STATS_FD, NULL); - - TEST_ASSERT_VM_VCPU_IOCTL(fd >= 0, KVM_GET_STATS_FD, fd, vm); - return fd; -} - -static inline void read_stats_header(int stats_fd, struct kvm_stats_header *header) -{ - ssize_t ret; - - ret = pread(stats_fd, header, sizeof(*header), 0); - TEST_ASSERT(ret == sizeof(*header), - "Failed to read '%lu' header bytes, ret = '%ld'", - sizeof(*header), ret); -} - -struct kvm_stats_desc *read_stats_descriptors(int stats_fd, - struct kvm_stats_header *header); - -static inline ssize_t get_stats_descriptor_size(struct kvm_stats_header *header) -{ - /* - * The base size of the descriptor is defined by KVM's ABI, but the - * size of the name field is variable, as far as KVM's ABI is - * concerned. For a given instance of KVM, the name field is the same - * size for all stats and is provided in the overall stats header. - */ - return sizeof(struct kvm_stats_desc) + header->name_size; -} - -static inline struct kvm_stats_desc *get_stats_descriptor(struct kvm_stats_desc *stats, - int index, - struct kvm_stats_header *header) -{ - /* - * Note, size_desc includes the size of the name field, which is - * variable. i.e. this is NOT equivalent to &stats_desc[i]. - */ - return (void *)stats + index * get_stats_descriptor_size(header); -} - -void read_stat_data(int stats_fd, struct kvm_stats_header *header, - struct kvm_stats_desc *desc, uint64_t *data, - size_t max_elements); - -void __vm_get_stat(struct kvm_vm *vm, const char *stat_name, uint64_t *data, - size_t max_elements); - -static inline uint64_t vm_get_stat(struct kvm_vm *vm, const char *stat_name) -{ - uint64_t data; - - __vm_get_stat(vm, stat_name, &data, 1); - return data; -} - -void vm_create_irqchip(struct kvm_vm *vm); - -static inline int __vm_create_guest_memfd(struct kvm_vm *vm, uint64_t size, - uint64_t flags) -{ - struct kvm_create_guest_memfd guest_memfd = { - .size = size, - .flags = flags, - }; - - return __vm_ioctl(vm, KVM_CREATE_GUEST_MEMFD, &guest_memfd); -} - -static inline int vm_create_guest_memfd(struct kvm_vm *vm, uint64_t size, - uint64_t flags) -{ - int fd = __vm_create_guest_memfd(vm, size, flags); - - TEST_ASSERT(fd >= 0, KVM_IOCTL_ERROR(KVM_CREATE_GUEST_MEMFD, fd)); - return fd; -} - -void vm_set_user_memory_region(struct kvm_vm *vm, uint32_t slot, uint32_t flags, - uint64_t gpa, uint64_t size, void *hva); -int __vm_set_user_memory_region(struct kvm_vm *vm, uint32_t slot, uint32_t flags, - uint64_t gpa, uint64_t size, void *hva); -void vm_set_user_memory_region2(struct kvm_vm *vm, uint32_t slot, uint32_t flags, - uint64_t gpa, uint64_t size, void *hva, - uint32_t guest_memfd, uint64_t guest_memfd_offset); -int __vm_set_user_memory_region2(struct kvm_vm *vm, uint32_t slot, uint32_t flags, - uint64_t gpa, uint64_t size, void *hva, - uint32_t guest_memfd, uint64_t guest_memfd_offset); - -void vm_userspace_mem_region_add(struct kvm_vm *vm, - enum vm_mem_backing_src_type src_type, - uint64_t guest_paddr, uint32_t slot, uint64_t npages, - uint32_t flags); -void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type, - uint64_t guest_paddr, uint32_t slot, uint64_t npages, - uint32_t flags, int guest_memfd_fd, uint64_t guest_memfd_offset); - -#ifndef vm_arch_has_protected_memory -static inline bool vm_arch_has_protected_memory(struct kvm_vm *vm) -{ - return false; -} -#endif - -void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags); -void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa); -void vm_mem_region_delete(struct kvm_vm *vm, uint32_t slot); -struct kvm_vcpu *__vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id); -void vm_populate_vaddr_bitmap(struct kvm_vm *vm); -vm_vaddr_t vm_vaddr_unused_gap(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min); -vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min); -vm_vaddr_t __vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min, - enum kvm_mem_region_type type); -vm_vaddr_t vm_vaddr_alloc_shared(struct kvm_vm *vm, size_t sz, - vm_vaddr_t vaddr_min, - enum kvm_mem_region_type type); -vm_vaddr_t vm_vaddr_alloc_pages(struct kvm_vm *vm, int nr_pages); -vm_vaddr_t __vm_vaddr_alloc_page(struct kvm_vm *vm, - enum kvm_mem_region_type type); -vm_vaddr_t vm_vaddr_alloc_page(struct kvm_vm *vm); - -void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, - unsigned int npages); -void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa); -void *addr_gva2hva(struct kvm_vm *vm, vm_vaddr_t gva); -vm_paddr_t addr_hva2gpa(struct kvm_vm *vm, void *hva); -void *addr_gpa2alias(struct kvm_vm *vm, vm_paddr_t gpa); - - -static inline vm_paddr_t vm_untag_gpa(struct kvm_vm *vm, vm_paddr_t gpa) -{ - return gpa & ~vm->gpa_tag_mask; -} - -void vcpu_run(struct kvm_vcpu *vcpu); -int _vcpu_run(struct kvm_vcpu *vcpu); - -static inline int __vcpu_run(struct kvm_vcpu *vcpu) -{ - return __vcpu_ioctl(vcpu, KVM_RUN, NULL); -} - -void vcpu_run_complete_io(struct kvm_vcpu *vcpu); -struct kvm_reg_list *vcpu_get_reg_list(struct kvm_vcpu *vcpu); - -static inline void vcpu_enable_cap(struct kvm_vcpu *vcpu, uint32_t cap, - uint64_t arg0) -{ - struct kvm_enable_cap enable_cap = { .cap = cap, .args = { arg0 } }; - - vcpu_ioctl(vcpu, KVM_ENABLE_CAP, &enable_cap); -} - -static inline void vcpu_guest_debug_set(struct kvm_vcpu *vcpu, - struct kvm_guest_debug *debug) -{ - vcpu_ioctl(vcpu, KVM_SET_GUEST_DEBUG, debug); -} - -static inline void vcpu_mp_state_get(struct kvm_vcpu *vcpu, - struct kvm_mp_state *mp_state) -{ - vcpu_ioctl(vcpu, KVM_GET_MP_STATE, mp_state); -} -static inline void vcpu_mp_state_set(struct kvm_vcpu *vcpu, - struct kvm_mp_state *mp_state) -{ - vcpu_ioctl(vcpu, KVM_SET_MP_STATE, mp_state); -} - -static inline void vcpu_regs_get(struct kvm_vcpu *vcpu, struct kvm_regs *regs) -{ - vcpu_ioctl(vcpu, KVM_GET_REGS, regs); -} - -static inline void vcpu_regs_set(struct kvm_vcpu *vcpu, struct kvm_regs *regs) -{ - vcpu_ioctl(vcpu, KVM_SET_REGS, regs); -} -static inline void vcpu_sregs_get(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) -{ - vcpu_ioctl(vcpu, KVM_GET_SREGS, sregs); - -} -static inline void vcpu_sregs_set(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) -{ - vcpu_ioctl(vcpu, KVM_SET_SREGS, sregs); -} -static inline int _vcpu_sregs_set(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) -{ - return __vcpu_ioctl(vcpu, KVM_SET_SREGS, sregs); -} -static inline void vcpu_fpu_get(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) -{ - vcpu_ioctl(vcpu, KVM_GET_FPU, fpu); -} -static inline void vcpu_fpu_set(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) -{ - vcpu_ioctl(vcpu, KVM_SET_FPU, fpu); -} - -static inline int __vcpu_get_reg(struct kvm_vcpu *vcpu, uint64_t id, void *addr) -{ - struct kvm_one_reg reg = { .id = id, .addr = (uint64_t)addr }; - - return __vcpu_ioctl(vcpu, KVM_GET_ONE_REG, ®); -} -static inline int __vcpu_set_reg(struct kvm_vcpu *vcpu, uint64_t id, uint64_t val) -{ - struct kvm_one_reg reg = { .id = id, .addr = (uint64_t)&val }; - - return __vcpu_ioctl(vcpu, KVM_SET_ONE_REG, ®); -} -static inline void vcpu_get_reg(struct kvm_vcpu *vcpu, uint64_t id, void *addr) -{ - struct kvm_one_reg reg = { .id = id, .addr = (uint64_t)addr }; - - vcpu_ioctl(vcpu, KVM_GET_ONE_REG, ®); -} -static inline void vcpu_set_reg(struct kvm_vcpu *vcpu, uint64_t id, uint64_t val) -{ - struct kvm_one_reg reg = { .id = id, .addr = (uint64_t)&val }; - - vcpu_ioctl(vcpu, KVM_SET_ONE_REG, ®); -} - -#ifdef __KVM_HAVE_VCPU_EVENTS -static inline void vcpu_events_get(struct kvm_vcpu *vcpu, - struct kvm_vcpu_events *events) -{ - vcpu_ioctl(vcpu, KVM_GET_VCPU_EVENTS, events); -} -static inline void vcpu_events_set(struct kvm_vcpu *vcpu, - struct kvm_vcpu_events *events) -{ - vcpu_ioctl(vcpu, KVM_SET_VCPU_EVENTS, events); -} -#endif -#ifdef __x86_64__ -static inline void vcpu_nested_state_get(struct kvm_vcpu *vcpu, - struct kvm_nested_state *state) -{ - vcpu_ioctl(vcpu, KVM_GET_NESTED_STATE, state); -} -static inline int __vcpu_nested_state_set(struct kvm_vcpu *vcpu, - struct kvm_nested_state *state) -{ - return __vcpu_ioctl(vcpu, KVM_SET_NESTED_STATE, state); -} - -static inline void vcpu_nested_state_set(struct kvm_vcpu *vcpu, - struct kvm_nested_state *state) -{ - vcpu_ioctl(vcpu, KVM_SET_NESTED_STATE, state); -} -#endif -static inline int vcpu_get_stats_fd(struct kvm_vcpu *vcpu) -{ - int fd = __vcpu_ioctl(vcpu, KVM_GET_STATS_FD, NULL); - - TEST_ASSERT_VM_VCPU_IOCTL(fd >= 0, KVM_CHECK_EXTENSION, fd, vcpu->vm); - return fd; -} - -int __kvm_has_device_attr(int dev_fd, uint32_t group, uint64_t attr); - -static inline void kvm_has_device_attr(int dev_fd, uint32_t group, uint64_t attr) -{ - int ret = __kvm_has_device_attr(dev_fd, group, attr); - - TEST_ASSERT(!ret, "KVM_HAS_DEVICE_ATTR failed, rc: %i errno: %i", ret, errno); -} - -int __kvm_device_attr_get(int dev_fd, uint32_t group, uint64_t attr, void *val); - -static inline void kvm_device_attr_get(int dev_fd, uint32_t group, - uint64_t attr, void *val) -{ - int ret = __kvm_device_attr_get(dev_fd, group, attr, val); - - TEST_ASSERT(!ret, KVM_IOCTL_ERROR(KVM_GET_DEVICE_ATTR, ret)); -} - -int __kvm_device_attr_set(int dev_fd, uint32_t group, uint64_t attr, void *val); - -static inline void kvm_device_attr_set(int dev_fd, uint32_t group, - uint64_t attr, void *val) -{ - int ret = __kvm_device_attr_set(dev_fd, group, attr, val); - - TEST_ASSERT(!ret, KVM_IOCTL_ERROR(KVM_SET_DEVICE_ATTR, ret)); -} - -static inline int __vcpu_has_device_attr(struct kvm_vcpu *vcpu, uint32_t group, - uint64_t attr) -{ - return __kvm_has_device_attr(vcpu->fd, group, attr); -} - -static inline void vcpu_has_device_attr(struct kvm_vcpu *vcpu, uint32_t group, - uint64_t attr) -{ - kvm_has_device_attr(vcpu->fd, group, attr); -} - -static inline int __vcpu_device_attr_get(struct kvm_vcpu *vcpu, uint32_t group, - uint64_t attr, void *val) -{ - return __kvm_device_attr_get(vcpu->fd, group, attr, val); -} - -static inline void vcpu_device_attr_get(struct kvm_vcpu *vcpu, uint32_t group, - uint64_t attr, void *val) -{ - kvm_device_attr_get(vcpu->fd, group, attr, val); -} - -static inline int __vcpu_device_attr_set(struct kvm_vcpu *vcpu, uint32_t group, - uint64_t attr, void *val) -{ - return __kvm_device_attr_set(vcpu->fd, group, attr, val); -} - -static inline void vcpu_device_attr_set(struct kvm_vcpu *vcpu, uint32_t group, - uint64_t attr, void *val) -{ - kvm_device_attr_set(vcpu->fd, group, attr, val); -} - -int __kvm_test_create_device(struct kvm_vm *vm, uint64_t type); -int __kvm_create_device(struct kvm_vm *vm, uint64_t type); - -static inline int kvm_create_device(struct kvm_vm *vm, uint64_t type) -{ - int fd = __kvm_create_device(vm, type); - - TEST_ASSERT(fd >= 0, KVM_IOCTL_ERROR(KVM_CREATE_DEVICE, fd)); - return fd; -} - -void *vcpu_map_dirty_ring(struct kvm_vcpu *vcpu); - -/* - * VM VCPU Args Set - * - * Input Args: - * vm - Virtual Machine - * num - number of arguments - * ... - arguments, each of type uint64_t - * - * Output Args: None - * - * Return: None - * - * Sets the first @num input parameters for the function at @vcpu's entry point, - * per the C calling convention of the architecture, to the values given as - * variable args. Each of the variable args is expected to be of type uint64_t. - * The maximum @num can be is specific to the architecture. - */ -void vcpu_args_set(struct kvm_vcpu *vcpu, unsigned int num, ...); - -void kvm_irq_line(struct kvm_vm *vm, uint32_t irq, int level); -int _kvm_irq_line(struct kvm_vm *vm, uint32_t irq, int level); - -#define KVM_MAX_IRQ_ROUTES 4096 - -struct kvm_irq_routing *kvm_gsi_routing_create(void); -void kvm_gsi_routing_irqchip_add(struct kvm_irq_routing *routing, - uint32_t gsi, uint32_t pin); -int _kvm_gsi_routing_write(struct kvm_vm *vm, struct kvm_irq_routing *routing); -void kvm_gsi_routing_write(struct kvm_vm *vm, struct kvm_irq_routing *routing); - -const char *exit_reason_str(unsigned int exit_reason); - -vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm, vm_paddr_t paddr_min, - uint32_t memslot); -vm_paddr_t __vm_phy_pages_alloc(struct kvm_vm *vm, size_t num, - vm_paddr_t paddr_min, uint32_t memslot, - bool protected); -vm_paddr_t vm_alloc_page_table(struct kvm_vm *vm); - -static inline vm_paddr_t vm_phy_pages_alloc(struct kvm_vm *vm, size_t num, - vm_paddr_t paddr_min, uint32_t memslot) -{ - /* - * By default, allocate memory as protected for VMs that support - * protected memory, as the majority of memory for such VMs is - * protected, i.e. using shared memory is effectively opt-in. - */ - return __vm_phy_pages_alloc(vm, num, paddr_min, memslot, - vm_arch_has_protected_memory(vm)); -} - -/* - * ____vm_create() does KVM_CREATE_VM and little else. __vm_create() also - * loads the test binary into guest memory and creates an IRQ chip (x86 only). - * __vm_create() does NOT create vCPUs, @nr_runnable_vcpus is used purely to - * calculate the amount of memory needed for per-vCPU data, e.g. stacks. - */ -struct kvm_vm *____vm_create(struct vm_shape shape); -struct kvm_vm *__vm_create(struct vm_shape shape, uint32_t nr_runnable_vcpus, - uint64_t nr_extra_pages); - -static inline struct kvm_vm *vm_create_barebones(void) -{ - return ____vm_create(VM_SHAPE_DEFAULT); -} - -#ifdef __x86_64__ -static inline struct kvm_vm *vm_create_barebones_protected_vm(void) -{ - const struct vm_shape shape = { - .mode = VM_MODE_DEFAULT, - .type = KVM_X86_SW_PROTECTED_VM, - }; - - return ____vm_create(shape); -} -#endif - -static inline struct kvm_vm *vm_create(uint32_t nr_runnable_vcpus) -{ - return __vm_create(VM_SHAPE_DEFAULT, nr_runnable_vcpus, 0); -} - -struct kvm_vm *__vm_create_with_vcpus(struct vm_shape shape, uint32_t nr_vcpus, - uint64_t extra_mem_pages, - void *guest_code, struct kvm_vcpu *vcpus[]); - -static inline struct kvm_vm *vm_create_with_vcpus(uint32_t nr_vcpus, - void *guest_code, - struct kvm_vcpu *vcpus[]) -{ - return __vm_create_with_vcpus(VM_SHAPE_DEFAULT, nr_vcpus, 0, - guest_code, vcpus); -} - - -struct kvm_vm *__vm_create_shape_with_one_vcpu(struct vm_shape shape, - struct kvm_vcpu **vcpu, - uint64_t extra_mem_pages, - void *guest_code); - -/* - * Create a VM with a single vCPU with reasonable defaults and @extra_mem_pages - * additional pages of guest memory. Returns the VM and vCPU (via out param). - */ -static inline struct kvm_vm *__vm_create_with_one_vcpu(struct kvm_vcpu **vcpu, - uint64_t extra_mem_pages, - void *guest_code) -{ - return __vm_create_shape_with_one_vcpu(VM_SHAPE_DEFAULT, vcpu, - extra_mem_pages, guest_code); -} - -static inline struct kvm_vm *vm_create_with_one_vcpu(struct kvm_vcpu **vcpu, - void *guest_code) -{ - return __vm_create_with_one_vcpu(vcpu, 0, guest_code); -} - -static inline struct kvm_vm *vm_create_shape_with_one_vcpu(struct vm_shape shape, - struct kvm_vcpu **vcpu, - void *guest_code) -{ - return __vm_create_shape_with_one_vcpu(shape, vcpu, 0, guest_code); -} - -struct kvm_vcpu *vm_recreate_with_one_vcpu(struct kvm_vm *vm); - -void kvm_pin_this_task_to_pcpu(uint32_t pcpu); -void kvm_print_vcpu_pinning_help(void); -void kvm_parse_vcpu_pinning(const char *pcpus_string, uint32_t vcpu_to_pcpu[], - int nr_vcpus); - -unsigned long vm_compute_max_gfn(struct kvm_vm *vm); -unsigned int vm_calc_num_guest_pages(enum vm_guest_mode mode, size_t size); -unsigned int vm_num_host_pages(enum vm_guest_mode mode, unsigned int num_guest_pages); -unsigned int vm_num_guest_pages(enum vm_guest_mode mode, unsigned int num_host_pages); -static inline unsigned int -vm_adjust_num_guest_pages(enum vm_guest_mode mode, unsigned int num_guest_pages) -{ - unsigned int n; - n = vm_num_guest_pages(mode, vm_num_host_pages(mode, num_guest_pages)); -#ifdef __s390x__ - /* s390 requires 1M aligned guest sizes */ - n = (n + 255) & ~255; -#endif - return n; -} - -#define sync_global_to_guest(vm, g) ({ \ - typeof(g) *_p = addr_gva2hva(vm, (vm_vaddr_t)&(g)); \ - memcpy(_p, &(g), sizeof(g)); \ -}) - -#define sync_global_from_guest(vm, g) ({ \ - typeof(g) *_p = addr_gva2hva(vm, (vm_vaddr_t)&(g)); \ - memcpy(&(g), _p, sizeof(g)); \ -}) - -/* - * Write a global value, but only in the VM's (guest's) domain. Primarily used - * for "globals" that hold per-VM values (VMs always duplicate code and global - * data into their own region of physical memory), but can be used anytime it's - * undesirable to change the host's copy of the global. - */ -#define write_guest_global(vm, g, val) ({ \ - typeof(g) *_p = addr_gva2hva(vm, (vm_vaddr_t)&(g)); \ - typeof(g) _val = val; \ - \ - memcpy(_p, &(_val), sizeof(g)); \ -}) - -void assert_on_unhandled_exception(struct kvm_vcpu *vcpu); - -void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, - uint8_t indent); - -static inline void vcpu_dump(FILE *stream, struct kvm_vcpu *vcpu, - uint8_t indent) -{ - vcpu_arch_dump(stream, vcpu, indent); -} - -/* - * Adds a vCPU with reasonable defaults (e.g. a stack) - * - * Input Args: - * vm - Virtual Machine - * vcpu_id - The id of the VCPU to add to the VM. - */ -struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id); -void vcpu_arch_set_entry_point(struct kvm_vcpu *vcpu, void *guest_code); - -static inline struct kvm_vcpu *vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id, - void *guest_code) -{ - struct kvm_vcpu *vcpu = vm_arch_vcpu_add(vm, vcpu_id); - - vcpu_arch_set_entry_point(vcpu, guest_code); - - return vcpu; -} - -/* Re-create a vCPU after restarting a VM, e.g. for state save/restore tests. */ -struct kvm_vcpu *vm_arch_vcpu_recreate(struct kvm_vm *vm, uint32_t vcpu_id); - -static inline struct kvm_vcpu *vm_vcpu_recreate(struct kvm_vm *vm, - uint32_t vcpu_id) -{ - return vm_arch_vcpu_recreate(vm, vcpu_id); -} - -void vcpu_arch_free(struct kvm_vcpu *vcpu); - -void virt_arch_pgd_alloc(struct kvm_vm *vm); - -static inline void virt_pgd_alloc(struct kvm_vm *vm) -{ - virt_arch_pgd_alloc(vm); -} - -/* - * VM Virtual Page Map - * - * Input Args: - * vm - Virtual Machine - * vaddr - VM Virtual Address - * paddr - VM Physical Address - * memslot - Memory region slot for new virtual translation tables - * - * Output Args: None - * - * Return: None - * - * Within @vm, creates a virtual translation for the page starting - * at @vaddr to the page starting at @paddr. - */ -void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr); - -static inline void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr) -{ - virt_arch_pg_map(vm, vaddr, paddr); -} - - -/* - * Address Guest Virtual to Guest Physical - * - * Input Args: - * vm - Virtual Machine - * gva - VM virtual address - * - * Output Args: None - * - * Return: - * Equivalent VM physical address - * - * Returns the VM physical address of the translated VM virtual - * address given by @gva. - */ -vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva); - -static inline vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva) -{ - return addr_arch_gva2gpa(vm, gva); -} - -/* - * Virtual Translation Tables Dump - * - * Input Args: - * stream - Output FILE stream - * vm - Virtual Machine - * indent - Left margin indent amount - * - * Output Args: None - * - * Return: None - * - * Dumps to the FILE stream given by @stream, the contents of all the - * virtual translation tables for the VM given by @vm. - */ -void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent); - -static inline void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) -{ - virt_arch_dump(stream, vm, indent); -} - - -static inline int __vm_disable_nx_huge_pages(struct kvm_vm *vm) -{ - return __vm_enable_cap(vm, KVM_CAP_VM_DISABLE_NX_HUGE_PAGES, 0); -} - -/* - * Arch hook that is invoked via a constructor, i.e. before exeucting main(), - * to allow for arch-specific setup that is common to all tests, e.g. computing - * the default guest "mode". - */ -void kvm_selftest_arch_init(void); - -void kvm_arch_vm_post_create(struct kvm_vm *vm); - -bool vm_is_gpa_protected(struct kvm_vm *vm, vm_paddr_t paddr); - -uint32_t guest_get_vcpuid(void); - -#endif /* SELFTEST_KVM_UTIL_BASE_H */ diff --git a/tools/testing/selftests/kvm/include/kvm_util_types.h b/tools/testing/selftests/kvm/include/kvm_util_types.h new file mode 100644 index 000000000000..ec787b97cf18 --- /dev/null +++ b/tools/testing/selftests/kvm/include/kvm_util_types.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef SELFTEST_KVM_UTIL_TYPES_H +#define SELFTEST_KVM_UTIL_TYPES_H + +/* + * Provide a version of static_assert() that is guaranteed to have an optional + * message param. _GNU_SOURCE is defined for all KVM selftests, _GNU_SOURCE + * implies _ISOC11_SOURCE, and if _ISOC11_SOURCE is defined, glibc #undefs and + * #defines static_assert() as a direct alias to _Static_assert() (see + * usr/include/assert.h). Define a custom macro instead of redefining + * static_assert() to avoid creating non-deterministic behavior that is + * dependent on include order. + */ +#define __kvm_static_assert(expr, msg, ...) _Static_assert(expr, msg) +#define kvm_static_assert(expr, ...) __kvm_static_assert(expr, ##__VA_ARGS__, #expr) + +typedef uint64_t vm_paddr_t; /* Virtual Machine (Guest) physical address */ +typedef uint64_t vm_vaddr_t; /* Virtual Machine (Guest) virtual address */ + +#endif /* SELFTEST_KVM_UTIL_TYPES_H */ diff --git a/tools/testing/selftests/kvm/include/memstress.h b/tools/testing/selftests/kvm/include/memstress.h index ce4e603050ea..9071eb6dea60 100644 --- a/tools/testing/selftests/kvm/include/memstress.h +++ b/tools/testing/selftests/kvm/include/memstress.h @@ -62,7 +62,6 @@ struct kvm_vm *memstress_create_vm(enum vm_guest_mode mode, int nr_vcpus, void memstress_destroy_vm(struct kvm_vm *vm); void memstress_set_write_percent(struct kvm_vm *vm, uint32_t write_percent); -void memstress_set_random_seed(struct kvm_vm *vm, uint32_t random_seed); void memstress_set_random_access(struct kvm_vm *vm, bool random_access); void memstress_start_vcpu_threads(int vcpus, void (*vcpu_fn)(struct memstress_vcpu_args *)); diff --git a/tools/testing/selftests/kvm/include/riscv/processor.h b/tools/testing/selftests/kvm/include/riscv/processor.h index ce473fe251dd..5f389166338c 100644 --- a/tools/testing/selftests/kvm/include/riscv/processor.h +++ b/tools/testing/selftests/kvm/include/riscv/processor.h @@ -50,6 +50,16 @@ static inline uint64_t __kvm_reg_id(uint64_t type, uint64_t subtype, bool __vcpu_has_ext(struct kvm_vcpu *vcpu, uint64_t ext); +static inline bool __vcpu_has_isa_ext(struct kvm_vcpu *vcpu, uint64_t isa_ext) +{ + return __vcpu_has_ext(vcpu, RISCV_ISA_EXT_REG(isa_ext)); +} + +static inline bool __vcpu_has_sbi_ext(struct kvm_vcpu *vcpu, uint64_t sbi_ext) +{ + return __vcpu_has_ext(vcpu, RISCV_SBI_EXT_REG(sbi_ext)); +} + struct ex_regs { unsigned long ra; unsigned long sp; @@ -154,45 +164,6 @@ void vm_install_interrupt_handler(struct kvm_vm *vm, exception_handler_fn handle #define PGTBL_PAGE_SIZE PGTBL_L0_BLOCK_SIZE #define PGTBL_PAGE_SIZE_SHIFT PGTBL_L0_BLOCK_SHIFT -/* SBI return error codes */ -#define SBI_SUCCESS 0 -#define SBI_ERR_FAILURE -1 -#define SBI_ERR_NOT_SUPPORTED -2 -#define SBI_ERR_INVALID_PARAM -3 -#define SBI_ERR_DENIED -4 -#define SBI_ERR_INVALID_ADDRESS -5 -#define SBI_ERR_ALREADY_AVAILABLE -6 -#define SBI_ERR_ALREADY_STARTED -7 -#define SBI_ERR_ALREADY_STOPPED -8 - -#define SBI_EXT_EXPERIMENTAL_START 0x08000000 -#define SBI_EXT_EXPERIMENTAL_END 0x08FFFFFF - -#define KVM_RISCV_SELFTESTS_SBI_EXT SBI_EXT_EXPERIMENTAL_END -#define KVM_RISCV_SELFTESTS_SBI_UCALL 0 -#define KVM_RISCV_SELFTESTS_SBI_UNEXP 1 - -enum sbi_ext_id { - SBI_EXT_BASE = 0x10, - SBI_EXT_STA = 0x535441, -}; - -enum sbi_ext_base_fid { - SBI_EXT_BASE_PROBE_EXT = 3, -}; - -struct sbiret { - long error; - long value; -}; - -struct sbiret sbi_ecall(int ext, int fid, unsigned long arg0, - unsigned long arg1, unsigned long arg2, - unsigned long arg3, unsigned long arg4, - unsigned long arg5); - -bool guest_sbi_probe_extension(int extid, long *out_val); - static inline void local_irq_enable(void) { csr_set(CSR_SSTATUS, SR_SIE); diff --git a/tools/testing/selftests/kvm/include/riscv/sbi.h b/tools/testing/selftests/kvm/include/riscv/sbi.h new file mode 100644 index 000000000000..046b432ae896 --- /dev/null +++ b/tools/testing/selftests/kvm/include/riscv/sbi.h @@ -0,0 +1,141 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * RISC-V SBI specific definitions + * + * Copyright (C) 2024 Rivos Inc. + */ + +#ifndef SELFTEST_KVM_SBI_H +#define SELFTEST_KVM_SBI_H + +/* SBI spec version fields */ +#define SBI_SPEC_VERSION_DEFAULT 0x1 +#define SBI_SPEC_VERSION_MAJOR_SHIFT 24 +#define SBI_SPEC_VERSION_MAJOR_MASK 0x7f +#define SBI_SPEC_VERSION_MINOR_MASK 0xffffff + +/* SBI return error codes */ +#define SBI_SUCCESS 0 +#define SBI_ERR_FAILURE -1 +#define SBI_ERR_NOT_SUPPORTED -2 +#define SBI_ERR_INVALID_PARAM -3 +#define SBI_ERR_DENIED -4 +#define SBI_ERR_INVALID_ADDRESS -5 +#define SBI_ERR_ALREADY_AVAILABLE -6 +#define SBI_ERR_ALREADY_STARTED -7 +#define SBI_ERR_ALREADY_STOPPED -8 + +#define SBI_EXT_EXPERIMENTAL_START 0x08000000 +#define SBI_EXT_EXPERIMENTAL_END 0x08FFFFFF + +#define KVM_RISCV_SELFTESTS_SBI_EXT SBI_EXT_EXPERIMENTAL_END +#define KVM_RISCV_SELFTESTS_SBI_UCALL 0 +#define KVM_RISCV_SELFTESTS_SBI_UNEXP 1 + +enum sbi_ext_id { + SBI_EXT_BASE = 0x10, + SBI_EXT_STA = 0x535441, + SBI_EXT_PMU = 0x504D55, +}; + +enum sbi_ext_base_fid { + SBI_EXT_BASE_GET_SPEC_VERSION = 0, + SBI_EXT_BASE_GET_IMP_ID, + SBI_EXT_BASE_GET_IMP_VERSION, + SBI_EXT_BASE_PROBE_EXT = 3, +}; +enum sbi_ext_pmu_fid { + SBI_EXT_PMU_NUM_COUNTERS = 0, + SBI_EXT_PMU_COUNTER_GET_INFO, + SBI_EXT_PMU_COUNTER_CFG_MATCH, + SBI_EXT_PMU_COUNTER_START, + SBI_EXT_PMU_COUNTER_STOP, + SBI_EXT_PMU_COUNTER_FW_READ, + SBI_EXT_PMU_COUNTER_FW_READ_HI, + SBI_EXT_PMU_SNAPSHOT_SET_SHMEM, +}; + +union sbi_pmu_ctr_info { + unsigned long value; + struct { + unsigned long csr:12; + unsigned long width:6; +#if __riscv_xlen == 32 + unsigned long reserved:13; +#else + unsigned long reserved:45; +#endif + unsigned long type:1; + }; +}; + +struct riscv_pmu_snapshot_data { + u64 ctr_overflow_mask; + u64 ctr_values[64]; + u64 reserved[447]; +}; + +struct sbiret { + long error; + long value; +}; + +/** General pmu event codes specified in SBI PMU extension */ +enum sbi_pmu_hw_generic_events_t { + SBI_PMU_HW_NO_EVENT = 0, + SBI_PMU_HW_CPU_CYCLES = 1, + SBI_PMU_HW_INSTRUCTIONS = 2, + SBI_PMU_HW_CACHE_REFERENCES = 3, + SBI_PMU_HW_CACHE_MISSES = 4, + SBI_PMU_HW_BRANCH_INSTRUCTIONS = 5, + SBI_PMU_HW_BRANCH_MISSES = 6, + SBI_PMU_HW_BUS_CYCLES = 7, + SBI_PMU_HW_STALLED_CYCLES_FRONTEND = 8, + SBI_PMU_HW_STALLED_CYCLES_BACKEND = 9, + SBI_PMU_HW_REF_CPU_CYCLES = 10, + + SBI_PMU_HW_GENERAL_MAX, +}; + +/* SBI PMU counter types */ +enum sbi_pmu_ctr_type { + SBI_PMU_CTR_TYPE_HW = 0x0, + SBI_PMU_CTR_TYPE_FW, +}; + +/* Flags defined for config matching function */ +#define SBI_PMU_CFG_FLAG_SKIP_MATCH BIT(0) +#define SBI_PMU_CFG_FLAG_CLEAR_VALUE BIT(1) +#define SBI_PMU_CFG_FLAG_AUTO_START BIT(2) +#define SBI_PMU_CFG_FLAG_SET_VUINH BIT(3) +#define SBI_PMU_CFG_FLAG_SET_VSINH BIT(4) +#define SBI_PMU_CFG_FLAG_SET_UINH BIT(5) +#define SBI_PMU_CFG_FLAG_SET_SINH BIT(6) +#define SBI_PMU_CFG_FLAG_SET_MINH BIT(7) + +/* Flags defined for counter start function */ +#define SBI_PMU_START_FLAG_SET_INIT_VALUE BIT(0) +#define SBI_PMU_START_FLAG_INIT_SNAPSHOT BIT(1) + +/* Flags defined for counter stop function */ +#define SBI_PMU_STOP_FLAG_RESET BIT(0) +#define SBI_PMU_STOP_FLAG_TAKE_SNAPSHOT BIT(1) + +struct sbiret sbi_ecall(int ext, int fid, unsigned long arg0, + unsigned long arg1, unsigned long arg2, + unsigned long arg3, unsigned long arg4, + unsigned long arg5); + +bool guest_sbi_probe_extension(int extid, long *out_val); + +/* Make SBI version */ +static inline unsigned long sbi_mk_version(unsigned long major, + unsigned long minor) +{ + return ((major & SBI_SPEC_VERSION_MAJOR_MASK) << SBI_SPEC_VERSION_MAJOR_SHIFT) + | (minor & SBI_SPEC_VERSION_MINOR_MASK); +} + +unsigned long get_host_sbi_spec_version(void); + +#endif /* SELFTEST_KVM_SBI_H */ diff --git a/tools/testing/selftests/kvm/include/riscv/ucall.h b/tools/testing/selftests/kvm/include/riscv/ucall.h index be46eb32ec27..a695ae36f3e0 100644 --- a/tools/testing/selftests/kvm/include/riscv/ucall.h +++ b/tools/testing/selftests/kvm/include/riscv/ucall.h @@ -3,6 +3,7 @@ #define SELFTEST_KVM_UCALL_H #include "processor.h" +#include "sbi.h" #define UCALL_EXIT_REASON KVM_EXIT_RISCV_SBI diff --git a/tools/testing/selftests/kvm/include/s390x/ucall.h b/tools/testing/selftests/kvm/include/s390x/ucall.h index b231bf2e49d6..8035a872a351 100644 --- a/tools/testing/selftests/kvm/include/s390x/ucall.h +++ b/tools/testing/selftests/kvm/include/s390x/ucall.h @@ -2,7 +2,7 @@ #ifndef SELFTEST_KVM_UCALL_H #define SELFTEST_KVM_UCALL_H -#include "kvm_util_base.h" +#include "kvm_util.h" #define UCALL_EXIT_REASON KVM_EXIT_S390_SIEIC diff --git a/tools/testing/selftests/kvm/include/test_util.h b/tools/testing/selftests/kvm/include/test_util.h index 8a6e30612c86..3e473058849f 100644 --- a/tools/testing/selftests/kvm/include/test_util.h +++ b/tools/testing/selftests/kvm/include/test_util.h @@ -91,9 +91,28 @@ struct guest_random_state { uint32_t seed; }; +extern uint32_t guest_random_seed; +extern struct guest_random_state guest_rng; + struct guest_random_state new_guest_random_state(uint32_t seed); uint32_t guest_random_u32(struct guest_random_state *state); +static inline bool __guest_random_bool(struct guest_random_state *state, + uint8_t percent) +{ + return (guest_random_u32(state) % 100) < percent; +} + +static inline bool guest_random_bool(struct guest_random_state *state) +{ + return __guest_random_bool(state, 50); +} + +static inline uint64_t guest_random_u64(struct guest_random_state *state) +{ + return ((uint64_t)guest_random_u32(state) << 32) | guest_random_u32(state); +} + enum vm_mem_backing_src_type { VM_MEM_SRC_ANONYMOUS, VM_MEM_SRC_ANONYMOUS_THP, diff --git a/tools/testing/selftests/kvm/include/userfaultfd_util.h b/tools/testing/selftests/kvm/include/userfaultfd_util.h index 877449c34592..60f7f9d435dc 100644 --- a/tools/testing/selftests/kvm/include/userfaultfd_util.h +++ b/tools/testing/selftests/kvm/include/userfaultfd_util.h @@ -5,9 +5,6 @@ * Copyright (C) 2018, Red Hat, Inc. * Copyright (C) 2019-2022 Google LLC */ - -#define _GNU_SOURCE /* for pipe2 */ - #include <inttypes.h> #include <time.h> #include <pthread.h> @@ -17,17 +14,27 @@ typedef int (*uffd_handler_t)(int uffd_mode, int uffd, struct uffd_msg *msg); -struct uffd_desc { +struct uffd_reader_args { int uffd_mode; int uffd; - int pipefds[2]; useconds_t delay; uffd_handler_t handler; - pthread_t thread; + /* Holds the read end of the pipe for killing the reader. */ + int pipe; +}; + +struct uffd_desc { + int uffd; + uint64_t num_readers; + /* Holds the write ends of the pipes for killing the readers. */ + int *pipefds; + pthread_t *readers; + struct uffd_reader_args *reader_args; }; struct uffd_desc *uffd_setup_demand_paging(int uffd_mode, useconds_t delay, void *hva, uint64_t len, + uint64_t num_readers, uffd_handler_t handler); void uffd_stop_demand_paging(struct uffd_desc *uffd); diff --git a/tools/testing/selftests/kvm/include/x86_64/kvm_util_arch.h b/tools/testing/selftests/kvm/include/x86_64/kvm_util_arch.h index 9f1725192aa2..972bb1c4ab4c 100644 --- a/tools/testing/selftests/kvm/include/x86_64/kvm_util_arch.h +++ b/tools/testing/selftests/kvm/include/x86_64/kvm_util_arch.h @@ -5,7 +5,16 @@ #include <stdbool.h> #include <stdint.h> +#include "kvm_util_types.h" +#include "test_util.h" + +extern bool is_forced_emulation_enabled; + struct kvm_vm_arch { + vm_vaddr_t gdt; + vm_vaddr_t tss; + vm_vaddr_t idt; + uint64_t c_bit; uint64_t s_bit; int sev_fd; @@ -20,4 +29,23 @@ static inline bool __vm_arch_has_protected_memory(struct kvm_vm_arch *arch) #define vm_arch_has_protected_memory(vm) \ __vm_arch_has_protected_memory(&(vm)->arch) +#define vcpu_arch_put_guest(mem, __val) \ +do { \ + const typeof(mem) val = (__val); \ + \ + if (!is_forced_emulation_enabled || guest_random_bool(&guest_rng)) { \ + (mem) = val; \ + } else if (guest_random_bool(&guest_rng)) { \ + __asm__ __volatile__(KVM_FEP "mov %1, %0" \ + : "+m" (mem) \ + : "r" (val) : "memory"); \ + } else { \ + uint64_t __old = READ_ONCE(mem); \ + \ + __asm__ __volatile__(KVM_FEP LOCK_PREFIX "cmpxchg %[new], %[ptr]" \ + : [ptr] "+m" (mem), [old] "+a" (__old) \ + : [new]"r" (val) : "memory", "cc"); \ + } \ +} while (0) + #endif // SELFTEST_KVM_UTIL_ARCH_H diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h index 81ce37ec407d..8eb57de0b587 100644 --- a/tools/testing/selftests/kvm/include/x86_64/processor.h +++ b/tools/testing/selftests/kvm/include/x86_64/processor.h @@ -18,17 +18,12 @@ #include <linux/kvm_para.h> #include <linux/stringify.h> -#include "../kvm_util.h" +#include "kvm_util.h" +#include "ucall_common.h" extern bool host_cpu_is_intel; extern bool host_cpu_is_amd; -enum vm_guest_x86_subtype { - VM_SUBTYPE_NONE = 0, - VM_SUBTYPE_SEV, - VM_SUBTYPE_SEV_ES, -}; - /* Forced emulation prefix, used to invoke the emulator unconditionally. */ #define KVM_FEP "ud2; .byte 'k', 'v', 'm';" @@ -1139,8 +1134,6 @@ struct idt_entry { uint32_t offset2; uint32_t reserved; }; -void vm_init_descriptor_tables(struct kvm_vm *vm); -void vcpu_init_descriptor_tables(struct kvm_vcpu *vcpu); void vm_install_exception_handler(struct kvm_vm *vm, int vector, void (*handler)(struct ex_regs *)); diff --git a/tools/testing/selftests/kvm/include/x86_64/sev.h b/tools/testing/selftests/kvm/include/x86_64/sev.h index 8a1bf88474c9..82c11c81a956 100644 --- a/tools/testing/selftests/kvm/include/x86_64/sev.h +++ b/tools/testing/selftests/kvm/include/x86_64/sev.h @@ -31,8 +31,9 @@ void sev_vm_launch(struct kvm_vm *vm, uint32_t policy); void sev_vm_launch_measure(struct kvm_vm *vm, uint8_t *measurement); void sev_vm_launch_finish(struct kvm_vm *vm); -struct kvm_vm *vm_sev_create_with_one_vcpu(uint32_t policy, void *guest_code, +struct kvm_vm *vm_sev_create_with_one_vcpu(uint32_t type, void *guest_code, struct kvm_vcpu **cpu); +void vm_sev_launch(struct kvm_vm *vm, uint32_t policy, uint8_t *measurement); kvm_static_assert(SEV_RET_SUCCESS == 0); @@ -67,20 +68,8 @@ kvm_static_assert(SEV_RET_SUCCESS == 0); __TEST_ASSERT_VM_VCPU_IOCTL(!ret, #cmd, ret, vm); \ }) -static inline void sev_vm_init(struct kvm_vm *vm) -{ - vm->arch.sev_fd = open_sev_dev_path_or_exit(); - - vm_sev_ioctl(vm, KVM_SEV_INIT, NULL); -} - - -static inline void sev_es_vm_init(struct kvm_vm *vm) -{ - vm->arch.sev_fd = open_sev_dev_path_or_exit(); - - vm_sev_ioctl(vm, KVM_SEV_ES_INIT, NULL); -} +void sev_vm_init(struct kvm_vm *vm); +void sev_es_vm_init(struct kvm_vm *vm); static inline void sev_register_encrypted_memory(struct kvm_vm *vm, struct userspace_mem_region *region) diff --git a/tools/testing/selftests/kvm/include/x86_64/ucall.h b/tools/testing/selftests/kvm/include/x86_64/ucall.h index 06b244bd06ee..d3825dcc3cd9 100644 --- a/tools/testing/selftests/kvm/include/x86_64/ucall.h +++ b/tools/testing/selftests/kvm/include/x86_64/ucall.h @@ -2,7 +2,7 @@ #ifndef SELFTEST_KVM_UCALL_H #define SELFTEST_KVM_UCALL_H -#include "kvm_util_base.h" +#include "kvm_util.h" #define UCALL_EXIT_REASON KVM_EXIT_IO diff --git a/tools/testing/selftests/kvm/kvm_binary_stats_test.c b/tools/testing/selftests/kvm/kvm_binary_stats_test.c index 698c1cfa3111..f02355c3c4c2 100644 --- a/tools/testing/selftests/kvm/kvm_binary_stats_test.c +++ b/tools/testing/selftests/kvm/kvm_binary_stats_test.c @@ -6,8 +6,6 @@ * * Test the fd-based interface for KVM statistics. */ - -#define _GNU_SOURCE /* for program_invocation_short_name */ #include <fcntl.h> #include <stdio.h> #include <stdlib.h> diff --git a/tools/testing/selftests/kvm/kvm_create_max_vcpus.c b/tools/testing/selftests/kvm/kvm_create_max_vcpus.c index b9e23265e4b3..c78f34699f73 100644 --- a/tools/testing/selftests/kvm/kvm_create_max_vcpus.c +++ b/tools/testing/selftests/kvm/kvm_create_max_vcpus.c @@ -6,8 +6,6 @@ * * Test for KVM_CAP_MAX_VCPUS and KVM_CAP_MAX_VCPU_ID. */ - -#define _GNU_SOURCE /* for program_invocation_short_name */ #include <fcntl.h> #include <stdio.h> #include <stdlib.h> diff --git a/tools/testing/selftests/kvm/kvm_page_table_test.c b/tools/testing/selftests/kvm/kvm_page_table_test.c index e0ba97ac1c56..dd8b12f626d3 100644 --- a/tools/testing/selftests/kvm/kvm_page_table_test.c +++ b/tools/testing/selftests/kvm/kvm_page_table_test.c @@ -8,9 +8,6 @@ * page size have been pre-allocated on your system, if you are planning to * use hugepages to back the guest memory for testing. */ - -#define _GNU_SOURCE /* for program_invocation_name */ - #include <stdio.h> #include <stdlib.h> #include <time.h> @@ -21,6 +18,7 @@ #include "kvm_util.h" #include "processor.h" #include "guest_modes.h" +#include "ucall_common.h" #define TEST_MEM_SLOT_INDEX 1 diff --git a/tools/testing/selftests/kvm/lib/aarch64/gic.c b/tools/testing/selftests/kvm/lib/aarch64/gic.c index 55668631d546..7abbf8866512 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/gic.c +++ b/tools/testing/selftests/kvm/lib/aarch64/gic.c @@ -17,13 +17,12 @@ static const struct gic_common_ops *gic_common_ops; static struct spinlock gic_lock; -static void gic_cpu_init(unsigned int cpu, void *redist_base) +static void gic_cpu_init(unsigned int cpu) { - gic_common_ops->gic_cpu_init(cpu, redist_base); + gic_common_ops->gic_cpu_init(cpu); } -static void -gic_dist_init(enum gic_type type, unsigned int nr_cpus, void *dist_base) +static void gic_dist_init(enum gic_type type, unsigned int nr_cpus) { const struct gic_common_ops *gic_ops = NULL; @@ -40,7 +39,7 @@ gic_dist_init(enum gic_type type, unsigned int nr_cpus, void *dist_base) GUEST_ASSERT(gic_ops); - gic_ops->gic_init(nr_cpus, dist_base); + gic_ops->gic_init(nr_cpus); gic_common_ops = gic_ops; /* Make sure that the initialized data is visible to all the vCPUs */ @@ -49,18 +48,15 @@ gic_dist_init(enum gic_type type, unsigned int nr_cpus, void *dist_base) spin_unlock(&gic_lock); } -void gic_init(enum gic_type type, unsigned int nr_cpus, - void *dist_base, void *redist_base) +void gic_init(enum gic_type type, unsigned int nr_cpus) { uint32_t cpu = guest_get_vcpuid(); GUEST_ASSERT(type < GIC_TYPE_MAX); - GUEST_ASSERT(dist_base); - GUEST_ASSERT(redist_base); GUEST_ASSERT(nr_cpus); - gic_dist_init(type, nr_cpus, dist_base); - gic_cpu_init(cpu, redist_base); + gic_dist_init(type, nr_cpus); + gic_cpu_init(cpu); } void gic_irq_enable(unsigned int intid) diff --git a/tools/testing/selftests/kvm/lib/aarch64/gic_private.h b/tools/testing/selftests/kvm/lib/aarch64/gic_private.h index 75d07313c893..d24e9ecc96c6 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/gic_private.h +++ b/tools/testing/selftests/kvm/lib/aarch64/gic_private.h @@ -8,8 +8,8 @@ #define SELFTEST_KVM_GIC_PRIVATE_H struct gic_common_ops { - void (*gic_init)(unsigned int nr_cpus, void *dist_base); - void (*gic_cpu_init)(unsigned int cpu, void *redist_base); + void (*gic_init)(unsigned int nr_cpus); + void (*gic_cpu_init)(unsigned int cpu); void (*gic_irq_enable)(unsigned int intid); void (*gic_irq_disable)(unsigned int intid); uint64_t (*gic_read_iar)(void); diff --git a/tools/testing/selftests/kvm/lib/aarch64/gic_v3.c b/tools/testing/selftests/kvm/lib/aarch64/gic_v3.c index 263bf3ed8fd5..66d05506f78b 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/gic_v3.c +++ b/tools/testing/selftests/kvm/lib/aarch64/gic_v3.c @@ -9,12 +9,21 @@ #include "processor.h" #include "delay.h" +#include "gic.h" #include "gic_v3.h" #include "gic_private.h" +#define GICV3_MAX_CPUS 512 + +#define GICD_INT_DEF_PRI 0xa0 +#define GICD_INT_DEF_PRI_X4 ((GICD_INT_DEF_PRI << 24) |\ + (GICD_INT_DEF_PRI << 16) |\ + (GICD_INT_DEF_PRI << 8) |\ + GICD_INT_DEF_PRI) + +#define ICC_PMR_DEF_PRIO 0xf0 + struct gicv3_data { - void *dist_base; - void *redist_base[GICV3_MAX_CPUS]; unsigned int nr_cpus; unsigned int nr_spis; }; @@ -35,17 +44,23 @@ static void gicv3_gicd_wait_for_rwp(void) { unsigned int count = 100000; /* 1s */ - while (readl(gicv3_data.dist_base + GICD_CTLR) & GICD_CTLR_RWP) { + while (readl(GICD_BASE_GVA + GICD_CTLR) & GICD_CTLR_RWP) { GUEST_ASSERT(count--); udelay(10); } } -static void gicv3_gicr_wait_for_rwp(void *redist_base) +static inline volatile void *gicr_base_cpu(uint32_t cpu) +{ + /* Align all the redistributors sequentially */ + return GICR_BASE_GVA + cpu * SZ_64K * 2; +} + +static void gicv3_gicr_wait_for_rwp(uint32_t cpu) { unsigned int count = 100000; /* 1s */ - while (readl(redist_base + GICR_CTLR) & GICR_CTLR_RWP) { + while (readl(gicr_base_cpu(cpu) + GICR_CTLR) & GICR_CTLR_RWP) { GUEST_ASSERT(count--); udelay(10); } @@ -56,7 +71,7 @@ static void gicv3_wait_for_rwp(uint32_t cpu_or_dist) if (cpu_or_dist & DIST_BIT) gicv3_gicd_wait_for_rwp(); else - gicv3_gicr_wait_for_rwp(gicv3_data.redist_base[cpu_or_dist]); + gicv3_gicr_wait_for_rwp(cpu_or_dist); } static enum gicv3_intid_range get_intid_range(unsigned int intid) @@ -116,15 +131,15 @@ static void gicv3_set_eoi_split(bool split) uint32_t gicv3_reg_readl(uint32_t cpu_or_dist, uint64_t offset) { - void *base = cpu_or_dist & DIST_BIT ? gicv3_data.dist_base - : sgi_base_from_redist(gicv3_data.redist_base[cpu_or_dist]); + volatile void *base = cpu_or_dist & DIST_BIT ? GICD_BASE_GVA + : sgi_base_from_redist(gicr_base_cpu(cpu_or_dist)); return readl(base + offset); } void gicv3_reg_writel(uint32_t cpu_or_dist, uint64_t offset, uint32_t reg_val) { - void *base = cpu_or_dist & DIST_BIT ? gicv3_data.dist_base - : sgi_base_from_redist(gicv3_data.redist_base[cpu_or_dist]); + volatile void *base = cpu_or_dist & DIST_BIT ? GICD_BASE_GVA + : sgi_base_from_redist(gicr_base_cpu(cpu_or_dist)); writel(reg_val, base + offset); } @@ -263,7 +278,7 @@ static bool gicv3_irq_get_pending(uint32_t intid) return gicv3_read_reg(intid, GICD_ISPENDR, 32, 1); } -static void gicv3_enable_redist(void *redist_base) +static void gicv3_enable_redist(volatile void *redist_base) { uint32_t val = readl(redist_base + GICR_WAKER); unsigned int count = 100000; /* 1s */ @@ -278,21 +293,15 @@ static void gicv3_enable_redist(void *redist_base) } } -static inline void *gicr_base_cpu(void *redist_base, uint32_t cpu) +static void gicv3_cpu_init(unsigned int cpu) { - /* Align all the redistributors sequentially */ - return redist_base + cpu * SZ_64K * 2; -} - -static void gicv3_cpu_init(unsigned int cpu, void *redist_base) -{ - void *sgi_base; + volatile void *sgi_base; unsigned int i; - void *redist_base_cpu; + volatile void *redist_base_cpu; GUEST_ASSERT(cpu < gicv3_data.nr_cpus); - redist_base_cpu = gicr_base_cpu(redist_base, cpu); + redist_base_cpu = gicr_base_cpu(cpu); sgi_base = sgi_base_from_redist(redist_base_cpu); gicv3_enable_redist(redist_base_cpu); @@ -310,7 +319,7 @@ static void gicv3_cpu_init(unsigned int cpu, void *redist_base) writel(GICD_INT_DEF_PRI_X4, sgi_base + GICR_IPRIORITYR0 + i); - gicv3_gicr_wait_for_rwp(redist_base_cpu); + gicv3_gicr_wait_for_rwp(cpu); /* Enable the GIC system register (ICC_*) access */ write_sysreg_s(read_sysreg_s(SYS_ICC_SRE_EL1) | ICC_SRE_EL1_SRE, @@ -320,18 +329,15 @@ static void gicv3_cpu_init(unsigned int cpu, void *redist_base) write_sysreg_s(ICC_PMR_DEF_PRIO, SYS_ICC_PMR_EL1); /* Enable non-secure Group-1 interrupts */ - write_sysreg_s(ICC_IGRPEN1_EL1_ENABLE, SYS_ICC_GRPEN1_EL1); - - gicv3_data.redist_base[cpu] = redist_base_cpu; + write_sysreg_s(ICC_IGRPEN1_EL1_MASK, SYS_ICC_IGRPEN1_EL1); } static void gicv3_dist_init(void) { - void *dist_base = gicv3_data.dist_base; unsigned int i; /* Disable the distributor until we set things up */ - writel(0, dist_base + GICD_CTLR); + writel(0, GICD_BASE_GVA + GICD_CTLR); gicv3_gicd_wait_for_rwp(); /* @@ -339,33 +345,32 @@ static void gicv3_dist_init(void) * Also, deactivate and disable them. */ for (i = 32; i < gicv3_data.nr_spis; i += 32) { - writel(~0, dist_base + GICD_IGROUPR + i / 8); - writel(~0, dist_base + GICD_ICACTIVER + i / 8); - writel(~0, dist_base + GICD_ICENABLER + i / 8); + writel(~0, GICD_BASE_GVA + GICD_IGROUPR + i / 8); + writel(~0, GICD_BASE_GVA + GICD_ICACTIVER + i / 8); + writel(~0, GICD_BASE_GVA + GICD_ICENABLER + i / 8); } /* Set a default priority for all the SPIs */ for (i = 32; i < gicv3_data.nr_spis; i += 4) writel(GICD_INT_DEF_PRI_X4, - dist_base + GICD_IPRIORITYR + i); + GICD_BASE_GVA + GICD_IPRIORITYR + i); /* Wait for the settings to sync-in */ gicv3_gicd_wait_for_rwp(); /* Finally, enable the distributor globally with ARE */ writel(GICD_CTLR_ARE_NS | GICD_CTLR_ENABLE_G1A | - GICD_CTLR_ENABLE_G1, dist_base + GICD_CTLR); + GICD_CTLR_ENABLE_G1, GICD_BASE_GVA + GICD_CTLR); gicv3_gicd_wait_for_rwp(); } -static void gicv3_init(unsigned int nr_cpus, void *dist_base) +static void gicv3_init(unsigned int nr_cpus) { GUEST_ASSERT(nr_cpus <= GICV3_MAX_CPUS); gicv3_data.nr_cpus = nr_cpus; - gicv3_data.dist_base = dist_base; gicv3_data.nr_spis = GICD_TYPER_SPIS( - readl(gicv3_data.dist_base + GICD_TYPER)); + readl(GICD_BASE_GVA + GICD_TYPER)); if (gicv3_data.nr_spis > 1020) gicv3_data.nr_spis = 1020; @@ -396,3 +401,27 @@ const struct gic_common_ops gicv3_ops = { .gic_irq_get_pending = gicv3_irq_get_pending, .gic_irq_set_config = gicv3_irq_set_config, }; + +void gic_rdist_enable_lpis(vm_paddr_t cfg_table, size_t cfg_table_size, + vm_paddr_t pend_table) +{ + volatile void *rdist_base = gicr_base_cpu(guest_get_vcpuid()); + + u32 ctlr; + u64 val; + + val = (cfg_table | + GICR_PROPBASER_InnerShareable | + GICR_PROPBASER_RaWaWb | + ((ilog2(cfg_table_size) - 1) & GICR_PROPBASER_IDBITS_MASK)); + writeq_relaxed(val, rdist_base + GICR_PROPBASER); + + val = (pend_table | + GICR_PENDBASER_InnerShareable | + GICR_PENDBASER_RaWaWb); + writeq_relaxed(val, rdist_base + GICR_PENDBASER); + + ctlr = readl_relaxed(rdist_base + GICR_CTLR); + ctlr |= GICR_CTLR_ENABLE_LPIS; + writel_relaxed(ctlr, rdist_base + GICR_CTLR); +} diff --git a/tools/testing/selftests/kvm/lib/aarch64/gic_v3_its.c b/tools/testing/selftests/kvm/lib/aarch64/gic_v3_its.c new file mode 100644 index 000000000000..09f270545646 --- /dev/null +++ b/tools/testing/selftests/kvm/lib/aarch64/gic_v3_its.c @@ -0,0 +1,248 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Guest ITS library, generously donated by drivers/irqchip/irq-gic-v3-its.c + * over in the kernel tree. + */ + +#include <linux/kvm.h> +#include <linux/sizes.h> +#include <asm/kvm_para.h> +#include <asm/kvm.h> + +#include "kvm_util.h" +#include "vgic.h" +#include "gic.h" +#include "gic_v3.h" +#include "processor.h" + +static u64 its_read_u64(unsigned long offset) +{ + return readq_relaxed(GITS_BASE_GVA + offset); +} + +static void its_write_u64(unsigned long offset, u64 val) +{ + writeq_relaxed(val, GITS_BASE_GVA + offset); +} + +static u32 its_read_u32(unsigned long offset) +{ + return readl_relaxed(GITS_BASE_GVA + offset); +} + +static void its_write_u32(unsigned long offset, u32 val) +{ + writel_relaxed(val, GITS_BASE_GVA + offset); +} + +static unsigned long its_find_baser(unsigned int type) +{ + int i; + + for (i = 0; i < GITS_BASER_NR_REGS; i++) { + u64 baser; + unsigned long offset = GITS_BASER + (i * sizeof(baser)); + + baser = its_read_u64(offset); + if (GITS_BASER_TYPE(baser) == type) + return offset; + } + + GUEST_FAIL("Couldn't find an ITS BASER of type %u", type); + return -1; +} + +static void its_install_table(unsigned int type, vm_paddr_t base, size_t size) +{ + unsigned long offset = its_find_baser(type); + u64 baser; + + baser = ((size / SZ_64K) - 1) | + GITS_BASER_PAGE_SIZE_64K | + GITS_BASER_InnerShareable | + base | + GITS_BASER_RaWaWb | + GITS_BASER_VALID; + + its_write_u64(offset, baser); +} + +static void its_install_cmdq(vm_paddr_t base, size_t size) +{ + u64 cbaser; + + cbaser = ((size / SZ_4K) - 1) | + GITS_CBASER_InnerShareable | + base | + GITS_CBASER_RaWaWb | + GITS_CBASER_VALID; + + its_write_u64(GITS_CBASER, cbaser); +} + +void its_init(vm_paddr_t coll_tbl, size_t coll_tbl_sz, + vm_paddr_t device_tbl, size_t device_tbl_sz, + vm_paddr_t cmdq, size_t cmdq_size) +{ + u32 ctlr; + + its_install_table(GITS_BASER_TYPE_COLLECTION, coll_tbl, coll_tbl_sz); + its_install_table(GITS_BASER_TYPE_DEVICE, device_tbl, device_tbl_sz); + its_install_cmdq(cmdq, cmdq_size); + + ctlr = its_read_u32(GITS_CTLR); + ctlr |= GITS_CTLR_ENABLE; + its_write_u32(GITS_CTLR, ctlr); +} + +struct its_cmd_block { + union { + u64 raw_cmd[4]; + __le64 raw_cmd_le[4]; + }; +}; + +static inline void its_fixup_cmd(struct its_cmd_block *cmd) +{ + /* Let's fixup BE commands */ + cmd->raw_cmd_le[0] = cpu_to_le64(cmd->raw_cmd[0]); + cmd->raw_cmd_le[1] = cpu_to_le64(cmd->raw_cmd[1]); + cmd->raw_cmd_le[2] = cpu_to_le64(cmd->raw_cmd[2]); + cmd->raw_cmd_le[3] = cpu_to_le64(cmd->raw_cmd[3]); +} + +static void its_mask_encode(u64 *raw_cmd, u64 val, int h, int l) +{ + u64 mask = GENMASK_ULL(h, l); + *raw_cmd &= ~mask; + *raw_cmd |= (val << l) & mask; +} + +static void its_encode_cmd(struct its_cmd_block *cmd, u8 cmd_nr) +{ + its_mask_encode(&cmd->raw_cmd[0], cmd_nr, 7, 0); +} + +static void its_encode_devid(struct its_cmd_block *cmd, u32 devid) +{ + its_mask_encode(&cmd->raw_cmd[0], devid, 63, 32); +} + +static void its_encode_event_id(struct its_cmd_block *cmd, u32 id) +{ + its_mask_encode(&cmd->raw_cmd[1], id, 31, 0); +} + +static void its_encode_phys_id(struct its_cmd_block *cmd, u32 phys_id) +{ + its_mask_encode(&cmd->raw_cmd[1], phys_id, 63, 32); +} + +static void its_encode_size(struct its_cmd_block *cmd, u8 size) +{ + its_mask_encode(&cmd->raw_cmd[1], size, 4, 0); +} + +static void its_encode_itt(struct its_cmd_block *cmd, u64 itt_addr) +{ + its_mask_encode(&cmd->raw_cmd[2], itt_addr >> 8, 51, 8); +} + +static void its_encode_valid(struct its_cmd_block *cmd, int valid) +{ + its_mask_encode(&cmd->raw_cmd[2], !!valid, 63, 63); +} + +static void its_encode_target(struct its_cmd_block *cmd, u64 target_addr) +{ + its_mask_encode(&cmd->raw_cmd[2], target_addr >> 16, 51, 16); +} + +static void its_encode_collection(struct its_cmd_block *cmd, u16 col) +{ + its_mask_encode(&cmd->raw_cmd[2], col, 15, 0); +} + +#define GITS_CMDQ_POLL_ITERATIONS 0 + +static void its_send_cmd(void *cmdq_base, struct its_cmd_block *cmd) +{ + u64 cwriter = its_read_u64(GITS_CWRITER); + struct its_cmd_block *dst = cmdq_base + cwriter; + u64 cbaser = its_read_u64(GITS_CBASER); + size_t cmdq_size; + u64 next; + int i; + + cmdq_size = ((cbaser & 0xFF) + 1) * SZ_4K; + + its_fixup_cmd(cmd); + + WRITE_ONCE(*dst, *cmd); + dsb(ishst); + next = (cwriter + sizeof(*cmd)) % cmdq_size; + its_write_u64(GITS_CWRITER, next); + + /* + * Polling isn't necessary considering KVM's ITS emulation at the time + * of writing this, as the CMDQ is processed synchronously after a write + * to CWRITER. + */ + for (i = 0; its_read_u64(GITS_CREADR) != next; i++) { + __GUEST_ASSERT(i < GITS_CMDQ_POLL_ITERATIONS, + "ITS didn't process command at offset %lu after %d iterations\n", + cwriter, i); + + cpu_relax(); + } +} + +void its_send_mapd_cmd(void *cmdq_base, u32 device_id, vm_paddr_t itt_base, + size_t itt_size, bool valid) +{ + struct its_cmd_block cmd = {}; + + its_encode_cmd(&cmd, GITS_CMD_MAPD); + its_encode_devid(&cmd, device_id); + its_encode_size(&cmd, ilog2(itt_size) - 1); + its_encode_itt(&cmd, itt_base); + its_encode_valid(&cmd, valid); + + its_send_cmd(cmdq_base, &cmd); +} + +void its_send_mapc_cmd(void *cmdq_base, u32 vcpu_id, u32 collection_id, bool valid) +{ + struct its_cmd_block cmd = {}; + + its_encode_cmd(&cmd, GITS_CMD_MAPC); + its_encode_collection(&cmd, collection_id); + its_encode_target(&cmd, vcpu_id); + its_encode_valid(&cmd, valid); + + its_send_cmd(cmdq_base, &cmd); +} + +void its_send_mapti_cmd(void *cmdq_base, u32 device_id, u32 event_id, + u32 collection_id, u32 intid) +{ + struct its_cmd_block cmd = {}; + + its_encode_cmd(&cmd, GITS_CMD_MAPTI); + its_encode_devid(&cmd, device_id); + its_encode_event_id(&cmd, event_id); + its_encode_phys_id(&cmd, intid); + its_encode_collection(&cmd, collection_id); + + its_send_cmd(cmdq_base, &cmd); +} + +void its_send_invall_cmd(void *cmdq_base, u32 collection_id) +{ + struct its_cmd_block cmd = {}; + + its_encode_cmd(&cmd, GITS_CMD_INVALL); + its_encode_collection(&cmd, collection_id); + + its_send_cmd(cmdq_base, &cmd); +} diff --git a/tools/testing/selftests/kvm/lib/aarch64/processor.c b/tools/testing/selftests/kvm/lib/aarch64/processor.c index a9eb17295be4..0ac7cc89f38c 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/processor.c +++ b/tools/testing/selftests/kvm/lib/aarch64/processor.c @@ -11,6 +11,8 @@ #include "guest_modes.h" #include "kvm_util.h" #include "processor.h" +#include "ucall_common.h" + #include <linux/bitfield.h> #include <linux/sizes.h> diff --git a/tools/testing/selftests/kvm/lib/aarch64/vgic.c b/tools/testing/selftests/kvm/lib/aarch64/vgic.c index 184378d593e9..4427f43f73ea 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/vgic.c +++ b/tools/testing/selftests/kvm/lib/aarch64/vgic.c @@ -3,8 +3,10 @@ * ARM Generic Interrupt Controller (GIC) v3 host support */ +#include <linux/kernel.h> #include <linux/kvm.h> #include <linux/sizes.h> +#include <asm/cputype.h> #include <asm/kvm_para.h> #include <asm/kvm.h> @@ -19,8 +21,6 @@ * Input args: * vm - KVM VM * nr_vcpus - Number of vCPUs supported by this VM - * gicd_base_gpa - Guest Physical Address of the Distributor region - * gicr_base_gpa - Guest Physical Address of the Redistributor region * * Output args: None * @@ -30,11 +30,10 @@ * redistributor regions of the guest. Since it depends on the number of * vCPUs for the VM, it must be called after all the vCPUs have been created. */ -int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, uint32_t nr_irqs, - uint64_t gicd_base_gpa, uint64_t gicr_base_gpa) +int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, uint32_t nr_irqs) { int gic_fd; - uint64_t redist_attr; + uint64_t attr; struct list_head *iter; unsigned int nr_gic_pages, nr_vcpus_created = 0; @@ -60,18 +59,19 @@ int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, uint32_t nr_irqs, kvm_device_attr_set(gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL, KVM_DEV_ARM_VGIC_CTRL_INIT, NULL); + attr = GICD_BASE_GPA; kvm_device_attr_set(gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, - KVM_VGIC_V3_ADDR_TYPE_DIST, &gicd_base_gpa); + KVM_VGIC_V3_ADDR_TYPE_DIST, &attr); nr_gic_pages = vm_calc_num_guest_pages(vm->mode, KVM_VGIC_V3_DIST_SIZE); - virt_map(vm, gicd_base_gpa, gicd_base_gpa, nr_gic_pages); + virt_map(vm, GICD_BASE_GPA, GICD_BASE_GPA, nr_gic_pages); /* Redistributor setup */ - redist_attr = REDIST_REGION_ATTR_ADDR(nr_vcpus, gicr_base_gpa, 0, 0); + attr = REDIST_REGION_ATTR_ADDR(nr_vcpus, GICR_BASE_GPA, 0, 0); kvm_device_attr_set(gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, - KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &redist_attr); + KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &attr); nr_gic_pages = vm_calc_num_guest_pages(vm->mode, KVM_VGIC_V3_REDIST_SIZE * nr_vcpus); - virt_map(vm, gicr_base_gpa, gicr_base_gpa, nr_gic_pages); + virt_map(vm, GICR_BASE_GPA, GICR_BASE_GPA, nr_gic_pages); kvm_device_attr_set(gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL, KVM_DEV_ARM_VGIC_CTRL_INIT, NULL); @@ -168,3 +168,21 @@ void kvm_irq_write_isactiver(int gic_fd, uint32_t intid, struct kvm_vcpu *vcpu) { vgic_poke_irq(gic_fd, intid, vcpu, GICD_ISACTIVER); } + +int vgic_its_setup(struct kvm_vm *vm) +{ + int its_fd = kvm_create_device(vm, KVM_DEV_TYPE_ARM_VGIC_ITS); + u64 attr; + + attr = GITS_BASE_GPA; + kvm_device_attr_set(its_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_ITS_ADDR_TYPE, &attr); + + kvm_device_attr_set(its_fd, KVM_DEV_ARM_VGIC_GRP_CTRL, + KVM_DEV_ARM_VGIC_CTRL_INIT, NULL); + + virt_map(vm, GITS_BASE_GPA, GITS_BASE_GPA, + vm_calc_num_guest_pages(vm->mode, KVM_VGIC_V3_ITS_SIZE)); + + return its_fd; +} diff --git a/tools/testing/selftests/kvm/lib/assert.c b/tools/testing/selftests/kvm/lib/assert.c index 2bd25b191d15..b49690658c60 100644 --- a/tools/testing/selftests/kvm/lib/assert.c +++ b/tools/testing/selftests/kvm/lib/assert.c @@ -4,9 +4,6 @@ * * Copyright (C) 2018, Google LLC. */ - -#define _GNU_SOURCE /* for getline(3) and strchrnul(3)*/ - #include "test_util.h" #include <execinfo.h> diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index b2262b5fad9e..ad00e4761886 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -4,11 +4,10 @@ * * Copyright (C) 2018, Google LLC. */ - -#define _GNU_SOURCE /* for program_invocation_name */ #include "test_util.h" #include "kvm_util.h" #include "processor.h" +#include "ucall_common.h" #include <assert.h> #include <sched.h> @@ -20,6 +19,9 @@ #define KVM_UTIL_MIN_PFN 2 +uint32_t guest_random_seed; +struct guest_random_state guest_rng; + static int vcpu_mmap_sz(void); int open_path_or_exit(const char *path, int flags) @@ -276,7 +278,6 @@ struct kvm_vm *____vm_create(struct vm_shape shape) vm->mode = shape.mode; vm->type = shape.type; - vm->subtype = shape.subtype; vm->pa_bits = vm_guest_mode_params[vm->mode].pa_bits; vm->va_bits = vm_guest_mode_params[vm->mode].va_bits; @@ -433,6 +434,10 @@ struct kvm_vm *__vm_create(struct vm_shape shape, uint32_t nr_runnable_vcpus, slot0 = memslot2region(vm, 0); ucall_init(vm, slot0->region.guest_phys_addr + slot0->region.memory_size); + pr_info("Random seed: 0x%x\n", guest_random_seed); + guest_rng = new_guest_random_state(guest_random_seed); + sync_global_to_guest(vm, guest_rng); + kvm_arch_vm_post_create(vm); return vm; @@ -930,6 +935,10 @@ void vm_set_user_memory_region(struct kvm_vm *vm, uint32_t slot, uint32_t flags, errno, strerror(errno)); } +#define TEST_REQUIRE_SET_USER_MEMORY_REGION2() \ + __TEST_REQUIRE(kvm_has_cap(KVM_CAP_USER_MEMORY2), \ + "KVM selftests now require KVM_SET_USER_MEMORY_REGION2 (introduced in v6.8)") + int __vm_set_user_memory_region2(struct kvm_vm *vm, uint32_t slot, uint32_t flags, uint64_t gpa, uint64_t size, void *hva, uint32_t guest_memfd, uint64_t guest_memfd_offset) @@ -944,6 +953,8 @@ int __vm_set_user_memory_region2(struct kvm_vm *vm, uint32_t slot, uint32_t flag .guest_memfd_offset = guest_memfd_offset, }; + TEST_REQUIRE_SET_USER_MEMORY_REGION2(); + return ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION2, ®ion); } @@ -970,6 +981,8 @@ void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type, size_t mem_size = npages * vm->page_size; size_t alignment; + TEST_REQUIRE_SET_USER_MEMORY_REGION2(); + TEST_ASSERT(vm_adjust_num_guest_pages(vm->mode, npages) == npages, "Number of guest pages is not compatible with the host. " "Try npages=%d", vm_adjust_num_guest_pages(vm->mode, npages)); @@ -2306,6 +2319,8 @@ void __attribute((constructor)) kvm_selftest_init(void) /* Tell stdout not to buffer its content. */ setbuf(stdout, NULL); + guest_random_seed = random(); + kvm_selftest_arch_init(); } diff --git a/tools/testing/selftests/kvm/lib/memstress.c b/tools/testing/selftests/kvm/lib/memstress.c index cf2c73971308..313277486a1d 100644 --- a/tools/testing/selftests/kvm/lib/memstress.c +++ b/tools/testing/selftests/kvm/lib/memstress.c @@ -2,14 +2,13 @@ /* * Copyright (C) 2020, Google LLC. */ -#define _GNU_SOURCE - #include <inttypes.h> #include <linux/bitmap.h> #include "kvm_util.h" #include "memstress.h" #include "processor.h" +#include "ucall_common.h" struct memstress_args memstress_args; @@ -56,7 +55,7 @@ void memstress_guest_code(uint32_t vcpu_idx) uint64_t page; int i; - rand_state = new_guest_random_state(args->random_seed + vcpu_idx); + rand_state = new_guest_random_state(guest_random_seed + vcpu_idx); gva = vcpu_args->gva; pages = vcpu_args->pages; @@ -76,7 +75,7 @@ void memstress_guest_code(uint32_t vcpu_idx) addr = gva + (page * args->guest_page_size); - if (guest_random_u32(&rand_state) % 100 < args->write_percent) + if (__guest_random_bool(&rand_state, args->write_percent)) *(uint64_t *)addr = 0x0123456789ABCDEF; else READ_ONCE(*(uint64_t *)addr); @@ -243,12 +242,6 @@ void memstress_set_write_percent(struct kvm_vm *vm, uint32_t write_percent) sync_global_to_guest(vm, memstress_args.write_percent); } -void memstress_set_random_seed(struct kvm_vm *vm, uint32_t random_seed) -{ - memstress_args.random_seed = random_seed; - sync_global_to_guest(vm, memstress_args.random_seed); -} - void memstress_set_random_access(struct kvm_vm *vm, bool random_access) { memstress_args.random_access = random_access; diff --git a/tools/testing/selftests/kvm/lib/riscv/processor.c b/tools/testing/selftests/kvm/lib/riscv/processor.c index e8211f5d6863..6ae47b3d6b25 100644 --- a/tools/testing/selftests/kvm/lib/riscv/processor.c +++ b/tools/testing/selftests/kvm/lib/riscv/processor.c @@ -10,6 +10,7 @@ #include "kvm_util.h" #include "processor.h" +#include "ucall_common.h" #define DEFAULT_RISCV_GUEST_STACK_VADDR_MIN 0xac0000 @@ -502,3 +503,15 @@ bool guest_sbi_probe_extension(int extid, long *out_val) return true; } + +unsigned long get_host_sbi_spec_version(void) +{ + struct sbiret ret; + + ret = sbi_ecall(SBI_EXT_BASE, SBI_EXT_BASE_GET_SPEC_VERSION, 0, + 0, 0, 0, 0, 0); + + GUEST_ASSERT(!ret.error); + + return ret.value; +} diff --git a/tools/testing/selftests/kvm/lib/test_util.c b/tools/testing/selftests/kvm/lib/test_util.c index 5a8f8becb129..8ed0b74ae837 100644 --- a/tools/testing/selftests/kvm/lib/test_util.c +++ b/tools/testing/selftests/kvm/lib/test_util.c @@ -4,8 +4,6 @@ * * Copyright (C) 2020, Google LLC. */ - -#define _GNU_SOURCE #include <stdio.h> #include <stdarg.h> #include <assert.h> diff --git a/tools/testing/selftests/kvm/lib/ucall_common.c b/tools/testing/selftests/kvm/lib/ucall_common.c index f5af65a41c29..42151e571953 100644 --- a/tools/testing/selftests/kvm/lib/ucall_common.c +++ b/tools/testing/selftests/kvm/lib/ucall_common.c @@ -1,9 +1,12 @@ // SPDX-License-Identifier: GPL-2.0-only -#include "kvm_util.h" #include "linux/types.h" #include "linux/bitmap.h" #include "linux/atomic.h" +#include "kvm_util.h" +#include "ucall_common.h" + + #define GUEST_UCALL_FAILED -1 struct ucall_header { diff --git a/tools/testing/selftests/kvm/lib/userfaultfd_util.c b/tools/testing/selftests/kvm/lib/userfaultfd_util.c index f4eef6eb2dc2..7c9de8414462 100644 --- a/tools/testing/selftests/kvm/lib/userfaultfd_util.c +++ b/tools/testing/selftests/kvm/lib/userfaultfd_util.c @@ -6,9 +6,6 @@ * Copyright (C) 2018, Red Hat, Inc. * Copyright (C) 2019-2022 Google LLC */ - -#define _GNU_SOURCE /* for pipe2 */ - #include <inttypes.h> #include <stdio.h> #include <stdlib.h> @@ -16,6 +13,7 @@ #include <poll.h> #include <pthread.h> #include <linux/userfaultfd.h> +#include <sys/epoll.h> #include <sys/syscall.h> #include "kvm_util.h" @@ -27,76 +25,69 @@ static void *uffd_handler_thread_fn(void *arg) { - struct uffd_desc *uffd_desc = (struct uffd_desc *)arg; - int uffd = uffd_desc->uffd; - int pipefd = uffd_desc->pipefds[0]; - useconds_t delay = uffd_desc->delay; + struct uffd_reader_args *reader_args = (struct uffd_reader_args *)arg; + int uffd = reader_args->uffd; int64_t pages = 0; struct timespec start; struct timespec ts_diff; + struct epoll_event evt; + int epollfd; + + epollfd = epoll_create(1); + TEST_ASSERT(epollfd >= 0, "Failed to create epollfd."); + + evt.events = EPOLLIN | EPOLLEXCLUSIVE; + evt.data.u32 = 0; + TEST_ASSERT(!epoll_ctl(epollfd, EPOLL_CTL_ADD, uffd, &evt), + "Failed to add uffd to epollfd"); + + evt.events = EPOLLIN; + evt.data.u32 = 1; + TEST_ASSERT(!epoll_ctl(epollfd, EPOLL_CTL_ADD, reader_args->pipe, &evt), + "Failed to add pipe to epollfd"); clock_gettime(CLOCK_MONOTONIC, &start); while (1) { struct uffd_msg msg; - struct pollfd pollfd[2]; - char tmp_chr; int r; - pollfd[0].fd = uffd; - pollfd[0].events = POLLIN; - pollfd[1].fd = pipefd; - pollfd[1].events = POLLIN; + r = epoll_wait(epollfd, &evt, 1, -1); + TEST_ASSERT(r == 1, + "Unexpected number of events (%d) from epoll, errno = %d", + r, errno); - r = poll(pollfd, 2, -1); - switch (r) { - case -1: - pr_info("poll err"); - continue; - case 0: - continue; - case 1: - break; - default: - pr_info("Polling uffd returned %d", r); - return NULL; - } - - if (pollfd[0].revents & POLLERR) { - pr_info("uffd revents has POLLERR"); - return NULL; - } + if (evt.data.u32 == 1) { + char tmp_chr; - if (pollfd[1].revents & POLLIN) { - r = read(pollfd[1].fd, &tmp_chr, 1); + TEST_ASSERT(!(evt.events & (EPOLLERR | EPOLLHUP)), + "Reader thread received EPOLLERR or EPOLLHUP on pipe."); + r = read(reader_args->pipe, &tmp_chr, 1); TEST_ASSERT(r == 1, - "Error reading pipefd in UFFD thread"); + "Error reading pipefd in uffd reader thread"); break; } - if (!(pollfd[0].revents & POLLIN)) - continue; + TEST_ASSERT(!(evt.events & (EPOLLERR | EPOLLHUP)), + "Reader thread received EPOLLERR or EPOLLHUP on uffd."); r = read(uffd, &msg, sizeof(msg)); if (r == -1) { - if (errno == EAGAIN) - continue; - pr_info("Read of uffd got errno %d\n", errno); - return NULL; + TEST_ASSERT(errno == EAGAIN, + "Error reading from UFFD: errno = %d", errno); + continue; } - if (r != sizeof(msg)) { - pr_info("Read on uffd returned unexpected size: %d bytes", r); - return NULL; - } + TEST_ASSERT(r == sizeof(msg), + "Read on uffd returned unexpected number of bytes (%d)", r); if (!(msg.event & UFFD_EVENT_PAGEFAULT)) continue; - if (delay) - usleep(delay); - r = uffd_desc->handler(uffd_desc->uffd_mode, uffd, &msg); - if (r < 0) - return NULL; + if (reader_args->delay) + usleep(reader_args->delay); + r = reader_args->handler(reader_args->uffd_mode, uffd, &msg); + TEST_ASSERT(r >= 0, + "Reader thread handler fn returned negative value %d", r); pages++; } @@ -110,6 +101,7 @@ static void *uffd_handler_thread_fn(void *arg) struct uffd_desc *uffd_setup_demand_paging(int uffd_mode, useconds_t delay, void *hva, uint64_t len, + uint64_t num_readers, uffd_handler_t handler) { struct uffd_desc *uffd_desc; @@ -118,14 +110,25 @@ struct uffd_desc *uffd_setup_demand_paging(int uffd_mode, useconds_t delay, struct uffdio_api uffdio_api; struct uffdio_register uffdio_register; uint64_t expected_ioctls = ((uint64_t) 1) << _UFFDIO_COPY; - int ret; + int ret, i; PER_PAGE_DEBUG("Userfaultfd %s mode, faults resolved with %s\n", is_minor ? "MINOR" : "MISSING", is_minor ? "UFFDIO_CONINUE" : "UFFDIO_COPY"); uffd_desc = malloc(sizeof(struct uffd_desc)); - TEST_ASSERT(uffd_desc, "malloc failed"); + TEST_ASSERT(uffd_desc, "Failed to malloc uffd descriptor"); + + uffd_desc->pipefds = calloc(sizeof(int), num_readers); + TEST_ASSERT(uffd_desc->pipefds, "Failed to alloc pipes"); + + uffd_desc->readers = calloc(sizeof(pthread_t), num_readers); + TEST_ASSERT(uffd_desc->readers, "Failed to alloc reader threads"); + + uffd_desc->reader_args = calloc(sizeof(struct uffd_reader_args), num_readers); + TEST_ASSERT(uffd_desc->reader_args, "Failed to alloc reader_args"); + + uffd_desc->num_readers = num_readers; /* In order to get minor faults, prefault via the alias. */ if (is_minor) @@ -148,18 +151,28 @@ struct uffd_desc *uffd_setup_demand_paging(int uffd_mode, useconds_t delay, TEST_ASSERT((uffdio_register.ioctls & expected_ioctls) == expected_ioctls, "missing userfaultfd ioctls"); - ret = pipe2(uffd_desc->pipefds, O_CLOEXEC | O_NONBLOCK); - TEST_ASSERT(!ret, "Failed to set up pipefd"); - - uffd_desc->uffd_mode = uffd_mode; uffd_desc->uffd = uffd; - uffd_desc->delay = delay; - uffd_desc->handler = handler; - pthread_create(&uffd_desc->thread, NULL, uffd_handler_thread_fn, - uffd_desc); + for (i = 0; i < uffd_desc->num_readers; ++i) { + int pipes[2]; - PER_VCPU_DEBUG("Created uffd thread for HVA range [%p, %p)\n", - hva, hva + len); + ret = pipe2((int *) &pipes, O_CLOEXEC | O_NONBLOCK); + TEST_ASSERT(!ret, "Failed to set up pipefd %i for uffd_desc %p", + i, uffd_desc); + + uffd_desc->pipefds[i] = pipes[1]; + + uffd_desc->reader_args[i].uffd_mode = uffd_mode; + uffd_desc->reader_args[i].uffd = uffd; + uffd_desc->reader_args[i].delay = delay; + uffd_desc->reader_args[i].handler = handler; + uffd_desc->reader_args[i].pipe = pipes[0]; + + pthread_create(&uffd_desc->readers[i], NULL, uffd_handler_thread_fn, + &uffd_desc->reader_args[i]); + + PER_VCPU_DEBUG("Created uffd thread %i for HVA range [%p, %p)\n", + i, hva, hva + len); + } return uffd_desc; } @@ -167,19 +180,26 @@ struct uffd_desc *uffd_setup_demand_paging(int uffd_mode, useconds_t delay, void uffd_stop_demand_paging(struct uffd_desc *uffd) { char c = 0; - int ret; + int i; - ret = write(uffd->pipefds[1], &c, 1); - TEST_ASSERT(ret == 1, "Unable to write to pipefd"); + for (i = 0; i < uffd->num_readers; ++i) + TEST_ASSERT(write(uffd->pipefds[i], &c, 1) == 1, + "Unable to write to pipefd %i for uffd_desc %p", i, uffd); - ret = pthread_join(uffd->thread, NULL); - TEST_ASSERT(ret == 0, "Pthread_join failed."); + for (i = 0; i < uffd->num_readers; ++i) + TEST_ASSERT(!pthread_join(uffd->readers[i], NULL), + "Pthread_join failed on reader %i for uffd_desc %p", i, uffd); close(uffd->uffd); - close(uffd->pipefds[1]); - close(uffd->pipefds[0]); + for (i = 0; i < uffd->num_readers; ++i) { + close(uffd->pipefds[i]); + close(uffd->reader_args[i].pipe); + } + free(uffd->pipefds); + free(uffd->readers); + free(uffd->reader_args); free(uffd); } diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c index 74a4c736c9ae..c664e446136b 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/processor.c +++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c @@ -15,14 +15,16 @@ #define NUM_INTERRUPTS 256 #endif -#define DEFAULT_CODE_SELECTOR 0x8 -#define DEFAULT_DATA_SELECTOR 0x10 +#define KERNEL_CS 0x8 +#define KERNEL_DS 0x10 +#define KERNEL_TSS 0x18 #define MAX_NR_CPUID_ENTRIES 100 vm_vaddr_t exception_handlers; bool host_cpu_is_amd; bool host_cpu_is_intel; +bool is_forced_emulation_enabled; static void regs_dump(FILE *stream, struct kvm_regs *regs, uint8_t indent) { @@ -417,7 +419,7 @@ static void kvm_seg_set_unusable(struct kvm_segment *segp) static void kvm_seg_fill_gdt_64bit(struct kvm_vm *vm, struct kvm_segment *segp) { - void *gdt = addr_gva2hva(vm, vm->gdt); + void *gdt = addr_gva2hva(vm, vm->arch.gdt); struct desc64 *desc = gdt + (segp->selector >> 3) * 8; desc->limit0 = segp->limit & 0xFFFF; @@ -437,27 +439,10 @@ static void kvm_seg_fill_gdt_64bit(struct kvm_vm *vm, struct kvm_segment *segp) desc->base3 = segp->base >> 32; } - -/* - * Set Long Mode Flat Kernel Code Segment - * - * Input Args: - * vm - VM whose GDT is being filled, or NULL to only write segp - * selector - selector value - * - * Output Args: - * segp - Pointer to KVM segment - * - * Return: None - * - * Sets up the KVM segment pointed to by @segp, to be a code segment - * with the selector value given by @selector. - */ -static void kvm_seg_set_kernel_code_64bit(struct kvm_vm *vm, uint16_t selector, - struct kvm_segment *segp) +static void kvm_seg_set_kernel_code_64bit(struct kvm_segment *segp) { memset(segp, 0, sizeof(*segp)); - segp->selector = selector; + segp->selector = KERNEL_CS; segp->limit = 0xFFFFFFFFu; segp->s = 0x1; /* kTypeCodeData */ segp->type = 0x08 | 0x01 | 0x02; /* kFlagCode | kFlagCodeAccessed @@ -466,30 +451,12 @@ static void kvm_seg_set_kernel_code_64bit(struct kvm_vm *vm, uint16_t selector, segp->g = true; segp->l = true; segp->present = 1; - if (vm) - kvm_seg_fill_gdt_64bit(vm, segp); } -/* - * Set Long Mode Flat Kernel Data Segment - * - * Input Args: - * vm - VM whose GDT is being filled, or NULL to only write segp - * selector - selector value - * - * Output Args: - * segp - Pointer to KVM segment - * - * Return: None - * - * Sets up the KVM segment pointed to by @segp, to be a data segment - * with the selector value given by @selector. - */ -static void kvm_seg_set_kernel_data_64bit(struct kvm_vm *vm, uint16_t selector, - struct kvm_segment *segp) +static void kvm_seg_set_kernel_data_64bit(struct kvm_segment *segp) { memset(segp, 0, sizeof(*segp)); - segp->selector = selector; + segp->selector = KERNEL_DS; segp->limit = 0xFFFFFFFFu; segp->s = 0x1; /* kTypeCodeData */ segp->type = 0x00 | 0x01 | 0x02; /* kFlagData | kFlagDataAccessed @@ -497,8 +464,6 @@ static void kvm_seg_set_kernel_data_64bit(struct kvm_vm *vm, uint16_t selector, */ segp->g = true; segp->present = true; - if (vm) - kvm_seg_fill_gdt_64bit(vm, segp); } vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva) @@ -516,72 +481,153 @@ vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva) return vm_untag_gpa(vm, PTE_GET_PA(*pte)) | (gva & ~HUGEPAGE_MASK(level)); } -static void kvm_setup_gdt(struct kvm_vm *vm, struct kvm_dtable *dt) -{ - if (!vm->gdt) - vm->gdt = __vm_vaddr_alloc_page(vm, MEM_REGION_DATA); - - dt->base = vm->gdt; - dt->limit = getpagesize(); -} - -static void kvm_setup_tss_64bit(struct kvm_vm *vm, struct kvm_segment *segp, - int selector) +static void kvm_seg_set_tss_64bit(vm_vaddr_t base, struct kvm_segment *segp) { - if (!vm->tss) - vm->tss = __vm_vaddr_alloc_page(vm, MEM_REGION_DATA); - memset(segp, 0, sizeof(*segp)); - segp->base = vm->tss; + segp->base = base; segp->limit = 0x67; - segp->selector = selector; + segp->selector = KERNEL_TSS; segp->type = 0xb; segp->present = 1; - kvm_seg_fill_gdt_64bit(vm, segp); } -static void vcpu_setup(struct kvm_vm *vm, struct kvm_vcpu *vcpu) +static void vcpu_init_sregs(struct kvm_vm *vm, struct kvm_vcpu *vcpu) { struct kvm_sregs sregs; + TEST_ASSERT_EQ(vm->mode, VM_MODE_PXXV48_4K); + /* Set mode specific system register values. */ vcpu_sregs_get(vcpu, &sregs); - sregs.idt.limit = 0; + sregs.idt.base = vm->arch.idt; + sregs.idt.limit = NUM_INTERRUPTS * sizeof(struct idt_entry) - 1; + sregs.gdt.base = vm->arch.gdt; + sregs.gdt.limit = getpagesize() - 1; + + sregs.cr0 = X86_CR0_PE | X86_CR0_NE | X86_CR0_PG; + sregs.cr4 |= X86_CR4_PAE | X86_CR4_OSFXSR; + sregs.efer |= (EFER_LME | EFER_LMA | EFER_NX); - kvm_setup_gdt(vm, &sregs.gdt); + kvm_seg_set_unusable(&sregs.ldt); + kvm_seg_set_kernel_code_64bit(&sregs.cs); + kvm_seg_set_kernel_data_64bit(&sregs.ds); + kvm_seg_set_kernel_data_64bit(&sregs.es); + kvm_seg_set_kernel_data_64bit(&sregs.gs); + kvm_seg_set_tss_64bit(vm->arch.tss, &sregs.tr); - switch (vm->mode) { - case VM_MODE_PXXV48_4K: - sregs.cr0 = X86_CR0_PE | X86_CR0_NE | X86_CR0_PG; - sregs.cr4 |= X86_CR4_PAE | X86_CR4_OSFXSR; - sregs.efer |= (EFER_LME | EFER_LMA | EFER_NX); + sregs.cr3 = vm->pgd; + vcpu_sregs_set(vcpu, &sregs); +} - kvm_seg_set_unusable(&sregs.ldt); - kvm_seg_set_kernel_code_64bit(vm, DEFAULT_CODE_SELECTOR, &sregs.cs); - kvm_seg_set_kernel_data_64bit(vm, DEFAULT_DATA_SELECTOR, &sregs.ds); - kvm_seg_set_kernel_data_64bit(vm, DEFAULT_DATA_SELECTOR, &sregs.es); - kvm_setup_tss_64bit(vm, &sregs.tr, 0x18); - break; +static void set_idt_entry(struct kvm_vm *vm, int vector, unsigned long addr, + int dpl, unsigned short selector) +{ + struct idt_entry *base = + (struct idt_entry *)addr_gva2hva(vm, vm->arch.idt); + struct idt_entry *e = &base[vector]; + + memset(e, 0, sizeof(*e)); + e->offset0 = addr; + e->selector = selector; + e->ist = 0; + e->type = 14; + e->dpl = dpl; + e->p = 1; + e->offset1 = addr >> 16; + e->offset2 = addr >> 32; +} + +static bool kvm_fixup_exception(struct ex_regs *regs) +{ + if (regs->r9 != KVM_EXCEPTION_MAGIC || regs->rip != regs->r10) + return false; - default: - TEST_FAIL("Unknown guest mode, mode: 0x%x", vm->mode); + if (regs->vector == DE_VECTOR) + return false; + + regs->rip = regs->r11; + regs->r9 = regs->vector; + regs->r10 = regs->error_code; + return true; +} + +void route_exception(struct ex_regs *regs) +{ + typedef void(*handler)(struct ex_regs *); + handler *handlers = (handler *)exception_handlers; + + if (handlers && handlers[regs->vector]) { + handlers[regs->vector](regs); + return; } - sregs.cr3 = vm->pgd; - vcpu_sregs_set(vcpu, &sregs); + if (kvm_fixup_exception(regs)) + return; + + ucall_assert(UCALL_UNHANDLED, + "Unhandled exception in guest", __FILE__, __LINE__, + "Unhandled exception '0x%lx' at guest RIP '0x%lx'", + regs->vector, regs->rip); +} + +static void vm_init_descriptor_tables(struct kvm_vm *vm) +{ + extern void *idt_handlers; + struct kvm_segment seg; + int i; + + vm->arch.gdt = __vm_vaddr_alloc_page(vm, MEM_REGION_DATA); + vm->arch.idt = __vm_vaddr_alloc_page(vm, MEM_REGION_DATA); + vm->handlers = __vm_vaddr_alloc_page(vm, MEM_REGION_DATA); + vm->arch.tss = __vm_vaddr_alloc_page(vm, MEM_REGION_DATA); + + /* Handlers have the same address in both address spaces.*/ + for (i = 0; i < NUM_INTERRUPTS; i++) + set_idt_entry(vm, i, (unsigned long)(&idt_handlers)[i], 0, KERNEL_CS); + + *(vm_vaddr_t *)addr_gva2hva(vm, (vm_vaddr_t)(&exception_handlers)) = vm->handlers; + + kvm_seg_set_kernel_code_64bit(&seg); + kvm_seg_fill_gdt_64bit(vm, &seg); + + kvm_seg_set_kernel_data_64bit(&seg); + kvm_seg_fill_gdt_64bit(vm, &seg); + + kvm_seg_set_tss_64bit(vm->arch.tss, &seg); + kvm_seg_fill_gdt_64bit(vm, &seg); +} + +void vm_install_exception_handler(struct kvm_vm *vm, int vector, + void (*handler)(struct ex_regs *)) +{ + vm_vaddr_t *handlers = (vm_vaddr_t *)addr_gva2hva(vm, vm->handlers); + + handlers[vector] = (vm_vaddr_t)handler; +} + +void assert_on_unhandled_exception(struct kvm_vcpu *vcpu) +{ + struct ucall uc; + + if (get_ucall(vcpu, &uc) == UCALL_UNHANDLED) + REPORT_GUEST_ASSERT(uc); } void kvm_arch_vm_post_create(struct kvm_vm *vm) { vm_create_irqchip(vm); + vm_init_descriptor_tables(vm); + sync_global_to_guest(vm, host_cpu_is_intel); sync_global_to_guest(vm, host_cpu_is_amd); + sync_global_to_guest(vm, is_forced_emulation_enabled); + + if (vm->type == KVM_X86_SEV_VM || vm->type == KVM_X86_SEV_ES_VM) { + struct kvm_sev_init init = { 0 }; - if (vm->subtype == VM_SUBTYPE_SEV) - sev_vm_init(vm); - else if (vm->subtype == VM_SUBTYPE_SEV_ES) - sev_es_vm_init(vm); + vm_sev_ioctl(vm, KVM_SEV_INIT2, &init); + } } void vcpu_arch_set_entry_point(struct kvm_vcpu *vcpu, void *guest_code) @@ -621,7 +667,7 @@ struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id) vcpu = __vm_vcpu_add(vm, vcpu_id); vcpu_init_cpuid(vcpu, kvm_get_supported_cpuid()); - vcpu_setup(vm, vcpu); + vcpu_init_sregs(vm, vcpu); /* Setup guest general purpose registers */ vcpu_regs_get(vcpu, ®s); @@ -1081,108 +1127,15 @@ void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits) void kvm_init_vm_address_properties(struct kvm_vm *vm) { - if (vm->subtype == VM_SUBTYPE_SEV || vm->subtype == VM_SUBTYPE_SEV_ES) { + if (vm->type == KVM_X86_SEV_VM || vm->type == KVM_X86_SEV_ES_VM) { + vm->arch.sev_fd = open_sev_dev_path_or_exit(); vm->arch.c_bit = BIT_ULL(this_cpu_property(X86_PROPERTY_SEV_C_BIT)); vm->gpa_tag_mask = vm->arch.c_bit; + } else { + vm->arch.sev_fd = -1; } } -static void set_idt_entry(struct kvm_vm *vm, int vector, unsigned long addr, - int dpl, unsigned short selector) -{ - struct idt_entry *base = - (struct idt_entry *)addr_gva2hva(vm, vm->idt); - struct idt_entry *e = &base[vector]; - - memset(e, 0, sizeof(*e)); - e->offset0 = addr; - e->selector = selector; - e->ist = 0; - e->type = 14; - e->dpl = dpl; - e->p = 1; - e->offset1 = addr >> 16; - e->offset2 = addr >> 32; -} - - -static bool kvm_fixup_exception(struct ex_regs *regs) -{ - if (regs->r9 != KVM_EXCEPTION_MAGIC || regs->rip != regs->r10) - return false; - - if (regs->vector == DE_VECTOR) - return false; - - regs->rip = regs->r11; - regs->r9 = regs->vector; - regs->r10 = regs->error_code; - return true; -} - -void route_exception(struct ex_regs *regs) -{ - typedef void(*handler)(struct ex_regs *); - handler *handlers = (handler *)exception_handlers; - - if (handlers && handlers[regs->vector]) { - handlers[regs->vector](regs); - return; - } - - if (kvm_fixup_exception(regs)) - return; - - ucall_assert(UCALL_UNHANDLED, - "Unhandled exception in guest", __FILE__, __LINE__, - "Unhandled exception '0x%lx' at guest RIP '0x%lx'", - regs->vector, regs->rip); -} - -void vm_init_descriptor_tables(struct kvm_vm *vm) -{ - extern void *idt_handlers; - int i; - - vm->idt = __vm_vaddr_alloc_page(vm, MEM_REGION_DATA); - vm->handlers = __vm_vaddr_alloc_page(vm, MEM_REGION_DATA); - /* Handlers have the same address in both address spaces.*/ - for (i = 0; i < NUM_INTERRUPTS; i++) - set_idt_entry(vm, i, (unsigned long)(&idt_handlers)[i], 0, - DEFAULT_CODE_SELECTOR); -} - -void vcpu_init_descriptor_tables(struct kvm_vcpu *vcpu) -{ - struct kvm_vm *vm = vcpu->vm; - struct kvm_sregs sregs; - - vcpu_sregs_get(vcpu, &sregs); - sregs.idt.base = vm->idt; - sregs.idt.limit = NUM_INTERRUPTS * sizeof(struct idt_entry) - 1; - sregs.gdt.base = vm->gdt; - sregs.gdt.limit = getpagesize() - 1; - kvm_seg_set_kernel_data_64bit(NULL, DEFAULT_DATA_SELECTOR, &sregs.gs); - vcpu_sregs_set(vcpu, &sregs); - *(vm_vaddr_t *)addr_gva2hva(vm, (vm_vaddr_t)(&exception_handlers)) = vm->handlers; -} - -void vm_install_exception_handler(struct kvm_vm *vm, int vector, - void (*handler)(struct ex_regs *)) -{ - vm_vaddr_t *handlers = (vm_vaddr_t *)addr_gva2hva(vm, vm->handlers); - - handlers[vector] = (vm_vaddr_t)handler; -} - -void assert_on_unhandled_exception(struct kvm_vcpu *vcpu) -{ - struct ucall uc; - - if (get_ucall(vcpu, &uc) == UCALL_UNHANDLED) - REPORT_GUEST_ASSERT(uc); -} - const struct kvm_cpuid_entry2 *get_cpuid_entry(const struct kvm_cpuid2 *cpuid, uint32_t function, uint32_t index) { @@ -1344,6 +1297,7 @@ void kvm_selftest_arch_init(void) { host_cpu_is_intel = this_cpu_is_intel(); host_cpu_is_amd = this_cpu_is_amd(); + is_forced_emulation_enabled = kvm_is_forced_emulation_enabled(); } bool sys_clocksource_is_based_on_tsc(void) diff --git a/tools/testing/selftests/kvm/lib/x86_64/sev.c b/tools/testing/selftests/kvm/lib/x86_64/sev.c index e248d3364b9c..e9535ee20b7f 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/sev.c +++ b/tools/testing/selftests/kvm/lib/x86_64/sev.c @@ -1,5 +1,4 @@ // SPDX-License-Identifier: GPL-2.0-only -#define _GNU_SOURCE /* for program_invocation_short_name */ #include <stdint.h> #include <stdbool.h> @@ -35,6 +34,32 @@ static void encrypt_region(struct kvm_vm *vm, struct userspace_mem_region *regio } } +void sev_vm_init(struct kvm_vm *vm) +{ + if (vm->type == KVM_X86_DEFAULT_VM) { + assert(vm->arch.sev_fd == -1); + vm->arch.sev_fd = open_sev_dev_path_or_exit(); + vm_sev_ioctl(vm, KVM_SEV_INIT, NULL); + } else { + struct kvm_sev_init init = { 0 }; + assert(vm->type == KVM_X86_SEV_VM); + vm_sev_ioctl(vm, KVM_SEV_INIT2, &init); + } +} + +void sev_es_vm_init(struct kvm_vm *vm) +{ + if (vm->type == KVM_X86_DEFAULT_VM) { + assert(vm->arch.sev_fd == -1); + vm->arch.sev_fd = open_sev_dev_path_or_exit(); + vm_sev_ioctl(vm, KVM_SEV_ES_INIT, NULL); + } else { + struct kvm_sev_init init = { 0 }; + assert(vm->type == KVM_X86_SEV_ES_VM); + vm_sev_ioctl(vm, KVM_SEV_INIT2, &init); + } +} + void sev_vm_launch(struct kvm_vm *vm, uint32_t policy) { struct kvm_sev_launch_start launch_start = { @@ -87,28 +112,30 @@ void sev_vm_launch_finish(struct kvm_vm *vm) TEST_ASSERT_EQ(status.state, SEV_GUEST_STATE_RUNNING); } -struct kvm_vm *vm_sev_create_with_one_vcpu(uint32_t policy, void *guest_code, +struct kvm_vm *vm_sev_create_with_one_vcpu(uint32_t type, void *guest_code, struct kvm_vcpu **cpu) { struct vm_shape shape = { - .type = VM_TYPE_DEFAULT, .mode = VM_MODE_DEFAULT, - .subtype = policy & SEV_POLICY_ES ? VM_SUBTYPE_SEV_ES : - VM_SUBTYPE_SEV, + .type = type, }; struct kvm_vm *vm; struct kvm_vcpu *cpus[1]; - uint8_t measurement[512]; vm = __vm_create_with_vcpus(shape, 1, 0, guest_code, cpus); *cpu = cpus[0]; + return vm; +} + +void vm_sev_launch(struct kvm_vm *vm, uint32_t policy, uint8_t *measurement) +{ sev_vm_launch(vm, policy); - /* TODO: Validate the measurement is as expected. */ + if (!measurement) + measurement = alloca(256); + sev_vm_launch_measure(vm, measurement); sev_vm_launch_finish(vm); - - return vm; } diff --git a/tools/testing/selftests/kvm/max_guest_memory_test.c b/tools/testing/selftests/kvm/max_guest_memory_test.c index 1a6da7389bf1..0b9678858b6d 100644 --- a/tools/testing/selftests/kvm/max_guest_memory_test.c +++ b/tools/testing/selftests/kvm/max_guest_memory_test.c @@ -1,6 +1,4 @@ // SPDX-License-Identifier: GPL-2.0 -#define _GNU_SOURCE - #include <stdio.h> #include <stdlib.h> #include <pthread.h> diff --git a/tools/testing/selftests/kvm/memslot_modification_stress_test.c b/tools/testing/selftests/kvm/memslot_modification_stress_test.c index 156361966612..05fcf902e067 100644 --- a/tools/testing/selftests/kvm/memslot_modification_stress_test.c +++ b/tools/testing/selftests/kvm/memslot_modification_stress_test.c @@ -6,9 +6,6 @@ * Copyright (C) 2018, Red Hat, Inc. * Copyright (C) 2020, Google, Inc. */ - -#define _GNU_SOURCE /* for program_invocation_name */ - #include <stdio.h> #include <stdlib.h> #include <sys/syscall.h> diff --git a/tools/testing/selftests/kvm/riscv/arch_timer.c b/tools/testing/selftests/kvm/riscv/arch_timer.c index 0f9cabd99fd4..2c792228ac0b 100644 --- a/tools/testing/selftests/kvm/riscv/arch_timer.c +++ b/tools/testing/selftests/kvm/riscv/arch_timer.c @@ -7,13 +7,11 @@ * * Copyright (c) 2024, Intel Corporation. */ - -#define _GNU_SOURCE - #include "arch_timer.h" #include "kvm_util.h" #include "processor.h" #include "timer_test.h" +#include "ucall_common.h" static int timer_irq = IRQ_S_TIMER; @@ -85,7 +83,7 @@ struct kvm_vm *test_vm_create(void) int nr_vcpus = test_args.nr_vcpus; vm = vm_create_with_vcpus(nr_vcpus, guest_code, vcpus); - __TEST_REQUIRE(__vcpu_has_ext(vcpus[0], RISCV_ISA_EXT_REG(KVM_RISCV_ISA_EXT_SSTC)), + __TEST_REQUIRE(__vcpu_has_isa_ext(vcpus[0], KVM_RISCV_ISA_EXT_SSTC), "SSTC not available, skipping test\n"); vm_init_vector_tables(vm); diff --git a/tools/testing/selftests/kvm/riscv/ebreak_test.c b/tools/testing/selftests/kvm/riscv/ebreak_test.c new file mode 100644 index 000000000000..823c132069b4 --- /dev/null +++ b/tools/testing/selftests/kvm/riscv/ebreak_test.c @@ -0,0 +1,82 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * RISC-V KVM ebreak test. + * + * Copyright 2024 Beijing ESWIN Computing Technology Co., Ltd. + * + */ +#include "kvm_util.h" + +#define LABEL_ADDRESS(v) ((uint64_t)&(v)) + +extern unsigned char sw_bp_1, sw_bp_2; +static uint64_t sw_bp_addr; + +static void guest_code(void) +{ + asm volatile( + ".option push\n" + ".option norvc\n" + "sw_bp_1: ebreak\n" + "sw_bp_2: ebreak\n" + ".option pop\n" + ); + GUEST_ASSERT_EQ(READ_ONCE(sw_bp_addr), LABEL_ADDRESS(sw_bp_2)); + + GUEST_DONE(); +} + +static void guest_breakpoint_handler(struct ex_regs *regs) +{ + WRITE_ONCE(sw_bp_addr, regs->epc); + regs->epc += 4; +} + +int main(void) +{ + struct kvm_vm *vm; + struct kvm_vcpu *vcpu; + uint64_t pc; + struct kvm_guest_debug debug = { + .control = KVM_GUESTDBG_ENABLE, + }; + + TEST_REQUIRE(kvm_has_cap(KVM_CAP_SET_GUEST_DEBUG)); + + vm = vm_create_with_one_vcpu(&vcpu, guest_code); + + vm_init_vector_tables(vm); + vcpu_init_vector_tables(vcpu); + vm_install_exception_handler(vm, EXC_BREAKPOINT, + guest_breakpoint_handler); + + /* + * Enable the guest debug. + * ebreak should exit to the VMM with KVM_EXIT_DEBUG reason. + */ + vcpu_guest_debug_set(vcpu, &debug); + vcpu_run(vcpu); + + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_DEBUG); + + vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.pc), &pc); + TEST_ASSERT_EQ(pc, LABEL_ADDRESS(sw_bp_1)); + + /* skip sw_bp_1 */ + vcpu_set_reg(vcpu, RISCV_CORE_REG(regs.pc), pc + 4); + + /* + * Disable all debug controls. + * Guest should handle the ebreak without exiting to the VMM. + */ + memset(&debug, 0, sizeof(debug)); + vcpu_guest_debug_set(vcpu, &debug); + + vcpu_run(vcpu); + + TEST_ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_DONE); + + kvm_vm_free(vm); + + return 0; +} diff --git a/tools/testing/selftests/kvm/riscv/get-reg-list.c b/tools/testing/selftests/kvm/riscv/get-reg-list.c index b882b7b9b785..222198dd6d04 100644 --- a/tools/testing/selftests/kvm/riscv/get-reg-list.c +++ b/tools/testing/selftests/kvm/riscv/get-reg-list.c @@ -43,6 +43,7 @@ bool filter_reg(__u64 reg) case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_V: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SMSTATEEN: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SSAIA: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SSCOFPMF: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SSTC: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SVINVAL: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SVNAPOT: @@ -408,6 +409,7 @@ static const char *isa_ext_single_id_to_str(__u64 reg_off) KVM_ISA_EXT_ARR(V), KVM_ISA_EXT_ARR(SMSTATEEN), KVM_ISA_EXT_ARR(SSAIA), + KVM_ISA_EXT_ARR(SSCOFPMF), KVM_ISA_EXT_ARR(SSTC), KVM_ISA_EXT_ARR(SVINVAL), KVM_ISA_EXT_ARR(SVNAPOT), @@ -931,6 +933,7 @@ KVM_ISA_EXT_SUBLIST_CONFIG(fp_f, FP_F); KVM_ISA_EXT_SUBLIST_CONFIG(fp_d, FP_D); KVM_ISA_EXT_SIMPLE_CONFIG(h, H); KVM_ISA_EXT_SUBLIST_CONFIG(smstateen, SMSTATEEN); +KVM_ISA_EXT_SIMPLE_CONFIG(sscofpmf, SSCOFPMF); KVM_ISA_EXT_SIMPLE_CONFIG(sstc, SSTC); KVM_ISA_EXT_SIMPLE_CONFIG(svinval, SVINVAL); KVM_ISA_EXT_SIMPLE_CONFIG(svnapot, SVNAPOT); @@ -986,6 +989,7 @@ struct vcpu_reg_list *vcpu_configs[] = { &config_fp_d, &config_h, &config_smstateen, + &config_sscofpmf, &config_sstc, &config_svinval, &config_svnapot, diff --git a/tools/testing/selftests/kvm/riscv/sbi_pmu_test.c b/tools/testing/selftests/kvm/riscv/sbi_pmu_test.c new file mode 100644 index 000000000000..69bb94e6b227 --- /dev/null +++ b/tools/testing/selftests/kvm/riscv/sbi_pmu_test.c @@ -0,0 +1,681 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * sbi_pmu_test.c - Tests the riscv64 SBI PMU functionality. + * + * Copyright (c) 2024, Rivos Inc. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <sys/types.h> +#include "kvm_util.h" +#include "test_util.h" +#include "processor.h" +#include "sbi.h" +#include "arch_timer.h" + +/* Maximum counters(firmware + hardware) */ +#define RISCV_MAX_PMU_COUNTERS 64 +union sbi_pmu_ctr_info ctrinfo_arr[RISCV_MAX_PMU_COUNTERS]; + +/* Snapshot shared memory data */ +#define PMU_SNAPSHOT_GPA_BASE BIT(30) +static void *snapshot_gva; +static vm_paddr_t snapshot_gpa; + +static int vcpu_shared_irq_count; +static int counter_in_use; + +/* Cache the available counters in a bitmask */ +static unsigned long counter_mask_available; + +static bool illegal_handler_invoked; + +#define SBI_PMU_TEST_BASIC BIT(0) +#define SBI_PMU_TEST_EVENTS BIT(1) +#define SBI_PMU_TEST_SNAPSHOT BIT(2) +#define SBI_PMU_TEST_OVERFLOW BIT(3) + +static int disabled_tests; + +unsigned long pmu_csr_read_num(int csr_num) +{ +#define switchcase_csr_read(__csr_num, __val) {\ + case __csr_num: \ + __val = csr_read(__csr_num); \ + break; } +#define switchcase_csr_read_2(__csr_num, __val) {\ + switchcase_csr_read(__csr_num + 0, __val) \ + switchcase_csr_read(__csr_num + 1, __val)} +#define switchcase_csr_read_4(__csr_num, __val) {\ + switchcase_csr_read_2(__csr_num + 0, __val) \ + switchcase_csr_read_2(__csr_num + 2, __val)} +#define switchcase_csr_read_8(__csr_num, __val) {\ + switchcase_csr_read_4(__csr_num + 0, __val) \ + switchcase_csr_read_4(__csr_num + 4, __val)} +#define switchcase_csr_read_16(__csr_num, __val) {\ + switchcase_csr_read_8(__csr_num + 0, __val) \ + switchcase_csr_read_8(__csr_num + 8, __val)} +#define switchcase_csr_read_32(__csr_num, __val) {\ + switchcase_csr_read_16(__csr_num + 0, __val) \ + switchcase_csr_read_16(__csr_num + 16, __val)} + + unsigned long ret = 0; + + switch (csr_num) { + switchcase_csr_read_32(CSR_CYCLE, ret) + switchcase_csr_read_32(CSR_CYCLEH, ret) + default : + break; + } + + return ret; +#undef switchcase_csr_read_32 +#undef switchcase_csr_read_16 +#undef switchcase_csr_read_8 +#undef switchcase_csr_read_4 +#undef switchcase_csr_read_2 +#undef switchcase_csr_read +} + +static inline void dummy_func_loop(uint64_t iter) +{ + int i = 0; + + while (i < iter) { + asm volatile("nop"); + i++; + } +} + +static void start_counter(unsigned long counter, unsigned long start_flags, + unsigned long ival) +{ + struct sbiret ret; + + ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_START, counter, 1, start_flags, + ival, 0, 0); + __GUEST_ASSERT(ret.error == 0, "Unable to start counter %ld\n", counter); +} + +/* This should be invoked only for reset counter use case */ +static void stop_reset_counter(unsigned long counter, unsigned long stop_flags) +{ + struct sbiret ret; + + ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_STOP, counter, 1, + stop_flags | SBI_PMU_STOP_FLAG_RESET, 0, 0, 0); + __GUEST_ASSERT(ret.error == SBI_ERR_ALREADY_STOPPED, + "Unable to stop counter %ld\n", counter); +} + +static void stop_counter(unsigned long counter, unsigned long stop_flags) +{ + struct sbiret ret; + + ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_STOP, counter, 1, stop_flags, + 0, 0, 0); + __GUEST_ASSERT(ret.error == 0, "Unable to stop counter %ld error %ld\n", + counter, ret.error); +} + +static void guest_illegal_exception_handler(struct ex_regs *regs) +{ + __GUEST_ASSERT(regs->cause == EXC_INST_ILLEGAL, + "Unexpected exception handler %lx\n", regs->cause); + + illegal_handler_invoked = true; + /* skip the trapping instruction */ + regs->epc += 4; +} + +static void guest_irq_handler(struct ex_regs *regs) +{ + unsigned int irq_num = regs->cause & ~CAUSE_IRQ_FLAG; + struct riscv_pmu_snapshot_data *snapshot_data = snapshot_gva; + unsigned long overflown_mask; + unsigned long counter_val = 0; + + /* Validate that we are in the correct irq handler */ + GUEST_ASSERT_EQ(irq_num, IRQ_PMU_OVF); + + /* Stop all counters first to avoid further interrupts */ + stop_counter(counter_in_use, SBI_PMU_STOP_FLAG_TAKE_SNAPSHOT); + + csr_clear(CSR_SIP, BIT(IRQ_PMU_OVF)); + + overflown_mask = READ_ONCE(snapshot_data->ctr_overflow_mask); + GUEST_ASSERT(overflown_mask & 0x01); + + WRITE_ONCE(vcpu_shared_irq_count, vcpu_shared_irq_count+1); + + counter_val = READ_ONCE(snapshot_data->ctr_values[0]); + /* Now start the counter to mimick the real driver behavior */ + start_counter(counter_in_use, SBI_PMU_START_FLAG_SET_INIT_VALUE, counter_val); +} + +static unsigned long get_counter_index(unsigned long cbase, unsigned long cmask, + unsigned long cflags, + unsigned long event) +{ + struct sbiret ret; + + ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_CFG_MATCH, cbase, cmask, + cflags, event, 0, 0); + __GUEST_ASSERT(ret.error == 0, "config matching failed %ld\n", ret.error); + GUEST_ASSERT(ret.value < RISCV_MAX_PMU_COUNTERS); + GUEST_ASSERT(BIT(ret.value) & counter_mask_available); + + return ret.value; +} + +static unsigned long get_num_counters(void) +{ + struct sbiret ret; + + ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_NUM_COUNTERS, 0, 0, 0, 0, 0, 0); + + __GUEST_ASSERT(ret.error == 0, "Unable to retrieve number of counters from SBI PMU"); + __GUEST_ASSERT(ret.value < RISCV_MAX_PMU_COUNTERS, + "Invalid number of counters %ld\n", ret.value); + + return ret.value; +} + +static void update_counter_info(int num_counters) +{ + int i = 0; + struct sbiret ret; + + for (i = 0; i < num_counters; i++) { + ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_GET_INFO, i, 0, 0, 0, 0, 0); + + /* There can be gaps in logical counter indicies*/ + if (ret.error) + continue; + GUEST_ASSERT_NE(ret.value, 0); + + ctrinfo_arr[i].value = ret.value; + counter_mask_available |= BIT(i); + } + + GUEST_ASSERT(counter_mask_available > 0); +} + +static unsigned long read_fw_counter(int idx, union sbi_pmu_ctr_info ctrinfo) +{ + struct sbiret ret; + + ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_FW_READ, idx, 0, 0, 0, 0, 0); + GUEST_ASSERT(ret.error == 0); + return ret.value; +} + +static unsigned long read_counter(int idx, union sbi_pmu_ctr_info ctrinfo) +{ + unsigned long counter_val = 0; + + __GUEST_ASSERT(ctrinfo.type < 2, "Invalid counter type %d", ctrinfo.type); + + if (ctrinfo.type == SBI_PMU_CTR_TYPE_HW) + counter_val = pmu_csr_read_num(ctrinfo.csr); + else if (ctrinfo.type == SBI_PMU_CTR_TYPE_FW) + counter_val = read_fw_counter(idx, ctrinfo); + + return counter_val; +} + +static inline void verify_sbi_requirement_assert(void) +{ + long out_val = 0; + bool probe; + + probe = guest_sbi_probe_extension(SBI_EXT_PMU, &out_val); + GUEST_ASSERT(probe && out_val == 1); + + if (get_host_sbi_spec_version() < sbi_mk_version(2, 0)) + __GUEST_ASSERT(0, "SBI implementation version doesn't support PMU Snapshot"); +} + +static void snapshot_set_shmem(vm_paddr_t gpa, unsigned long flags) +{ + unsigned long lo = (unsigned long)gpa; +#if __riscv_xlen == 32 + unsigned long hi = (unsigned long)(gpa >> 32); +#else + unsigned long hi = gpa == -1 ? -1 : 0; +#endif + struct sbiret ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_SNAPSHOT_SET_SHMEM, + lo, hi, flags, 0, 0, 0); + + GUEST_ASSERT(ret.value == 0 && ret.error == 0); +} + +static void test_pmu_event(unsigned long event) +{ + unsigned long counter; + unsigned long counter_value_pre, counter_value_post; + unsigned long counter_init_value = 100; + + counter = get_counter_index(0, counter_mask_available, 0, event); + counter_value_pre = read_counter(counter, ctrinfo_arr[counter]); + + /* Do not set the initial value */ + start_counter(counter, 0, 0); + dummy_func_loop(10000); + stop_counter(counter, 0); + + counter_value_post = read_counter(counter, ctrinfo_arr[counter]); + __GUEST_ASSERT(counter_value_post > counter_value_pre, + "Event update verification failed: post [%lx] pre [%lx]\n", + counter_value_post, counter_value_pre); + + /* + * We can't just update the counter without starting it. + * Do start/stop twice to simulate that by first initializing to a very + * high value and a low value after that. + */ + start_counter(counter, SBI_PMU_START_FLAG_SET_INIT_VALUE, ULONG_MAX/2); + stop_counter(counter, 0); + counter_value_pre = read_counter(counter, ctrinfo_arr[counter]); + + start_counter(counter, SBI_PMU_START_FLAG_SET_INIT_VALUE, counter_init_value); + stop_counter(counter, 0); + counter_value_post = read_counter(counter, ctrinfo_arr[counter]); + __GUEST_ASSERT(counter_value_pre > counter_value_post, + "Counter reinitialization verification failed : post [%lx] pre [%lx]\n", + counter_value_post, counter_value_pre); + + /* Now set the initial value and compare */ + start_counter(counter, SBI_PMU_START_FLAG_SET_INIT_VALUE, counter_init_value); + dummy_func_loop(10000); + stop_counter(counter, 0); + + counter_value_post = read_counter(counter, ctrinfo_arr[counter]); + __GUEST_ASSERT(counter_value_post > counter_init_value, + "Event update verification failed: post [%lx] pre [%lx]\n", + counter_value_post, counter_init_value); + + stop_reset_counter(counter, 0); +} + +static void test_pmu_event_snapshot(unsigned long event) +{ + unsigned long counter; + unsigned long counter_value_pre, counter_value_post; + unsigned long counter_init_value = 100; + struct riscv_pmu_snapshot_data *snapshot_data = snapshot_gva; + + counter = get_counter_index(0, counter_mask_available, 0, event); + counter_value_pre = read_counter(counter, ctrinfo_arr[counter]); + + /* Do not set the initial value */ + start_counter(counter, 0, 0); + dummy_func_loop(10000); + stop_counter(counter, SBI_PMU_STOP_FLAG_TAKE_SNAPSHOT); + + /* The counter value is updated w.r.t relative index of cbase */ + counter_value_post = READ_ONCE(snapshot_data->ctr_values[0]); + __GUEST_ASSERT(counter_value_post > counter_value_pre, + "Event update verification failed: post [%lx] pre [%lx]\n", + counter_value_post, counter_value_pre); + + /* + * We can't just update the counter without starting it. + * Do start/stop twice to simulate that by first initializing to a very + * high value and a low value after that. + */ + WRITE_ONCE(snapshot_data->ctr_values[0], ULONG_MAX/2); + start_counter(counter, SBI_PMU_START_FLAG_INIT_SNAPSHOT, 0); + stop_counter(counter, SBI_PMU_STOP_FLAG_TAKE_SNAPSHOT); + counter_value_pre = READ_ONCE(snapshot_data->ctr_values[0]); + + WRITE_ONCE(snapshot_data->ctr_values[0], counter_init_value); + start_counter(counter, SBI_PMU_START_FLAG_INIT_SNAPSHOT, 0); + stop_counter(counter, SBI_PMU_STOP_FLAG_TAKE_SNAPSHOT); + counter_value_post = READ_ONCE(snapshot_data->ctr_values[0]); + __GUEST_ASSERT(counter_value_pre > counter_value_post, + "Counter reinitialization verification failed : post [%lx] pre [%lx]\n", + counter_value_post, counter_value_pre); + + /* Now set the initial value and compare */ + WRITE_ONCE(snapshot_data->ctr_values[0], counter_init_value); + start_counter(counter, SBI_PMU_START_FLAG_INIT_SNAPSHOT, 0); + dummy_func_loop(10000); + stop_counter(counter, SBI_PMU_STOP_FLAG_TAKE_SNAPSHOT); + + counter_value_post = READ_ONCE(snapshot_data->ctr_values[0]); + __GUEST_ASSERT(counter_value_post > counter_init_value, + "Event update verification failed: post [%lx] pre [%lx]\n", + counter_value_post, counter_init_value); + + stop_reset_counter(counter, 0); +} + +static void test_pmu_event_overflow(unsigned long event) +{ + unsigned long counter; + unsigned long counter_value_post; + unsigned long counter_init_value = ULONG_MAX - 10000; + struct riscv_pmu_snapshot_data *snapshot_data = snapshot_gva; + + counter = get_counter_index(0, counter_mask_available, 0, event); + counter_in_use = counter; + + /* The counter value is updated w.r.t relative index of cbase passed to start/stop */ + WRITE_ONCE(snapshot_data->ctr_values[0], counter_init_value); + start_counter(counter, SBI_PMU_START_FLAG_INIT_SNAPSHOT, 0); + dummy_func_loop(10000); + udelay(msecs_to_usecs(2000)); + /* irq handler should have stopped the counter */ + stop_counter(counter, SBI_PMU_STOP_FLAG_TAKE_SNAPSHOT); + + counter_value_post = READ_ONCE(snapshot_data->ctr_values[0]); + /* The counter value after stopping should be less the init value due to overflow */ + __GUEST_ASSERT(counter_value_post < counter_init_value, + "counter_value_post %lx counter_init_value %lx for counter\n", + counter_value_post, counter_init_value); + + stop_reset_counter(counter, 0); +} + +static void test_invalid_event(void) +{ + struct sbiret ret; + unsigned long event = 0x1234; /* A random event */ + + ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_CFG_MATCH, 0, + counter_mask_available, 0, event, 0, 0); + GUEST_ASSERT_EQ(ret.error, SBI_ERR_NOT_SUPPORTED); +} + +static void test_pmu_events(void) +{ + int num_counters = 0; + + /* Get the counter details */ + num_counters = get_num_counters(); + update_counter_info(num_counters); + + /* Sanity testing for any random invalid event */ + test_invalid_event(); + + /* Only these two events are guaranteed to be present */ + test_pmu_event(SBI_PMU_HW_CPU_CYCLES); + test_pmu_event(SBI_PMU_HW_INSTRUCTIONS); + + GUEST_DONE(); +} + +static void test_pmu_basic_sanity(void) +{ + long out_val = 0; + bool probe; + struct sbiret ret; + int num_counters = 0, i; + union sbi_pmu_ctr_info ctrinfo; + + probe = guest_sbi_probe_extension(SBI_EXT_PMU, &out_val); + GUEST_ASSERT(probe && out_val == 1); + + num_counters = get_num_counters(); + + for (i = 0; i < num_counters; i++) { + ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_GET_INFO, i, + 0, 0, 0, 0, 0); + + /* There can be gaps in logical counter indicies*/ + if (ret.error) + continue; + GUEST_ASSERT_NE(ret.value, 0); + + ctrinfo.value = ret.value; + + /** + * Accessibility check of hardware and read capability of firmware counters. + * The spec doesn't mandate any initial value. No need to check any value. + */ + if (ctrinfo.type == SBI_PMU_CTR_TYPE_HW) { + pmu_csr_read_num(ctrinfo.csr); + GUEST_ASSERT(illegal_handler_invoked); + } else if (ctrinfo.type == SBI_PMU_CTR_TYPE_FW) { + read_fw_counter(i, ctrinfo); + } + } + + GUEST_DONE(); +} + +static void test_pmu_events_snaphost(void) +{ + int num_counters = 0; + struct riscv_pmu_snapshot_data *snapshot_data = snapshot_gva; + int i; + + /* Verify presence of SBI PMU and minimum requrired SBI version */ + verify_sbi_requirement_assert(); + + snapshot_set_shmem(snapshot_gpa, 0); + + /* Get the counter details */ + num_counters = get_num_counters(); + update_counter_info(num_counters); + + /* Validate shared memory access */ + GUEST_ASSERT_EQ(READ_ONCE(snapshot_data->ctr_overflow_mask), 0); + for (i = 0; i < num_counters; i++) { + if (counter_mask_available & (BIT(i))) + GUEST_ASSERT_EQ(READ_ONCE(snapshot_data->ctr_values[i]), 0); + } + /* Only these two events are guranteed to be present */ + test_pmu_event_snapshot(SBI_PMU_HW_CPU_CYCLES); + test_pmu_event_snapshot(SBI_PMU_HW_INSTRUCTIONS); + + GUEST_DONE(); +} + +static void test_pmu_events_overflow(void) +{ + int num_counters = 0; + + /* Verify presence of SBI PMU and minimum requrired SBI version */ + verify_sbi_requirement_assert(); + + snapshot_set_shmem(snapshot_gpa, 0); + csr_set(CSR_IE, BIT(IRQ_PMU_OVF)); + local_irq_enable(); + + /* Get the counter details */ + num_counters = get_num_counters(); + update_counter_info(num_counters); + + /* + * Qemu supports overflow for cycle/instruction. + * This test may fail on any platform that do not support overflow for these two events. + */ + test_pmu_event_overflow(SBI_PMU_HW_CPU_CYCLES); + GUEST_ASSERT_EQ(vcpu_shared_irq_count, 1); + + test_pmu_event_overflow(SBI_PMU_HW_INSTRUCTIONS); + GUEST_ASSERT_EQ(vcpu_shared_irq_count, 2); + + GUEST_DONE(); +} + +static void run_vcpu(struct kvm_vcpu *vcpu) +{ + struct ucall uc; + + vcpu_run(vcpu); + switch (get_ucall(vcpu, &uc)) { + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + break; + case UCALL_DONE: + case UCALL_SYNC: + break; + default: + TEST_FAIL("Unknown ucall %lu", uc.cmd); + break; + } +} + +void test_vm_destroy(struct kvm_vm *vm) +{ + memset(ctrinfo_arr, 0, sizeof(union sbi_pmu_ctr_info) * RISCV_MAX_PMU_COUNTERS); + counter_mask_available = 0; + kvm_vm_free(vm); +} + +static void test_vm_basic_test(void *guest_code) +{ + struct kvm_vm *vm; + struct kvm_vcpu *vcpu; + + vm = vm_create_with_one_vcpu(&vcpu, guest_code); + __TEST_REQUIRE(__vcpu_has_sbi_ext(vcpu, KVM_RISCV_SBI_EXT_PMU), + "SBI PMU not available, skipping test"); + vm_init_vector_tables(vm); + /* Illegal instruction handler is required to verify read access without configuration */ + vm_install_exception_handler(vm, EXC_INST_ILLEGAL, guest_illegal_exception_handler); + + vcpu_init_vector_tables(vcpu); + run_vcpu(vcpu); + + test_vm_destroy(vm); +} + +static void test_vm_events_test(void *guest_code) +{ + struct kvm_vm *vm = NULL; + struct kvm_vcpu *vcpu = NULL; + + vm = vm_create_with_one_vcpu(&vcpu, guest_code); + __TEST_REQUIRE(__vcpu_has_sbi_ext(vcpu, KVM_RISCV_SBI_EXT_PMU), + "SBI PMU not available, skipping test"); + run_vcpu(vcpu); + + test_vm_destroy(vm); +} + +static void test_vm_setup_snapshot_mem(struct kvm_vm *vm, struct kvm_vcpu *vcpu) +{ + /* PMU Snapshot requires single page only */ + vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, PMU_SNAPSHOT_GPA_BASE, 1, 1, 0); + /* PMU_SNAPSHOT_GPA_BASE is identity mapped */ + virt_map(vm, PMU_SNAPSHOT_GPA_BASE, PMU_SNAPSHOT_GPA_BASE, 1); + + snapshot_gva = (void *)(PMU_SNAPSHOT_GPA_BASE); + snapshot_gpa = addr_gva2gpa(vcpu->vm, (vm_vaddr_t)snapshot_gva); + sync_global_to_guest(vcpu->vm, snapshot_gva); + sync_global_to_guest(vcpu->vm, snapshot_gpa); +} + +static void test_vm_events_snapshot_test(void *guest_code) +{ + struct kvm_vm *vm = NULL; + struct kvm_vcpu *vcpu; + + vm = vm_create_with_one_vcpu(&vcpu, guest_code); + __TEST_REQUIRE(__vcpu_has_sbi_ext(vcpu, KVM_RISCV_SBI_EXT_PMU), + "SBI PMU not available, skipping test"); + + test_vm_setup_snapshot_mem(vm, vcpu); + + run_vcpu(vcpu); + + test_vm_destroy(vm); +} + +static void test_vm_events_overflow(void *guest_code) +{ + struct kvm_vm *vm = NULL; + struct kvm_vcpu *vcpu; + + vm = vm_create_with_one_vcpu(&vcpu, guest_code); + __TEST_REQUIRE(__vcpu_has_sbi_ext(vcpu, KVM_RISCV_SBI_EXT_PMU), + "SBI PMU not available, skipping test"); + + __TEST_REQUIRE(__vcpu_has_isa_ext(vcpu, KVM_RISCV_ISA_EXT_SSCOFPMF), + "Sscofpmf is not available, skipping overflow test"); + + test_vm_setup_snapshot_mem(vm, vcpu); + vm_init_vector_tables(vm); + vm_install_interrupt_handler(vm, guest_irq_handler); + + vcpu_init_vector_tables(vcpu); + /* Initialize guest timer frequency. */ + vcpu_get_reg(vcpu, RISCV_TIMER_REG(frequency), &timer_freq); + sync_global_to_guest(vm, timer_freq); + + run_vcpu(vcpu); + + test_vm_destroy(vm); +} + +static void test_print_help(char *name) +{ + pr_info("Usage: %s [-h] [-d <test name>]\n", name); + pr_info("\t-d: Test to disable. Available tests are 'basic', 'events', 'snapshot', 'overflow'\n"); + pr_info("\t-h: print this help screen\n"); +} + +static bool parse_args(int argc, char *argv[]) +{ + int opt; + + while ((opt = getopt(argc, argv, "hd:")) != -1) { + switch (opt) { + case 'd': + if (!strncmp("basic", optarg, 5)) + disabled_tests |= SBI_PMU_TEST_BASIC; + else if (!strncmp("events", optarg, 6)) + disabled_tests |= SBI_PMU_TEST_EVENTS; + else if (!strncmp("snapshot", optarg, 8)) + disabled_tests |= SBI_PMU_TEST_SNAPSHOT; + else if (!strncmp("overflow", optarg, 8)) + disabled_tests |= SBI_PMU_TEST_OVERFLOW; + else + goto done; + break; + case 'h': + default: + goto done; + } + } + + return true; +done: + test_print_help(argv[0]); + return false; +} + +int main(int argc, char *argv[]) +{ + if (!parse_args(argc, argv)) + exit(KSFT_SKIP); + + if (!(disabled_tests & SBI_PMU_TEST_BASIC)) { + test_vm_basic_test(test_pmu_basic_sanity); + pr_info("SBI PMU basic test : PASS\n"); + } + + if (!(disabled_tests & SBI_PMU_TEST_EVENTS)) { + test_vm_events_test(test_pmu_events); + pr_info("SBI PMU event verification test : PASS\n"); + } + + if (!(disabled_tests & SBI_PMU_TEST_SNAPSHOT)) { + test_vm_events_snapshot_test(test_pmu_events_snaphost); + pr_info("SBI PMU event verification with snapshot test : PASS\n"); + } + + if (!(disabled_tests & SBI_PMU_TEST_OVERFLOW)) { + test_vm_events_overflow(test_pmu_events_overflow); + pr_info("SBI PMU event verification with overflow test : PASS\n"); + } + + return 0; +} diff --git a/tools/testing/selftests/kvm/rseq_test.c b/tools/testing/selftests/kvm/rseq_test.c index 28f97fb52044..e5898678bfab 100644 --- a/tools/testing/selftests/kvm/rseq_test.c +++ b/tools/testing/selftests/kvm/rseq_test.c @@ -1,5 +1,13 @@ // SPDX-License-Identifier: GPL-2.0-only -#define _GNU_SOURCE /* for program_invocation_short_name */ + +/* + * Include rseq.c without _GNU_SOURCE defined, before including any headers, so + * that rseq.c is compiled with its configuration, not KVM selftests' config. + */ +#undef _GNU_SOURCE +#include "../rseq/rseq.c" +#define _GNU_SOURCE + #include <errno.h> #include <fcntl.h> #include <pthread.h> @@ -19,8 +27,7 @@ #include "kvm_util.h" #include "processor.h" #include "test_util.h" - -#include "../rseq/rseq.c" +#include "ucall_common.h" /* * Any bug related to task migration is likely to be timing-dependent; perform @@ -186,12 +193,35 @@ static void calc_min_max_cpu(void) "Only one usable CPU, task migration not possible"); } +static void help(const char *name) +{ + puts(""); + printf("usage: %s [-h] [-u]\n", name); + printf(" -u: Don't sanity check the number of successful KVM_RUNs\n"); + puts(""); + exit(0); +} + int main(int argc, char *argv[]) { + bool skip_sanity_check = false; int r, i, snapshot; struct kvm_vm *vm; struct kvm_vcpu *vcpu; u32 cpu, rseq_cpu; + int opt; + + while ((opt = getopt(argc, argv, "hu")) != -1) { + switch (opt) { + case 'u': + skip_sanity_check = true; + break; + case 'h': + default: + help(argv[0]); + break; + } + } r = sched_getaffinity(0, sizeof(possible_mask), &possible_mask); TEST_ASSERT(!r, "sched_getaffinity failed, errno = %d (%s)", errno, @@ -254,9 +284,17 @@ int main(int argc, char *argv[]) * getcpu() to stabilize. A 2:1 migration:KVM_RUN ratio is a fairly * conservative ratio on x86-64, which can do _more_ KVM_RUNs than * migrations given the 1us+ delay in the migration task. + * + * Another reason why it may have small migration:KVM_RUN ratio is that, + * on systems with large low power mode wakeup latency, it may happen + * quite often that the scheduler is not able to wake up the target CPU + * before the vCPU thread is scheduled to another CPU. */ - TEST_ASSERT(i > (NR_TASK_MIGRATIONS / 2), - "Only performed %d KVM_RUNs, task stalled too much?", i); + TEST_ASSERT(skip_sanity_check || i > (NR_TASK_MIGRATIONS / 2), + "Only performed %d KVM_RUNs, task stalled too much?\n\n" + " Try disabling deep sleep states to reduce CPU wakeup latency,\n" + " e.g. via cpuidle.off=1 or setting /dev/cpu_dma_latency to '0',\n" + " or run with -u to disable this sanity check.", i); pthread_join(migration_thread, NULL); diff --git a/tools/testing/selftests/kvm/s390x/cmma_test.c b/tools/testing/selftests/kvm/s390x/cmma_test.c index 626a2b8a2037..b39033844756 100644 --- a/tools/testing/selftests/kvm/s390x/cmma_test.c +++ b/tools/testing/selftests/kvm/s390x/cmma_test.c @@ -7,8 +7,6 @@ * Authors: * Nico Boehr <nrb@linux.ibm.com> */ - -#define _GNU_SOURCE /* for program_invocation_short_name */ #include <fcntl.h> #include <stdio.h> #include <stdlib.h> @@ -18,6 +16,7 @@ #include "test_util.h" #include "kvm_util.h" #include "kselftest.h" +#include "ucall_common.h" #define MAIN_PAGE_COUNT 512 diff --git a/tools/testing/selftests/kvm/s390x/memop.c b/tools/testing/selftests/kvm/s390x/memop.c index 48cb910e660d..f2df7416be84 100644 --- a/tools/testing/selftests/kvm/s390x/memop.c +++ b/tools/testing/selftests/kvm/s390x/memop.c @@ -15,6 +15,7 @@ #include "test_util.h" #include "kvm_util.h" #include "kselftest.h" +#include "ucall_common.h" enum mop_target { LOGICAL, diff --git a/tools/testing/selftests/kvm/s390x/sync_regs_test.c b/tools/testing/selftests/kvm/s390x/sync_regs_test.c index 43fb25ddc3ec..53def355ccba 100644 --- a/tools/testing/selftests/kvm/s390x/sync_regs_test.c +++ b/tools/testing/selftests/kvm/s390x/sync_regs_test.c @@ -10,8 +10,6 @@ * * Test expected behavior of the KVM_CAP_SYNC_REGS functionality. */ - -#define _GNU_SOURCE /* for program_invocation_short_name */ #include <fcntl.h> #include <stdio.h> #include <stdlib.h> diff --git a/tools/testing/selftests/kvm/s390x/tprot.c b/tools/testing/selftests/kvm/s390x/tprot.c index c73f948c9b63..7a742a673b7c 100644 --- a/tools/testing/selftests/kvm/s390x/tprot.c +++ b/tools/testing/selftests/kvm/s390x/tprot.c @@ -8,6 +8,7 @@ #include "test_util.h" #include "kvm_util.h" #include "kselftest.h" +#include "ucall_common.h" #define PAGE_SHIFT 12 #define PAGE_SIZE (1 << PAGE_SHIFT) diff --git a/tools/testing/selftests/kvm/set_memory_region_test.c b/tools/testing/selftests/kvm/set_memory_region_test.c index bd57d991e27d..bb8002084f52 100644 --- a/tools/testing/selftests/kvm/set_memory_region_test.c +++ b/tools/testing/selftests/kvm/set_memory_region_test.c @@ -1,5 +1,4 @@ // SPDX-License-Identifier: GPL-2.0 -#define _GNU_SOURCE /* for program_invocation_short_name */ #include <fcntl.h> #include <pthread.h> #include <sched.h> @@ -221,8 +220,20 @@ static void test_move_memory_region(void) static void guest_code_delete_memory_region(void) { + struct desc_ptr idt; uint64_t val; + /* + * Clobber the IDT so that a #PF due to the memory region being deleted + * escalates to triple-fault shutdown. Because the memory region is + * deleted, there will be no valid mappings. As a result, KVM will + * repeatedly intercepts the state-2 page fault that occurs when trying + * to vector the guest's #PF. I.e. trying to actually handle the #PF + * in the guest will never succeed, and so isn't an option. + */ + memset(&idt, 0, sizeof(idt)); + __asm__ __volatile__("lidt %0" :: "m"(idt)); + GUEST_SYNC(0); /* Spin until the memory region is deleted. */ @@ -339,7 +350,7 @@ static void test_invalid_memory_region_flags(void) #ifdef __x86_64__ if (kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SW_PROTECTED_VM)) - vm = vm_create_barebones_protected_vm(); + vm = vm_create_barebones_type(KVM_X86_SW_PROTECTED_VM); else #endif vm = vm_create_barebones(); @@ -462,7 +473,7 @@ static void test_add_private_memory_region(void) pr_info("Testing ADD of KVM_MEM_GUEST_MEMFD memory regions\n"); - vm = vm_create_barebones_protected_vm(); + vm = vm_create_barebones_type(KVM_X86_SW_PROTECTED_VM); test_invalid_guest_memfd(vm, vm->kvm_fd, 0, "KVM fd should fail"); test_invalid_guest_memfd(vm, vm->fd, 0, "VM's fd should fail"); @@ -471,7 +482,7 @@ static void test_add_private_memory_region(void) test_invalid_guest_memfd(vm, memfd, 0, "Regular memfd() should fail"); close(memfd); - vm2 = vm_create_barebones_protected_vm(); + vm2 = vm_create_barebones_type(KVM_X86_SW_PROTECTED_VM); memfd = vm_create_guest_memfd(vm2, MEM_REGION_SIZE, 0); test_invalid_guest_memfd(vm, memfd, 0, "Other VM's guest_memfd() should fail"); @@ -499,7 +510,7 @@ static void test_add_overlapping_private_memory_regions(void) pr_info("Testing ADD of overlapping KVM_MEM_GUEST_MEMFD memory regions\n"); - vm = vm_create_barebones_protected_vm(); + vm = vm_create_barebones_type(KVM_X86_SW_PROTECTED_VM); memfd = vm_create_guest_memfd(vm, MEM_REGION_SIZE * 4, 0); diff --git a/tools/testing/selftests/kvm/steal_time.c b/tools/testing/selftests/kvm/steal_time.c index bae0c5026f82..a8d3afa0b86b 100644 --- a/tools/testing/selftests/kvm/steal_time.c +++ b/tools/testing/selftests/kvm/steal_time.c @@ -4,20 +4,22 @@ * * Copyright (C) 2020, Red Hat, Inc. */ -#define _GNU_SOURCE #include <stdio.h> #include <time.h> #include <sched.h> #include <pthread.h> #include <linux/kernel.h> #include <asm/kvm.h> -#ifndef __riscv +#ifdef __riscv +#include "sbi.h" +#else #include <asm/kvm_para.h> #endif #include "test_util.h" #include "kvm_util.h" #include "processor.h" +#include "ucall_common.h" #define NR_VCPUS 4 #define ST_GPA_BASE (1 << 30) @@ -83,20 +85,18 @@ static void steal_time_init(struct kvm_vcpu *vcpu, uint32_t i) static void steal_time_dump(struct kvm_vm *vm, uint32_t vcpu_idx) { struct kvm_steal_time *st = addr_gva2hva(vm, (ulong)st_gva[vcpu_idx]); - int i; - pr_info("VCPU%d:\n", vcpu_idx); - pr_info(" steal: %lld\n", st->steal); - pr_info(" version: %d\n", st->version); - pr_info(" flags: %d\n", st->flags); - pr_info(" preempted: %d\n", st->preempted); - pr_info(" u8_pad: "); - for (i = 0; i < 3; ++i) - pr_info("%d", st->u8_pad[i]); - pr_info("\n pad: "); - for (i = 0; i < 11; ++i) - pr_info("%d", st->pad[i]); - pr_info("\n"); + ksft_print_msg("VCPU%d:\n", vcpu_idx); + ksft_print_msg(" steal: %lld\n", st->steal); + ksft_print_msg(" version: %d\n", st->version); + ksft_print_msg(" flags: %d\n", st->flags); + ksft_print_msg(" preempted: %d\n", st->preempted); + ksft_print_msg(" u8_pad: %d %d %d\n", + st->u8_pad[0], st->u8_pad[1], st->u8_pad[2]); + ksft_print_msg(" pad: %d %d %d %d %d %d %d %d %d %d %d\n", + st->pad[0], st->pad[1], st->pad[2], st->pad[3], + st->pad[4], st->pad[5], st->pad[6], st->pad[7], + st->pad[8], st->pad[9], st->pad[10]); } #elif defined(__aarch64__) @@ -199,10 +199,10 @@ static void steal_time_dump(struct kvm_vm *vm, uint32_t vcpu_idx) { struct st_time *st = addr_gva2hva(vm, (ulong)st_gva[vcpu_idx]); - pr_info("VCPU%d:\n", vcpu_idx); - pr_info(" rev: %d\n", st->rev); - pr_info(" attr: %d\n", st->attr); - pr_info(" st_time: %ld\n", st->st_time); + ksft_print_msg("VCPU%d:\n", vcpu_idx); + ksft_print_msg(" rev: %d\n", st->rev); + ksft_print_msg(" attr: %d\n", st->attr); + ksft_print_msg(" st_time: %ld\n", st->st_time); } #elif defined(__riscv) @@ -366,7 +366,9 @@ int main(int ac, char **av) vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, ST_GPA_BASE, 1, gpages, 0); virt_map(vm, ST_GPA_BASE, ST_GPA_BASE, gpages); + ksft_print_header(); TEST_REQUIRE(is_steal_time_supported(vcpus[0])); + ksft_set_plan(NR_VCPUS); /* Run test on each VCPU */ for (i = 0; i < NR_VCPUS; ++i) { @@ -407,14 +409,15 @@ int main(int ac, char **av) run_delay, stolen_time); if (verbose) { - pr_info("VCPU%d: total-stolen-time=%ld test-stolen-time=%ld", i, - guest_stolen_time[i], stolen_time); - if (stolen_time == run_delay) - pr_info(" (BONUS: guest test-stolen-time even exactly matches test-run_delay)"); - pr_info("\n"); + ksft_print_msg("VCPU%d: total-stolen-time=%ld test-stolen-time=%ld%s\n", + i, guest_stolen_time[i], stolen_time, + stolen_time == run_delay ? + " (BONUS: guest test-stolen-time even exactly matches test-run_delay)" : ""); steal_time_dump(vm, i); } + ksft_test_result_pass("vcpu%d\n", i); } - return 0; + /* Print results and exit() accordingly */ + ksft_finished(); } diff --git a/tools/testing/selftests/kvm/x86_64/amx_test.c b/tools/testing/selftests/kvm/x86_64/amx_test.c index eae521f050e0..903940c54d2d 100644 --- a/tools/testing/selftests/kvm/x86_64/amx_test.c +++ b/tools/testing/selftests/kvm/x86_64/amx_test.c @@ -6,8 +6,6 @@ * * Tests for amx #NM exception and save/restore. */ - -#define _GNU_SOURCE /* for program_invocation_short_name */ #include <fcntl.h> #include <stdio.h> #include <stdlib.h> @@ -246,8 +244,6 @@ int main(int argc, char *argv[]) vcpu_regs_get(vcpu, ®s1); /* Register #NM handler */ - vm_init_descriptor_tables(vm); - vcpu_init_descriptor_tables(vcpu); vm_install_exception_handler(vm, NM_VECTOR, guest_nm_handler); /* amx cfg for guest_code */ diff --git a/tools/testing/selftests/kvm/x86_64/dirty_log_page_splitting_test.c b/tools/testing/selftests/kvm/x86_64/dirty_log_page_splitting_test.c index ee3b384b991c..2929c067c207 100644 --- a/tools/testing/selftests/kvm/x86_64/dirty_log_page_splitting_test.c +++ b/tools/testing/selftests/kvm/x86_64/dirty_log_page_splitting_test.c @@ -17,6 +17,7 @@ #include "test_util.h" #include "memstress.h" #include "guest_modes.h" +#include "ucall_common.h" #define VCPUS 2 #define SLOTS 2 diff --git a/tools/testing/selftests/kvm/x86_64/exit_on_emulation_failure_test.c b/tools/testing/selftests/kvm/x86_64/exit_on_emulation_failure_test.c index 6c2e5e0ceb1f..81055476d394 100644 --- a/tools/testing/selftests/kvm/x86_64/exit_on_emulation_failure_test.c +++ b/tools/testing/selftests/kvm/x86_64/exit_on_emulation_failure_test.c @@ -4,12 +4,9 @@ * * Test for KVM_CAP_EXIT_ON_EMULATION_FAILURE. */ - -#define _GNU_SOURCE /* for program_invocation_short_name */ - #include "flds_emulation.h" - #include "test_util.h" +#include "ucall_common.h" #define MMIO_GPA 0x700000000 #define MMIO_GVA MMIO_GPA diff --git a/tools/testing/selftests/kvm/x86_64/fix_hypercall_test.c b/tools/testing/selftests/kvm/x86_64/fix_hypercall_test.c index f3c2239228b1..762628f7d4ba 100644 --- a/tools/testing/selftests/kvm/x86_64/fix_hypercall_test.c +++ b/tools/testing/selftests/kvm/x86_64/fix_hypercall_test.c @@ -110,8 +110,6 @@ static void test_fix_hypercall(struct kvm_vcpu *vcpu, bool disable_quirk) { struct kvm_vm *vm = vcpu->vm; - vm_init_descriptor_tables(vm); - vcpu_init_descriptor_tables(vcpu); vm_install_exception_handler(vcpu->vm, UD_VECTOR, guest_ud_handler); if (disable_quirk) diff --git a/tools/testing/selftests/kvm/x86_64/hwcr_msr_test.c b/tools/testing/selftests/kvm/x86_64/hwcr_msr_test.c index df351ae17029..10b1b0ba374e 100644 --- a/tools/testing/selftests/kvm/x86_64/hwcr_msr_test.c +++ b/tools/testing/selftests/kvm/x86_64/hwcr_msr_test.c @@ -2,8 +2,6 @@ /* * Copyright (C) 2023, Google LLC. */ - -#define _GNU_SOURCE /* for program_invocation_short_name */ #include <sys/ioctl.h> #include "test_util.h" diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c b/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c index 5c27efbf405e..4f5881d4ef66 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c +++ b/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c @@ -7,8 +7,6 @@ * This work is licensed under the terms of the GNU GPL, version 2. * */ - -#define _GNU_SOURCE /* for program_invocation_short_name */ #include <fcntl.h> #include <stdio.h> #include <stdlib.h> diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_evmcs.c b/tools/testing/selftests/kvm/x86_64/hyperv_evmcs.c index 4c7257ecd2a6..e192720bfe14 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_evmcs.c +++ b/tools/testing/selftests/kvm/x86_64/hyperv_evmcs.c @@ -4,7 +4,6 @@ * * Tests for Enlightened VMCS, including nested guest state. */ -#define _GNU_SOURCE /* for program_invocation_short_name */ #include <fcntl.h> #include <stdio.h> #include <stdlib.h> @@ -258,8 +257,6 @@ int main(int argc, char *argv[]) vcpu_args_set(vcpu, 3, vmx_pages_gva, hv_pages_gva, addr_gva2gpa(vm, hcall_page)); vcpu_set_msr(vcpu, HV_X64_MSR_VP_INDEX, vcpu->id); - vm_init_descriptor_tables(vm); - vcpu_init_descriptor_tables(vcpu); vm_install_exception_handler(vm, UD_VECTOR, guest_ud_handler); vm_install_exception_handler(vm, NMI_VECTOR, guest_nmi_handler); diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_features.c b/tools/testing/selftests/kvm/x86_64/hyperv_features.c index b923a285e96f..068e9c69710d 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_features.c +++ b/tools/testing/selftests/kvm/x86_64/hyperv_features.c @@ -156,9 +156,6 @@ static void guest_test_msrs_access(void) vcpu_init_cpuid(vcpu, prev_cpuid); } - vm_init_descriptor_tables(vm); - vcpu_init_descriptor_tables(vcpu); - /* TODO: Make this entire test easier to maintain. */ if (stage >= 21) vcpu_enable_cap(vcpu, KVM_CAP_HYPERV_SYNIC2, 0); @@ -532,9 +529,6 @@ static void guest_test_hcalls_access(void) while (true) { vm = vm_create_with_one_vcpu(&vcpu, guest_hcall); - vm_init_descriptor_tables(vm); - vcpu_init_descriptor_tables(vcpu); - /* Hypercall input/output */ hcall_page = vm_vaddr_alloc_pages(vm, 2); memset(addr_gva2hva(vm, hcall_page), 0x0, 2 * getpagesize()); diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_ipi.c b/tools/testing/selftests/kvm/x86_64/hyperv_ipi.c index f1617762c22f..22c0c124582f 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_ipi.c +++ b/tools/testing/selftests/kvm/x86_64/hyperv_ipi.c @@ -5,8 +5,6 @@ * Copyright (C) 2022, Red Hat, Inc. * */ - -#define _GNU_SOURCE /* for program_invocation_short_name */ #include <pthread.h> #include <inttypes.h> @@ -256,16 +254,13 @@ int main(int argc, char *argv[]) hcall_page = vm_vaddr_alloc_pages(vm, 2); memset(addr_gva2hva(vm, hcall_page), 0x0, 2 * getpagesize()); - vm_init_descriptor_tables(vm); vcpu[1] = vm_vcpu_add(vm, RECEIVER_VCPU_ID_1, receiver_code); - vcpu_init_descriptor_tables(vcpu[1]); vcpu_args_set(vcpu[1], 2, hcall_page, addr_gva2gpa(vm, hcall_page)); vcpu_set_msr(vcpu[1], HV_X64_MSR_VP_INDEX, RECEIVER_VCPU_ID_1); vcpu_set_hv_cpuid(vcpu[1]); vcpu[2] = vm_vcpu_add(vm, RECEIVER_VCPU_ID_2, receiver_code); - vcpu_init_descriptor_tables(vcpu[2]); vcpu_args_set(vcpu[2], 2, hcall_page, addr_gva2gpa(vm, hcall_page)); vcpu_set_msr(vcpu[2], HV_X64_MSR_VP_INDEX, RECEIVER_VCPU_ID_2); vcpu_set_hv_cpuid(vcpu[2]); diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c b/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c index c9b18707edc0..b987a3d79715 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c +++ b/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c @@ -4,7 +4,6 @@ * * Tests for Hyper-V extensions to SVM. */ -#define _GNU_SOURCE /* for program_invocation_short_name */ #include <fcntl.h> #include <stdio.h> #include <stdlib.h> diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_tlb_flush.c b/tools/testing/selftests/kvm/x86_64/hyperv_tlb_flush.c index 05b56095cf76..077cd0ec3040 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_tlb_flush.c +++ b/tools/testing/selftests/kvm/x86_64/hyperv_tlb_flush.c @@ -5,8 +5,6 @@ * Copyright (C) 2022, Red Hat, Inc. * */ - -#define _GNU_SOURCE /* for program_invocation_short_name */ #include <asm/barrier.h> #include <pthread.h> #include <inttypes.h> diff --git a/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c b/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c index 40cc59f4e650..78878b3a2725 100644 --- a/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c +++ b/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c @@ -183,9 +183,6 @@ int main(void) vcpu_clear_cpuid_entry(vcpu, KVM_CPUID_FEATURES); - vm_init_descriptor_tables(vm); - vcpu_init_descriptor_tables(vcpu); - enter_guest(vcpu); kvm_vm_free(vm); diff --git a/tools/testing/selftests/kvm/x86_64/monitor_mwait_test.c b/tools/testing/selftests/kvm/x86_64/monitor_mwait_test.c index 853802641e1e..2b550eff35f1 100644 --- a/tools/testing/selftests/kvm/x86_64/monitor_mwait_test.c +++ b/tools/testing/selftests/kvm/x86_64/monitor_mwait_test.c @@ -75,14 +75,12 @@ int main(int argc, char *argv[]) struct ucall uc; int testcase; + TEST_REQUIRE(this_cpu_has(X86_FEATURE_MWAIT)); TEST_REQUIRE(kvm_has_cap(KVM_CAP_DISABLE_QUIRKS2)); vm = vm_create_with_one_vcpu(&vcpu, guest_code); vcpu_clear_cpuid_feature(vcpu, X86_FEATURE_MWAIT); - vm_init_descriptor_tables(vm); - vcpu_init_descriptor_tables(vcpu); - while (1) { vcpu_run(vcpu); TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); diff --git a/tools/testing/selftests/kvm/x86_64/nested_exceptions_test.c b/tools/testing/selftests/kvm/x86_64/nested_exceptions_test.c index 3670331adf21..3eb0313ffa39 100644 --- a/tools/testing/selftests/kvm/x86_64/nested_exceptions_test.c +++ b/tools/testing/selftests/kvm/x86_64/nested_exceptions_test.c @@ -1,6 +1,4 @@ // SPDX-License-Identifier: GPL-2.0-only -#define _GNU_SOURCE /* for program_invocation_short_name */ - #include "test_util.h" #include "kvm_util.h" #include "processor.h" diff --git a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c index 17bbb96fc4df..e7efb2b35f8b 100644 --- a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c +++ b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c @@ -5,9 +5,6 @@ * * Copyright (C) 2022, Google LLC. */ - -#define _GNU_SOURCE - #include <fcntl.h> #include <stdint.h> #include <time.h> diff --git a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.sh b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.sh index 7cbb409801ee..caad084b8bfd 100755 --- a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.sh +++ b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.sh @@ -13,10 +13,21 @@ NX_HUGE_PAGES_RECOVERY_RATIO=$(cat /sys/module/kvm/parameters/nx_huge_pages_reco NX_HUGE_PAGES_RECOVERY_PERIOD=$(cat /sys/module/kvm/parameters/nx_huge_pages_recovery_period_ms) HUGE_PAGES=$(cat /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages) +# If we're already root, the host might not have sudo. +if [ $(whoami) == "root" ]; then + function do_sudo () { + "$@" + } +else + function do_sudo () { + sudo "$@" + } +fi + set +e function sudo_echo () { - echo "$1" | sudo tee -a "$2" > /dev/null + echo "$1" | do_sudo tee -a "$2" > /dev/null } NXECUTABLE="$(dirname $0)/nx_huge_pages_test" diff --git a/tools/testing/selftests/kvm/x86_64/platform_info_test.c b/tools/testing/selftests/kvm/x86_64/platform_info_test.c index 87011965dc41..eda88080c186 100644 --- a/tools/testing/selftests/kvm/x86_64/platform_info_test.c +++ b/tools/testing/selftests/kvm/x86_64/platform_info_test.c @@ -9,8 +9,6 @@ * Verifies expected behavior of controlling guest access to * MSR_PLATFORM_INFO. */ - -#define _GNU_SOURCE /* for program_invocation_short_name */ #include <fcntl.h> #include <stdio.h> #include <stdlib.h> @@ -26,36 +24,18 @@ static void guest_code(void) { uint64_t msr_platform_info; + uint8_t vector; - for (;;) { - msr_platform_info = rdmsr(MSR_PLATFORM_INFO); - GUEST_SYNC(msr_platform_info); - asm volatile ("inc %r11"); - } -} - -static void test_msr_platform_info_enabled(struct kvm_vcpu *vcpu) -{ - struct ucall uc; - - vm_enable_cap(vcpu->vm, KVM_CAP_MSR_PLATFORM_INFO, true); - vcpu_run(vcpu); - TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); + GUEST_SYNC(true); + msr_platform_info = rdmsr(MSR_PLATFORM_INFO); + GUEST_ASSERT_EQ(msr_platform_info & MSR_PLATFORM_INFO_MAX_TURBO_RATIO, + MSR_PLATFORM_INFO_MAX_TURBO_RATIO); - get_ucall(vcpu, &uc); - TEST_ASSERT(uc.cmd == UCALL_SYNC, - "Received ucall other than UCALL_SYNC: %lu", uc.cmd); - TEST_ASSERT((uc.args[1] & MSR_PLATFORM_INFO_MAX_TURBO_RATIO) == - MSR_PLATFORM_INFO_MAX_TURBO_RATIO, - "Expected MSR_PLATFORM_INFO to have max turbo ratio mask: %i.", - MSR_PLATFORM_INFO_MAX_TURBO_RATIO); -} + GUEST_SYNC(false); + vector = rdmsr_safe(MSR_PLATFORM_INFO, &msr_platform_info); + GUEST_ASSERT_EQ(vector, GP_VECTOR); -static void test_msr_platform_info_disabled(struct kvm_vcpu *vcpu) -{ - vm_enable_cap(vcpu->vm, KVM_CAP_MSR_PLATFORM_INFO, false); - vcpu_run(vcpu); - TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_SHUTDOWN); + GUEST_DONE(); } int main(int argc, char *argv[]) @@ -63,6 +43,7 @@ int main(int argc, char *argv[]) struct kvm_vcpu *vcpu; struct kvm_vm *vm; uint64_t msr_platform_info; + struct ucall uc; TEST_REQUIRE(kvm_has_cap(KVM_CAP_MSR_PLATFORM_INFO)); @@ -71,8 +52,26 @@ int main(int argc, char *argv[]) msr_platform_info = vcpu_get_msr(vcpu, MSR_PLATFORM_INFO); vcpu_set_msr(vcpu, MSR_PLATFORM_INFO, msr_platform_info | MSR_PLATFORM_INFO_MAX_TURBO_RATIO); - test_msr_platform_info_enabled(vcpu); - test_msr_platform_info_disabled(vcpu); + + for (;;) { + vcpu_run(vcpu); + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); + + switch (get_ucall(vcpu, &uc)) { + case UCALL_SYNC: + vm_enable_cap(vm, KVM_CAP_MSR_PLATFORM_INFO, uc.args[1]); + break; + case UCALL_DONE: + goto done; + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + default: + TEST_FAIL("Unexpected ucall %lu", uc.cmd); + break; + } + } + +done: vcpu_set_msr(vcpu, MSR_PLATFORM_INFO, msr_platform_info); kvm_vm_free(vm); diff --git a/tools/testing/selftests/kvm/x86_64/pmu_counters_test.c b/tools/testing/selftests/kvm/x86_64/pmu_counters_test.c index 26c85815f7e9..96446134c00b 100644 --- a/tools/testing/selftests/kvm/x86_64/pmu_counters_test.c +++ b/tools/testing/selftests/kvm/x86_64/pmu_counters_test.c @@ -2,8 +2,6 @@ /* * Copyright (C) 2023, Tencent, Inc. */ - -#define _GNU_SOURCE /* for program_invocation_short_name */ #include <x86intrin.h> #include "pmu.h" @@ -21,7 +19,6 @@ static uint8_t kvm_pmu_version; static bool kvm_has_perf_caps; -static bool is_forced_emulation_enabled; static struct kvm_vm *pmu_vm_create_with_one_vcpu(struct kvm_vcpu **vcpu, void *guest_code, @@ -31,11 +28,7 @@ static struct kvm_vm *pmu_vm_create_with_one_vcpu(struct kvm_vcpu **vcpu, struct kvm_vm *vm; vm = vm_create_with_one_vcpu(vcpu, guest_code); - vm_init_descriptor_tables(vm); - vcpu_init_descriptor_tables(*vcpu); - sync_global_to_guest(vm, kvm_pmu_version); - sync_global_to_guest(vm, is_forced_emulation_enabled); /* * Set PERF_CAPABILITIES before PMU version as KVM disallows enabling @@ -630,7 +623,6 @@ int main(int argc, char *argv[]) kvm_pmu_version = kvm_cpu_property(X86_PROPERTY_PMU_VERSION); kvm_has_perf_caps = kvm_cpu_has(X86_FEATURE_PDCM); - is_forced_emulation_enabled = kvm_is_forced_emulation_enabled(); test_intel_counters(); diff --git a/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c b/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c index 3c85d1ae9893..26b3e7efe5dd 100644 --- a/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c +++ b/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c @@ -9,9 +9,6 @@ * Verifies the expected behavior of allow lists and deny lists for * virtual PMU events. */ - -#define _GNU_SOURCE /* for program_invocation_short_name */ - #include "kvm_util.h" #include "pmu.h" #include "processor.h" @@ -337,9 +334,6 @@ static void test_pmu_config_disable(void (*guest_code)(void)) vm_enable_cap(vm, KVM_CAP_PMU_CAPABILITY, KVM_PMU_CAP_DISABLE); vcpu = vm_vcpu_add(vm, 0, guest_code); - vm_init_descriptor_tables(vm); - vcpu_init_descriptor_tables(vcpu); - TEST_ASSERT(!sanity_check_pmu(vcpu), "Guest should not be able to use disabled PMU."); @@ -876,9 +870,6 @@ int main(int argc, char *argv[]) vm = vm_create_with_one_vcpu(&vcpu, guest_code); - vm_init_descriptor_tables(vm); - vcpu_init_descriptor_tables(vcpu); - TEST_REQUIRE(sanity_check_pmu(vcpu)); if (use_amd_pmu()) diff --git a/tools/testing/selftests/kvm/x86_64/private_mem_conversions_test.c b/tools/testing/selftests/kvm/x86_64/private_mem_conversions_test.c index e0f642d2a3c4..82a8d88b5338 100644 --- a/tools/testing/selftests/kvm/x86_64/private_mem_conversions_test.c +++ b/tools/testing/selftests/kvm/x86_64/private_mem_conversions_test.c @@ -2,7 +2,6 @@ /* * Copyright (C) 2022, Google LLC. */ -#define _GNU_SOURCE /* for program_invocation_short_name */ #include <fcntl.h> #include <limits.h> #include <pthread.h> diff --git a/tools/testing/selftests/kvm/x86_64/set_boot_cpu_id.c b/tools/testing/selftests/kvm/x86_64/set_boot_cpu_id.c index 366cf18600bc..d691d86e5bc3 100644 --- a/tools/testing/selftests/kvm/x86_64/set_boot_cpu_id.c +++ b/tools/testing/selftests/kvm/x86_64/set_boot_cpu_id.c @@ -4,7 +4,6 @@ * * Copyright (C) 2020, Red Hat, Inc. */ -#define _GNU_SOURCE /* for program_invocation_name */ #include <fcntl.h> #include <stdio.h> #include <stdlib.h> diff --git a/tools/testing/selftests/kvm/x86_64/set_sregs_test.c b/tools/testing/selftests/kvm/x86_64/set_sregs_test.c index 3610981d9162..c021c0795a96 100644 --- a/tools/testing/selftests/kvm/x86_64/set_sregs_test.c +++ b/tools/testing/selftests/kvm/x86_64/set_sregs_test.c @@ -10,7 +10,6 @@ * That bug allowed a user-mode program that called the KVM_SET_SREGS * ioctl to put a VCPU's local APIC into an invalid state. */ -#define _GNU_SOURCE /* for program_invocation_short_name */ #include <fcntl.h> #include <stdio.h> #include <stdlib.h> diff --git a/tools/testing/selftests/kvm/x86_64/sev_init2_tests.c b/tools/testing/selftests/kvm/x86_64/sev_init2_tests.c new file mode 100644 index 000000000000..7a4a61be119b --- /dev/null +++ b/tools/testing/selftests/kvm/x86_64/sev_init2_tests.c @@ -0,0 +1,152 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include <linux/kvm.h> +#include <linux/psp-sev.h> +#include <stdio.h> +#include <sys/ioctl.h> +#include <stdlib.h> +#include <errno.h> +#include <pthread.h> + +#include "test_util.h" +#include "kvm_util.h" +#include "processor.h" +#include "svm_util.h" +#include "kselftest.h" + +#define SVM_SEV_FEAT_DEBUG_SWAP 32u + +/* + * Some features may have hidden dependencies, or may only work + * for certain VM types. Err on the side of safety and don't + * expect that all supported features can be passed one by one + * to KVM_SEV_INIT2. + * + * (Well, right now there's only one...) + */ +#define KNOWN_FEATURES SVM_SEV_FEAT_DEBUG_SWAP + +int kvm_fd; +u64 supported_vmsa_features; +bool have_sev_es; + +static int __sev_ioctl(int vm_fd, int cmd_id, void *data) +{ + struct kvm_sev_cmd cmd = { + .id = cmd_id, + .data = (uint64_t)data, + .sev_fd = open_sev_dev_path_or_exit(), + }; + int ret; + + ret = ioctl(vm_fd, KVM_MEMORY_ENCRYPT_OP, &cmd); + TEST_ASSERT(ret < 0 || cmd.error == SEV_RET_SUCCESS, + "%d failed: fw error: %d\n", + cmd_id, cmd.error); + + return ret; +} + +static void test_init2(unsigned long vm_type, struct kvm_sev_init *init) +{ + struct kvm_vm *vm; + int ret; + + vm = vm_create_barebones_type(vm_type); + ret = __sev_ioctl(vm->fd, KVM_SEV_INIT2, init); + TEST_ASSERT(ret == 0, + "KVM_SEV_INIT2 return code is %d (expected 0), errno: %d", + ret, errno); + kvm_vm_free(vm); +} + +static void test_init2_invalid(unsigned long vm_type, struct kvm_sev_init *init, const char *msg) +{ + struct kvm_vm *vm; + int ret; + + vm = vm_create_barebones_type(vm_type); + ret = __sev_ioctl(vm->fd, KVM_SEV_INIT2, init); + TEST_ASSERT(ret == -1 && errno == EINVAL, + "KVM_SEV_INIT2 should fail, %s.", + msg); + kvm_vm_free(vm); +} + +void test_vm_types(void) +{ + test_init2(KVM_X86_SEV_VM, &(struct kvm_sev_init){}); + + /* + * TODO: check that unsupported types cannot be created. Probably + * a separate selftest. + */ + if (have_sev_es) + test_init2(KVM_X86_SEV_ES_VM, &(struct kvm_sev_init){}); + + test_init2_invalid(0, &(struct kvm_sev_init){}, + "VM type is KVM_X86_DEFAULT_VM"); + if (kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SW_PROTECTED_VM)) + test_init2_invalid(KVM_X86_SW_PROTECTED_VM, &(struct kvm_sev_init){}, + "VM type is KVM_X86_SW_PROTECTED_VM"); +} + +void test_flags(uint32_t vm_type) +{ + int i; + + for (i = 0; i < 32; i++) + test_init2_invalid(vm_type, + &(struct kvm_sev_init){ .flags = BIT(i) }, + "invalid flag"); +} + +void test_features(uint32_t vm_type, uint64_t supported_features) +{ + int i; + + for (i = 0; i < 64; i++) { + if (!(supported_features & (1u << i))) + test_init2_invalid(vm_type, + &(struct kvm_sev_init){ .vmsa_features = BIT_ULL(i) }, + "unknown feature"); + else if (KNOWN_FEATURES & (1u << i)) + test_init2(vm_type, + &(struct kvm_sev_init){ .vmsa_features = BIT_ULL(i) }); + } +} + +int main(int argc, char *argv[]) +{ + int kvm_fd = open_kvm_dev_path_or_exit(); + bool have_sev; + + TEST_REQUIRE(__kvm_has_device_attr(kvm_fd, KVM_X86_GRP_SEV, + KVM_X86_SEV_VMSA_FEATURES) == 0); + kvm_device_attr_get(kvm_fd, KVM_X86_GRP_SEV, + KVM_X86_SEV_VMSA_FEATURES, + &supported_vmsa_features); + + have_sev = kvm_cpu_has(X86_FEATURE_SEV); + TEST_ASSERT(have_sev == !!(kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SEV_VM)), + "sev: KVM_CAP_VM_TYPES (%x) does not match cpuid (checking %x)", + kvm_check_cap(KVM_CAP_VM_TYPES), 1 << KVM_X86_SEV_VM); + + TEST_REQUIRE(kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SEV_VM)); + have_sev_es = kvm_cpu_has(X86_FEATURE_SEV_ES); + + TEST_ASSERT(have_sev_es == !!(kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SEV_ES_VM)), + "sev-es: KVM_CAP_VM_TYPES (%x) does not match cpuid (checking %x)", + kvm_check_cap(KVM_CAP_VM_TYPES), 1 << KVM_X86_SEV_ES_VM); + + test_vm_types(); + + test_flags(KVM_X86_SEV_VM); + if (have_sev_es) + test_flags(KVM_X86_SEV_ES_VM); + + test_features(KVM_X86_SEV_VM, 0); + if (have_sev_es) + test_features(KVM_X86_SEV_ES_VM, supported_vmsa_features); + + return 0; +} diff --git a/tools/testing/selftests/kvm/x86_64/sev_smoke_test.c b/tools/testing/selftests/kvm/x86_64/sev_smoke_test.c index 026779f3ed06..7c70c0da4fb7 100644 --- a/tools/testing/selftests/kvm/x86_64/sev_smoke_test.c +++ b/tools/testing/selftests/kvm/x86_64/sev_smoke_test.c @@ -4,6 +4,7 @@ #include <stdlib.h> #include <string.h> #include <sys/ioctl.h> +#include <math.h> #include "test_util.h" #include "kvm_util.h" @@ -13,6 +14,8 @@ #include "sev.h" +#define XFEATURE_MASK_X87_AVX (XFEATURE_MASK_FP | XFEATURE_MASK_SSE | XFEATURE_MASK_YMM) + static void guest_sev_es_code(void) { /* TODO: Check CPUID after GHCB-based hypercall support is added. */ @@ -35,13 +38,98 @@ static void guest_sev_code(void) GUEST_DONE(); } +/* Stash state passed via VMSA before any compiled code runs. */ +extern void guest_code_xsave(void); +asm("guest_code_xsave:\n" + "mov $-1, %eax\n" + "mov $-1, %edx\n" + "xsave (%rdi)\n" + "jmp guest_sev_es_code"); + +static void compare_xsave(u8 *from_host, u8 *from_guest) +{ + int i; + bool bad = false; + for (i = 0; i < 4095; i++) { + if (from_host[i] != from_guest[i]) { + printf("mismatch at %02hhx | %02hhx %02hhx\n", i, from_host[i], from_guest[i]); + bad = true; + } + } + + if (bad) + abort(); +} + +static void test_sync_vmsa(uint32_t policy) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + vm_vaddr_t gva; + void *hva; + + double x87val = M_PI; + struct kvm_xsave __attribute__((aligned(64))) xsave = { 0 }; + struct kvm_sregs sregs; + struct kvm_xcrs xcrs = { + .nr_xcrs = 1, + .xcrs[0].xcr = 0, + .xcrs[0].value = XFEATURE_MASK_X87_AVX, + }; + + vm = vm_sev_create_with_one_vcpu(KVM_X86_SEV_ES_VM, guest_code_xsave, &vcpu); + gva = vm_vaddr_alloc_shared(vm, PAGE_SIZE, KVM_UTIL_MIN_VADDR, + MEM_REGION_TEST_DATA); + hva = addr_gva2hva(vm, gva); + + vcpu_args_set(vcpu, 1, gva); + + vcpu_sregs_get(vcpu, &sregs); + sregs.cr4 |= X86_CR4_OSFXSR | X86_CR4_OSXSAVE; + vcpu_sregs_set(vcpu, &sregs); + + vcpu_xcrs_set(vcpu, &xcrs); + asm("fninit\n" + "vpcmpeqb %%ymm4, %%ymm4, %%ymm4\n" + "fldl %3\n" + "xsave (%2)\n" + "fstp %%st\n" + : "=m"(xsave) + : "A"(XFEATURE_MASK_X87_AVX), "r"(&xsave), "m" (x87val) + : "ymm4", "st", "st(1)", "st(2)", "st(3)", "st(4)", "st(5)", "st(6)", "st(7)"); + vcpu_xsave_set(vcpu, &xsave); + + vm_sev_launch(vm, SEV_POLICY_ES | policy, NULL); + + /* This page is shared, so make it decrypted. */ + memset(hva, 0, 4096); + + vcpu_run(vcpu); + + TEST_ASSERT(vcpu->run->exit_reason == KVM_EXIT_SYSTEM_EVENT, + "Wanted SYSTEM_EVENT, got %s", + exit_reason_str(vcpu->run->exit_reason)); + TEST_ASSERT_EQ(vcpu->run->system_event.type, KVM_SYSTEM_EVENT_SEV_TERM); + TEST_ASSERT_EQ(vcpu->run->system_event.ndata, 1); + TEST_ASSERT_EQ(vcpu->run->system_event.data[0], GHCB_MSR_TERM_REQ); + + compare_xsave((u8 *)&xsave, (u8 *)hva); + + kvm_vm_free(vm); +} + static void test_sev(void *guest_code, uint64_t policy) { struct kvm_vcpu *vcpu; struct kvm_vm *vm; struct ucall uc; - vm = vm_sev_create_with_one_vcpu(policy, guest_code, &vcpu); + uint32_t type = policy & SEV_POLICY_ES ? KVM_X86_SEV_ES_VM : KVM_X86_SEV_VM; + + vm = vm_sev_create_with_one_vcpu(type, guest_code, &vcpu); + + /* TODO: Validate the measurement is as expected. */ + vm_sev_launch(vm, policy, NULL); for (;;) { vcpu_run(vcpu); @@ -82,6 +170,12 @@ int main(int argc, char *argv[]) if (kvm_cpu_has(X86_FEATURE_SEV_ES)) { test_sev(guest_sev_es_code, SEV_POLICY_ES | SEV_POLICY_NO_DBG); test_sev(guest_sev_es_code, SEV_POLICY_ES); + + if (kvm_has_cap(KVM_CAP_XCRS) && + (xgetbv(0) & XFEATURE_MASK_X87_AVX) == XFEATURE_MASK_X87_AVX) { + test_sync_vmsa(0); + test_sync_vmsa(SEV_POLICY_NO_DBG); + } } return 0; diff --git a/tools/testing/selftests/kvm/x86_64/smaller_maxphyaddr_emulation_test.c b/tools/testing/selftests/kvm/x86_64/smaller_maxphyaddr_emulation_test.c index 416207c38a17..fabeeaddfb3a 100644 --- a/tools/testing/selftests/kvm/x86_64/smaller_maxphyaddr_emulation_test.c +++ b/tools/testing/selftests/kvm/x86_64/smaller_maxphyaddr_emulation_test.c @@ -5,9 +5,6 @@ * Test that KVM emulates instructions in response to EPT violations when * allow_smaller_maxphyaddr is enabled and guest.MAXPHYADDR < host.MAXPHYADDR. */ - -#define _GNU_SOURCE /* for program_invocation_short_name */ - #include "flds_emulation.h" #include "test_util.h" @@ -60,9 +57,6 @@ int main(int argc, char *argv[]) vm = vm_create_with_one_vcpu(&vcpu, guest_code); vcpu_args_set(vcpu, 1, kvm_is_tdp_enabled()); - vm_init_descriptor_tables(vm); - vcpu_init_descriptor_tables(vcpu); - vcpu_set_cpuid_property(vcpu, X86_PROPERTY_MAX_PHY_ADDR, MAXPHYADDR); rc = kvm_check_cap(KVM_CAP_EXIT_ON_EMULATION_FAILURE); diff --git a/tools/testing/selftests/kvm/x86_64/smm_test.c b/tools/testing/selftests/kvm/x86_64/smm_test.c index e18b86666e1f..55c88d664a94 100644 --- a/tools/testing/selftests/kvm/x86_64/smm_test.c +++ b/tools/testing/selftests/kvm/x86_64/smm_test.c @@ -4,7 +4,6 @@ * * Tests for SMM. */ -#define _GNU_SOURCE /* for program_invocation_short_name */ #include <fcntl.h> #include <stdio.h> #include <stdlib.h> diff --git a/tools/testing/selftests/kvm/x86_64/state_test.c b/tools/testing/selftests/kvm/x86_64/state_test.c index 88b58aab7207..1c756db329e5 100644 --- a/tools/testing/selftests/kvm/x86_64/state_test.c +++ b/tools/testing/selftests/kvm/x86_64/state_test.c @@ -6,7 +6,6 @@ * * Tests for vCPU state save/restore, including nested guest state. */ -#define _GNU_SOURCE /* for program_invocation_short_name */ #include <fcntl.h> #include <stdio.h> #include <stdlib.h> diff --git a/tools/testing/selftests/kvm/x86_64/svm_int_ctl_test.c b/tools/testing/selftests/kvm/x86_64/svm_int_ctl_test.c index 32bef39bec21..916e04248fbb 100644 --- a/tools/testing/selftests/kvm/x86_64/svm_int_ctl_test.c +++ b/tools/testing/selftests/kvm/x86_64/svm_int_ctl_test.c @@ -93,9 +93,6 @@ int main(int argc, char *argv[]) vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code); - vm_init_descriptor_tables(vm); - vcpu_init_descriptor_tables(vcpu); - vm_install_exception_handler(vm, VINTR_IRQ_NUMBER, vintr_irq_handler); vm_install_exception_handler(vm, INTR_IRQ_NUMBER, intr_irq_handler); diff --git a/tools/testing/selftests/kvm/x86_64/svm_nested_shutdown_test.c b/tools/testing/selftests/kvm/x86_64/svm_nested_shutdown_test.c index d6fcdcc3af31..00135cbba35e 100644 --- a/tools/testing/selftests/kvm/x86_64/svm_nested_shutdown_test.c +++ b/tools/testing/selftests/kvm/x86_64/svm_nested_shutdown_test.c @@ -48,12 +48,9 @@ int main(int argc, char *argv[]) TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM)); vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code); - vm_init_descriptor_tables(vm); - vcpu_init_descriptor_tables(vcpu); - vcpu_alloc_svm(vm, &svm_gva); - vcpu_args_set(vcpu, 2, svm_gva, vm->idt); + vcpu_args_set(vcpu, 2, svm_gva, vm->arch.idt); vcpu_run(vcpu); TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_SHUTDOWN); diff --git a/tools/testing/selftests/kvm/x86_64/svm_nested_soft_inject_test.c b/tools/testing/selftests/kvm/x86_64/svm_nested_soft_inject_test.c index 0c7ce3d4e83a..7b6481d6c0d3 100644 --- a/tools/testing/selftests/kvm/x86_64/svm_nested_soft_inject_test.c +++ b/tools/testing/selftests/kvm/x86_64/svm_nested_soft_inject_test.c @@ -152,9 +152,6 @@ static void run_test(bool is_nmi) vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code); - vm_init_descriptor_tables(vm); - vcpu_init_descriptor_tables(vcpu); - vm_install_exception_handler(vm, NMI_VECTOR, guest_nmi_handler); vm_install_exception_handler(vm, BP_VECTOR, guest_bp_handler); vm_install_exception_handler(vm, INT_NR, guest_int_handler); @@ -166,7 +163,7 @@ static void run_test(bool is_nmi) idt_alt_vm = vm_vaddr_alloc_page(vm); idt_alt = addr_gva2hva(vm, idt_alt_vm); - idt = addr_gva2hva(vm, vm->idt); + idt = addr_gva2hva(vm, vm->arch.idt); memcpy(idt_alt, idt, getpagesize()); } else { idt_alt_vm = 0; diff --git a/tools/testing/selftests/kvm/x86_64/sync_regs_test.c b/tools/testing/selftests/kvm/x86_64/sync_regs_test.c index adb5593daf48..8fa3948b0170 100644 --- a/tools/testing/selftests/kvm/x86_64/sync_regs_test.c +++ b/tools/testing/selftests/kvm/x86_64/sync_regs_test.c @@ -8,8 +8,6 @@ * including requesting an invalid register set, updates to/from values * in kvm_run.s.regs when kvm_valid_regs and kvm_dirty_regs are toggled. */ - -#define _GNU_SOURCE /* for program_invocation_short_name */ #include <fcntl.h> #include <stdio.h> #include <stdlib.h> diff --git a/tools/testing/selftests/kvm/x86_64/ucna_injection_test.c b/tools/testing/selftests/kvm/x86_64/ucna_injection_test.c index dcbb3c29fb8e..57f157c06b39 100644 --- a/tools/testing/selftests/kvm/x86_64/ucna_injection_test.c +++ b/tools/testing/selftests/kvm/x86_64/ucna_injection_test.c @@ -17,14 +17,11 @@ * delivered into the guest or not. * */ - -#define _GNU_SOURCE /* for program_invocation_short_name */ #include <pthread.h> #include <inttypes.h> #include <string.h> #include <time.h> -#include "kvm_util_base.h" #include "kvm_util.h" #include "mce.h" #include "processor.h" @@ -285,10 +282,6 @@ int main(int argc, char *argv[]) cmcidis_vcpu = create_vcpu_with_mce_cap(vm, 1, false, cmci_disabled_guest_code); cmci_vcpu = create_vcpu_with_mce_cap(vm, 2, true, cmci_enabled_guest_code); - vm_init_descriptor_tables(vm); - vcpu_init_descriptor_tables(ucna_vcpu); - vcpu_init_descriptor_tables(cmcidis_vcpu); - vcpu_init_descriptor_tables(cmci_vcpu); vm_install_exception_handler(vm, CMCI_VECTOR, guest_cmci_handler); vm_install_exception_handler(vm, GP_VECTOR, guest_gp_handler); diff --git a/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c b/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c index f4f61a2d2464..32b2794b78fe 100644 --- a/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c +++ b/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c @@ -4,8 +4,6 @@ * * Tests for exiting into userspace on registered MSRs */ - -#define _GNU_SOURCE /* for program_invocation_short_name */ #include <sys/ioctl.h> #include "kvm_test_harness.h" @@ -13,8 +11,6 @@ #include "kvm_util.h" #include "vmx.h" -static bool fep_available; - #define MSR_NON_EXISTENT 0x474f4f00 static u64 deny_bits = 0; @@ -258,7 +254,7 @@ static void guest_code_filter_allow(void) GUEST_ASSERT(data == 2); GUEST_ASSERT(guest_exception_count == 0); - if (fep_available) { + if (is_forced_emulation_enabled) { /* Let userspace know we aren't done. */ GUEST_SYNC(0); @@ -520,8 +516,6 @@ KVM_ONE_VCPU_TEST(user_msr, msr_filter_allow, guest_code_filter_allow) uint64_t cmd; int rc; - sync_global_to_guest(vm, fep_available); - rc = kvm_check_cap(KVM_CAP_X86_USER_SPACE_MSR); TEST_ASSERT(rc, "KVM_CAP_X86_USER_SPACE_MSR is available"); vm_enable_cap(vm, KVM_CAP_X86_USER_SPACE_MSR, KVM_MSR_EXIT_REASON_FILTER); @@ -531,9 +525,6 @@ KVM_ONE_VCPU_TEST(user_msr, msr_filter_allow, guest_code_filter_allow) vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter_allow); - vm_init_descriptor_tables(vm); - vcpu_init_descriptor_tables(vcpu); - vm_install_exception_handler(vm, GP_VECTOR, guest_gp_handler); /* Process guest code userspace exits. */ @@ -551,7 +542,7 @@ KVM_ONE_VCPU_TEST(user_msr, msr_filter_allow, guest_code_filter_allow) vcpu_run(vcpu); cmd = process_ucall(vcpu); - if (fep_available) { + if (is_forced_emulation_enabled) { TEST_ASSERT_EQ(cmd, UCALL_SYNC); vm_install_exception_handler(vm, GP_VECTOR, guest_fep_gp_handler); @@ -774,7 +765,5 @@ KVM_ONE_VCPU_TEST(user_msr, user_exit_msr_flags, NULL) int main(int argc, char *argv[]) { - fep_available = kvm_is_forced_emulation_enabled(); - return test_harness_run(argc, argv); } diff --git a/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c b/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c index 977948fd52e6..fa512d033205 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c +++ b/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c @@ -4,9 +4,6 @@ * * Copyright (C) 2018, Red Hat, Inc. */ - -#define _GNU_SOURCE /* for program_invocation_name */ - #include <stdio.h> #include <stdlib.h> #include <linux/bitmap.h> diff --git a/tools/testing/selftests/kvm/x86_64/vmx_exception_with_invalid_guest_state.c b/tools/testing/selftests/kvm/x86_64/vmx_exception_with_invalid_guest_state.c index fad3634fd9eb..3fd6eceab46f 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_exception_with_invalid_guest_state.c +++ b/tools/testing/selftests/kvm/x86_64/vmx_exception_with_invalid_guest_state.c @@ -115,9 +115,6 @@ int main(int argc, char *argv[]) vm = vm_create_with_one_vcpu(&vcpu, guest_code); get_set_sigalrm_vcpu(vcpu); - vm_init_descriptor_tables(vm); - vcpu_init_descriptor_tables(vcpu); - vm_install_exception_handler(vm, UD_VECTOR, guest_ud_handler); /* diff --git a/tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c b/tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c index ea0cb3cae0f7..7c92536551cc 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c +++ b/tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c @@ -10,7 +10,6 @@ * and check it can be retrieved with KVM_GET_MSR, also test * the invalid LBR formats are rejected. */ -#define _GNU_SOURCE /* for program_invocation_short_name */ #include <sys/ioctl.h> #include <linux/bitmap.h> @@ -86,9 +85,6 @@ KVM_ONE_VCPU_TEST(vmx_pmu_caps, guest_wrmsr_perf_capabilities, guest_code) struct ucall uc; int r, i; - vm_init_descriptor_tables(vcpu->vm); - vcpu_init_descriptor_tables(vcpu); - vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, host_cap.capabilities); vcpu_args_set(vcpu, 1, host_cap.capabilities); diff --git a/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c b/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c index affc32800158..00dd2ac07a61 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c +++ b/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c @@ -9,7 +9,6 @@ * value instead of partially decayed timer value * */ -#define _GNU_SOURCE /* for program_invocation_short_name */ #include <fcntl.h> #include <stdio.h> #include <stdlib.h> diff --git a/tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c b/tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c index 725c206ba0b9..a76078a08ff8 100644 --- a/tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c +++ b/tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c @@ -19,8 +19,6 @@ * Migration is a command line option. When used on non-numa machines will * exit with error. Test is still usefull on non-numa for testing IPIs. */ - -#define _GNU_SOURCE /* for program_invocation_short_name */ #include <getopt.h> #include <pthread.h> #include <inttypes.h> @@ -410,8 +408,6 @@ int main(int argc, char *argv[]) vm = vm_create_with_one_vcpu(¶ms[0].vcpu, halter_guest_code); - vm_init_descriptor_tables(vm); - vcpu_init_descriptor_tables(params[0].vcpu); vm_install_exception_handler(vm, IPI_VECTOR, guest_ipi_handler); virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA); diff --git a/tools/testing/selftests/kvm/x86_64/xapic_state_test.c b/tools/testing/selftests/kvm/x86_64/xapic_state_test.c index ab75b873a4ad..69849acd95b0 100644 --- a/tools/testing/selftests/kvm/x86_64/xapic_state_test.c +++ b/tools/testing/selftests/kvm/x86_64/xapic_state_test.c @@ -1,5 +1,4 @@ // SPDX-License-Identifier: GPL-2.0-only -#define _GNU_SOURCE /* for program_invocation_short_name */ #include <fcntl.h> #include <stdio.h> #include <stdlib.h> diff --git a/tools/testing/selftests/kvm/x86_64/xcr0_cpuid_test.c b/tools/testing/selftests/kvm/x86_64/xcr0_cpuid_test.c index 25a0b0db5c3c..95ce192d0753 100644 --- a/tools/testing/selftests/kvm/x86_64/xcr0_cpuid_test.c +++ b/tools/testing/selftests/kvm/x86_64/xcr0_cpuid_test.c @@ -109,9 +109,6 @@ int main(int argc, char *argv[]) vm = vm_create_with_one_vcpu(&vcpu, guest_code); run = vcpu->run; - vm_init_descriptor_tables(vm); - vcpu_init_descriptor_tables(vcpu); - while (1) { vcpu_run(vcpu); diff --git a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c index d2ea0435f4f7..a59b3c799bb2 100644 --- a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c +++ b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c @@ -125,7 +125,7 @@ struct compat_vcpu_runstate_info { uint32_t state; uint64_t state_entry_time; uint64_t time[5]; -} __attribute__((__packed__));; +} __attribute__((__packed__)); struct arch_vcpu_info { unsigned long cr2; @@ -171,8 +171,9 @@ static volatile bool guest_saw_irq; static void evtchn_handler(struct ex_regs *regs) { struct vcpu_info *vi = (void *)VCPU_INFO_VADDR; - vi->evtchn_upcall_pending = 0; - vi->evtchn_pending_sel = 0; + + vcpu_arch_put_guest(vi->evtchn_upcall_pending, 0); + vcpu_arch_put_guest(vi->evtchn_pending_sel, 0); guest_saw_irq = true; GUEST_SYNC(TEST_GUEST_SAW_IRQ); @@ -380,20 +381,6 @@ wait_for_timer: GUEST_SYNC(TEST_DONE); } -static int cmp_timespec(struct timespec *a, struct timespec *b) -{ - if (a->tv_sec > b->tv_sec) - return 1; - else if (a->tv_sec < b->tv_sec) - return -1; - else if (a->tv_nsec > b->tv_nsec) - return 1; - else if (a->tv_nsec < b->tv_nsec) - return -1; - else - return 0; -} - static struct shared_info *shinfo; static struct vcpu_info *vinfo; static struct kvm_vcpu *vcpu; @@ -449,7 +436,6 @@ static void *juggle_shinfo_state(void *arg) int main(int argc, char *argv[]) { - struct timespec min_ts, max_ts, vm_ts; struct kvm_xen_hvm_attr evt_reset; struct kvm_vm *vm; pthread_t thread; @@ -468,8 +454,6 @@ int main(int argc, char *argv[]) bool do_evtchn_tests = do_eventfd_tests && !!(xen_caps & KVM_XEN_HVM_CONFIG_EVTCHN_SEND); bool has_shinfo_hva = !!(xen_caps & KVM_XEN_HVM_CONFIG_SHARED_INFO_HVA); - clock_gettime(CLOCK_REALTIME, &min_ts); - vm = vm_create_with_one_vcpu(&vcpu, guest_code); /* Map a region for the shared_info page */ @@ -553,8 +537,6 @@ int main(int argc, char *argv[]) }; vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &vec); - vm_init_descriptor_tables(vm); - vcpu_init_descriptor_tables(vcpu); vm_install_exception_handler(vm, EVTCHN_VECTOR, evtchn_handler); if (do_runstate_tests) { @@ -1010,7 +992,6 @@ int main(int argc, char *argv[]) vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &evt_reset); alarm(0); - clock_gettime(CLOCK_REALTIME, &max_ts); /* * Just a *really* basic check that things are being put in the @@ -1019,6 +1000,8 @@ int main(int argc, char *argv[]) */ struct pvclock_wall_clock *wc; struct pvclock_vcpu_time_info *ti, *ti2; + struct kvm_clock_data kcdata; + long long delta; wc = addr_gpa2hva(vm, SHINFO_REGION_GPA + 0xc00); ti = addr_gpa2hva(vm, SHINFO_REGION_GPA + 0x40 + 0x20); @@ -1034,12 +1017,34 @@ int main(int argc, char *argv[]) ti2->tsc_shift, ti2->flags); } - vm_ts.tv_sec = wc->sec; - vm_ts.tv_nsec = wc->nsec; TEST_ASSERT(wc->version && !(wc->version & 1), "Bad wallclock version %x", wc->version); - TEST_ASSERT(cmp_timespec(&min_ts, &vm_ts) <= 0, "VM time too old"); - TEST_ASSERT(cmp_timespec(&max_ts, &vm_ts) >= 0, "VM time too new"); + + vm_ioctl(vm, KVM_GET_CLOCK, &kcdata); + + if (kcdata.flags & KVM_CLOCK_REALTIME) { + if (verbose) { + printf("KVM_GET_CLOCK clock: %lld.%09lld\n", + kcdata.clock / NSEC_PER_SEC, kcdata.clock % NSEC_PER_SEC); + printf("KVM_GET_CLOCK realtime: %lld.%09lld\n", + kcdata.realtime / NSEC_PER_SEC, kcdata.realtime % NSEC_PER_SEC); + } + + delta = (wc->sec * NSEC_PER_SEC + wc->nsec) - (kcdata.realtime - kcdata.clock); + + /* + * KVM_GET_CLOCK gives CLOCK_REALTIME which jumps on leap seconds updates but + * unfortunately KVM doesn't currently offer a CLOCK_TAI alternative. Accept 1s + * delta as testing clock accuracy is not the goal here. The test just needs to + * check that the value in shinfo is somewhat sane. + */ + TEST_ASSERT(llabs(delta) < NSEC_PER_SEC, + "Guest's epoch from shinfo %d.%09d differs from KVM_GET_CLOCK %lld.%lld", + wc->sec, wc->nsec, (kcdata.realtime - kcdata.clock) / NSEC_PER_SEC, + (kcdata.realtime - kcdata.clock) % NSEC_PER_SEC); + } else { + pr_info("Missing KVM_CLOCK_REALTIME, skipping shinfo epoch sanity check\n"); + } TEST_ASSERT(ti->version && !(ti->version & 1), "Bad time_info version %x", ti->version); diff --git a/tools/testing/selftests/kvm/x86_64/xss_msr_test.c b/tools/testing/selftests/kvm/x86_64/xss_msr_test.c index 167c97abff1b..f331a4e9bae3 100644 --- a/tools/testing/selftests/kvm/x86_64/xss_msr_test.c +++ b/tools/testing/selftests/kvm/x86_64/xss_msr_test.c @@ -4,8 +4,6 @@ * * Tests for the IA32_XSS MSR. */ - -#define _GNU_SOURCE /* for program_invocation_short_name */ #include <sys/ioctl.h> #include "test_util.h" diff --git a/tools/testing/selftests/powerpc/Makefile b/tools/testing/selftests/powerpc/Makefile index c376151982c4..b175e94e1901 100644 --- a/tools/testing/selftests/powerpc/Makefile +++ b/tools/testing/selftests/powerpc/Makefile @@ -7,12 +7,6 @@ ARCH := $(shell echo $(ARCH) | sed -e s/ppc.*/powerpc/) ifeq ($(ARCH),powerpc) -GIT_VERSION = $(shell git describe --always --long --dirty || echo "unknown") - -CFLAGS := -std=gnu99 -O2 -Wall -Werror -DGIT_VERSION='"$(GIT_VERSION)"' -I$(CURDIR)/include $(CFLAGS) - -export CFLAGS - SUB_DIRS = alignment \ benchmarks \ cache_shape \ @@ -46,6 +40,7 @@ $(SUB_DIRS): BUILD_TARGET=$(OUTPUT)/$@; mkdir -p $$BUILD_TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -k -C $@ all include ../lib.mk +include ./flags.mk override define RUN_TESTS +@for TARGET in $(SUB_DIRS); do \ @@ -57,14 +52,14 @@ endef override define INSTALL_RULE +@for TARGET in $(SUB_DIRS); do \ BUILD_TARGET=$(OUTPUT)/$$TARGET; \ - $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET install;\ + $(MAKE) OUTPUT=$$BUILD_TARGET INSTALL_PATH=$$INSTALL_PATH/$$TARGET -C $$TARGET install;\ done; endef emit_tests: +@for TARGET in $(SUB_DIRS); do \ BUILD_TARGET=$(OUTPUT)/$$TARGET; \ - $(MAKE) OUTPUT=$$BUILD_TARGET -s -C $$TARGET $@;\ + $(MAKE) OUTPUT=$$BUILD_TARGET COLLECTION=$(COLLECTION)/$$TARGET -s -C $$TARGET $@;\ done; override define CLEAN diff --git a/tools/testing/selftests/powerpc/alignment/Makefile b/tools/testing/selftests/powerpc/alignment/Makefile index 93e9af37449d..66d5d7aaeb20 100644 --- a/tools/testing/selftests/powerpc/alignment/Makefile +++ b/tools/testing/selftests/powerpc/alignment/Makefile @@ -3,5 +3,6 @@ TEST_GEN_PROGS := copy_first_unaligned alignment_handler top_srcdir = ../../../../.. include ../../lib.mk +include ../flags.mk $(TEST_GEN_PROGS): ../harness.c ../utils.c diff --git a/tools/testing/selftests/powerpc/benchmarks/Makefile b/tools/testing/selftests/powerpc/benchmarks/Makefile index a32a6ab89914..1321922038d0 100644 --- a/tools/testing/selftests/powerpc/benchmarks/Makefile +++ b/tools/testing/selftests/powerpc/benchmarks/Makefile @@ -4,10 +4,11 @@ TEST_GEN_FILES := exec_target TEST_FILES := settings -CFLAGS += -O2 - top_srcdir = ../../../../.. include ../../lib.mk +include ../flags.mk + +CFLAGS += -O2 $(TEST_GEN_PROGS): ../harness.c diff --git a/tools/testing/selftests/powerpc/cache_shape/Makefile b/tools/testing/selftests/powerpc/cache_shape/Makefile index 689f6c8ebcd8..3a3ca956ac66 100644 --- a/tools/testing/selftests/powerpc/cache_shape/Makefile +++ b/tools/testing/selftests/powerpc/cache_shape/Makefile @@ -3,5 +3,6 @@ TEST_GEN_PROGS := cache_shape top_srcdir = ../../../../.. include ../../lib.mk +include ../flags.mk $(TEST_GEN_PROGS): ../harness.c ../utils.c diff --git a/tools/testing/selftests/powerpc/copyloops/Makefile b/tools/testing/selftests/powerpc/copyloops/Makefile index 77594e697f2f..42940f92d832 100644 --- a/tools/testing/selftests/powerpc/copyloops/Makefile +++ b/tools/testing/selftests/powerpc/copyloops/Makefile @@ -1,14 +1,4 @@ # SPDX-License-Identifier: GPL-2.0 -# The loops are all 64-bit code -CFLAGS += -m64 -CFLAGS += -I$(CURDIR) -CFLAGS += -D SELFTEST -CFLAGS += -maltivec -CFLAGS += -mcpu=power4 - -# Use our CFLAGS for the implicit .S rule & set the asm machine type -ASFLAGS = $(CFLAGS) -Wa,-mpower4 - TEST_GEN_PROGS := copyuser_64_t0 copyuser_64_t1 copyuser_64_t2 \ copyuser_p7_t0 copyuser_p7_t1 \ memcpy_64_t0 memcpy_64_t1 memcpy_64_t2 \ @@ -20,6 +10,17 @@ EXTRA_SOURCES := validate.c ../harness.c stubs.S top_srcdir = ../../../../.. include ../../lib.mk +include ../flags.mk + +# The loops are all 64-bit code +CFLAGS += -m64 +CFLAGS += -I$(CURDIR) +CFLAGS += -D SELFTEST +CFLAGS += -maltivec +CFLAGS += -mcpu=power4 + +# Use our CFLAGS for the implicit .S rule & set the asm machine type +ASFLAGS = $(CFLAGS) -Wa,-mpower4 $(OUTPUT)/copyuser_64_t%: copyuser_64.S $(EXTRA_SOURCES) $(CC) $(CPPFLAGS) $(CFLAGS) \ diff --git a/tools/testing/selftests/powerpc/dexcr/.gitignore b/tools/testing/selftests/powerpc/dexcr/.gitignore index b82f45dd46b9..11eefb4b9fa4 100644 --- a/tools/testing/selftests/powerpc/dexcr/.gitignore +++ b/tools/testing/selftests/powerpc/dexcr/.gitignore @@ -1,2 +1,4 @@ +dexcr_test hashchk_test +chdexcr lsdexcr diff --git a/tools/testing/selftests/powerpc/dexcr/Makefile b/tools/testing/selftests/powerpc/dexcr/Makefile index 76210f2bcec3..58cf9f722905 100644 --- a/tools/testing/selftests/powerpc/dexcr/Makefile +++ b/tools/testing/selftests/powerpc/dexcr/Makefile @@ -1,9 +1,12 @@ -TEST_GEN_PROGS := hashchk_test -TEST_GEN_FILES := lsdexcr +TEST_GEN_PROGS := dexcr_test hashchk_test +TEST_GEN_FILES := lsdexcr chdexcr include ../../lib.mk +include ../flags.mk -$(OUTPUT)/hashchk_test: CFLAGS += -fno-pie $(call cc-option,-mno-rop-protect) +CFLAGS += $(KHDR_INCLUDES) + +$(OUTPUT)/hashchk_test: CFLAGS += -fno-pie -no-pie $(call cc-option,-mno-rop-protect) $(TEST_GEN_PROGS): ../harness.c ../utils.c ./dexcr.c $(TEST_GEN_FILES): ../utils.c ./dexcr.c diff --git a/tools/testing/selftests/powerpc/dexcr/chdexcr.c b/tools/testing/selftests/powerpc/dexcr/chdexcr.c new file mode 100644 index 000000000000..c548d7a5bb9b --- /dev/null +++ b/tools/testing/selftests/powerpc/dexcr/chdexcr.c @@ -0,0 +1,112 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +#include <errno.h> +#include <stddef.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/prctl.h> + +#include "dexcr.h" +#include "utils.h" + +static void die(const char *msg) +{ + printf("%s\n", msg); + exit(1); +} + +static void help(void) +{ + printf("Invoke a provided program with a custom DEXCR on-exec reset value\n" + "\n" + "usage: chdexcr [CHDEXCR OPTIONS] -- PROGRAM [ARGS...]\n" + "\n" + "Each configurable DEXCR aspect is exposed as an option.\n" + "\n" + "The normal option sets the aspect in the DEXCR. The --no- variant\n" + "clears that aspect. For example, --ibrtpd sets the IBRTPD aspect bit,\n" + "so indirect branch prediction will be disabled in the provided program.\n" + "Conversely, --no-ibrtpd clears the aspect bit, so indirect branch\n" + "prediction may occur.\n" + "\n" + "CHDEXCR OPTIONS:\n"); + + for (int i = 0; i < ARRAY_SIZE(aspects); i++) { + const struct dexcr_aspect *aspect = &aspects[i]; + + if (aspect->prctl == -1) + continue; + + printf(" --%-6s / --no-%-6s : %s\n", aspect->opt, aspect->opt, aspect->desc); + } +} + +static const struct dexcr_aspect *opt_to_aspect(const char *opt) +{ + for (int i = 0; i < ARRAY_SIZE(aspects); i++) + if (aspects[i].prctl != -1 && !strcmp(aspects[i].opt, opt)) + return &aspects[i]; + + return NULL; +} + +static int apply_option(const char *option) +{ + const struct dexcr_aspect *aspect; + const char *opt = NULL; + const char *set_prefix = "--"; + const char *clear_prefix = "--no-"; + unsigned long ctrl = 0; + int err; + + if (!strcmp(option, "-h") || !strcmp(option, "--help")) { + help(); + exit(0); + } + + /* Strip out --(no-) prefix and determine ctrl value */ + if (!strncmp(option, clear_prefix, strlen(clear_prefix))) { + opt = &option[strlen(clear_prefix)]; + ctrl |= PR_PPC_DEXCR_CTRL_CLEAR_ONEXEC; + } else if (!strncmp(option, set_prefix, strlen(set_prefix))) { + opt = &option[strlen(set_prefix)]; + ctrl |= PR_PPC_DEXCR_CTRL_SET_ONEXEC; + } + + if (!opt || !*opt) + return 1; + + aspect = opt_to_aspect(opt); + if (!aspect) + die("unknown aspect"); + + err = pr_set_dexcr(aspect->prctl, ctrl); + if (err) + die("failed to apply option"); + + return 0; +} + +int main(int argc, char *const argv[]) +{ + int i; + + if (!dexcr_exists()) + die("DEXCR not detected on this hardware"); + + for (i = 1; i < argc; i++) + if (apply_option(argv[i])) + break; + + if (i < argc && !strcmp(argv[i], "--")) + i++; + + if (i >= argc) + die("missing command"); + + execvp(argv[i], &argv[i]); + perror("execve"); + + return errno; +} diff --git a/tools/testing/selftests/powerpc/dexcr/dexcr.c b/tools/testing/selftests/powerpc/dexcr/dexcr.c index 65ec5347de98..468fd0dc9912 100644 --- a/tools/testing/selftests/powerpc/dexcr/dexcr.c +++ b/tools/testing/selftests/powerpc/dexcr/dexcr.c @@ -3,6 +3,7 @@ #include <errno.h> #include <setjmp.h> #include <signal.h> +#include <sys/prctl.h> #include <sys/types.h> #include <sys/wait.h> @@ -43,6 +44,45 @@ out: return exists; } +unsigned int pr_which_to_aspect(unsigned long which) +{ + switch (which) { + case PR_PPC_DEXCR_SBHE: + return DEXCR_PR_SBHE; + case PR_PPC_DEXCR_IBRTPD: + return DEXCR_PR_IBRTPD; + case PR_PPC_DEXCR_SRAPD: + return DEXCR_PR_SRAPD; + case PR_PPC_DEXCR_NPHIE: + return DEXCR_PR_NPHIE; + default: + FAIL_IF_EXIT_MSG(true, "unknown PR aspect"); + } +} + +int pr_get_dexcr(unsigned long which) +{ + return prctl(PR_PPC_GET_DEXCR, which, 0UL, 0UL, 0UL); +} + +int pr_set_dexcr(unsigned long which, unsigned long ctrl) +{ + return prctl(PR_PPC_SET_DEXCR, which, ctrl, 0UL, 0UL); +} + +bool pr_dexcr_aspect_supported(unsigned long which) +{ + if (pr_get_dexcr(which) == -1) + return errno == ENODEV; + + return true; +} + +bool pr_dexcr_aspect_editable(unsigned long which) +{ + return pr_get_dexcr(which) & PR_PPC_DEXCR_CTRL_EDITABLE; +} + /* * Just test if a bad hashchk triggers a signal, without checking * for support or if the NPHIE aspect is enabled. diff --git a/tools/testing/selftests/powerpc/dexcr/dexcr.h b/tools/testing/selftests/powerpc/dexcr/dexcr.h index f55cbbc8643b..51e9ba3b0997 100644 --- a/tools/testing/selftests/powerpc/dexcr/dexcr.h +++ b/tools/testing/selftests/powerpc/dexcr/dexcr.h @@ -9,6 +9,7 @@ #define _SELFTESTS_POWERPC_DEXCR_DEXCR_H #include <stdbool.h> +#include <sys/prctl.h> #include <sys/types.h> #include "reg.h" @@ -26,8 +27,64 @@ #define PPC_RAW_HASHCHK(b, i, a) \ str(.long (0x7C0005E4 | PPC_RAW_HASH_ARGS(b, i, a));) +struct dexcr_aspect { + const char *name; /* Short display name */ + const char *opt; /* Option name for chdexcr */ + const char *desc; /* Expanded aspect meaning */ + unsigned int index; /* Aspect bit index in DEXCR */ + unsigned long prctl; /* 'which' value for get/set prctl */ +}; + +static const struct dexcr_aspect aspects[] = { + { + .name = "SBHE", + .opt = "sbhe", + .desc = "Speculative branch hint enable", + .index = 0, + .prctl = PR_PPC_DEXCR_SBHE, + }, + { + .name = "IBRTPD", + .opt = "ibrtpd", + .desc = "Indirect branch recurrent target prediction disable", + .index = 3, + .prctl = PR_PPC_DEXCR_IBRTPD, + }, + { + .name = "SRAPD", + .opt = "srapd", + .desc = "Subroutine return address prediction disable", + .index = 4, + .prctl = PR_PPC_DEXCR_SRAPD, + }, + { + .name = "NPHIE", + .opt = "nphie", + .desc = "Non-privileged hash instruction enable", + .index = 5, + .prctl = PR_PPC_DEXCR_NPHIE, + }, + { + .name = "PHIE", + .opt = "phie", + .desc = "Privileged hash instruction enable", + .index = 6, + .prctl = -1, + }, +}; + bool dexcr_exists(void); +bool pr_dexcr_aspect_supported(unsigned long which); + +bool pr_dexcr_aspect_editable(unsigned long which); + +int pr_get_dexcr(unsigned long pr_aspect); + +int pr_set_dexcr(unsigned long pr_aspect, unsigned long ctrl); + +unsigned int pr_which_to_aspect(unsigned long which); + bool hashchk_triggers(void); enum dexcr_source { diff --git a/tools/testing/selftests/powerpc/dexcr/dexcr_test.c b/tools/testing/selftests/powerpc/dexcr/dexcr_test.c new file mode 100644 index 000000000000..7a8657164908 --- /dev/null +++ b/tools/testing/selftests/powerpc/dexcr/dexcr_test.c @@ -0,0 +1,215 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +#include <errno.h> +#include <fcntl.h> +#include <stdlib.h> +#include <string.h> +#include <sys/prctl.h> +#include <unistd.h> + +#include "dexcr.h" +#include "utils.h" + +/* + * Helper function for testing the behaviour of a newly exec-ed process + */ +static int dexcr_prctl_onexec_test_child(unsigned long which, const char *status) +{ + unsigned long dexcr = mfspr(SPRN_DEXCR_RO); + unsigned long aspect = pr_which_to_aspect(which); + int ctrl = pr_get_dexcr(which); + + if (!strcmp(status, "set")) { + FAIL_IF_EXIT_MSG(!(ctrl & PR_PPC_DEXCR_CTRL_SET), + "setting aspect across exec not applied"); + + FAIL_IF_EXIT_MSG(!(ctrl & PR_PPC_DEXCR_CTRL_SET_ONEXEC), + "setting aspect across exec not inherited"); + + FAIL_IF_EXIT_MSG(!(aspect & dexcr), "setting aspect across exec did not take effect"); + } else if (!strcmp(status, "clear")) { + FAIL_IF_EXIT_MSG(!(ctrl & PR_PPC_DEXCR_CTRL_CLEAR), + "clearing aspect across exec not applied"); + + FAIL_IF_EXIT_MSG(!(ctrl & PR_PPC_DEXCR_CTRL_CLEAR_ONEXEC), + "clearing aspect across exec not inherited"); + + FAIL_IF_EXIT_MSG(aspect & dexcr, "clearing aspect across exec did not take effect"); + } else { + FAIL_IF_EXIT_MSG(true, "unknown expected status"); + } + + return 0; +} + +/* + * Test that the given prctl value can be manipulated freely + */ +static int dexcr_prctl_aspect_test(unsigned long which) +{ + unsigned long aspect = pr_which_to_aspect(which); + pid_t pid; + int ctrl; + int err; + int errno_save; + + SKIP_IF_MSG(!dexcr_exists(), "DEXCR not supported"); + SKIP_IF_MSG(!pr_dexcr_aspect_supported(which), "DEXCR aspect not supported"); + SKIP_IF_MSG(!pr_dexcr_aspect_editable(which), "DEXCR aspect not editable with prctl"); + + /* We reject invalid combinations of arguments */ + err = pr_set_dexcr(which, PR_PPC_DEXCR_CTRL_SET | PR_PPC_DEXCR_CTRL_CLEAR); + errno_save = errno; + FAIL_IF_MSG(err != -1, "simultaneous set and clear should be rejected"); + FAIL_IF_MSG(errno_save != EINVAL, "simultaneous set and clear should be rejected with EINVAL"); + + err = pr_set_dexcr(which, PR_PPC_DEXCR_CTRL_SET_ONEXEC | PR_PPC_DEXCR_CTRL_CLEAR_ONEXEC); + errno_save = errno; + FAIL_IF_MSG(err != -1, "simultaneous set and clear on exec should be rejected"); + FAIL_IF_MSG(errno_save != EINVAL, "simultaneous set and clear on exec should be rejected with EINVAL"); + + /* We set the aspect */ + err = pr_set_dexcr(which, PR_PPC_DEXCR_CTRL_SET); + FAIL_IF_MSG(err, "PR_PPC_DEXCR_CTRL_SET failed"); + + ctrl = pr_get_dexcr(which); + FAIL_IF_MSG(!(ctrl & PR_PPC_DEXCR_CTRL_SET), "config value not PR_PPC_DEXCR_CTRL_SET"); + FAIL_IF_MSG(ctrl & PR_PPC_DEXCR_CTRL_CLEAR, "config value unexpected clear flag"); + FAIL_IF_MSG(!(aspect & mfspr(SPRN_DEXCR_RO)), "setting aspect did not take effect"); + + /* We clear the aspect */ + err = pr_set_dexcr(which, PR_PPC_DEXCR_CTRL_CLEAR); + FAIL_IF_MSG(err, "PR_PPC_DEXCR_CTRL_CLEAR failed"); + + ctrl = pr_get_dexcr(which); + FAIL_IF_MSG(!(ctrl & PR_PPC_DEXCR_CTRL_CLEAR), "config value not PR_PPC_DEXCR_CTRL_CLEAR"); + FAIL_IF_MSG(ctrl & PR_PPC_DEXCR_CTRL_SET, "config value unexpected set flag"); + FAIL_IF_MSG(aspect & mfspr(SPRN_DEXCR_RO), "clearing aspect did not take effect"); + + /* We make it set on exec (doesn't change our current value) */ + err = pr_set_dexcr(which, PR_PPC_DEXCR_CTRL_SET_ONEXEC); + FAIL_IF_MSG(err, "PR_PPC_DEXCR_CTRL_SET_ONEXEC failed"); + + ctrl = pr_get_dexcr(which); + FAIL_IF_MSG(!(ctrl & PR_PPC_DEXCR_CTRL_CLEAR), "process aspect should still be cleared"); + FAIL_IF_MSG(!(ctrl & PR_PPC_DEXCR_CTRL_SET_ONEXEC), "config value not PR_PPC_DEXCR_CTRL_SET_ONEXEC"); + FAIL_IF_MSG(ctrl & PR_PPC_DEXCR_CTRL_CLEAR_ONEXEC, "config value unexpected clear on exec flag"); + FAIL_IF_MSG(aspect & mfspr(SPRN_DEXCR_RO), "scheduling aspect to set on exec should not change it now"); + + /* We make it clear on exec (doesn't change our current value) */ + err = pr_set_dexcr(which, PR_PPC_DEXCR_CTRL_CLEAR_ONEXEC); + FAIL_IF_MSG(err, "PR_PPC_DEXCR_CTRL_CLEAR_ONEXEC failed"); + + ctrl = pr_get_dexcr(which); + FAIL_IF_MSG(!(ctrl & PR_PPC_DEXCR_CTRL_CLEAR), "process aspect config should still be cleared"); + FAIL_IF_MSG(!(ctrl & PR_PPC_DEXCR_CTRL_CLEAR_ONEXEC), "config value not PR_PPC_DEXCR_CTRL_CLEAR_ONEXEC"); + FAIL_IF_MSG(ctrl & PR_PPC_DEXCR_CTRL_SET_ONEXEC, "config value unexpected set on exec flag"); + FAIL_IF_MSG(aspect & mfspr(SPRN_DEXCR_RO), "process aspect should still be cleared"); + + /* We allow setting the current and on-exec value in a single call */ + err = pr_set_dexcr(which, PR_PPC_DEXCR_CTRL_SET | PR_PPC_DEXCR_CTRL_CLEAR_ONEXEC); + FAIL_IF_MSG(err, "PR_PPC_DEXCR_CTRL_SET | PR_PPC_DEXCR_CTRL_CLEAR_ONEXEC failed"); + + ctrl = pr_get_dexcr(which); + FAIL_IF_MSG(!(ctrl & PR_PPC_DEXCR_CTRL_SET), "config value not PR_PPC_DEXCR_CTRL_SET"); + FAIL_IF_MSG(!(ctrl & PR_PPC_DEXCR_CTRL_CLEAR_ONEXEC), "config value not PR_PPC_DEXCR_CTRL_CLEAR_ONEXEC"); + FAIL_IF_MSG(!(aspect & mfspr(SPRN_DEXCR_RO)), "process aspect should be set"); + + err = pr_set_dexcr(which, PR_PPC_DEXCR_CTRL_CLEAR | PR_PPC_DEXCR_CTRL_SET_ONEXEC); + FAIL_IF_MSG(err, "PR_PPC_DEXCR_CTRL_CLEAR | PR_PPC_DEXCR_CTRL_SET_ONEXEC failed"); + + ctrl = pr_get_dexcr(which); + FAIL_IF_MSG(!(ctrl & PR_PPC_DEXCR_CTRL_CLEAR), "config value not PR_PPC_DEXCR_CTRL_CLEAR"); + FAIL_IF_MSG(!(ctrl & PR_PPC_DEXCR_CTRL_SET_ONEXEC), "config value not PR_PPC_DEXCR_CTRL_SET_ONEXEC"); + FAIL_IF_MSG(aspect & mfspr(SPRN_DEXCR_RO), "process aspect should be clear"); + + /* Verify the onexec value is applied across exec */ + pid = fork(); + if (!pid) { + char which_str[32] = {}; + char *args[] = { "dexcr_prctl_onexec_test_child", which_str, "set", NULL }; + unsigned int ctrl = pr_get_dexcr(which); + + sprintf(which_str, "%lu", which); + + FAIL_IF_EXIT_MSG(!(ctrl & PR_PPC_DEXCR_CTRL_SET_ONEXEC), + "setting aspect on exec not copied across fork"); + + FAIL_IF_EXIT_MSG(mfspr(SPRN_DEXCR_RO) & aspect, + "setting aspect on exec wrongly applied to fork"); + + execve("/proc/self/exe", args, NULL); + _exit(errno); + } + await_child_success(pid); + + err = pr_set_dexcr(which, PR_PPC_DEXCR_CTRL_SET | PR_PPC_DEXCR_CTRL_CLEAR_ONEXEC); + FAIL_IF_MSG(err, "PR_PPC_DEXCR_CTRL_SET | PR_PPC_DEXCR_CTRL_CLEAR_ONEXEC failed"); + + pid = fork(); + if (!pid) { + char which_str[32] = {}; + char *args[] = { "dexcr_prctl_onexec_test_child", which_str, "clear", NULL }; + unsigned int ctrl = pr_get_dexcr(which); + + sprintf(which_str, "%lu", which); + + FAIL_IF_EXIT_MSG(!(ctrl & PR_PPC_DEXCR_CTRL_CLEAR_ONEXEC), + "clearing aspect on exec not copied across fork"); + + FAIL_IF_EXIT_MSG(!(mfspr(SPRN_DEXCR_RO) & aspect), + "clearing aspect on exec wrongly applied to fork"); + + execve("/proc/self/exe", args, NULL); + _exit(errno); + } + await_child_success(pid); + + return 0; +} + +static int dexcr_prctl_ibrtpd_test(void) +{ + return dexcr_prctl_aspect_test(PR_PPC_DEXCR_IBRTPD); +} + +static int dexcr_prctl_srapd_test(void) +{ + return dexcr_prctl_aspect_test(PR_PPC_DEXCR_SRAPD); +} + +static int dexcr_prctl_nphie_test(void) +{ + return dexcr_prctl_aspect_test(PR_PPC_DEXCR_NPHIE); +} + +int main(int argc, char *argv[]) +{ + int err = 0; + + /* + * Some tests require checking what happens across exec, so we may be + * invoked as the child of a particular test + */ + if (argc > 1) { + if (argc == 3 && !strcmp(argv[0], "dexcr_prctl_onexec_test_child")) { + unsigned long which; + + err = parse_ulong(argv[1], strlen(argv[1]), &which, 10); + FAIL_IF_MSG(err, "failed to parse which value for child"); + + return dexcr_prctl_onexec_test_child(which, argv[2]); + } + + FAIL_IF_MSG(true, "unknown test case"); + } + + /* + * Otherwise we are the main test invocation and run the full suite + */ + err |= test_harness(dexcr_prctl_ibrtpd_test, "dexcr_prctl_ibrtpd"); + err |= test_harness(dexcr_prctl_srapd_test, "dexcr_prctl_srapd"); + err |= test_harness(dexcr_prctl_nphie_test, "dexcr_prctl_nphie"); + + return err; +} diff --git a/tools/testing/selftests/powerpc/dexcr/hashchk_test.c b/tools/testing/selftests/powerpc/dexcr/hashchk_test.c index 7d5658c9ebe4..645224bdc142 100644 --- a/tools/testing/selftests/powerpc/dexcr/hashchk_test.c +++ b/tools/testing/selftests/powerpc/dexcr/hashchk_test.c @@ -21,8 +21,14 @@ static int require_nphie(void) { SKIP_IF_MSG(!dexcr_exists(), "DEXCR not supported"); + + pr_set_dexcr(PR_PPC_DEXCR_NPHIE, PR_PPC_DEXCR_CTRL_SET | PR_PPC_DEXCR_CTRL_SET_ONEXEC); + + if (get_dexcr(EFFECTIVE) & DEXCR_PR_NPHIE) + return 0; + SKIP_IF_MSG(!(get_dexcr(EFFECTIVE) & DEXCR_PR_NPHIE), - "DEXCR[NPHIE] not enabled"); + "Failed to enable DEXCR[NPHIE]"); return 0; } diff --git a/tools/testing/selftests/powerpc/dexcr/lsdexcr.c b/tools/testing/selftests/powerpc/dexcr/lsdexcr.c index 94abbfcc389e..7588929180ab 100644 --- a/tools/testing/selftests/powerpc/dexcr/lsdexcr.c +++ b/tools/testing/selftests/powerpc/dexcr/lsdexcr.c @@ -1,9 +1,9 @@ // SPDX-License-Identifier: GPL-2.0+ -#include <errno.h> #include <stddef.h> #include <stdio.h> #include <string.h> +#include <sys/prctl.h> #include "dexcr.h" #include "utils.h" @@ -12,40 +12,6 @@ static unsigned int dexcr; static unsigned int hdexcr; static unsigned int effective; -struct dexcr_aspect { - const char *name; - const char *desc; - unsigned int index; -}; - -static const struct dexcr_aspect aspects[] = { - { - .name = "SBHE", - .desc = "Speculative branch hint enable", - .index = 0, - }, - { - .name = "IBRTPD", - .desc = "Indirect branch recurrent target prediction disable", - .index = 3, - }, - { - .name = "SRAPD", - .desc = "Subroutine return address prediction disable", - .index = 4, - }, - { - .name = "NPHIE", - .desc = "Non-privileged hash instruction enable", - .index = 5, - }, - { - .name = "PHIE", - .desc = "Privileged hash instruction enable", - .index = 6, - }, -}; - static void print_list(const char *list[], size_t len) { for (size_t i = 0; i < len; i++) { @@ -60,7 +26,7 @@ static void print_dexcr(char *name, unsigned int bits) const char *enabled_aspects[ARRAY_SIZE(aspects) + 1] = {NULL}; size_t j = 0; - printf("%s: %08x", name, bits); + printf("%s: 0x%08x", name, bits); if (bits == 0) { printf("\n"); @@ -103,6 +69,63 @@ static void print_aspect(const struct dexcr_aspect *aspect) printf(" \t(%s)\n", aspect->desc); } +static void print_aspect_config(const struct dexcr_aspect *aspect) +{ + const char *reason = NULL; + const char *reason_hyp = NULL; + const char *reason_prctl = "no prctl"; + bool actual = effective & DEXCR_PR_BIT(aspect->index); + bool expected = actual; /* Assume it's fine if we don't expect a specific set/clear value */ + + if (actual) + reason = "set by unknown"; + else + reason = "cleared by unknown"; + + if (aspect->prctl != -1) { + int ctrl = pr_get_dexcr(aspect->prctl); + + if (ctrl < 0) { + reason_prctl = "failed to read prctl"; + } else { + if (ctrl & PR_PPC_DEXCR_CTRL_SET) { + reason_prctl = "set by prctl"; + expected = true; + } else if (ctrl & PR_PPC_DEXCR_CTRL_CLEAR) { + reason_prctl = "cleared by prctl"; + expected = false; + } else { + reason_prctl = "unknown prctl"; + } + + reason = reason_prctl; + } + } + + if (hdexcr & DEXCR_PR_BIT(aspect->index)) { + reason_hyp = "set by hypervisor"; + reason = reason_hyp; + expected = true; + } else { + reason_hyp = "not modified by hypervisor"; + } + + printf("%12s (%d): %-28s (%s, %s)\n", + aspect->name, + aspect->index, + reason, + reason_hyp, + reason_prctl); + + /* + * The checks are not atomic, so this can technically trigger if the + * hypervisor makes a change while we are checking each source. It's + * far more likely to be a bug if we see this though. + */ + if (actual != expected) + printf(" : ! actual %s does not match config\n", aspect->name); +} + int main(int argc, char *argv[]) { if (!dexcr_exists()) { @@ -114,6 +137,8 @@ int main(int argc, char *argv[]) hdexcr = get_dexcr(HDEXCR); effective = dexcr | hdexcr; + printf("current status:\n"); + print_dexcr(" DEXCR", dexcr); print_dexcr(" HDEXCR", hdexcr); print_dexcr("Effective", effective); @@ -136,6 +161,12 @@ int main(int argc, char *argv[]) else printf("ignored\n"); } + printf("\n"); + + printf("configuration:\n"); + for (size_t i = 0; i < ARRAY_SIZE(aspects); i++) + print_aspect_config(&aspects[i]); + printf("\n"); return 0; } diff --git a/tools/testing/selftests/powerpc/dscr/Makefile b/tools/testing/selftests/powerpc/dscr/Makefile index 9289d5febe1e..9fa9cb5bd989 100644 --- a/tools/testing/selftests/powerpc/dscr/Makefile +++ b/tools/testing/selftests/powerpc/dscr/Makefile @@ -5,6 +5,7 @@ TEST_GEN_PROGS := dscr_default_test dscr_explicit_test dscr_user_test \ top_srcdir = ../../../../.. include ../../lib.mk +include ../flags.mk $(OUTPUT)/dscr_default_test: LDLIBS += -lpthread $(OUTPUT)/dscr_explicit_test: LDLIBS += -lpthread diff --git a/tools/testing/selftests/powerpc/eeh/Makefile b/tools/testing/selftests/powerpc/eeh/Makefile index ae963eb2dc5b..70797716f2b5 100644 --- a/tools/testing/selftests/powerpc/eeh/Makefile +++ b/tools/testing/selftests/powerpc/eeh/Makefile @@ -7,3 +7,4 @@ TEST_FILES := eeh-functions.sh settings top_srcdir = ../../../../.. include ../../lib.mk +include ../flags.mk diff --git a/tools/testing/selftests/powerpc/flags.mk b/tools/testing/selftests/powerpc/flags.mk new file mode 100644 index 000000000000..b909bee3cb2a --- /dev/null +++ b/tools/testing/selftests/powerpc/flags.mk @@ -0,0 +1,12 @@ +#This checks for any ENV variables and add those. + +ifeq ($(GIT_VERSION),) +GIT_VERSION := $(shell git describe --always --long --dirty || echo "unknown") +export GIT_VERSION +endif + +ifeq ($(CFLAGS),) +CFLAGS := -std=gnu99 -O2 -Wall -Werror -DGIT_VERSION='"$(GIT_VERSION)"' -I$(selfdir)/powerpc/include $(CFLAGS) +export CFLAGS +endif + diff --git a/tools/testing/selftests/powerpc/math/Makefile b/tools/testing/selftests/powerpc/math/Makefile index 3948f7c510aa..b14fd2e0c6a8 100644 --- a/tools/testing/selftests/powerpc/math/Makefile +++ b/tools/testing/selftests/powerpc/math/Makefile @@ -3,6 +3,7 @@ TEST_GEN_PROGS := fpu_syscall fpu_preempt fpu_signal fpu_denormal vmx_syscall vm top_srcdir = ../../../../.. include ../../lib.mk +include ../flags.mk $(TEST_GEN_PROGS): ../harness.c $(TEST_GEN_PROGS): CFLAGS += -O2 -g -pthread -m64 -maltivec diff --git a/tools/testing/selftests/powerpc/mce/Makefile b/tools/testing/selftests/powerpc/mce/Makefile index 2424513982d9..ce4ed679aaaf 100644 --- a/tools/testing/selftests/powerpc/mce/Makefile +++ b/tools/testing/selftests/powerpc/mce/Makefile @@ -3,5 +3,6 @@ TEST_GEN_PROGS := inject-ra-err include ../../lib.mk +include ../flags.mk $(TEST_GEN_PROGS): ../harness.c diff --git a/tools/testing/selftests/powerpc/mm/Makefile b/tools/testing/selftests/powerpc/mm/Makefile index 4a6608beef0e..aab058ecb352 100644 --- a/tools/testing/selftests/powerpc/mm/Makefile +++ b/tools/testing/selftests/powerpc/mm/Makefile @@ -13,6 +13,7 @@ TEST_GEN_FILES := tempfile top_srcdir = ../../../../.. include ../../lib.mk +include ../flags.mk $(TEST_GEN_PROGS): ../harness.c ../utils.c diff --git a/tools/testing/selftests/powerpc/nx-gzip/Makefile b/tools/testing/selftests/powerpc/nx-gzip/Makefile index 0785c2e99d40..480d8ba94cf7 100644 --- a/tools/testing/selftests/powerpc/nx-gzip/Makefile +++ b/tools/testing/selftests/powerpc/nx-gzip/Makefile @@ -1,8 +1,9 @@ -CFLAGS = -O3 -m64 -I./include -I../include - TEST_GEN_FILES := gzfht_test gunz_test TEST_PROGS := nx-gzip-test.sh include ../../lib.mk +include ../flags.mk + +CFLAGS = -O3 -m64 -I./include -I../include $(TEST_GEN_FILES): gzip_vas.c ../utils.c diff --git a/tools/testing/selftests/powerpc/papr_attributes/Makefile b/tools/testing/selftests/powerpc/papr_attributes/Makefile index e899712d49db..406429499022 100644 --- a/tools/testing/selftests/powerpc/papr_attributes/Makefile +++ b/tools/testing/selftests/powerpc/papr_attributes/Makefile @@ -3,5 +3,6 @@ TEST_GEN_PROGS := attr_test top_srcdir = ../../../../.. include ../../lib.mk +include ../flags.mk -$(TEST_GEN_PROGS): ../harness.c ../utils.c
\ No newline at end of file +$(TEST_GEN_PROGS): ../harness.c ../utils.c diff --git a/tools/testing/selftests/powerpc/papr_sysparm/Makefile b/tools/testing/selftests/powerpc/papr_sysparm/Makefile index 7f79e437634a..fed4f2414dbf 100644 --- a/tools/testing/selftests/powerpc/papr_sysparm/Makefile +++ b/tools/testing/selftests/powerpc/papr_sysparm/Makefile @@ -6,6 +6,7 @@ TEST_GEN_PROGS := papr_sysparm top_srcdir = ../../../../.. include ../../lib.mk +include ../flags.mk $(TEST_GEN_PROGS): ../harness.c ../utils.c diff --git a/tools/testing/selftests/powerpc/papr_vpd/Makefile b/tools/testing/selftests/powerpc/papr_vpd/Makefile index 06b719703bfd..b09852e40882 100644 --- a/tools/testing/selftests/powerpc/papr_vpd/Makefile +++ b/tools/testing/selftests/powerpc/papr_vpd/Makefile @@ -6,6 +6,7 @@ TEST_GEN_PROGS := papr_vpd top_srcdir = ../../../../.. include ../../lib.mk +include ../flags.mk $(TEST_GEN_PROGS): ../harness.c ../utils.c diff --git a/tools/testing/selftests/powerpc/pmu/Makefile b/tools/testing/selftests/powerpc/pmu/Makefile index a284fa874a9f..7e9dbf3d0d09 100644 --- a/tools/testing/selftests/powerpc/pmu/Makefile +++ b/tools/testing/selftests/powerpc/pmu/Makefile @@ -7,8 +7,11 @@ EXTRA_SOURCES := ../harness.c event.c lib.c ../utils.c top_srcdir = ../../../../.. include ../../lib.mk +include ../flags.mk -all: $(TEST_GEN_PROGS) ebb sampling_tests event_code_tests +SUB_DIRS := ebb sampling_tests event_code_tests + +all: $(TEST_GEN_PROGS) $(SUB_DIRS) $(TEST_GEN_PROGS): $(EXTRA_SOURCES) @@ -22,12 +25,16 @@ $(OUTPUT)/count_stcx_fail: loop.S $(EXTRA_SOURCES) $(OUTPUT)/per_event_excludes: ../utils.c +$(SUB_DIRS): + BUILD_TARGET=$(OUTPUT)/$@; mkdir -p $$BUILD_TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -k -C $@ all + DEFAULT_RUN_TESTS := $(RUN_TESTS) override define RUN_TESTS $(DEFAULT_RUN_TESTS) - +TARGET=ebb; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET run_tests - +TARGET=sampling_tests; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET run_tests - +TARGET=event_code_tests; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET run_tests + +@for TARGET in $(SUB_DIRS); do \ + BUILD_TARGET=$(OUTPUT)/$$TARGET; \ + $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET run_tests; \ + done; endef emit_tests: @@ -35,34 +42,29 @@ emit_tests: BASENAME_TEST=`basename $$TEST`; \ echo "$(COLLECTION):$$BASENAME_TEST"; \ done - +TARGET=ebb; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -s -C $$TARGET emit_tests - +TARGET=sampling_tests; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -s -C $$TARGET emit_tests - +TARGET=event_code_tests; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -s -C $$TARGET emit_tests + +@for TARGET in $(SUB_DIRS); do \ + BUILD_TARGET=$(OUTPUT)/$$TARGET; \ + $(MAKE) OUTPUT=$$BUILD_TARGET COLLECTION=$(COLLECTION)/$$TARGET -s -C $$TARGET emit_tests; \ + done; DEFAULT_INSTALL_RULE := $(INSTALL_RULE) override define INSTALL_RULE $(DEFAULT_INSTALL_RULE) - +TARGET=ebb; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET install - +TARGET=sampling_tests; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET install - +TARGET=event_code_tests; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET install + +@for TARGET in $(SUB_DIRS); do \ + BUILD_TARGET=$(OUTPUT)/$$TARGET; \ + $(MAKE) OUTPUT=$$BUILD_TARGET INSTALL_PATH=$$INSTALL_PATH/$$TARGET -C $$TARGET install; \ + done; endef DEFAULT_CLEAN := $(CLEAN) override define CLEAN $(DEFAULT_CLEAN) $(RM) $(TEST_GEN_PROGS) $(OUTPUT)/loop.o - +TARGET=ebb; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET clean - +TARGET=sampling_tests; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET clean - +TARGET=event_code_tests; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET clean + +@for TARGET in $(SUB_DIRS); do \ + BUILD_TARGET=$(OUTPUT)/$$TARGET; \ + $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET clean; \ + done; endef -ebb: - TARGET=$@; BUILD_TARGET=$$OUTPUT/$$TARGET; mkdir -p $$BUILD_TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -k -C $$TARGET all - -sampling_tests: - TARGET=$@; BUILD_TARGET=$$OUTPUT/$$TARGET; mkdir -p $$BUILD_TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -k -C $$TARGET all - -event_code_tests: - TARGET=$@; BUILD_TARGET=$$OUTPUT/$$TARGET; mkdir -p $$BUILD_TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -k -C $$TARGET all .PHONY: all run_tests ebb sampling_tests event_code_tests emit_tests diff --git a/tools/testing/selftests/powerpc/pmu/ebb/Makefile b/tools/testing/selftests/powerpc/pmu/ebb/Makefile index 010160690227..1b39af7c10db 100644 --- a/tools/testing/selftests/powerpc/pmu/ebb/Makefile +++ b/tools/testing/selftests/powerpc/pmu/ebb/Makefile @@ -4,16 +4,6 @@ include ../../../../../build/Build.include noarg: $(MAKE) -C ../../ -# The EBB handler is 64-bit code and everything links against it -CFLAGS += -m64 - -TMPOUT = $(OUTPUT)/TMPDIR/ -# Toolchains may build PIE by default which breaks the assembly -no-pie-option := $(call try-run, echo 'int main() { return 0; }' | \ - $(CC) -Werror $(KBUILD_CPPFLAGS) $(CC_OPTION_CFLAGS) -no-pie -x c - -o "$$TMP", -no-pie) - -LDFLAGS += $(no-pie-option) - TEST_GEN_PROGS := reg_access_test event_attributes_test cycles_test \ cycles_with_freeze_test pmc56_overflow_test \ ebb_vs_cpu_event_test cpu_event_vs_ebb_test \ @@ -28,6 +18,17 @@ TEST_GEN_PROGS := reg_access_test event_attributes_test cycles_test \ top_srcdir = ../../../../../.. include ../../../lib.mk +include ../../flags.mk + +# The EBB handler is 64-bit code and everything links against it +CFLAGS += -m64 + +TMPOUT = $(OUTPUT)/TMPDIR/ +# Toolchains may build PIE by default which breaks the assembly +no-pie-option := $(call try-run, echo 'int main() { return 0; }' | \ + $(CC) -Werror $(KBUILD_CPPFLAGS) $(CC_OPTION_CFLAGS) -no-pie -x c - -o "$$TMP", -no-pie) + +LDFLAGS += $(no-pie-option) $(TEST_GEN_PROGS): ../../harness.c ../../utils.c ../event.c ../lib.c \ ebb.c ebb_handler.S trace.c busy_loop.S diff --git a/tools/testing/selftests/powerpc/pmu/event_code_tests/Makefile b/tools/testing/selftests/powerpc/pmu/event_code_tests/Makefile index 4e07d7046457..fdb080b3fa65 100644 --- a/tools/testing/selftests/powerpc/pmu/event_code_tests/Makefile +++ b/tools/testing/selftests/powerpc/pmu/event_code_tests/Makefile @@ -1,6 +1,4 @@ # SPDX-License-Identifier: GPL-2.0 -CFLAGS += -m64 - TEST_GEN_PROGS := group_constraint_pmc56_test group_pmc56_exclude_constraints_test group_constraint_pmc_count_test \ group_constraint_repeat_test group_constraint_radix_scope_qual_test reserved_bits_mmcra_sample_elig_mode_test \ group_constraint_mmcra_sample_test invalid_event_code_test reserved_bits_mmcra_thresh_ctl_test \ @@ -11,5 +9,8 @@ TEST_GEN_PROGS := group_constraint_pmc56_test group_pmc56_exclude_constraints_te top_srcdir = ../../../../../.. include ../../../lib.mk +include ../../flags.mk + +CFLAGS += -m64 $(TEST_GEN_PROGS): ../../harness.c ../../utils.c ../event.c ../lib.c ../sampling_tests/misc.h ../sampling_tests/misc.c diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile b/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile index 9e67351fb252..9f79bec5fce7 100644 --- a/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile @@ -1,6 +1,4 @@ # SPDX-License-Identifier: GPL-2.0 -CFLAGS += -m64 - TEST_GEN_PROGS := mmcr0_exceptionbits_test mmcr0_cc56run_test mmcr0_pmccext_test \ mmcr0_pmcjce_test mmcr0_fc56_pmc1ce_test mmcr0_fc56_pmc56_test \ mmcr1_comb_test mmcr2_l2l3_test mmcr2_fcs_fch_test \ @@ -11,5 +9,8 @@ TEST_GEN_PROGS := mmcr0_exceptionbits_test mmcr0_cc56run_test mmcr0_pmccext_test top_srcdir = ../../../../../.. include ../../../lib.mk +include ../../flags.mk + +CFLAGS += -m64 $(TEST_GEN_PROGS): ../../harness.c ../../utils.c ../event.c ../lib.c misc.c misc.h ../loop.S ../branch_loops.S diff --git a/tools/testing/selftests/powerpc/primitives/Makefile b/tools/testing/selftests/powerpc/primitives/Makefile index 9b9491a63213..23bd9a7590dd 100644 --- a/tools/testing/selftests/powerpc/primitives/Makefile +++ b/tools/testing/selftests/powerpc/primitives/Makefile @@ -1,9 +1,10 @@ # SPDX-License-Identifier: GPL-2.0-only -CFLAGS += -I$(CURDIR) - TEST_GEN_PROGS := load_unaligned_zeropad top_srcdir = ../../../../.. include ../../lib.mk +include ../flags.mk + +CFLAGS += -I$(CURDIR) $(TEST_GEN_PROGS): ../harness.c diff --git a/tools/testing/selftests/powerpc/ptrace/Makefile b/tools/testing/selftests/powerpc/ptrace/Makefile index 1b39b86849da..59ca01d8567e 100644 --- a/tools/testing/selftests/powerpc/ptrace/Makefile +++ b/tools/testing/selftests/powerpc/ptrace/Makefile @@ -26,6 +26,7 @@ LOCAL_HDRS += $(patsubst %,$(selfdir)/powerpc/ptrace/%,$(wildcard *.h)) top_srcdir = ../../../../.. include ../../lib.mk +include ../flags.mk TM_TESTS := $(patsubst %,$(OUTPUT)/%,$(TM_TESTS)) TESTS_64 := $(patsubst %,$(OUTPUT)/%,$(TESTS_64)) diff --git a/tools/testing/selftests/powerpc/security/Makefile b/tools/testing/selftests/powerpc/security/Makefile index e0d979ab0204..33286039724a 100644 --- a/tools/testing/selftests/powerpc/security/Makefile +++ b/tools/testing/selftests/powerpc/security/Makefile @@ -5,9 +5,10 @@ TEST_PROGS := mitigation-patching.sh top_srcdir = ../../../../.. -CFLAGS += $(KHDR_INCLUDES) - include ../../lib.mk +include ../flags.mk + +CFLAGS += $(KHDR_INCLUDES) $(TEST_GEN_PROGS): ../harness.c ../utils.c diff --git a/tools/testing/selftests/powerpc/signal/Makefile b/tools/testing/selftests/powerpc/signal/Makefile index f679d260afc8..ece95bd52be9 100644 --- a/tools/testing/selftests/powerpc/signal/Makefile +++ b/tools/testing/selftests/powerpc/signal/Makefile @@ -3,7 +3,6 @@ TEST_GEN_PROGS := signal signal_tm sigfuz sigreturn_vdso sig_sc_double_restart TEST_GEN_PROGS += sigreturn_kernel TEST_GEN_PROGS += sigreturn_unaligned -CFLAGS += -maltivec $(OUTPUT)/signal_tm: CFLAGS += -mhtm $(OUTPUT)/sigfuz: CFLAGS += -pthread -m64 @@ -11,5 +10,8 @@ TEST_FILES := settings top_srcdir = ../../../../.. include ../../lib.mk +include ../flags.mk + +CFLAGS += -maltivec $(TEST_GEN_PROGS): ../harness.c ../utils.c signal.S diff --git a/tools/testing/selftests/powerpc/stringloops/Makefile b/tools/testing/selftests/powerpc/stringloops/Makefile index 9c39f55a58ff..4c9d9a58c9d1 100644 --- a/tools/testing/selftests/powerpc/stringloops/Makefile +++ b/tools/testing/selftests/powerpc/stringloops/Makefile @@ -1,7 +1,4 @@ # SPDX-License-Identifier: GPL-2.0 -# The loops are all 64-bit code -CFLAGS += -I$(CURDIR) - EXTRA_SOURCES := ../harness.c build_32bit = $(shell if ($(CC) $(CFLAGS) -m32 -o /dev/null memcmp.c >/dev/null 2>&1) then echo "1"; fi) @@ -27,9 +24,13 @@ $(OUTPUT)/strlen_32: CFLAGS += -m32 TEST_GEN_PROGS += strlen_32 endif -ASFLAGS = $(CFLAGS) - top_srcdir = ../../../../.. include ../../lib.mk +include ../flags.mk + +# The loops are all 64-bit code +CFLAGS += -I$(CURDIR) + +ASFLAGS = $(CFLAGS) $(TEST_GEN_PROGS): $(EXTRA_SOURCES) diff --git a/tools/testing/selftests/powerpc/switch_endian/Makefile b/tools/testing/selftests/powerpc/switch_endian/Makefile index bdc081afedb0..0da2e0a74264 100644 --- a/tools/testing/selftests/powerpc/switch_endian/Makefile +++ b/tools/testing/selftests/powerpc/switch_endian/Makefile @@ -1,12 +1,13 @@ # SPDX-License-Identifier: GPL-2.0 TEST_GEN_PROGS := switch_endian_test -ASFLAGS += -O2 -Wall -g -nostdlib -m64 - EXTRA_CLEAN = $(OUTPUT)/*.o $(OUTPUT)/check-reversed.S top_srcdir = ../../../../.. include ../../lib.mk +include ../flags.mk + +ASFLAGS += -O2 -Wall -g -nostdlib -m64 $(OUTPUT)/switch_endian_test: ASFLAGS += -I $(OUTPUT) $(OUTPUT)/switch_endian_test: $(OUTPUT)/check-reversed.S diff --git a/tools/testing/selftests/powerpc/syscalls/Makefile b/tools/testing/selftests/powerpc/syscalls/Makefile index ee1740ddfb0c..3bc07af88f0e 100644 --- a/tools/testing/selftests/powerpc/syscalls/Makefile +++ b/tools/testing/selftests/powerpc/syscalls/Makefile @@ -1,9 +1,10 @@ # SPDX-License-Identifier: GPL-2.0-only TEST_GEN_PROGS := ipc_unmuxed rtas_filter -CFLAGS += $(KHDR_INCLUDES) - top_srcdir = ../../../../.. include ../../lib.mk +include ../flags.mk + +CFLAGS += $(KHDR_INCLUDES) $(TEST_GEN_PROGS): ../harness.c ../utils.c diff --git a/tools/testing/selftests/powerpc/tm/Makefile b/tools/testing/selftests/powerpc/tm/Makefile index 3876805c2f31..f13f0ab36007 100644 --- a/tools/testing/selftests/powerpc/tm/Makefile +++ b/tools/testing/selftests/powerpc/tm/Makefile @@ -11,6 +11,7 @@ TEST_FILES := settings top_srcdir = ../../../../.. include ../../lib.mk +include ../flags.mk $(TEST_GEN_PROGS): ../harness.c ../utils.c diff --git a/tools/testing/selftests/powerpc/vphn/Makefile b/tools/testing/selftests/powerpc/vphn/Makefile index cf65cbf33085..61d519a076c6 100644 --- a/tools/testing/selftests/powerpc/vphn/Makefile +++ b/tools/testing/selftests/powerpc/vphn/Makefile @@ -1,10 +1,11 @@ # SPDX-License-Identifier: GPL-2.0-only TEST_GEN_PROGS := test-vphn -CFLAGS += -m64 -I$(CURDIR) - top_srcdir = ../../../../.. include ../../lib.mk +include ../flags.mk + +CFLAGS += -m64 -I$(CURDIR) $(TEST_GEN_PROGS): ../harness.c diff --git a/tools/testing/selftests/ring-buffer/.gitignore b/tools/testing/selftests/ring-buffer/.gitignore new file mode 100644 index 000000000000..3aed1a2a6c67 --- /dev/null +++ b/tools/testing/selftests/ring-buffer/.gitignore @@ -0,0 +1 @@ +map_test diff --git a/tools/testing/selftests/ring-buffer/Makefile b/tools/testing/selftests/ring-buffer/Makefile new file mode 100644 index 000000000000..627c5fa6d1ab --- /dev/null +++ b/tools/testing/selftests/ring-buffer/Makefile @@ -0,0 +1,8 @@ +# SPDX-License-Identifier: GPL-2.0 +CFLAGS += -Wl,-no-as-needed -Wall +CFLAGS += $(KHDR_INCLUDES) +CFLAGS += -D_GNU_SOURCE + +TEST_GEN_PROGS = map_test + +include ../lib.mk diff --git a/tools/testing/selftests/ring-buffer/config b/tools/testing/selftests/ring-buffer/config new file mode 100644 index 000000000000..d936f8f00e78 --- /dev/null +++ b/tools/testing/selftests/ring-buffer/config @@ -0,0 +1,2 @@ +CONFIG_FTRACE=y +CONFIG_TRACER_SNAPSHOT=y diff --git a/tools/testing/selftests/ring-buffer/map_test.c b/tools/testing/selftests/ring-buffer/map_test.c new file mode 100644 index 000000000000..a9006fa7097e --- /dev/null +++ b/tools/testing/selftests/ring-buffer/map_test.c @@ -0,0 +1,294 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Ring-buffer memory mapping tests + * + * Copyright (c) 2024 Vincent Donnefort <vdonnefort@google.com> + */ +#include <fcntl.h> +#include <sched.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> + +#include <linux/trace_mmap.h> + +#include <sys/mman.h> +#include <sys/ioctl.h> + +#include "../user_events/user_events_selftests.h" /* share tracefs setup */ +#include "../kselftest_harness.h" + +#define TRACEFS_ROOT "/sys/kernel/tracing" + +static int __tracefs_write(const char *path, const char *value) +{ + int fd, ret; + + fd = open(path, O_WRONLY | O_TRUNC); + if (fd < 0) + return fd; + + ret = write(fd, value, strlen(value)); + + close(fd); + + return ret == -1 ? -errno : 0; +} + +static int __tracefs_write_int(const char *path, int value) +{ + char *str; + int ret; + + if (asprintf(&str, "%d", value) < 0) + return -1; + + ret = __tracefs_write(path, str); + + free(str); + + return ret; +} + +#define tracefs_write_int(path, value) \ + ASSERT_EQ(__tracefs_write_int((path), (value)), 0) + +#define tracefs_write(path, value) \ + ASSERT_EQ(__tracefs_write((path), (value)), 0) + +static int tracefs_reset(void) +{ + if (__tracefs_write_int(TRACEFS_ROOT"/tracing_on", 0)) + return -1; + if (__tracefs_write(TRACEFS_ROOT"/trace", "")) + return -1; + if (__tracefs_write(TRACEFS_ROOT"/set_event", "")) + return -1; + if (__tracefs_write(TRACEFS_ROOT"/current_tracer", "nop")) + return -1; + + return 0; +} + +struct tracefs_cpu_map_desc { + struct trace_buffer_meta *meta; + int cpu_fd; +}; + +int tracefs_cpu_map(struct tracefs_cpu_map_desc *desc, int cpu) +{ + int page_size = getpagesize(); + char *cpu_path; + void *map; + + if (asprintf(&cpu_path, + TRACEFS_ROOT"/per_cpu/cpu%d/trace_pipe_raw", + cpu) < 0) + return -ENOMEM; + + desc->cpu_fd = open(cpu_path, O_RDONLY | O_NONBLOCK); + free(cpu_path); + if (desc->cpu_fd < 0) + return -ENODEV; + + map = mmap(NULL, page_size, PROT_READ, MAP_SHARED, desc->cpu_fd, 0); + if (map == MAP_FAILED) + return -errno; + + desc->meta = (struct trace_buffer_meta *)map; + + return 0; +} + +void tracefs_cpu_unmap(struct tracefs_cpu_map_desc *desc) +{ + munmap(desc->meta, desc->meta->meta_page_size); + close(desc->cpu_fd); +} + +FIXTURE(map) { + struct tracefs_cpu_map_desc map_desc; + bool umount; +}; + +FIXTURE_VARIANT(map) { + int subbuf_size; +}; + +FIXTURE_VARIANT_ADD(map, subbuf_size_4k) { + .subbuf_size = 4, +}; + +FIXTURE_VARIANT_ADD(map, subbuf_size_8k) { + .subbuf_size = 8, +}; + +FIXTURE_SETUP(map) +{ + int cpu = sched_getcpu(); + cpu_set_t cpu_mask; + bool fail, umount; + char *message; + + if (getuid() != 0) + SKIP(return, "Skipping: %s", "Please run the test as root"); + + if (!tracefs_enabled(&message, &fail, &umount)) { + if (fail) { + TH_LOG("Tracefs setup failed: %s", message); + ASSERT_FALSE(fail); + } + SKIP(return, "Skipping: %s", message); + } + + self->umount = umount; + + ASSERT_GE(cpu, 0); + + ASSERT_EQ(tracefs_reset(), 0); + + tracefs_write_int(TRACEFS_ROOT"/buffer_subbuf_size_kb", variant->subbuf_size); + + ASSERT_EQ(tracefs_cpu_map(&self->map_desc, cpu), 0); + + /* + * Ensure generated events will be found on this very same ring-buffer. + */ + CPU_ZERO(&cpu_mask); + CPU_SET(cpu, &cpu_mask); + ASSERT_EQ(sched_setaffinity(0, sizeof(cpu_mask), &cpu_mask), 0); +} + +FIXTURE_TEARDOWN(map) +{ + tracefs_reset(); + + if (self->umount) + tracefs_unmount(); + + tracefs_cpu_unmap(&self->map_desc); +} + +TEST_F(map, meta_page_check) +{ + struct tracefs_cpu_map_desc *desc = &self->map_desc; + int cnt = 0; + + ASSERT_EQ(desc->meta->entries, 0); + ASSERT_EQ(desc->meta->overrun, 0); + ASSERT_EQ(desc->meta->read, 0); + + ASSERT_EQ(desc->meta->reader.id, 0); + ASSERT_EQ(desc->meta->reader.read, 0); + + ASSERT_EQ(ioctl(desc->cpu_fd, TRACE_MMAP_IOCTL_GET_READER), 0); + ASSERT_EQ(desc->meta->reader.id, 0); + + tracefs_write_int(TRACEFS_ROOT"/tracing_on", 1); + for (int i = 0; i < 16; i++) + tracefs_write_int(TRACEFS_ROOT"/trace_marker", i); +again: + ASSERT_EQ(ioctl(desc->cpu_fd, TRACE_MMAP_IOCTL_GET_READER), 0); + + ASSERT_EQ(desc->meta->entries, 16); + ASSERT_EQ(desc->meta->overrun, 0); + ASSERT_EQ(desc->meta->read, 16); + + ASSERT_EQ(desc->meta->reader.id, 1); + + if (!(cnt++)) + goto again; +} + +TEST_F(map, data_mmap) +{ + struct tracefs_cpu_map_desc *desc = &self->map_desc; + unsigned long meta_len, data_len; + void *data; + + meta_len = desc->meta->meta_page_size; + data_len = desc->meta->subbuf_size * desc->meta->nr_subbufs; + + /* Map all the available subbufs */ + data = mmap(NULL, data_len, PROT_READ, MAP_SHARED, + desc->cpu_fd, meta_len); + ASSERT_NE(data, MAP_FAILED); + munmap(data, data_len); + + /* Map all the available subbufs - 1 */ + data_len -= desc->meta->subbuf_size; + data = mmap(NULL, data_len, PROT_READ, MAP_SHARED, + desc->cpu_fd, meta_len); + ASSERT_NE(data, MAP_FAILED); + munmap(data, data_len); + + /* Overflow the available subbufs by 1 */ + meta_len += desc->meta->subbuf_size * 2; + data = mmap(NULL, data_len, PROT_READ, MAP_SHARED, + desc->cpu_fd, meta_len); + ASSERT_EQ(data, MAP_FAILED); +} + +FIXTURE(snapshot) { + bool umount; +}; + +FIXTURE_SETUP(snapshot) +{ + bool fail, umount; + struct stat sb; + char *message; + + if (getuid() != 0) + SKIP(return, "Skipping: %s", "Please run the test as root"); + + if (stat(TRACEFS_ROOT"/snapshot", &sb)) + SKIP(return, "Skipping: %s", "snapshot not available"); + + if (!tracefs_enabled(&message, &fail, &umount)) { + if (fail) { + TH_LOG("Tracefs setup failed: %s", message); + ASSERT_FALSE(fail); + } + SKIP(return, "Skipping: %s", message); + } + + self->umount = umount; +} + +FIXTURE_TEARDOWN(snapshot) +{ + __tracefs_write(TRACEFS_ROOT"/events/sched/sched_switch/trigger", + "!snapshot"); + tracefs_reset(); + + if (self->umount) + tracefs_unmount(); +} + +TEST_F(snapshot, excludes_map) +{ + struct tracefs_cpu_map_desc map_desc; + int cpu = sched_getcpu(); + + ASSERT_GE(cpu, 0); + tracefs_write(TRACEFS_ROOT"/events/sched/sched_switch/trigger", + "snapshot"); + ASSERT_EQ(tracefs_cpu_map(&map_desc, cpu), -EBUSY); +} + +TEST_F(snapshot, excluded_by_map) +{ + struct tracefs_cpu_map_desc map_desc; + int cpu = sched_getcpu(); + + ASSERT_EQ(tracefs_cpu_map(&map_desc, cpu), 0); + + ASSERT_EQ(__tracefs_write(TRACEFS_ROOT"/events/sched/sched_switch/trigger", + "snapshot"), -EBUSY); + ASSERT_EQ(__tracefs_write(TRACEFS_ROOT"/snapshot", + "1"), -EBUSY); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/user_events/ftrace_test.c b/tools/testing/selftests/user_events/ftrace_test.c index dcd7509fe2e0..0bb46793dcd4 100644 --- a/tools/testing/selftests/user_events/ftrace_test.c +++ b/tools/testing/selftests/user_events/ftrace_test.c @@ -261,6 +261,12 @@ TEST_F(user, register_events) { ASSERT_EQ(0, ioctl(self->data_fd, DIAG_IOCSREG, ®)); ASSERT_EQ(0, reg.write_index); + /* Register without separator spacing should still match */ + reg.enable_bit = 29; + reg.name_args = (__u64)"__test_event u32 field1;u32 field2"; + ASSERT_EQ(0, ioctl(self->data_fd, DIAG_IOCSREG, ®)); + ASSERT_EQ(0, reg.write_index); + /* Multiple registers to same name but different args should fail */ reg.enable_bit = 29; reg.name_args = (__u64)"__test_event u32 field1;"; @@ -288,6 +294,8 @@ TEST_F(user, register_events) { ASSERT_EQ(0, ioctl(self->data_fd, DIAG_IOCSUNREG, &unreg)); unreg.disable_bit = 30; ASSERT_EQ(0, ioctl(self->data_fd, DIAG_IOCSUNREG, &unreg)); + unreg.disable_bit = 29; + ASSERT_EQ(0, ioctl(self->data_fd, DIAG_IOCSUNREG, &unreg)); /* Delete should have been auto-done after close and unregister */ close(self->data_fd); diff --git a/tools/testing/selftests/wireguard/qemu/kernel.config b/tools/testing/selftests/wireguard/qemu/kernel.config index 507555714b1d..f314d3789f17 100644 --- a/tools/testing/selftests/wireguard/qemu/kernel.config +++ b/tools/testing/selftests/wireguard/qemu/kernel.config @@ -41,7 +41,6 @@ CONFIG_KALLSYMS=y CONFIG_BUG=y CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE=y CONFIG_JUMP_LABEL=y -CONFIG_BASE_FULL=y CONFIG_FUTEX=y CONFIG_SHMEM=y CONFIG_SLUB=y diff --git a/tools/tracing/rtla/Makefile.config b/tools/tracing/rtla/Makefile.config index 6d4ba77847b6..0b7ecfb30d19 100644 --- a/tools/tracing/rtla/Makefile.config +++ b/tools/tracing/rtla/Makefile.config @@ -3,7 +3,7 @@ STOP_ERROR := LIBTRACEEVENT_MIN_VERSION = 1.5 -LIBTRACEFS_MIN_VERSION = 1.3 +LIBTRACEFS_MIN_VERSION = 1.6 define lib_setup $(eval LIB_INCLUDES += $(shell sh -c "$(PKG_CONFIG) --cflags lib$(1)")) diff --git a/tools/tracing/rtla/src/osnoise_hist.c b/tools/tracing/rtla/src/osnoise_hist.c index 01870d50942a..7be17d09f7e8 100644 --- a/tools/tracing/rtla/src/osnoise_hist.c +++ b/tools/tracing/rtla/src/osnoise_hist.c @@ -36,13 +36,14 @@ struct osnoise_hist_params { cpu_set_t hk_cpu_set; struct sched_attr sched_param; struct trace_events *events; - char no_header; char no_summary; char no_index; char with_zeros; int bucket_size; int entries; + int warmup; + int buffer_size; }; struct osnoise_hist_cpu { @@ -436,9 +437,9 @@ static void osnoise_hist_usage(char *usage) static const char * const msg[] = { "", " usage: rtla osnoise hist [-h] [-D] [-d s] [-a us] [-p us] [-r us] [-s us] [-S us] \\", - " [-T us] [-t[=file]] [-e sys[:event]] [--filter <filter>] [--trigger <trigger>] \\", + " [-T us] [-t[file]] [-e sys[:event]] [--filter <filter>] [--trigger <trigger>] \\", " [-c cpu-list] [-H cpu-list] [-P priority] [-b N] [-E N] [--no-header] [--no-summary] \\", - " [--no-index] [--with-zeros] [-C[=cgroup_name]]", + " [--no-index] [--with-zeros] [-C[=cgroup_name]] [--warm-up]", "", " -h/--help: print this menu", " -a/--auto: set automatic trace mode, stopping the session if argument in us sample is hit", @@ -452,7 +453,7 @@ static void osnoise_hist_usage(char *usage) " -C/--cgroup[=cgroup_name]: set cgroup, if no cgroup_name is passed, the rtla's cgroup will be inherited", " -d/--duration time[s|m|h|d]: duration of the session", " -D/--debug: print debug info", - " -t/--trace[=file]: save the stopped trace to [file|osnoise_trace.txt]", + " -t/--trace[file]: save the stopped trace to [file|osnoise_trace.txt]", " -e/--event <sys:event>: enable the <sys:event> in the trace instance, multiple -e are allowed", " --filter <filter>: enable a trace event filter to the previous -e event", " --trigger <trigger>: enable a trace event trigger to the previous -e event", @@ -468,6 +469,8 @@ static void osnoise_hist_usage(char *usage) " f:prio - use SCHED_FIFO with prio", " d:runtime[us|ms|s]:period[us|ms|s] - use SCHED_DEADLINE with runtime and period", " in nanoseconds", + " --warm-up: let the workload run for s seconds before collecting data", + " --trace-buffer-size kB: set the per-cpu trace buffer size in kB", NULL, }; @@ -531,13 +534,15 @@ static struct osnoise_hist_params {"with-zeros", no_argument, 0, '3'}, {"trigger", required_argument, 0, '4'}, {"filter", required_argument, 0, '5'}, + {"warm-up", required_argument, 0, '6'}, + {"trace-buffer-size", required_argument, 0, '7'}, {0, 0, 0, 0} }; /* getopt_long stores the option index here. */ int option_index = 0; - c = getopt_long(argc, argv, "a:c:C::b:d:e:E:DhH:p:P:r:s:S:t::T:01234:5:", + c = getopt_long(argc, argv, "a:c:C::b:d:e:E:DhH:p:P:r:s:S:t::T:01234:5:6:7:", long_options, &option_index); /* detect the end of the options. */ @@ -640,9 +645,13 @@ static struct osnoise_hist_params params->threshold = get_llong_from_str(optarg); break; case 't': - if (optarg) - /* skip = */ - params->trace_output = &optarg[1]; + if (optarg) { + if (optarg[0] == '=') + params->trace_output = &optarg[1]; + else + params->trace_output = &optarg[0]; + } else if (optind < argc && argv[optind][0] != '0') + params->trace_output = argv[optind]; else params->trace_output = "osnoise_trace.txt"; break; @@ -680,6 +689,12 @@ static struct osnoise_hist_params osnoise_hist_usage("--filter requires a previous -e\n"); } break; + case '6': + params->warmup = get_llong_from_str(optarg); + break; + case '7': + params->buffer_size = get_llong_from_str(optarg); + break; default: osnoise_hist_usage("Invalid option"); } @@ -886,6 +901,11 @@ int osnoise_hist_main(int argc, char *argv[]) goto out_hist; } + if (params->buffer_size > 0) { + retval = trace_set_buffer_size(&record->trace, params->buffer_size); + if (retval) + goto out_hist; + } } /* @@ -899,6 +919,25 @@ int osnoise_hist_main(int argc, char *argv[]) trace_instance_start(&record->trace); trace_instance_start(trace); + if (params->warmup > 0) { + debug_msg("Warming up for %d seconds\n", params->warmup); + sleep(params->warmup); + if (stop_tracing) + goto out_hist; + + /* + * Clean up the buffer. The osnoise workload do not run + * with tracing off to avoid creating a performance penalty + * when not needed. + */ + retval = tracefs_instance_file_write(trace->inst, "trace", ""); + if (retval < 0) { + debug_msg("Error cleaning up the buffer"); + goto out_hist; + } + + } + tool->start_time = time(NULL); osnoise_hist_set_signals(params); diff --git a/tools/tracing/rtla/src/osnoise_top.c b/tools/tracing/rtla/src/osnoise_top.c index 457360db0767..07ba55d4ec06 100644 --- a/tools/tracing/rtla/src/osnoise_top.c +++ b/tools/tracing/rtla/src/osnoise_top.c @@ -40,6 +40,8 @@ struct osnoise_top_params { int set_sched; int cgroup; int hk_cpus; + int warmup; + int buffer_size; cpu_set_t hk_cpu_set; struct sched_attr sched_param; struct trace_events *events; @@ -281,8 +283,8 @@ static void osnoise_top_usage(struct osnoise_top_params *params, char *usage) static const char * const msg[] = { " [-h] [-q] [-D] [-d s] [-a us] [-p us] [-r us] [-s us] [-S us] \\", - " [-T us] [-t[=file]] [-e sys[:event]] [--filter <filter>] [--trigger <trigger>] \\", - " [-c cpu-list] [-H cpu-list] [-P priority] [-C[=cgroup_name]]", + " [-T us] [-t[file]] [-e sys[:event]] [--filter <filter>] [--trigger <trigger>] \\", + " [-c cpu-list] [-H cpu-list] [-P priority] [-C[=cgroup_name]] [--warm-up s]", "", " -h/--help: print this menu", " -a/--auto: set automatic trace mode, stopping the session if argument in us sample is hit", @@ -296,7 +298,7 @@ static void osnoise_top_usage(struct osnoise_top_params *params, char *usage) " -C/--cgroup[=cgroup_name]: set cgroup, if no cgroup_name is passed, the rtla's cgroup will be inherited", " -d/--duration time[s|m|h|d]: duration of the session", " -D/--debug: print debug info", - " -t/--trace[=file]: save the stopped trace to [file|osnoise_trace.txt]", + " -t/--trace[file]: save the stopped trace to [file|osnoise_trace.txt]", " -e/--event <sys:event>: enable the <sys:event> in the trace instance, multiple -e are allowed", " --filter <filter>: enable a trace event filter to the previous -e event", " --trigger <trigger>: enable a trace event trigger to the previous -e event", @@ -307,6 +309,8 @@ static void osnoise_top_usage(struct osnoise_top_params *params, char *usage) " f:prio - use SCHED_FIFO with prio", " d:runtime[us|ms|s]:period[us|ms|s] - use SCHED_DEADLINE with runtime and period", " in nanoseconds", + " --warm-up s: let the workload run for s seconds before collecting data", + " --trace-buffer-size kB: set the per-cpu trace buffer size in kB", NULL, }; @@ -381,13 +385,15 @@ struct osnoise_top_params *osnoise_top_parse_args(int argc, char **argv) {"trace", optional_argument, 0, 't'}, {"trigger", required_argument, 0, '0'}, {"filter", required_argument, 0, '1'}, + {"warm-up", required_argument, 0, '2'}, + {"trace-buffer-size", required_argument, 0, '3'}, {0, 0, 0, 0} }; /* getopt_long stores the option index here. */ int option_index = 0; - c = getopt_long(argc, argv, "a:c:C::d:De:hH:p:P:qr:s:S:t::T:0:1:", + c = getopt_long(argc, argv, "a:c:C::d:De:hH:p:P:qr:s:S:t::T:0:1:2:3:", long_options, &option_index); /* Detect the end of the options. */ @@ -480,9 +486,13 @@ struct osnoise_top_params *osnoise_top_parse_args(int argc, char **argv) params->stop_total_us = get_llong_from_str(optarg); break; case 't': - if (optarg) - /* skip = */ - params->trace_output = &optarg[1]; + if (optarg) { + if (optarg[0] == '=') + params->trace_output = &optarg[1]; + else + params->trace_output = &optarg[0]; + } else if (optind < argc && argv[optind][0] != '-') + params->trace_output = argv[optind]; else params->trace_output = "osnoise_trace.txt"; break; @@ -511,6 +521,12 @@ struct osnoise_top_params *osnoise_top_parse_args(int argc, char **argv) osnoise_top_usage(params, "--filter requires a previous -e\n"); } break; + case '2': + params->warmup = get_llong_from_str(optarg); + break; + case '3': + params->buffer_size = get_llong_from_str(optarg); + break; default: osnoise_top_usage(params, "Invalid option"); } @@ -719,6 +735,12 @@ int osnoise_top_main(int argc, char **argv) if (retval) goto out_top; } + + if (params->buffer_size > 0) { + retval = trace_set_buffer_size(&record->trace, params->buffer_size); + if (retval) + goto out_top; + } } /* @@ -732,6 +754,25 @@ int osnoise_top_main(int argc, char **argv) trace_instance_start(&record->trace); trace_instance_start(trace); + if (params->warmup > 0) { + debug_msg("Warming up for %d seconds\n", params->warmup); + sleep(params->warmup); + if (stop_tracing) + goto out_top; + + /* + * Clean up the buffer. The osnoise workload do not run + * with tracing off to avoid creating a performance penalty + * when not needed. + */ + retval = tracefs_instance_file_write(trace->inst, "trace", ""); + if (retval < 0) { + debug_msg("Error cleaning up the buffer"); + goto out_top; + } + + } + tool->start_time = time(NULL); osnoise_top_set_signals(params); diff --git a/tools/tracing/rtla/src/timerlat_aa.c b/tools/tracing/rtla/src/timerlat_aa.c index 7093fd5333be..7bd80ee2a5b4 100644 --- a/tools/tracing/rtla/src/timerlat_aa.c +++ b/tools/tracing/rtla/src/timerlat_aa.c @@ -16,6 +16,9 @@ enum timelat_state { TIMERLAT_WAITING_THREAD, }; +/* Used to fill spaces in the output */ +static const char *spaces = " "; + #define MAX_COMM 24 /* @@ -274,14 +277,17 @@ static int timerlat_aa_nmi_handler(struct trace_seq *s, struct tep_record *recor taa_data->prev_irq_timstamp = start; trace_seq_reset(taa_data->prev_irqs_seq); - trace_seq_printf(taa_data->prev_irqs_seq, "\t%24s \t\t\t%9.2f us\n", - "nmi", ns_to_usf(duration)); + trace_seq_printf(taa_data->prev_irqs_seq, " %24s %.*s %9.2f us\n", + "nmi", + 24, spaces, + ns_to_usf(duration)); return 0; } taa_data->thread_nmi_sum += duration; - trace_seq_printf(taa_data->nmi_seq, " %24s \t\t\t%9.2f us\n", - "nmi", ns_to_usf(duration)); + trace_seq_printf(taa_data->nmi_seq, " %24s %.*s %9.2f us\n", + "nmi", + 24, spaces, ns_to_usf(duration)); return 0; } @@ -323,8 +329,10 @@ static int timerlat_aa_irq_handler(struct trace_seq *s, struct tep_record *recor taa_data->prev_irq_timstamp = start; trace_seq_reset(taa_data->prev_irqs_seq); - trace_seq_printf(taa_data->prev_irqs_seq, "\t%24s:%-3llu \t\t%9.2f us\n", - desc, vector, ns_to_usf(duration)); + trace_seq_printf(taa_data->prev_irqs_seq, " %24s:%-3llu %.*s %9.2f us\n", + desc, vector, + 15, spaces, + ns_to_usf(duration)); return 0; } @@ -372,8 +380,10 @@ static int timerlat_aa_irq_handler(struct trace_seq *s, struct tep_record *recor * IRQ interference. */ taa_data->thread_irq_sum += duration; - trace_seq_printf(taa_data->irqs_seq, " %24s:%-3llu \t %9.2f us\n", - desc, vector, ns_to_usf(duration)); + trace_seq_printf(taa_data->irqs_seq, " %24s:%-3llu %.*s %9.2f us\n", + desc, vector, + 24, spaces, + ns_to_usf(duration)); return 0; } @@ -408,8 +418,10 @@ static int timerlat_aa_softirq_handler(struct trace_seq *s, struct tep_record *r taa_data->thread_softirq_sum += duration; - trace_seq_printf(taa_data->softirqs_seq, "\t%24s:%-3llu \t %9.2f us\n", - softirq_name[vector], vector, ns_to_usf(duration)); + trace_seq_printf(taa_data->softirqs_seq, " %24s:%-3llu %.*s %9.2f us\n", + softirq_name[vector], vector, + 24, spaces, + ns_to_usf(duration)); return 0; } @@ -452,8 +464,10 @@ static int timerlat_aa_thread_handler(struct trace_seq *s, struct tep_record *re } else { taa_data->thread_thread_sum += duration; - trace_seq_printf(taa_data->threads_seq, "\t%24s:%-3llu \t\t%9.2f us\n", - comm, pid, ns_to_usf(duration)); + trace_seq_printf(taa_data->threads_seq, " %24s:%-12llu %.*s %9.2f us\n", + comm, pid, + 15, spaces, + ns_to_usf(duration)); } return 0; @@ -482,7 +496,8 @@ static int timerlat_aa_stack_handler(struct trace_seq *s, struct tep_record *rec function = tep_find_function(taa_ctx->tool->trace.tep, caller[i]); if (!function) break; - trace_seq_printf(taa_data->stack_seq, "\t\t-> %s\n", function); + trace_seq_printf(taa_data->stack_seq, " %.*s -> %s\n", + 14, spaces, function); } } return 0; @@ -568,23 +583,24 @@ static void timerlat_thread_analysis(struct timerlat_aa_data *taa_data, int cpu, exp_irq_ts = taa_data->timer_irq_start_time - taa_data->timer_irq_start_delay; if (exp_irq_ts < taa_data->prev_irq_timstamp + taa_data->prev_irq_duration) { if (taa_data->prev_irq_timstamp < taa_data->timer_irq_start_time) - printf(" Previous IRQ interference: \t\t up to %9.2f us\n", - ns_to_usf(taa_data->prev_irq_duration)); + printf(" Previous IRQ interference: %.*s up to %9.2f us\n", + 16, spaces, + ns_to_usf(taa_data->prev_irq_duration)); } /* * The delay that the IRQ suffered before starting. */ - printf(" IRQ handler delay: %16s %9.2f us (%.2f %%)\n", - (ns_to_usf(taa_data->timer_exit_from_idle) > 10) ? "(exit from idle)" : "", - ns_to_usf(taa_data->timer_irq_start_delay), - ns_to_per(total, taa_data->timer_irq_start_delay)); + printf(" IRQ handler delay: %.*s %16s %9.2f us (%.2f %%)\n", 16, spaces, + (ns_to_usf(taa_data->timer_exit_from_idle) > 10) ? "(exit from idle)" : "", + ns_to_usf(taa_data->timer_irq_start_delay), + ns_to_per(total, taa_data->timer_irq_start_delay)); /* * Timerlat IRQ. */ - printf(" IRQ latency: \t\t\t\t %9.2f us\n", - ns_to_usf(taa_data->tlat_irq_latency)); + printf(" IRQ latency: %.*s %9.2f us\n", 40, spaces, + ns_to_usf(taa_data->tlat_irq_latency)); if (irq) { /* @@ -595,15 +611,16 @@ static void timerlat_thread_analysis(struct timerlat_aa_data *taa_data, int cpu, * so it will be displayed, it is the key. */ printf(" Blocking thread:\n"); - printf(" %24s:%-9llu\n", - taa_data->run_thread_comm, taa_data->run_thread_pid); + printf(" %.*s %24s:%-9llu\n", 6, spaces, taa_data->run_thread_comm, + taa_data->run_thread_pid); } else { /* * The duration of the IRQ handler that handled the timerlat IRQ. */ - printf(" Timerlat IRQ duration: \t\t %9.2f us (%.2f %%)\n", - ns_to_usf(taa_data->timer_irq_duration), - ns_to_per(total, taa_data->timer_irq_duration)); + printf(" Timerlat IRQ duration: %.*s %9.2f us (%.2f %%)\n", + 30, spaces, + ns_to_usf(taa_data->timer_irq_duration), + ns_to_per(total, taa_data->timer_irq_duration)); /* * The amount of time that the current thread postponed the scheduler. @@ -611,13 +628,13 @@ static void timerlat_thread_analysis(struct timerlat_aa_data *taa_data, int cpu, * Recalling that it is net from NMI/IRQ/Softirq interference, so there * is no need to compute values here. */ - printf(" Blocking thread: \t\t\t %9.2f us (%.2f %%)\n", - ns_to_usf(taa_data->thread_blocking_duration), - ns_to_per(total, taa_data->thread_blocking_duration)); + printf(" Blocking thread: %.*s %9.2f us (%.2f %%)\n", 36, spaces, + ns_to_usf(taa_data->thread_blocking_duration), + ns_to_per(total, taa_data->thread_blocking_duration)); - printf(" %24s:%-9llu %9.2f us\n", - taa_data->run_thread_comm, taa_data->run_thread_pid, - ns_to_usf(taa_data->thread_blocking_duration)); + printf(" %.*s %24s:%-9llu %.*s %9.2f us\n", 6, spaces, + taa_data->run_thread_comm, taa_data->run_thread_pid, + 12, spaces, ns_to_usf(taa_data->thread_blocking_duration)); } /* @@ -629,9 +646,9 @@ static void timerlat_thread_analysis(struct timerlat_aa_data *taa_data, int cpu, * NMIs can happen during the IRQ, so they are always possible. */ if (taa_data->thread_nmi_sum) - printf(" NMI interference \t\t\t %9.2f us (%.2f %%)\n", - ns_to_usf(taa_data->thread_nmi_sum), - ns_to_per(total, taa_data->thread_nmi_sum)); + printf(" NMI interference %.*s %9.2f us (%.2f %%)\n", 36, spaces, + ns_to_usf(taa_data->thread_nmi_sum), + ns_to_per(total, taa_data->thread_nmi_sum)); /* * If it is an IRQ latency, the other factors can be skipped. @@ -643,9 +660,9 @@ static void timerlat_thread_analysis(struct timerlat_aa_data *taa_data, int cpu, * Prints the interference caused by IRQs to the thread latency. */ if (taa_data->thread_irq_sum) { - printf(" IRQ interference \t\t\t %9.2f us (%.2f %%)\n", - ns_to_usf(taa_data->thread_irq_sum), - ns_to_per(total, taa_data->thread_irq_sum)); + printf(" IRQ interference %.*s %9.2f us (%.2f %%)\n", 36, spaces, + ns_to_usf(taa_data->thread_irq_sum), + ns_to_per(total, taa_data->thread_irq_sum)); trace_seq_do_printf(taa_data->irqs_seq); } @@ -654,9 +671,9 @@ static void timerlat_thread_analysis(struct timerlat_aa_data *taa_data, int cpu, * Prints the interference caused by Softirqs to the thread latency. */ if (taa_data->thread_softirq_sum) { - printf(" Softirq interference \t\t\t %9.2f us (%.2f %%)\n", - ns_to_usf(taa_data->thread_softirq_sum), - ns_to_per(total, taa_data->thread_softirq_sum)); + printf(" Softirq interference %.*s %9.2f us (%.2f %%)\n", 32, spaces, + ns_to_usf(taa_data->thread_softirq_sum), + ns_to_per(total, taa_data->thread_softirq_sum)); trace_seq_do_printf(taa_data->softirqs_seq); } @@ -670,9 +687,9 @@ static void timerlat_thread_analysis(struct timerlat_aa_data *taa_data, int cpu, * timer handling latency. */ if (taa_data->thread_thread_sum) { - printf(" Thread interference \t\t\t %9.2f us (%.2f %%)\n", - ns_to_usf(taa_data->thread_thread_sum), - ns_to_per(total, taa_data->thread_thread_sum)); + printf(" Thread interference %.*s %9.2f us (%.2f %%)\n", 33, spaces, + ns_to_usf(taa_data->thread_thread_sum), + ns_to_per(total, taa_data->thread_thread_sum)); trace_seq_do_printf(taa_data->threads_seq); } @@ -682,8 +699,8 @@ static void timerlat_thread_analysis(struct timerlat_aa_data *taa_data, int cpu, */ print_total: printf("------------------------------------------------------------------------\n"); - printf(" %s latency: \t\t\t %9.2f us (100%%)\n", irq ? "IRQ" : "Thread", - ns_to_usf(total)); + printf(" %s latency: %.*s %9.2f us (100%%)\n", irq ? " IRQ" : "Thread", + 37, spaces, ns_to_usf(total)); } static int timerlat_auto_analysis_collect_trace(struct timerlat_aa_context *taa_ctx) diff --git a/tools/tracing/rtla/src/timerlat_hist.c b/tools/tracing/rtla/src/timerlat_hist.c index 8bd51aab6513..a3907c390d67 100644 --- a/tools/tracing/rtla/src/timerlat_hist.c +++ b/tools/tracing/rtla/src/timerlat_hist.c @@ -40,6 +40,7 @@ struct timerlat_hist_params { int no_aa; int dump_tasks; int user_workload; + int kernel_workload; int user_hist; cpu_set_t hk_cpu_set; struct sched_attr sched_param; @@ -52,6 +53,8 @@ struct timerlat_hist_params { char with_zeros; int bucket_size; int entries; + int warmup; + int buffer_size; }; struct timerlat_hist_cpu { @@ -324,17 +327,29 @@ timerlat_print_summary(struct timerlat_hist_params *params, if (!data->hist[cpu].irq_count && !data->hist[cpu].thread_count) continue; - if (!params->no_irq) - trace_seq_printf(trace->seq, "%9llu ", - data->hist[cpu].min_irq); + if (!params->no_irq) { + if (data->hist[cpu].irq_count) + trace_seq_printf(trace->seq, "%9llu ", + data->hist[cpu].min_irq); + else + trace_seq_printf(trace->seq, " - "); + } - if (!params->no_thread) - trace_seq_printf(trace->seq, "%9llu ", - data->hist[cpu].min_thread); + if (!params->no_thread) { + if (data->hist[cpu].thread_count) + trace_seq_printf(trace->seq, "%9llu ", + data->hist[cpu].min_thread); + else + trace_seq_printf(trace->seq, " - "); + } - if (params->user_hist) - trace_seq_printf(trace->seq, "%9llu ", - data->hist[cpu].min_user); + if (params->user_hist) { + if (data->hist[cpu].user_count) + trace_seq_printf(trace->seq, "%9llu ", + data->hist[cpu].min_user); + else + trace_seq_printf(trace->seq, " - "); + } } trace_seq_printf(trace->seq, "\n"); @@ -384,25 +399,164 @@ timerlat_print_summary(struct timerlat_hist_params *params, if (!data->hist[cpu].irq_count && !data->hist[cpu].thread_count) continue; - if (!params->no_irq) - trace_seq_printf(trace->seq, "%9llu ", - data->hist[cpu].max_irq); + if (!params->no_irq) { + if (data->hist[cpu].irq_count) + trace_seq_printf(trace->seq, "%9llu ", + data->hist[cpu].max_irq); + else + trace_seq_printf(trace->seq, " - "); + } - if (!params->no_thread) - trace_seq_printf(trace->seq, "%9llu ", - data->hist[cpu].max_thread); + if (!params->no_thread) { + if (data->hist[cpu].thread_count) + trace_seq_printf(trace->seq, "%9llu ", + data->hist[cpu].max_thread); + else + trace_seq_printf(trace->seq, " - "); + } - if (params->user_hist) - trace_seq_printf(trace->seq, "%9llu ", - data->hist[cpu].max_user); + if (params->user_hist) { + if (data->hist[cpu].user_count) + trace_seq_printf(trace->seq, "%9llu ", + data->hist[cpu].max_user); + else + trace_seq_printf(trace->seq, " - "); + } } trace_seq_printf(trace->seq, "\n"); trace_seq_do_printf(trace->seq); trace_seq_reset(trace->seq); } +static void +timerlat_print_stats_all(struct timerlat_hist_params *params, + struct trace_instance *trace, + struct timerlat_hist_data *data) +{ + struct timerlat_hist_cpu *cpu_data; + struct timerlat_hist_cpu sum; + int cpu; + + if (params->no_summary) + return; + + memset(&sum, 0, sizeof(sum)); + sum.min_irq = ~0; + sum.min_thread = ~0; + sum.min_user = ~0; + + for (cpu = 0; cpu < data->nr_cpus; cpu++) { + if (params->cpus && !CPU_ISSET(cpu, ¶ms->monitored_cpus)) + continue; + + if (!data->hist[cpu].irq_count && !data->hist[cpu].thread_count) + continue; + + cpu_data = &data->hist[cpu]; + + sum.irq_count += cpu_data->irq_count; + update_min(&sum.min_irq, &cpu_data->min_irq); + update_sum(&sum.sum_irq, &cpu_data->sum_irq); + update_max(&sum.max_irq, &cpu_data->max_irq); + + sum.thread_count += cpu_data->thread_count; + update_min(&sum.min_thread, &cpu_data->min_thread); + update_sum(&sum.sum_thread, &cpu_data->sum_thread); + update_max(&sum.max_thread, &cpu_data->max_thread); + + sum.user_count += cpu_data->user_count; + update_min(&sum.min_user, &cpu_data->min_user); + update_sum(&sum.sum_user, &cpu_data->sum_user); + update_max(&sum.max_user, &cpu_data->max_user); + } + + if (!params->no_index) + trace_seq_printf(trace->seq, "ALL: "); + + if (!params->no_irq) + trace_seq_printf(trace->seq, " IRQ"); + + if (!params->no_thread) + trace_seq_printf(trace->seq, " Thr"); + + if (params->user_hist) + trace_seq_printf(trace->seq, " Usr"); + + trace_seq_printf(trace->seq, "\n"); + + if (!params->no_index) + trace_seq_printf(trace->seq, "count:"); + + if (!params->no_irq) + trace_seq_printf(trace->seq, "%9d ", + sum.irq_count); + + if (!params->no_thread) + trace_seq_printf(trace->seq, "%9d ", + sum.thread_count); + + if (params->user_hist) + trace_seq_printf(trace->seq, "%9d ", + sum.user_count); + + trace_seq_printf(trace->seq, "\n"); + + if (!params->no_index) + trace_seq_printf(trace->seq, "min: "); + + if (!params->no_irq) + trace_seq_printf(trace->seq, "%9llu ", + sum.min_irq); + + if (!params->no_thread) + trace_seq_printf(trace->seq, "%9llu ", + sum.min_thread); + + if (params->user_hist) + trace_seq_printf(trace->seq, "%9llu ", + sum.min_user); + + trace_seq_printf(trace->seq, "\n"); + + if (!params->no_index) + trace_seq_printf(trace->seq, "avg: "); + + if (!params->no_irq) + trace_seq_printf(trace->seq, "%9llu ", + sum.sum_irq / sum.irq_count); + + if (!params->no_thread) + trace_seq_printf(trace->seq, "%9llu ", + sum.sum_thread / sum.thread_count); + + if (params->user_hist) + trace_seq_printf(trace->seq, "%9llu ", + sum.sum_user / sum.user_count); + + trace_seq_printf(trace->seq, "\n"); + + if (!params->no_index) + trace_seq_printf(trace->seq, "max: "); + + if (!params->no_irq) + trace_seq_printf(trace->seq, "%9llu ", + sum.max_irq); + + if (!params->no_thread) + trace_seq_printf(trace->seq, "%9llu ", + sum.max_thread); + + if (params->user_hist) + trace_seq_printf(trace->seq, "%9llu ", + sum.max_user); + + trace_seq_printf(trace->seq, "\n"); + trace_seq_do_printf(trace->seq); + trace_seq_reset(trace->seq); +} + /* - * timerlat_print_stats - print data for all CPUs + * timerlat_print_stats - print data for each CPUs */ static void timerlat_print_stats(struct timerlat_hist_params *params, struct osnoise_tool *tool) @@ -485,6 +639,7 @@ timerlat_print_stats(struct timerlat_hist_params *params, struct osnoise_tool *t trace_seq_reset(trace->seq); timerlat_print_summary(params, trace, data); + timerlat_print_stats_all(params, trace, data); } /* @@ -497,9 +652,10 @@ static void timerlat_hist_usage(char *usage) char *msg[] = { "", " usage: [rtla] timerlat hist [-h] [-q] [-d s] [-D] [-n] [-a us] [-p us] [-i us] [-T us] [-s us] \\", - " [-t[=file]] [-e sys[:event]] [--filter <filter>] [--trigger <trigger>] [-c cpu-list] [-H cpu-list]\\", + " [-t[file]] [-e sys[:event]] [--filter <filter>] [--trigger <trigger>] [-c cpu-list] [-H cpu-list]\\", " [-P priority] [-E N] [-b N] [--no-irq] [--no-thread] [--no-header] [--no-summary] \\", - " [--no-index] [--with-zeros] [--dma-latency us] [-C[=cgroup_name]] [--no-aa] [--dump-task] [-u]", + " [--no-index] [--with-zeros] [--dma-latency us] [-C[=cgroup_name]] [--no-aa] [--dump-task] [-u|-k]", + " [--warm-up s]", "", " -h/--help: print this menu", " -a/--auto: set automatic trace mode, stopping the session if argument in us latency is hit", @@ -513,7 +669,7 @@ static void timerlat_hist_usage(char *usage) " -d/--duration time[m|h|d]: duration of the session in seconds", " --dump-tasks: prints the task running on all CPUs if stop conditions are met (depends on !--no-aa)", " -D/--debug: print debug info", - " -t/--trace[=file]: save the stopped trace to [file|timerlat_trace.txt]", + " -t/--trace[file]: save the stopped trace to [file|timerlat_trace.txt]", " -e/--event <sys:event>: enable the <sys:event> in the trace instance, multiple -e are allowed", " --filter <filter>: enable a trace event filter to the previous -e event", " --trigger <trigger>: enable a trace event trigger to the previous -e event", @@ -534,8 +690,11 @@ static void timerlat_hist_usage(char *usage) " f:prio - use SCHED_FIFO with prio", " d:runtime[us|ms|s]:period[us|ms|s] - use SCHED_DEADLINE with runtime and period", " in nanoseconds", - " -u/--user-threads: use rtla user-space threads instead of in-kernel timerlat threads", + " -u/--user-threads: use rtla user-space threads instead of kernel-space timerlat threads", + " -k/--kernel-threads: use timerlat kernel-space threads instead of rtla user-space threads", " -U/--user-load: enable timerlat for user-defined user-space workload", + " --warm-up s: let the workload run for s seconds before collecting data", + " --trace-buffer-size kB: set the per-cpu trace buffer size in kB", NULL, }; @@ -597,6 +756,7 @@ static struct timerlat_hist_params {"thread", required_argument, 0, 'T'}, {"trace", optional_argument, 0, 't'}, {"user-threads", no_argument, 0, 'u'}, + {"kernel-threads", no_argument, 0, 'k'}, {"user-load", no_argument, 0, 'U'}, {"event", required_argument, 0, 'e'}, {"no-irq", no_argument, 0, '0'}, @@ -610,13 +770,15 @@ static struct timerlat_hist_params {"dma-latency", required_argument, 0, '8'}, {"no-aa", no_argument, 0, '9'}, {"dump-task", no_argument, 0, '\1'}, + {"warm-up", required_argument, 0, '\2'}, + {"trace-buffer-size", required_argument, 0, '\3'}, {0, 0, 0, 0} }; /* getopt_long stores the option index here. */ int option_index = 0; - c = getopt_long(argc, argv, "a:c:C::b:d:e:E:DhH:i:np:P:s:t::T:uU0123456:7:8:9\1", + c = getopt_long(argc, argv, "a:c:C::b:d:e:E:DhH:i:knp:P:s:t::T:uU0123456:7:8:9\1\2:\3", long_options, &option_index); /* detect the end of the options. */ @@ -699,6 +861,9 @@ static struct timerlat_hist_params case 'i': params->stop_us = get_llong_from_str(optarg); break; + case 'k': + params->kernel_workload = 1; + break; case 'n': params->output_divisor = 1; break; @@ -720,9 +885,13 @@ static struct timerlat_hist_params params->stop_total_us = get_llong_from_str(optarg); break; case 't': - if (optarg) - /* skip = */ - params->trace_output = &optarg[1]; + if (optarg) { + if (optarg[0] == '=') + params->trace_output = &optarg[1]; + else + params->trace_output = &optarg[0]; + } else if (optind < argc && argv[optind][0] != '-') + params->trace_output = argv[optind]; else params->trace_output = "timerlat_trace.txt"; break; @@ -785,6 +954,12 @@ static struct timerlat_hist_params case '\1': params->dump_tasks = 1; break; + case '\2': + params->warmup = get_llong_from_str(optarg); + break; + case '\3': + params->buffer_size = get_llong_from_str(optarg); + break; default: timerlat_hist_usage("Invalid option"); } @@ -807,6 +982,9 @@ static struct timerlat_hist_params if (!params->stop_us && !params->stop_total_us) params->no_aa = 1; + if (params->kernel_workload && params->user_workload) + timerlat_hist_usage("--kernel-threads and --user-threads are mutually exclusive!"); + return params; } @@ -882,6 +1060,22 @@ timerlat_hist_apply_config(struct osnoise_tool *tool, struct timerlat_hist_param auto_house_keeping(¶ms->monitored_cpus); } + /* + * If the user did not specify a type of thread, try user-threads first. + * Fall back to kernel threads otherwise. + */ + if (!params->kernel_workload && !params->user_workload) { + retval = tracefs_file_exists(NULL, "osnoise/per_cpu/cpu0/timerlat_fd"); + if (retval) { + debug_msg("User-space interface detected, setting user-threads\n"); + params->user_workload = 1; + params->user_hist = 1; + } else { + debug_msg("User-space interface not detected, setting kernel-threads\n"); + params->kernel_workload = 1; + } + } + if (params->user_hist) { retval = osnoise_set_workload(tool->context, 0); if (retval) { @@ -1019,6 +1213,12 @@ int timerlat_hist_main(int argc, char *argv[]) if (retval) goto out_hist; } + + if (params->buffer_size > 0) { + retval = trace_set_buffer_size(&record->trace, params->buffer_size); + if (retval) + goto out_hist; + } } if (!params->no_aa) { @@ -1039,22 +1239,6 @@ int timerlat_hist_main(int argc, char *argv[]) } } - /* - * Start the tracers here, after having set all instances. - * - * Let the trace instance start first for the case of hitting a stop - * tracing while enabling other instances. The trace instance is the - * one with most valuable information. - */ - if (params->trace_output) - trace_instance_start(&record->trace); - if (!params->no_aa) - trace_instance_start(&aa->trace); - trace_instance_start(trace); - - tool->start_time = time(NULL); - timerlat_hist_set_signals(params); - if (params->user_workload) { /* rtla asked to stop */ params_u.should_run = 1; @@ -1074,6 +1258,29 @@ int timerlat_hist_main(int argc, char *argv[]) err_msg("Error creating timerlat user-space threads\n"); } + if (params->warmup > 0) { + debug_msg("Warming up for %d seconds\n", params->warmup); + sleep(params->warmup); + if (stop_tracing) + goto out_hist; + } + + /* + * Start the tracers here, after having set all instances. + * + * Let the trace instance start first for the case of hitting a stop + * tracing while enabling other instances. The trace instance is the + * one with most valuable information. + */ + if (params->trace_output) + trace_instance_start(&record->trace); + if (!params->no_aa) + trace_instance_start(&aa->trace); + trace_instance_start(trace); + + tool->start_time = time(NULL); + timerlat_hist_set_signals(params); + while (!stop_tracing) { sleep(params->sleep_time); @@ -1099,6 +1306,7 @@ int timerlat_hist_main(int argc, char *argv[]) } } } + if (params->user_workload && !params_u.stopped_running) { params_u.should_run = 0; sleep(1); diff --git a/tools/tracing/rtla/src/timerlat_top.c b/tools/tracing/rtla/src/timerlat_top.c index 8a3fa64319c6..8c16419fe22a 100644 --- a/tools/tracing/rtla/src/timerlat_top.c +++ b/tools/tracing/rtla/src/timerlat_top.c @@ -44,6 +44,10 @@ struct timerlat_top_params { int hk_cpus; int user_top; int user_workload; + int kernel_workload; + int pretty_output; + int warmup; + int buffer_size; cpu_set_t hk_cpu_set; struct sched_attr sched_param; struct trace_events *events; @@ -118,6 +122,37 @@ cleanup: return NULL; } +static void +timerlat_top_reset_sum(struct timerlat_top_cpu *summary) +{ + memset(summary, 0, sizeof(*summary)); + summary->min_irq = ~0; + summary->min_thread = ~0; + summary->min_user = ~0; +} + +static void +timerlat_top_update_sum(struct osnoise_tool *tool, int cpu, struct timerlat_top_cpu *sum) +{ + struct timerlat_top_data *data = tool->data; + struct timerlat_top_cpu *cpu_data = &data->cpu_data[cpu]; + + sum->irq_count += cpu_data->irq_count; + update_min(&sum->min_irq, &cpu_data->min_irq); + update_sum(&sum->sum_irq, &cpu_data->sum_irq); + update_max(&sum->max_irq, &cpu_data->max_irq); + + sum->thread_count += cpu_data->thread_count; + update_min(&sum->min_thread, &cpu_data->min_thread); + update_sum(&sum->sum_thread, &cpu_data->sum_thread); + update_max(&sum->max_thread, &cpu_data->max_thread); + + sum->user_count += cpu_data->user_count; + update_min(&sum->min_user, &cpu_data->min_user); + update_sum(&sum->sum_user, &cpu_data->sum_user); + update_max(&sum->max_user, &cpu_data->max_user); +} + /* * timerlat_hist_update - record a new timerlat occurent on cpu, updating data */ @@ -179,19 +214,22 @@ timerlat_top_handler(struct trace_seq *s, struct tep_record *record, /* * timerlat_top_header - print the header of the tool output */ -static void timerlat_top_header(struct osnoise_tool *top) +static void timerlat_top_header(struct timerlat_top_params *params, struct osnoise_tool *top) { - struct timerlat_top_params *params = top->params; struct trace_seq *s = top->trace.seq; char duration[26]; get_duration(top->start_time, duration, sizeof(duration)); - trace_seq_printf(s, "\033[2;37;40m"); + if (params->pretty_output) + trace_seq_printf(s, "\033[2;37;40m"); + trace_seq_printf(s, " Timer Latency "); if (params->user_top) trace_seq_printf(s, " "); - trace_seq_printf(s, "\033[0;0;0m"); + + if (params->pretty_output) + trace_seq_printf(s, "\033[0;0;0m"); trace_seq_printf(s, "\n"); trace_seq_printf(s, "%-6s | IRQ Timer Latency (%s) | Thread Timer Latency (%s)", duration, @@ -204,14 +242,20 @@ static void timerlat_top_header(struct osnoise_tool *top) } trace_seq_printf(s, "\n"); - trace_seq_printf(s, "\033[2;30;47m"); + if (params->pretty_output) + trace_seq_printf(s, "\033[2;30;47m"); + trace_seq_printf(s, "CPU COUNT | cur min avg max | cur min avg max"); if (params->user_top) trace_seq_printf(s, " | cur min avg max"); - trace_seq_printf(s, "\033[0;0;0m"); + + if (params->pretty_output) + trace_seq_printf(s, "\033[0;0;0m"); trace_seq_printf(s, "\n"); } +static const char *no_value = " -"; + /* * timerlat_top_print - prints the output of a given CPU */ @@ -239,10 +283,7 @@ static void timerlat_top_print(struct osnoise_tool *top, int cpu) trace_seq_printf(s, "%3d #%-9d |", cpu, cpu_data->irq_count); if (!cpu_data->irq_count) { - trace_seq_printf(s, " - "); - trace_seq_printf(s, " - "); - trace_seq_printf(s, " - "); - trace_seq_printf(s, " - |"); + trace_seq_printf(s, "%s %s %s %s |", no_value, no_value, no_value, no_value); } else { trace_seq_printf(s, "%9llu ", cpu_data->cur_irq / params->output_divisor); trace_seq_printf(s, "%9llu ", cpu_data->min_irq / params->output_divisor); @@ -251,10 +292,7 @@ static void timerlat_top_print(struct osnoise_tool *top, int cpu) } if (!cpu_data->thread_count) { - trace_seq_printf(s, " - "); - trace_seq_printf(s, " - "); - trace_seq_printf(s, " - "); - trace_seq_printf(s, " -\n"); + trace_seq_printf(s, "%s %s %s %s", no_value, no_value, no_value, no_value); } else { trace_seq_printf(s, "%9llu ", cpu_data->cur_thread / divisor); trace_seq_printf(s, "%9llu ", cpu_data->min_thread / divisor); @@ -271,10 +309,7 @@ static void timerlat_top_print(struct osnoise_tool *top, int cpu) trace_seq_printf(s, " |"); if (!cpu_data->user_count) { - trace_seq_printf(s, " - "); - trace_seq_printf(s, " - "); - trace_seq_printf(s, " - "); - trace_seq_printf(s, " -\n"); + trace_seq_printf(s, "%s %s %s %s\n", no_value, no_value, no_value, no_value); } else { trace_seq_printf(s, "%9llu ", cpu_data->cur_user / divisor); trace_seq_printf(s, "%9llu ", cpu_data->min_user / divisor); @@ -285,6 +320,77 @@ static void timerlat_top_print(struct osnoise_tool *top, int cpu) } /* + * timerlat_top_print_sum - prints the summary output + */ +static void +timerlat_top_print_sum(struct osnoise_tool *top, struct timerlat_top_cpu *summary) +{ + const char *split = "----------------------------------------"; + struct timerlat_top_params *params = top->params; + unsigned long long count = summary->irq_count; + int divisor = params->output_divisor; + struct trace_seq *s = top->trace.seq; + int e = 0; + + if (divisor == 0) + return; + + /* + * Skip if no data is available: is this cpu offline? + */ + if (!summary->irq_count && !summary->thread_count) + return; + + while (count > 999999) { + e++; + count /= 10; + } + + trace_seq_printf(s, "%.*s|%.*s|%.*s", 15, split, 40, split, 39, split); + if (params->user_top) + trace_seq_printf(s, "-|%.*s", 39, split); + trace_seq_printf(s, "\n"); + + trace_seq_printf(s, "ALL #%-6llu e%d |", count, e); + + if (!summary->irq_count) { + trace_seq_printf(s, " %s %s %s |", no_value, no_value, no_value); + } else { + trace_seq_printf(s, " "); + trace_seq_printf(s, "%9llu ", summary->min_irq / params->output_divisor); + trace_seq_printf(s, "%9llu ", (summary->sum_irq / summary->irq_count) / divisor); + trace_seq_printf(s, "%9llu |", summary->max_irq / divisor); + } + + if (!summary->thread_count) { + trace_seq_printf(s, "%s %s %s %s", no_value, no_value, no_value, no_value); + } else { + trace_seq_printf(s, " "); + trace_seq_printf(s, "%9llu ", summary->min_thread / divisor); + trace_seq_printf(s, "%9llu ", + (summary->sum_thread / summary->thread_count) / divisor); + trace_seq_printf(s, "%9llu", summary->max_thread / divisor); + } + + if (!params->user_top) { + trace_seq_printf(s, "\n"); + return; + } + + trace_seq_printf(s, " |"); + + if (!summary->user_count) { + trace_seq_printf(s, " %s %s %s |", no_value, no_value, no_value); + } else { + trace_seq_printf(s, " "); + trace_seq_printf(s, "%9llu ", summary->min_user / divisor); + trace_seq_printf(s, "%9llu ", + (summary->sum_user / summary->user_count) / divisor); + trace_seq_printf(s, "%9llu\n", summary->max_user / divisor); + } +} + +/* * clear_terminal - clears the output terminal */ static void clear_terminal(struct trace_seq *seq) @@ -300,6 +406,7 @@ static void timerlat_print_stats(struct timerlat_top_params *params, struct osnoise_tool *top) { struct trace_instance *trace = &top->trace; + struct timerlat_top_cpu summary; static int nr_cpus = -1; int i; @@ -312,14 +419,19 @@ timerlat_print_stats(struct timerlat_top_params *params, struct osnoise_tool *to if (!params->quiet) clear_terminal(trace->seq); - timerlat_top_header(top); + timerlat_top_reset_sum(&summary); + + timerlat_top_header(params, top); for (i = 0; i < nr_cpus; i++) { if (params->cpus && !CPU_ISSET(i, ¶ms->monitored_cpus)) continue; timerlat_top_print(top, i); + timerlat_top_update_sum(top, i, &summary); } + timerlat_top_print_sum(top, &summary); + trace_seq_do_printf(trace->seq); trace_seq_reset(trace->seq); } @@ -334,8 +446,8 @@ static void timerlat_top_usage(char *usage) static const char *const msg[] = { "", " usage: rtla timerlat [top] [-h] [-q] [-a us] [-d s] [-D] [-n] [-p us] [-i us] [-T us] [-s us] \\", - " [[-t[=file]] [-e sys[:event]] [--filter <filter>] [--trigger <trigger>] [-c cpu-list] [-H cpu-list]\\", - " [-P priority] [--dma-latency us] [--aa-only us] [-C[=cgroup_name]] [-u]", + " [[-t[file]] [-e sys[:event]] [--filter <filter>] [--trigger <trigger>] [-c cpu-list] [-H cpu-list]\\", + " [-P priority] [--dma-latency us] [--aa-only us] [-C[=cgroup_name]] [-u|-k] [--warm-up s]", "", " -h/--help: print this menu", " -a/--auto: set automatic trace mode, stopping the session if argument in us latency is hit", @@ -350,7 +462,7 @@ static void timerlat_top_usage(char *usage) " -d/--duration time[m|h|d]: duration of the session in seconds", " -D/--debug: print debug info", " --dump-tasks: prints the task running on all CPUs if stop conditions are met (depends on !--no-aa)", - " -t/--trace[=file]: save the stopped trace to [file|timerlat_trace.txt]", + " -t/--trace[file]: save the stopped trace to [file|timerlat_trace.txt]", " -e/--event <sys:event>: enable the <sys:event> in the trace instance, multiple -e are allowed", " --filter <command>: enable a trace event filter to the previous -e event", " --trigger <command>: enable a trace event trigger to the previous -e event", @@ -364,8 +476,11 @@ static void timerlat_top_usage(char *usage) " f:prio - use SCHED_FIFO with prio", " d:runtime[us|ms|s]:period[us|ms|s] - use SCHED_DEADLINE with runtime and period", " in nanoseconds", - " -u/--user-threads: use rtla user-space threads instead of in-kernel timerlat threads", + " -u/--user-threads: use rtla user-space threads instead of kernel-space timerlat threads", + " -k/--kernel-threads: use timerlat kernel-space threads instead of rtla user-space threads", " -U/--user-load: enable timerlat for user-defined user-space workload", + " --warm-up s: let the workload run for s seconds before collecting data", + " --trace-buffer-size kB: set the per-cpu trace buffer size in kB", NULL, }; @@ -425,6 +540,7 @@ static struct timerlat_top_params {"thread", required_argument, 0, 'T'}, {"trace", optional_argument, 0, 't'}, {"user-threads", no_argument, 0, 'u'}, + {"kernel-threads", no_argument, 0, 'k'}, {"user-load", no_argument, 0, 'U'}, {"trigger", required_argument, 0, '0'}, {"filter", required_argument, 0, '1'}, @@ -432,13 +548,15 @@ static struct timerlat_top_params {"no-aa", no_argument, 0, '3'}, {"dump-tasks", no_argument, 0, '4'}, {"aa-only", required_argument, 0, '5'}, + {"warm-up", required_argument, 0, '6'}, + {"trace-buffer-size", required_argument, 0, '7'}, {0, 0, 0, 0} }; /* getopt_long stores the option index here. */ int option_index = 0; - c = getopt_long(argc, argv, "a:c:C::d:De:hH:i:np:P:qs:t::T:uU0:1:2:345:", + c = getopt_long(argc, argv, "a:c:C::d:De:hH:i:knp:P:qs:t::T:uU0:1:2:345:6:7:", long_options, &option_index); /* detect the end of the options. */ @@ -523,6 +641,9 @@ static struct timerlat_top_params case 'i': params->stop_us = get_llong_from_str(optarg); break; + case 'k': + params->kernel_workload = true; + break; case 'n': params->output_divisor = 1; break; @@ -547,9 +668,13 @@ static struct timerlat_top_params params->stop_total_us = get_llong_from_str(optarg); break; case 't': - if (optarg) - /* skip = */ - params->trace_output = &optarg[1]; + if (optarg) { + if (optarg[0] == '=') + params->trace_output = &optarg[1]; + else + params->trace_output = &optarg[0]; + } else if (optind < argc && argv[optind][0] != '-') + params->trace_output = argv[optind]; else params->trace_output = "timerlat_trace.txt"; @@ -595,6 +720,12 @@ static struct timerlat_top_params case '4': params->dump_tasks = 1; break; + case '6': + params->warmup = get_llong_from_str(optarg); + break; + case '7': + params->buffer_size = get_llong_from_str(optarg); + break; default: timerlat_top_usage("Invalid option"); } @@ -614,6 +745,9 @@ static struct timerlat_top_params if (params->no_aa && params->aa_only) timerlat_top_usage("--no-aa and --aa-only are mutually exclusive!"); + if (params->kernel_workload && params->user_workload) + timerlat_top_usage("--kernel-threads and --user-threads are mutually exclusive!"); + return params; } @@ -692,6 +826,22 @@ timerlat_top_apply_config(struct osnoise_tool *top, struct timerlat_top_params * auto_house_keeping(¶ms->monitored_cpus); } + /* + * If the user did not specify a type of thread, try user-threads first. + * Fall back to kernel threads otherwise. + */ + if (!params->kernel_workload && !params->user_workload) { + retval = tracefs_file_exists(NULL, "osnoise/per_cpu/cpu0/timerlat_fd"); + if (retval) { + debug_msg("User-space interface detected, setting user-threads\n"); + params->user_workload = 1; + params->user_top = 1; + } else { + debug_msg("User-space interface not detected, setting kernel-threads\n"); + params->kernel_workload = 1; + } + } + if (params->user_top) { retval = osnoise_set_workload(top->context, 0); if (retval) { @@ -700,6 +850,9 @@ timerlat_top_apply_config(struct osnoise_tool *top, struct timerlat_top_params * } } + if (isatty(1) && !params->quiet) + params->pretty_output = 1; + return 0; out_err: @@ -830,6 +983,12 @@ int timerlat_top_main(int argc, char *argv[]) if (retval) goto out_top; } + + if (params->buffer_size > 0) { + retval = trace_set_buffer_size(&record->trace, params->buffer_size); + if (retval) + goto out_top; + } } if (!params->no_aa) { @@ -859,22 +1018,6 @@ int timerlat_top_main(int argc, char *argv[]) } } - /* - * Start the tracers here, after having set all instances. - * - * Let the trace instance start first for the case of hitting a stop - * tracing while enabling other instances. The trace instance is the - * one with most valuable information. - */ - if (params->trace_output) - trace_instance_start(&record->trace); - if (!params->no_aa && aa != top) - trace_instance_start(&aa->trace); - trace_instance_start(trace); - - top->start_time = time(NULL); - timerlat_top_set_signals(params); - if (params->user_workload) { /* rtla asked to stop */ params_u.should_run = 1; @@ -894,6 +1037,27 @@ int timerlat_top_main(int argc, char *argv[]) err_msg("Error creating timerlat user-space threads\n"); } + if (params->warmup > 0) { + debug_msg("Warming up for %d seconds\n", params->warmup); + sleep(params->warmup); + } + + /* + * Start the tracers here, after having set all instances. + * + * Let the trace instance start first for the case of hitting a stop + * tracing while enabling other instances. The trace instance is the + * one with most valuable information. + */ + if (params->trace_output) + trace_instance_start(&record->trace); + if (!params->no_aa && aa != top) + trace_instance_start(&aa->trace); + trace_instance_start(trace); + + top->start_time = time(NULL); + timerlat_top_set_signals(params); + while (!stop_tracing) { sleep(params->sleep_time); diff --git a/tools/tracing/rtla/src/trace.c b/tools/tracing/rtla/src/trace.c index e1ba6d9f4265..170a706248ab 100644 --- a/tools/tracing/rtla/src/trace.c +++ b/tools/tracing/rtla/src/trace.c @@ -540,3 +540,18 @@ int trace_is_off(struct trace_instance *tool, struct trace_instance *trace) return 0; } + +/* + * trace_set_buffer_size - set the per-cpu tracing buffer size. + */ +int trace_set_buffer_size(struct trace_instance *trace, int size) +{ + int retval; + + debug_msg("Setting trace buffer size to %d Kb\n", size); + retval = tracefs_instance_set_buffer_size(trace->inst, size, -1); + if (retval) + err_msg("Error setting trace buffer size\n"); + + return retval; +} diff --git a/tools/tracing/rtla/src/trace.h b/tools/tracing/rtla/src/trace.h index 2e9a89a25615..c7c92dc9a18a 100644 --- a/tools/tracing/rtla/src/trace.h +++ b/tools/tracing/rtla/src/trace.h @@ -48,3 +48,4 @@ int trace_events_enable(struct trace_instance *instance, int trace_event_add_filter(struct trace_events *event, char *filter); int trace_event_add_trigger(struct trace_events *event, char *trigger); int trace_is_off(struct trace_instance *tool, struct trace_instance *trace); +int trace_set_buffer_size(struct trace_instance *trace, int size); diff --git a/tools/workqueue/wq_monitor.py b/tools/workqueue/wq_monitor.py index a8856a9c45dc..9e964c5be40c 100644 --- a/tools/workqueue/wq_monitor.py +++ b/tools/workqueue/wq_monitor.py @@ -32,16 +32,13 @@ https://github.com/osandov/drgn. rescued The number of work items executed by the rescuer. """ -import sys import signal -import os import re import time import json import drgn -from drgn.helpers.linux.list import list_for_each_entry,list_empty -from drgn.helpers.linux.cpumask import for_each_possible_cpu +from drgn.helpers.linux.list import list_for_each_entry import argparse parser = argparse.ArgumentParser(description=desc, @@ -54,10 +51,6 @@ parser.add_argument('-j', '--json', action='store_true', help='Output in json') args = parser.parse_args() -def err(s): - print(s, file=sys.stderr, flush=True) - sys.exit(1) - workqueues = prog['workqueues'] WQ_UNBOUND = prog['WQ_UNBOUND'] |