diff options
Diffstat (limited to 'tools/testing/selftests')
280 files changed, 17274 insertions, 2914 deletions
diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index 1a21d6beebc6..3b2061d1c1a5 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -18,6 +18,7 @@ TARGETS += drivers/dma-buf TARGETS += drivers/s390x/uvdevice TARGETS += drivers/net/bonding TARGETS += drivers/net/team +TARGETS += dt TARGETS += efivarfs TARGETS += exec TARGETS += fchmodat2 @@ -85,6 +86,8 @@ TARGETS += syscall_user_dispatch TARGETS += sysctl TARGETS += tc-testing TARGETS += tdx +TARGETS += thermal/intel/power_floor +TARGETS += thermal/intel/workload_hint TARGETS += timens ifneq (1, $(quicktest)) TARGETS += timers diff --git a/tools/testing/selftests/alsa/alsa-local.h b/tools/testing/selftests/alsa/alsa-local.h index de030dc23bd1..29143ef52101 100644 --- a/tools/testing/selftests/alsa/alsa-local.h +++ b/tools/testing/selftests/alsa/alsa-local.h @@ -24,4 +24,14 @@ int conf_get_bool(snd_config_t *root, const char *key1, const char *key2, int de void conf_get_string_array(snd_config_t *root, const char *key1, const char *key2, const char **array, int array_size, const char *def); +struct card_cfg_data { + int card; + snd_config_t *config; + const char *filename; + const char *config_id; + struct card_cfg_data *next; +}; + +extern struct card_cfg_data *conf_cards; + #endif /* __ALSA_LOCAL_H */ diff --git a/tools/testing/selftests/alsa/conf.c b/tools/testing/selftests/alsa/conf.c index 2f1685a3eae1..00925eb8d9f4 100644 --- a/tools/testing/selftests/alsa/conf.c +++ b/tools/testing/selftests/alsa/conf.c @@ -19,14 +19,7 @@ #define SYSFS_ROOT "/sys" -struct card_data { - int card; - snd_config_t *config; - const char *filename; - struct card_data *next; -}; - -static struct card_data *conf_cards; +struct card_cfg_data *conf_cards; static const char *alsa_config = "ctl.hw {\n" @@ -97,9 +90,9 @@ snd_config_t *get_alsalib_config(void) return config; } -static struct card_data *conf_data_by_card(int card, bool msg) +static struct card_cfg_data *conf_data_by_card(int card, bool msg) { - struct card_data *conf; + struct card_cfg_data *conf; for (conf = conf_cards; conf; conf = conf->next) { if (conf->card == card) { @@ -229,55 +222,31 @@ static bool sysfs_match(const char *sysfs_root, snd_config_t *config) return iter > 0; } -static bool test_filename1(int card, const char *filename, const char *sysfs_card_root) +static void assign_card_config(int card, const char *sysfs_card_root) { - struct card_data *data, *data2; - snd_config_t *config, *sysfs_config, *card_config, *sysfs_card_config, *node; - snd_config_iterator_t i, next; + struct card_cfg_data *data; + snd_config_t *sysfs_card_config; - config = conf_load_from_file(filename); - if (snd_config_search(config, "sysfs", &sysfs_config) || - snd_config_get_type(sysfs_config) != SND_CONFIG_TYPE_COMPOUND) - ksft_exit_fail_msg("Missing global sysfs block in filename %s\n", filename); - if (snd_config_search(config, "card", &card_config) || - snd_config_get_type(card_config) != SND_CONFIG_TYPE_COMPOUND) - ksft_exit_fail_msg("Missing global card block in filename %s\n", filename); - if (!sysfs_match(SYSFS_ROOT, sysfs_config)) - return false; - snd_config_for_each(i, next, card_config) { - node = snd_config_iterator_entry(i); - if (snd_config_search(node, "sysfs", &sysfs_card_config) || - snd_config_get_type(sysfs_card_config) != SND_CONFIG_TYPE_COMPOUND) - ksft_exit_fail_msg("Missing card sysfs block in filename %s\n", filename); + for (data = conf_cards; data; data = data->next) { + snd_config_search(data->config, "sysfs", &sysfs_card_config); if (!sysfs_match(sysfs_card_root, sysfs_card_config)) continue; - data = malloc(sizeof(*data)); - if (!data) - ksft_exit_fail_msg("Out of memory\n"); - data2 = conf_data_by_card(card, false); - if (data2) - ksft_exit_fail_msg("Duplicate card '%s' <-> '%s'\n", filename, data2->filename); + data->card = card; - data->filename = filename; - data->config = node; - data->next = conf_cards; - conf_cards = data; - return true; + break; } - return false; } -static bool test_filename(const char *filename) +static void assign_card_configs(void) { char fn[128]; int card; for (card = 0; card < 32; card++) { snprintf(fn, sizeof(fn), "%s/class/sound/card%d", SYSFS_ROOT, card); - if (access(fn, R_OK) == 0 && test_filename1(card, filename, fn)) - return true; + if (access(fn, R_OK) == 0) + assign_card_config(card, fn); } - return false; } static int filename_filter(const struct dirent *dirent) @@ -296,6 +265,41 @@ static int filename_filter(const struct dirent *dirent) return 0; } +static bool match_config(const char *filename) +{ + struct card_cfg_data *data; + snd_config_t *config, *sysfs_config, *card_config, *sysfs_card_config, *node; + snd_config_iterator_t i, next; + + config = conf_load_from_file(filename); + if (snd_config_search(config, "sysfs", &sysfs_config) || + snd_config_get_type(sysfs_config) != SND_CONFIG_TYPE_COMPOUND) + ksft_exit_fail_msg("Missing global sysfs block in filename %s\n", filename); + if (snd_config_search(config, "card", &card_config) || + snd_config_get_type(card_config) != SND_CONFIG_TYPE_COMPOUND) + ksft_exit_fail_msg("Missing global card block in filename %s\n", filename); + if (!sysfs_match(SYSFS_ROOT, sysfs_config)) + return false; + snd_config_for_each(i, next, card_config) { + node = snd_config_iterator_entry(i); + if (snd_config_search(node, "sysfs", &sysfs_card_config) || + snd_config_get_type(sysfs_card_config) != SND_CONFIG_TYPE_COMPOUND) + ksft_exit_fail_msg("Missing card sysfs block in filename %s\n", filename); + + data = malloc(sizeof(*data)); + if (!data) + ksft_exit_fail_msg("Out of memory\n"); + data->filename = filename; + data->config = node; + data->card = -1; + if (snd_config_get_id(node, &data->config_id)) + ksft_exit_fail_msg("snd_config_get_id failed for card\n"); + data->next = conf_cards; + conf_cards = data; + } + return true; +} + void conf_load(void) { const char *fn = "conf.d"; @@ -311,17 +315,19 @@ void conf_load(void) if (filename == NULL) ksft_exit_fail_msg("Out of memory\n"); sprintf(filename, "%s/%s", fn, namelist[j]->d_name); - if (test_filename(filename)) + if (match_config(filename)) filename = NULL; free(filename); free(namelist[j]); } free(namelist); + + assign_card_configs(); } void conf_free(void) { - struct card_data *conf; + struct card_cfg_data *conf; while (conf_cards) { conf = conf_cards; @@ -332,7 +338,7 @@ void conf_free(void) snd_config_t *conf_by_card(int card) { - struct card_data *conf; + struct card_cfg_data *conf; conf = conf_data_by_card(card, true); if (conf) diff --git a/tools/testing/selftests/alsa/pcm-test.c b/tools/testing/selftests/alsa/pcm-test.c index c0a39818c5a4..de664dedb541 100644 --- a/tools/testing/selftests/alsa/pcm-test.c +++ b/tools/testing/selftests/alsa/pcm-test.c @@ -566,6 +566,7 @@ void *card_thread(void *data) int main(void) { struct card_data *card; + struct card_cfg_data *conf; struct pcm_data *pcm; snd_config_t *global_config, *cfg; int num_pcm_tests = 0, num_tests, num_std_pcm_tests; @@ -583,6 +584,10 @@ int main(void) find_pcms(); + for (conf = conf_cards; conf; conf = conf->next) + if (conf->card < 0) + num_missing++; + num_std_pcm_tests = conf_get_count(default_pcm_config, "test", NULL); for (pcm = pcm_list; pcm != NULL; pcm = pcm->next) { @@ -598,6 +603,11 @@ int main(void) ksft_set_plan(num_missing + num_pcm_tests); + for (conf = conf_cards; conf; conf = conf->next) + if (conf->card < 0) + ksft_test_result_fail("test.missing.%s.%s\n", + conf->filename, conf->config_id); + for (pcm = pcm_missing; pcm != NULL; pcm = pcm->next) { ksft_test_result(false, "test.missing.%d.%d.%d.%s\n", pcm->card, pcm->device, pcm->subdevice, diff --git a/tools/testing/selftests/amd-pstate/gitsource.sh b/tools/testing/selftests/amd-pstate/gitsource.sh index 5f2171f0116d..4cde62f90468 100755 --- a/tools/testing/selftests/amd-pstate/gitsource.sh +++ b/tools/testing/selftests/amd-pstate/gitsource.sh @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # SPDX-License-Identifier: GPL-2.0 # Testing and monitor the cpu desire performance, frequency, load, @@ -66,12 +66,15 @@ post_clear_gitsource() install_gitsource() { - if [ ! -d $git_name ]; then + if [ ! -d $SCRIPTDIR/$git_name ]; then + pushd $(pwd) > /dev/null 2>&1 + cd $SCRIPTDIR printf "Download gitsource, please wait a moment ...\n\n" wget -O $git_tar $gitsource_url > /dev/null 2>&1 printf "Tar gitsource ...\n\n" tar -xzf $git_tar + popd > /dev/null 2>&1 fi } @@ -79,12 +82,14 @@ install_gitsource() run_gitsource() { echo "Launching amd pstate tracer for $1 #$2 tracer_interval: $TRACER_INTERVAL" - ./amd_pstate_trace.py -n tracer-gitsource-$1-$2 -i $TRACER_INTERVAL > /dev/null 2>&1 & + $TRACER -n tracer-gitsource-$1-$2 -i $TRACER_INTERVAL > /dev/null 2>&1 & printf "Make and test gitsource for $1 #$2 make_cpus: $MAKE_CPUS\n" - cd $git_name - perf stat -a --per-socket -I 1000 -e power/energy-pkg/ /usr/bin/time -o ../$OUTFILE_GIT.time-gitsource-$1-$2.log make test -j$MAKE_CPUS > ../$OUTFILE_GIT-perf-$1-$2.log 2>&1 - cd .. + BACKUP_DIR=$(pwd) + pushd $BACKUP_DIR > /dev/null 2>&1 + cd $SCRIPTDIR/$git_name + $PERF stat -a --per-socket -I 1000 -e power/energy-pkg/ /usr/bin/time -o $BACKUP_DIR/$OUTFILE_GIT.time-gitsource-$1-$2.log make test -j$MAKE_CPUS > $BACKUP_DIR/$OUTFILE_GIT-perf-$1-$2.log 2>&1 + popd > /dev/null 2>&1 for job in `jobs -p` do diff --git a/tools/testing/selftests/amd-pstate/run.sh b/tools/testing/selftests/amd-pstate/run.sh index de4d8e9c9565..b053eea8bb19 100755 --- a/tools/testing/selftests/amd-pstate/run.sh +++ b/tools/testing/selftests/amd-pstate/run.sh @@ -8,9 +8,12 @@ else FILE_MAIN=DONE fi -source basic.sh -source tbench.sh -source gitsource.sh +SCRIPTDIR=`dirname "$0"` +TRACER=$SCRIPTDIR/../../../power/x86/amd_pstate_tracer/amd_pstate_trace.py + +source $SCRIPTDIR/basic.sh +source $SCRIPTDIR/tbench.sh +source $SCRIPTDIR/gitsource.sh # amd-pstate-ut only run on x86/x86_64 AMD systems. ARCH=$(uname -m 2>/dev/null | sed -e 's/i.86/x86/' -e 's/x86_64/x86/') @@ -22,6 +25,7 @@ OUTFILE=selftest OUTFILE_TBENCH="$OUTFILE.tbench" OUTFILE_GIT="$OUTFILE.gitsource" +PERF=/usr/bin/perf SYSFS= CPUROOT= CPUFREQROOT= @@ -151,6 +155,7 @@ help() [-p <tbench process number>] [-l <loop times for tbench>] [-i <amd tracer interval>] + [-b <perf binary>] [-m <comparative test: acpi-cpufreq>] \n" exit 2 @@ -158,7 +163,7 @@ help() parse_arguments() { - while getopts ho:c:t:p:l:i:m: arg + while getopts ho:c:t:p:l:i:b:m: arg do case $arg in h) # --help @@ -189,6 +194,10 @@ parse_arguments() TRACER_INTERVAL=$OPTARG ;; + b) # --perf-binary + PERF=`realpath $OPTARG` + ;; + m) # --comparative-test COMPARATIVE_TEST=$OPTARG ;; @@ -202,8 +211,8 @@ parse_arguments() command_perf() { - if ! command -v perf > /dev/null; then - echo $msg please install perf. >&2 + if ! $PERF -v; then + echo $msg please install perf or provide perf binary path as argument >&2 exit $ksft_skip fi } diff --git a/tools/testing/selftests/amd-pstate/tbench.sh b/tools/testing/selftests/amd-pstate/tbench.sh index 49c9850341f6..2a98d9c9202e 100755 --- a/tools/testing/selftests/amd-pstate/tbench.sh +++ b/tools/testing/selftests/amd-pstate/tbench.sh @@ -64,11 +64,11 @@ post_clear_tbench() run_tbench() { echo "Launching amd pstate tracer for $1 #$2 tracer_interval: $TRACER_INTERVAL" - ./amd_pstate_trace.py -n tracer-tbench-$1-$2 -i $TRACER_INTERVAL > /dev/null 2>&1 & + $TRACER -n tracer-tbench-$1-$2 -i $TRACER_INTERVAL > /dev/null 2>&1 & printf "Test tbench for $1 #$2 time_limit: $TIME_LIMIT procs_num: $PROCESS_NUM\n" tbench_srv > /dev/null 2>&1 & - perf stat -a --per-socket -I 1000 -e power/energy-pkg/ tbench -t $TIME_LIMIT $PROCESS_NUM > $OUTFILE_TBENCH-perf-$1-$2.log 2>&1 + $PERF stat -a --per-socket -I 1000 -e power/energy-pkg/ tbench -t $TIME_LIMIT $PROCESS_NUM > $OUTFILE_TBENCH-perf-$1-$2.log 2>&1 pid=`pidof tbench_srv` kill $pid diff --git a/tools/testing/selftests/arm64/abi/hwcap.c b/tools/testing/selftests/arm64/abi/hwcap.c index e3d262831d91..1189e77c8152 100644 --- a/tools/testing/selftests/arm64/abi/hwcap.c +++ b/tools/testing/selftests/arm64/abi/hwcap.c @@ -81,6 +81,20 @@ static void lrcpc_sigill(void) asm volatile(".inst 0xb8bfc3e0" : : : ); } +static void lse128_sigill(void) +{ + u64 __attribute__ ((aligned (16))) mem[2] = { 10, 20 }; + register u64 *memp asm ("x0") = mem; + register u64 val0 asm ("x1") = 5; + register u64 val1 asm ("x2") = 4; + + /* SWPP X1, X2, [X0] */ + asm volatile(".inst 0x19228001" + : "+r" (memp), "+r" (val0), "+r" (val1) + : + : "cc", "memory"); +} + static void mops_sigill(void) { char dst[1], src[1]; @@ -226,6 +240,12 @@ static void sveaes_sigill(void) asm volatile(".inst 0x4522e400" : : : "z0"); } +static void sveb16b16_sigill(void) +{ + /* BFADD ZA.H[W0, 0], {Z0.H-Z1.H} */ + asm volatile(".inst 0xC1E41C00" : : : ); +} + static void svepmull_sigill(void) { /* PMULLB Z0.Q, Z0.D, Z0.D */ @@ -289,6 +309,19 @@ static void uscat_sigbus(void) asm volatile(".inst 0xb820003f" : : : ); } +static void lrcpc3_sigill(void) +{ + int data[2] = { 1, 2 }; + + register int *src asm ("x0") = data; + register int data0 asm ("w2") = 0; + register int data1 asm ("w3") = 0; + + /* LDIAPP w2, w3, [x0] */ + asm volatile(".inst 0x99431802" + : "=r" (data0), "=r" (data1) : "r" (src) :); +} + static const struct hwcap_data { const char *name; unsigned long at_hwcap; @@ -349,6 +382,13 @@ static const struct hwcap_data { .sigill_fn = ilrcpc_sigill, }, { + .name = "LRCPC3", + .at_hwcap = AT_HWCAP2, + .hwcap_bit = HWCAP2_LRCPC3, + .cpuinfo = "lrcpc3", + .sigill_fn = lrcpc3_sigill, + }, + { .name = "LSE", .at_hwcap = AT_HWCAP, .hwcap_bit = HWCAP_ATOMICS, @@ -365,6 +405,13 @@ static const struct hwcap_data { .sigbus_reliable = true, }, { + .name = "LSE128", + .at_hwcap = AT_HWCAP2, + .hwcap_bit = HWCAP2_LSE128, + .cpuinfo = "lse128", + .sigill_fn = lse128_sigill, + }, + { .name = "MOPS", .at_hwcap = AT_HWCAP2, .hwcap_bit = HWCAP2_MOPS, @@ -494,6 +541,13 @@ static const struct hwcap_data { .sigill_fn = sveaes_sigill, }, { + .name = "SVE2 B16B16", + .at_hwcap = AT_HWCAP2, + .hwcap_bit = HWCAP2_SVE_B16B16, + .cpuinfo = "sveb16b16", + .sigill_fn = sveb16b16_sigill, + }, + { .name = "SVE2 PMULL", .at_hwcap = AT_HWCAP2, .hwcap_bit = HWCAP2_SVEPMULL, diff --git a/tools/testing/selftests/arm64/fp/sve-test.S b/tools/testing/selftests/arm64/fp/sve-test.S index 4328895dfc87..547d077e3517 100644 --- a/tools/testing/selftests/arm64/fp/sve-test.S +++ b/tools/testing/selftests/arm64/fp/sve-test.S @@ -473,6 +473,13 @@ function _start // mov x8, #__NR_sched_yield // Encourage preemption // svc #0 +#ifdef SSVE + mrs x0, S3_3_C4_C2_2 // SVCR should have ZA=0,SM=1 + and x1, x0, #3 + cmp x1, #1 + b.ne svcr_barf +#endif + mov x21, #0 0: mov x0, x21 bl check_zreg @@ -553,3 +560,15 @@ function vl_barf mov x1, #1 svc #0 endfunction + +function svcr_barf + mov x10, x0 + + puts "Bad SVCR: " + mov x0, x10 + bl putdecn + + mov x8, #__NR_exit + mov x1, #1 + svc #0 +endfunction diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 4225f975fce3..9c27b67bc7b1 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -585,11 +585,20 @@ endef # Define test_progs test runner. TRUNNER_TESTS_DIR := prog_tests TRUNNER_BPF_PROGS_DIR := progs -TRUNNER_EXTRA_SOURCES := test_progs.c cgroup_helpers.c trace_helpers.c \ - network_helpers.c testing_helpers.c \ - btf_helpers.c flow_dissector_load.h \ - cap_helpers.c test_loader.c xsk.c disasm.c \ - json_writer.c unpriv_helpers.c \ +TRUNNER_EXTRA_SOURCES := test_progs.c \ + cgroup_helpers.c \ + trace_helpers.c \ + network_helpers.c \ + testing_helpers.c \ + btf_helpers.c \ + cap_helpers.c \ + unpriv_helpers.c \ + netlink_helpers.c \ + test_loader.c \ + xsk.c \ + disasm.c \ + json_writer.c \ + flow_dissector_load.h \ ip_check_defrag_frags.h TRUNNER_EXTRA_FILES := $(OUTPUT)/urandom_read $(OUTPUT)/bpf_testmod.ko \ $(OUTPUT)/liburandom_read.so \ diff --git a/tools/testing/selftests/bpf/bpf_experimental.h b/tools/testing/selftests/bpf/bpf_experimental.h index 2c8cb3f61529..1386baf9ae4a 100644 --- a/tools/testing/selftests/bpf/bpf_experimental.h +++ b/tools/testing/selftests/bpf/bpf_experimental.h @@ -458,4 +458,23 @@ extern void bpf_throw(u64 cookie) __ksym; __bpf_assert_op(LHS, <=, END, value, false); \ }) +struct bpf_iter_css_task; +struct cgroup_subsys_state; +extern int bpf_iter_css_task_new(struct bpf_iter_css_task *it, + struct cgroup_subsys_state *css, unsigned int flags) __weak __ksym; +extern struct task_struct *bpf_iter_css_task_next(struct bpf_iter_css_task *it) __weak __ksym; +extern void bpf_iter_css_task_destroy(struct bpf_iter_css_task *it) __weak __ksym; + +struct bpf_iter_task; +extern int bpf_iter_task_new(struct bpf_iter_task *it, + struct task_struct *task, unsigned int flags) __weak __ksym; +extern struct task_struct *bpf_iter_task_next(struct bpf_iter_task *it) __weak __ksym; +extern void bpf_iter_task_destroy(struct bpf_iter_task *it) __weak __ksym; + +struct bpf_iter_css; +extern int bpf_iter_css_new(struct bpf_iter_css *it, + struct cgroup_subsys_state *start, unsigned int flags) __weak __ksym; +extern struct cgroup_subsys_state *bpf_iter_css_next(struct bpf_iter_css *it) __weak __ksym; +extern void bpf_iter_css_destroy(struct bpf_iter_css *it) __weak __ksym; + #endif diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c index a5e246f7b202..91907b321f91 100644 --- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c +++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c @@ -39,9 +39,7 @@ struct bpf_testmod_struct_arg_4 { int b; }; -__diag_push(); -__diag_ignore_all("-Wmissing-prototypes", - "Global functions as their definitions will be in bpf_testmod.ko BTF"); +__bpf_hook_start(); noinline int bpf_testmod_test_struct_arg_1(struct bpf_testmod_struct_arg_2 a, int b, int c) { @@ -335,7 +333,7 @@ noinline int bpf_fentry_shadow_test(int a) } EXPORT_SYMBOL_GPL(bpf_fentry_shadow_test); -__diag_pop(); +__bpf_hook_end(); static struct bin_attribute bin_attr_bpf_testmod_file __ro_after_init = { .attr = { .name = "bpf_testmod", .mode = 0666, }, diff --git a/tools/testing/selftests/bpf/cgroup_helpers.c b/tools/testing/selftests/bpf/cgroup_helpers.c index 5b1da2a32ea7..5aa133bf3688 100644 --- a/tools/testing/selftests/bpf/cgroup_helpers.c +++ b/tools/testing/selftests/bpf/cgroup_helpers.c @@ -45,9 +45,12 @@ #define format_parent_cgroup_path(buf, path) \ format_cgroup_path_pid(buf, path, getppid()) -#define format_classid_path(buf) \ - snprintf(buf, sizeof(buf), "%s%s", NETCLS_MOUNT_PATH, \ - CGROUP_WORK_DIR) +#define format_classid_path_pid(buf, pid) \ + snprintf(buf, sizeof(buf), "%s%s%d", NETCLS_MOUNT_PATH, \ + CGROUP_WORK_DIR, pid) + +#define format_classid_path(buf) \ + format_classid_path_pid(buf, getpid()) static __thread bool cgroup_workdir_mounted; @@ -419,26 +422,23 @@ int create_and_get_cgroup(const char *relative_path) } /** - * get_cgroup_id() - Get cgroup id for a particular cgroup path - * @relative_path: The cgroup path, relative to the workdir, to join + * get_cgroup_id_from_path - Get cgroup id for a particular cgroup path + * @cgroup_workdir: The absolute cgroup path * * On success, it returns the cgroup id. On failure it returns 0, * which is an invalid cgroup id. * If there is a failure, it prints the error to stderr. */ -unsigned long long get_cgroup_id(const char *relative_path) +unsigned long long get_cgroup_id_from_path(const char *cgroup_workdir) { int dirfd, err, flags, mount_id, fhsize; union { unsigned long long cgid; unsigned char raw_bytes[8]; } id; - char cgroup_workdir[PATH_MAX + 1]; struct file_handle *fhp, *fhp2; unsigned long long ret = 0; - format_cgroup_path(cgroup_workdir, relative_path); - dirfd = AT_FDCWD; flags = 0; fhsize = sizeof(*fhp); @@ -474,6 +474,14 @@ free_mem: return ret; } +unsigned long long get_cgroup_id(const char *relative_path) +{ + char cgroup_workdir[PATH_MAX + 1]; + + format_cgroup_path(cgroup_workdir, relative_path); + return get_cgroup_id_from_path(cgroup_workdir); +} + int cgroup_setup_and_join(const char *path) { int cg_fd; @@ -523,10 +531,20 @@ int setup_classid_environment(void) return 1; } - if (mount("net_cls", NETCLS_MOUNT_PATH, "cgroup", 0, "net_cls") && - errno != EBUSY) { - log_err("mount cgroup net_cls"); - return 1; + if (mount("net_cls", NETCLS_MOUNT_PATH, "cgroup", 0, "net_cls")) { + if (errno != EBUSY) { + log_err("mount cgroup net_cls"); + return 1; + } + + if (rmdir(NETCLS_MOUNT_PATH)) { + log_err("rmdir cgroup net_cls"); + return 1; + } + if (umount(CGROUP_MOUNT_DFLT)) { + log_err("umount cgroup base"); + return 1; + } } cleanup_classid_environment(); @@ -541,15 +559,16 @@ int setup_classid_environment(void) /** * set_classid() - Set a cgroupv1 net_cls classid - * @id: the numeric classid * - * Writes the passed classid into the cgroup work dir's net_cls.classid + * Writes the classid into the cgroup work dir's net_cls.classid * file in order to later on trigger socket tagging. * + * We leverage the current pid as the classid, ensuring unique identification. + * * On success, it returns 0, otherwise on failure it returns 1. If there * is a failure, it prints the error to stderr. */ -int set_classid(unsigned int id) +int set_classid(void) { char cgroup_workdir[PATH_MAX - 42]; char cgroup_classid_path[PATH_MAX + 1]; @@ -565,7 +584,7 @@ int set_classid(unsigned int id) return 1; } - if (dprintf(fd, "%u\n", id) < 0) { + if (dprintf(fd, "%u\n", getpid()) < 0) { log_err("Setting cgroup classid"); rc = 1; } @@ -607,3 +626,66 @@ void cleanup_classid_environment(void) join_cgroup_from_top(NETCLS_MOUNT_PATH); nftw(cgroup_workdir, nftwfunc, WALK_FD_LIMIT, FTW_DEPTH | FTW_MOUNT); } + +/** + * get_classid_cgroup_id - Get the cgroup id of a net_cls cgroup + */ +unsigned long long get_classid_cgroup_id(void) +{ + char cgroup_workdir[PATH_MAX + 1]; + + format_classid_path(cgroup_workdir); + return get_cgroup_id_from_path(cgroup_workdir); +} + +/** + * get_cgroup1_hierarchy_id - Retrieves the ID of a cgroup1 hierarchy from the cgroup1 subsys name. + * @subsys_name: The cgroup1 subsys name, which can be retrieved from /proc/self/cgroup. It can be + * a named cgroup like "name=systemd", a controller name like "net_cls", or multi-contollers like + * "net_cls,net_prio". + */ +int get_cgroup1_hierarchy_id(const char *subsys_name) +{ + char *c, *c2, *c3, *c4; + bool found = false; + char line[1024]; + FILE *file; + int i, id; + + if (!subsys_name) + return -1; + + file = fopen("/proc/self/cgroup", "r"); + if (!file) { + log_err("fopen /proc/self/cgroup"); + return -1; + } + + while (fgets(line, 1024, file)) { + i = 0; + for (c = strtok_r(line, ":", &c2); c && i < 2; c = strtok_r(NULL, ":", &c2)) { + if (i == 0) { + id = strtol(c, NULL, 10); + } else if (i == 1) { + if (!strcmp(c, subsys_name)) { + found = true; + break; + } + + /* Multiple subsystems may share one single mount point */ + for (c3 = strtok_r(c, ",", &c4); c3; + c3 = strtok_r(NULL, ",", &c4)) { + if (!strcmp(c, subsys_name)) { + found = true; + break; + } + } + } + i++; + } + if (found) + break; + } + fclose(file); + return found ? id : -1; +} diff --git a/tools/testing/selftests/bpf/cgroup_helpers.h b/tools/testing/selftests/bpf/cgroup_helpers.h index 5c2cb9c8b546..ee053641c026 100644 --- a/tools/testing/selftests/bpf/cgroup_helpers.h +++ b/tools/testing/selftests/bpf/cgroup_helpers.h @@ -20,6 +20,7 @@ int get_root_cgroup(void); int create_and_get_cgroup(const char *relative_path); void remove_cgroup(const char *relative_path); unsigned long long get_cgroup_id(const char *relative_path); +int get_cgroup1_hierarchy_id(const char *subsys_name); int join_cgroup(const char *relative_path); int join_root_cgroup(void); @@ -29,8 +30,9 @@ int setup_cgroup_environment(void); void cleanup_cgroup_environment(void); /* cgroupv1 related */ -int set_classid(unsigned int id); +int set_classid(void); int join_classid(void); +unsigned long long get_classid_cgroup_id(void); int setup_classid_environment(void); void cleanup_classid_environment(void); diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config index 02dd4409200e..3ec5927ec3e5 100644 --- a/tools/testing/selftests/bpf/config +++ b/tools/testing/selftests/bpf/config @@ -71,6 +71,7 @@ CONFIG_NETFILTER_SYNPROXY=y CONFIG_NETFILTER_XT_CONNMARK=y CONFIG_NETFILTER_XT_MATCH_STATE=y CONFIG_NETFILTER_XT_TARGET_CT=y +CONFIG_NETKIT=y CONFIG_NF_CONNTRACK=y CONFIG_NF_CONNTRACK_MARK=y CONFIG_NF_DEFRAG_IPV4=y diff --git a/tools/testing/selftests/bpf/config.aarch64 b/tools/testing/selftests/bpf/config.aarch64 index 253821494884..29c8635c5722 100644 --- a/tools/testing/selftests/bpf/config.aarch64 +++ b/tools/testing/selftests/bpf/config.aarch64 @@ -1,4 +1,3 @@ -CONFIG_9P_FS=y CONFIG_ARCH_VEXPRESS=y CONFIG_ARCH_WANT_DEFAULT_BPF_JIT=y CONFIG_ARM_SMMU_V3=y @@ -37,6 +36,7 @@ CONFIG_CRYPTO_USER_API_SKCIPHER=y CONFIG_DEBUG_ATOMIC_SLEEP=y CONFIG_DEBUG_INFO_BTF=y CONFIG_DEBUG_INFO_DWARF4=y +CONFIG_DEBUG_INFO_REDUCED=n CONFIG_DEBUG_LIST=y CONFIG_DEBUG_LOCKDEP=y CONFIG_DEBUG_NOTIFIERS=y @@ -46,7 +46,6 @@ CONFIG_DEBUG_SG=y CONFIG_DETECT_HUNG_TASK=y CONFIG_DEVTMPFS_MOUNT=y CONFIG_DEVTMPFS=y -CONFIG_DRM_VIRTIO_GPU=y CONFIG_DRM=y CONFIG_DUMMY=y CONFIG_EXPERT=y @@ -67,7 +66,6 @@ CONFIG_HAVE_KRETPROBES=y CONFIG_HEADERS_INSTALL=y CONFIG_HIGH_RES_TIMERS=y CONFIG_HUGETLBFS=y -CONFIG_HW_RANDOM_VIRTIO=y CONFIG_HW_RANDOM=y CONFIG_HZ_100=y CONFIG_IDLE_PAGE_TRACKING=y @@ -99,8 +97,6 @@ CONFIG_MEMCG=y CONFIG_MEMORY_HOTPLUG=y CONFIG_MEMORY_HOTREMOVE=y CONFIG_NAMESPACES=y -CONFIG_NET_9P_VIRTIO=y -CONFIG_NET_9P=y CONFIG_NET_ACT_BPF=y CONFIG_NET_ACT_GACT=y CONFIG_NETDEVICES=y @@ -140,7 +136,6 @@ CONFIG_SCHED_TRACER=y CONFIG_SCSI_CONSTANTS=y CONFIG_SCSI_LOGGING=y CONFIG_SCSI_SCAN_ASYNC=y -CONFIG_SCSI_VIRTIO=y CONFIG_SCSI=y CONFIG_SECURITY_NETWORK=y CONFIG_SERIAL_AMBA_PL011_CONSOLE=y @@ -167,16 +162,6 @@ CONFIG_UPROBES=y CONFIG_USELIB=y CONFIG_USER_NS=y CONFIG_VETH=y -CONFIG_VIRTIO_BALLOON=y -CONFIG_VIRTIO_BLK=y -CONFIG_VIRTIO_CONSOLE=y -CONFIG_VIRTIO_FS=y -CONFIG_VIRTIO_INPUT=y -CONFIG_VIRTIO_MMIO_CMDLINE_DEVICES=y -CONFIG_VIRTIO_MMIO=y -CONFIG_VIRTIO_NET=y -CONFIG_VIRTIO_PCI=y -CONFIG_VIRTIO_VSOCKETS_COMMON=y CONFIG_VLAN_8021Q=y CONFIG_VSOCKETS=y CONFIG_VSOCKETS_LOOPBACK=y diff --git a/tools/testing/selftests/bpf/config.s390x b/tools/testing/selftests/bpf/config.s390x index 2ba92167be35..e93330382849 100644 --- a/tools/testing/selftests/bpf/config.s390x +++ b/tools/testing/selftests/bpf/config.s390x @@ -1,4 +1,3 @@ -CONFIG_9P_FS=y CONFIG_ARCH_WANT_DEFAULT_BPF_JIT=y CONFIG_AUDIT=y CONFIG_BLK_CGROUP=y @@ -84,8 +83,6 @@ CONFIG_MEMORY_HOTPLUG=y CONFIG_MEMORY_HOTREMOVE=y CONFIG_NAMESPACES=y CONFIG_NET=y -CONFIG_NET_9P=y -CONFIG_NET_9P_VIRTIO=y CONFIG_NET_ACT_BPF=y CONFIG_NET_ACT_GACT=y CONFIG_NET_KEY=y @@ -114,7 +111,6 @@ CONFIG_SAMPLE_SECCOMP=y CONFIG_SAMPLES=y CONFIG_SCHED_TRACER=y CONFIG_SCSI=y -CONFIG_SCSI_VIRTIO=y CONFIG_SECURITY_NETWORK=y CONFIG_STACK_TRACER=y CONFIG_STATIC_KEYS_SELFTEST=y @@ -136,11 +132,6 @@ CONFIG_UPROBES=y CONFIG_USELIB=y CONFIG_USER_NS=y CONFIG_VETH=y -CONFIG_VIRTIO_BALLOON=y -CONFIG_VIRTIO_BLK=y -CONFIG_VIRTIO_NET=y -CONFIG_VIRTIO_PCI=y -CONFIG_VIRTIO_VSOCKETS_COMMON=y CONFIG_VLAN_8021Q=y CONFIG_VSOCKETS=y CONFIG_VSOCKETS_LOOPBACK=y diff --git a/tools/testing/selftests/bpf/config.vm b/tools/testing/selftests/bpf/config.vm new file mode 100644 index 000000000000..a9746ca78777 --- /dev/null +++ b/tools/testing/selftests/bpf/config.vm @@ -0,0 +1,12 @@ +CONFIG_9P_FS=y +CONFIG_9P_FS_POSIX_ACL=y +CONFIG_9P_FS_SECURITY=y +CONFIG_CRYPTO_DEV_VIRTIO=y +CONFIG_NET_9P=y +CONFIG_NET_9P_VIRTIO=y +CONFIG_VIRTIO_BALLOON=y +CONFIG_VIRTIO_BLK=y +CONFIG_VIRTIO_CONSOLE=y +CONFIG_VIRTIO_NET=y +CONFIG_VIRTIO_PCI=y +CONFIG_VIRTIO_VSOCKETS_COMMON=y diff --git a/tools/testing/selftests/bpf/config.x86_64 b/tools/testing/selftests/bpf/config.x86_64 index 2e70a6048278..f7bfb2b09c82 100644 --- a/tools/testing/selftests/bpf/config.x86_64 +++ b/tools/testing/selftests/bpf/config.x86_64 @@ -1,6 +1,3 @@ -CONFIG_9P_FS=y -CONFIG_9P_FS_POSIX_ACL=y -CONFIG_9P_FS_SECURITY=y CONFIG_AGP=y CONFIG_AGP_AMD64=y CONFIG_AGP_INTEL=y @@ -45,7 +42,6 @@ CONFIG_CPU_IDLE_GOV_LADDER=y CONFIG_CPUSETS=y CONFIG_CRC_T10DIF=y CONFIG_CRYPTO_BLAKE2B=y -CONFIG_CRYPTO_DEV_VIRTIO=y CONFIG_CRYPTO_SEQIV=y CONFIG_CRYPTO_XXHASH=y CONFIG_DCB=y @@ -145,8 +141,6 @@ CONFIG_MEMORY_FAILURE=y CONFIG_MINIX_SUBPARTITION=y CONFIG_NAMESPACES=y CONFIG_NET=y -CONFIG_NET_9P=y -CONFIG_NET_9P_VIRTIO=y CONFIG_NET_ACT_BPF=y CONFIG_NET_CLS_CGROUP=y CONFIG_NET_EMATCH=y @@ -228,12 +222,6 @@ CONFIG_USER_NS=y CONFIG_VALIDATE_FS_PARSER=y CONFIG_VETH=y CONFIG_VIRT_DRIVERS=y -CONFIG_VIRTIO_BALLOON=y -CONFIG_VIRTIO_BLK=y -CONFIG_VIRTIO_CONSOLE=y -CONFIG_VIRTIO_NET=y -CONFIG_VIRTIO_PCI=y -CONFIG_VIRTIO_VSOCKETS_COMMON=y CONFIG_VLAN_8021Q=y CONFIG_VSOCKETS=y CONFIG_VSOCKETS_LOOPBACK=y diff --git a/tools/testing/selftests/bpf/map_tests/map_percpu_stats.c b/tools/testing/selftests/bpf/map_tests/map_percpu_stats.c index 1a9eeefda9a8..2ea36408816b 100644 --- a/tools/testing/selftests/bpf/map_tests/map_percpu_stats.c +++ b/tools/testing/selftests/bpf/map_tests/map_percpu_stats.c @@ -131,10 +131,17 @@ static bool is_lru(__u32 map_type) map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH; } +static bool is_percpu(__u32 map_type) +{ + return map_type == BPF_MAP_TYPE_PERCPU_HASH || + map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH; +} + struct upsert_opts { __u32 map_type; int map_fd; __u32 n; + bool retry_for_nomem; }; static int create_small_hash(void) @@ -148,19 +155,38 @@ static int create_small_hash(void) return map_fd; } +static bool retry_for_nomem_fn(int err) +{ + return err == ENOMEM; +} + static void *patch_map_thread(void *arg) { + /* 8KB is enough for 1024 CPUs. And it is shared between N_THREADS. */ + static __u8 blob[8 << 10]; struct upsert_opts *opts = arg; + void *val_ptr; int val; int ret; int i; for (i = 0; i < opts->n; i++) { - if (opts->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) + if (opts->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) { val = create_small_hash(); - else + val_ptr = &val; + } else if (is_percpu(opts->map_type)) { + val_ptr = blob; + } else { val = rand(); - ret = bpf_map_update_elem(opts->map_fd, &i, &val, 0); + val_ptr = &val; + } + + /* 2 seconds may be enough ? */ + if (opts->retry_for_nomem) + ret = map_update_retriable(opts->map_fd, &i, val_ptr, 0, + 40, retry_for_nomem_fn); + else + ret = bpf_map_update_elem(opts->map_fd, &i, val_ptr, 0); CHECK(ret < 0, "bpf_map_update_elem", "key=%d error: %s\n", i, strerror(errno)); if (opts->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) @@ -281,6 +307,13 @@ static void __test(int map_fd) else opts.n /= 2; + /* per-cpu bpf memory allocator may not be able to allocate per-cpu + * pointer successfully and it can not refill free llist timely, and + * bpf_map_update_elem() will return -ENOMEM. so just retry to mitigate + * the problem temporarily. + */ + opts.retry_for_nomem = is_percpu(opts.map_type) && (info.map_flags & BPF_F_NO_PREALLOC); + /* * Upsert keys [0, n) under some competition: with random values from * N_THREADS threads. Check values, then delete all elements and check @@ -326,20 +359,14 @@ static int map_create(__u32 type, const char *name, struct bpf_map_create_opts * static int create_hash(void) { - struct bpf_map_create_opts map_opts = { - .sz = sizeof(map_opts), - .map_flags = BPF_F_NO_PREALLOC, - }; + LIBBPF_OPTS(bpf_map_create_opts, map_opts, .map_flags = BPF_F_NO_PREALLOC); return map_create(BPF_MAP_TYPE_HASH, "hash", &map_opts); } static int create_percpu_hash(void) { - struct bpf_map_create_opts map_opts = { - .sz = sizeof(map_opts), - .map_flags = BPF_F_NO_PREALLOC, - }; + LIBBPF_OPTS(bpf_map_create_opts, map_opts, .map_flags = BPF_F_NO_PREALLOC); return map_create(BPF_MAP_TYPE_PERCPU_HASH, "percpu_hash", &map_opts); } @@ -356,21 +383,17 @@ static int create_percpu_hash_prealloc(void) static int create_lru_hash(__u32 type, __u32 map_flags) { - struct bpf_map_create_opts map_opts = { - .sz = sizeof(map_opts), - .map_flags = map_flags, - }; + LIBBPF_OPTS(bpf_map_create_opts, map_opts, .map_flags = map_flags); return map_create(type, "lru_hash", &map_opts); } static int create_hash_of_maps(void) { - struct bpf_map_create_opts map_opts = { - .sz = sizeof(map_opts), + LIBBPF_OPTS(bpf_map_create_opts, map_opts, .map_flags = BPF_F_NO_PREALLOC, .inner_map_fd = create_small_hash(), - }; + ); int ret; ret = map_create_opts(BPF_MAP_TYPE_HASH_OF_MAPS, "hash_of_maps", diff --git a/tools/testing/selftests/bpf/netlink_helpers.c b/tools/testing/selftests/bpf/netlink_helpers.c new file mode 100644 index 000000000000..caf36eb1d032 --- /dev/null +++ b/tools/testing/selftests/bpf/netlink_helpers.c @@ -0,0 +1,358 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* Taken & modified from iproute2's libnetlink.c + * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> + */ +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <errno.h> +#include <time.h> +#include <sys/socket.h> + +#include "netlink_helpers.h" + +static int rcvbuf = 1024 * 1024; + +void rtnl_close(struct rtnl_handle *rth) +{ + if (rth->fd >= 0) { + close(rth->fd); + rth->fd = -1; + } +} + +int rtnl_open_byproto(struct rtnl_handle *rth, unsigned int subscriptions, + int protocol) +{ + socklen_t addr_len; + int sndbuf = 32768; + int one = 1; + + memset(rth, 0, sizeof(*rth)); + rth->proto = protocol; + rth->fd = socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC, protocol); + if (rth->fd < 0) { + perror("Cannot open netlink socket"); + return -1; + } + if (setsockopt(rth->fd, SOL_SOCKET, SO_SNDBUF, + &sndbuf, sizeof(sndbuf)) < 0) { + perror("SO_SNDBUF"); + goto err; + } + if (setsockopt(rth->fd, SOL_SOCKET, SO_RCVBUF, + &rcvbuf, sizeof(rcvbuf)) < 0) { + perror("SO_RCVBUF"); + goto err; + } + + /* Older kernels may no support extended ACK reporting */ + setsockopt(rth->fd, SOL_NETLINK, NETLINK_EXT_ACK, + &one, sizeof(one)); + + memset(&rth->local, 0, sizeof(rth->local)); + rth->local.nl_family = AF_NETLINK; + rth->local.nl_groups = subscriptions; + + if (bind(rth->fd, (struct sockaddr *)&rth->local, + sizeof(rth->local)) < 0) { + perror("Cannot bind netlink socket"); + goto err; + } + addr_len = sizeof(rth->local); + if (getsockname(rth->fd, (struct sockaddr *)&rth->local, + &addr_len) < 0) { + perror("Cannot getsockname"); + goto err; + } + if (addr_len != sizeof(rth->local)) { + fprintf(stderr, "Wrong address length %d\n", addr_len); + goto err; + } + if (rth->local.nl_family != AF_NETLINK) { + fprintf(stderr, "Wrong address family %d\n", + rth->local.nl_family); + goto err; + } + rth->seq = time(NULL); + return 0; +err: + rtnl_close(rth); + return -1; +} + +int rtnl_open(struct rtnl_handle *rth, unsigned int subscriptions) +{ + return rtnl_open_byproto(rth, subscriptions, NETLINK_ROUTE); +} + +static int __rtnl_recvmsg(int fd, struct msghdr *msg, int flags) +{ + int len; + + do { + len = recvmsg(fd, msg, flags); + } while (len < 0 && (errno == EINTR || errno == EAGAIN)); + if (len < 0) { + fprintf(stderr, "netlink receive error %s (%d)\n", + strerror(errno), errno); + return -errno; + } + if (len == 0) { + fprintf(stderr, "EOF on netlink\n"); + return -ENODATA; + } + return len; +} + +static int rtnl_recvmsg(int fd, struct msghdr *msg, char **answer) +{ + struct iovec *iov = msg->msg_iov; + char *buf; + int len; + + iov->iov_base = NULL; + iov->iov_len = 0; + + len = __rtnl_recvmsg(fd, msg, MSG_PEEK | MSG_TRUNC); + if (len < 0) + return len; + if (len < 32768) + len = 32768; + buf = malloc(len); + if (!buf) { + fprintf(stderr, "malloc error: not enough buffer\n"); + return -ENOMEM; + } + iov->iov_base = buf; + iov->iov_len = len; + len = __rtnl_recvmsg(fd, msg, 0); + if (len < 0) { + free(buf); + return len; + } + if (answer) + *answer = buf; + else + free(buf); + return len; +} + +static void rtnl_talk_error(struct nlmsghdr *h, struct nlmsgerr *err, + nl_ext_ack_fn_t errfn) +{ + fprintf(stderr, "RTNETLINK answers: %s\n", + strerror(-err->error)); +} + +static int __rtnl_talk_iov(struct rtnl_handle *rtnl, struct iovec *iov, + size_t iovlen, struct nlmsghdr **answer, + bool show_rtnl_err, nl_ext_ack_fn_t errfn) +{ + struct sockaddr_nl nladdr = { .nl_family = AF_NETLINK }; + struct iovec riov; + struct msghdr msg = { + .msg_name = &nladdr, + .msg_namelen = sizeof(nladdr), + .msg_iov = iov, + .msg_iovlen = iovlen, + }; + unsigned int seq = 0; + struct nlmsghdr *h; + int i, status; + char *buf; + + for (i = 0; i < iovlen; i++) { + h = iov[i].iov_base; + h->nlmsg_seq = seq = ++rtnl->seq; + if (answer == NULL) + h->nlmsg_flags |= NLM_F_ACK; + } + status = sendmsg(rtnl->fd, &msg, 0); + if (status < 0) { + perror("Cannot talk to rtnetlink"); + return -1; + } + /* change msg to use the response iov */ + msg.msg_iov = &riov; + msg.msg_iovlen = 1; + i = 0; + while (1) { +next: + status = rtnl_recvmsg(rtnl->fd, &msg, &buf); + ++i; + if (status < 0) + return status; + if (msg.msg_namelen != sizeof(nladdr)) { + fprintf(stderr, + "Sender address length == %d!\n", + msg.msg_namelen); + exit(1); + } + for (h = (struct nlmsghdr *)buf; status >= sizeof(*h); ) { + int len = h->nlmsg_len; + int l = len - sizeof(*h); + + if (l < 0 || len > status) { + if (msg.msg_flags & MSG_TRUNC) { + fprintf(stderr, "Truncated message!\n"); + free(buf); + return -1; + } + fprintf(stderr, + "Malformed message: len=%d!\n", + len); + exit(1); + } + if (nladdr.nl_pid != 0 || + h->nlmsg_pid != rtnl->local.nl_pid || + h->nlmsg_seq > seq || h->nlmsg_seq < seq - iovlen) { + /* Don't forget to skip that message. */ + status -= NLMSG_ALIGN(len); + h = (struct nlmsghdr *)((char *)h + NLMSG_ALIGN(len)); + continue; + } + if (h->nlmsg_type == NLMSG_ERROR) { + struct nlmsgerr *err = (struct nlmsgerr *)NLMSG_DATA(h); + int error = err->error; + + if (l < sizeof(struct nlmsgerr)) { + fprintf(stderr, "ERROR truncated\n"); + free(buf); + return -1; + } + if (error) { + errno = -error; + if (rtnl->proto != NETLINK_SOCK_DIAG && + show_rtnl_err) + rtnl_talk_error(h, err, errfn); + } + if (i < iovlen) { + free(buf); + goto next; + } + if (error) { + free(buf); + return -i; + } + if (answer) + *answer = (struct nlmsghdr *)buf; + else + free(buf); + return 0; + } + if (answer) { + *answer = (struct nlmsghdr *)buf; + return 0; + } + fprintf(stderr, "Unexpected reply!\n"); + status -= NLMSG_ALIGN(len); + h = (struct nlmsghdr *)((char *)h + NLMSG_ALIGN(len)); + } + free(buf); + if (msg.msg_flags & MSG_TRUNC) { + fprintf(stderr, "Message truncated!\n"); + continue; + } + if (status) { + fprintf(stderr, "Remnant of size %d!\n", status); + exit(1); + } + } +} + +static int __rtnl_talk(struct rtnl_handle *rtnl, struct nlmsghdr *n, + struct nlmsghdr **answer, bool show_rtnl_err, + nl_ext_ack_fn_t errfn) +{ + struct iovec iov = { + .iov_base = n, + .iov_len = n->nlmsg_len, + }; + + return __rtnl_talk_iov(rtnl, &iov, 1, answer, show_rtnl_err, errfn); +} + +int rtnl_talk(struct rtnl_handle *rtnl, struct nlmsghdr *n, + struct nlmsghdr **answer) +{ + return __rtnl_talk(rtnl, n, answer, true, NULL); +} + +int addattr(struct nlmsghdr *n, int maxlen, int type) +{ + return addattr_l(n, maxlen, type, NULL, 0); +} + +int addattr8(struct nlmsghdr *n, int maxlen, int type, __u8 data) +{ + return addattr_l(n, maxlen, type, &data, sizeof(__u8)); +} + +int addattr16(struct nlmsghdr *n, int maxlen, int type, __u16 data) +{ + return addattr_l(n, maxlen, type, &data, sizeof(__u16)); +} + +int addattr32(struct nlmsghdr *n, int maxlen, int type, __u32 data) +{ + return addattr_l(n, maxlen, type, &data, sizeof(__u32)); +} + +int addattr64(struct nlmsghdr *n, int maxlen, int type, __u64 data) +{ + return addattr_l(n, maxlen, type, &data, sizeof(__u64)); +} + +int addattrstrz(struct nlmsghdr *n, int maxlen, int type, const char *str) +{ + return addattr_l(n, maxlen, type, str, strlen(str)+1); +} + +int addattr_l(struct nlmsghdr *n, int maxlen, int type, const void *data, + int alen) +{ + int len = RTA_LENGTH(alen); + struct rtattr *rta; + + if (NLMSG_ALIGN(n->nlmsg_len) + RTA_ALIGN(len) > maxlen) { + fprintf(stderr, "%s: Message exceeded bound of %d\n", + __func__, maxlen); + return -1; + } + rta = NLMSG_TAIL(n); + rta->rta_type = type; + rta->rta_len = len; + if (alen) + memcpy(RTA_DATA(rta), data, alen); + n->nlmsg_len = NLMSG_ALIGN(n->nlmsg_len) + RTA_ALIGN(len); + return 0; +} + +int addraw_l(struct nlmsghdr *n, int maxlen, const void *data, int len) +{ + if (NLMSG_ALIGN(n->nlmsg_len) + NLMSG_ALIGN(len) > maxlen) { + fprintf(stderr, "%s: Message exceeded bound of %d\n", + __func__, maxlen); + return -1; + } + + memcpy(NLMSG_TAIL(n), data, len); + memset((void *) NLMSG_TAIL(n) + len, 0, NLMSG_ALIGN(len) - len); + n->nlmsg_len = NLMSG_ALIGN(n->nlmsg_len) + NLMSG_ALIGN(len); + return 0; +} + +struct rtattr *addattr_nest(struct nlmsghdr *n, int maxlen, int type) +{ + struct rtattr *nest = NLMSG_TAIL(n); + + addattr_l(n, maxlen, type, NULL, 0); + return nest; +} + +int addattr_nest_end(struct nlmsghdr *n, struct rtattr *nest) +{ + nest->rta_len = (void *)NLMSG_TAIL(n) - (void *)nest; + return n->nlmsg_len; +} diff --git a/tools/testing/selftests/bpf/netlink_helpers.h b/tools/testing/selftests/bpf/netlink_helpers.h new file mode 100644 index 000000000000..68116818a47e --- /dev/null +++ b/tools/testing/selftests/bpf/netlink_helpers.h @@ -0,0 +1,46 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef NETLINK_HELPERS_H +#define NETLINK_HELPERS_H + +#include <string.h> +#include <linux/netlink.h> +#include <linux/rtnetlink.h> + +struct rtnl_handle { + int fd; + struct sockaddr_nl local; + struct sockaddr_nl peer; + __u32 seq; + __u32 dump; + int proto; + FILE *dump_fp; +#define RTNL_HANDLE_F_LISTEN_ALL_NSID 0x01 +#define RTNL_HANDLE_F_SUPPRESS_NLERR 0x02 +#define RTNL_HANDLE_F_STRICT_CHK 0x04 + int flags; +}; + +#define NLMSG_TAIL(nmsg) \ + ((struct rtattr *) (((void *) (nmsg)) + NLMSG_ALIGN((nmsg)->nlmsg_len))) + +typedef int (*nl_ext_ack_fn_t)(const char *errmsg, uint32_t off, + const struct nlmsghdr *inner_nlh); + +int rtnl_open(struct rtnl_handle *rth, unsigned int subscriptions) + __attribute__((warn_unused_result)); +void rtnl_close(struct rtnl_handle *rth); +int rtnl_talk(struct rtnl_handle *rtnl, struct nlmsghdr *n, + struct nlmsghdr **answer) + __attribute__((warn_unused_result)); + +int addattr(struct nlmsghdr *n, int maxlen, int type); +int addattr8(struct nlmsghdr *n, int maxlen, int type, __u8 data); +int addattr16(struct nlmsghdr *n, int maxlen, int type, __u16 data); +int addattr32(struct nlmsghdr *n, int maxlen, int type, __u32 data); +int addattr64(struct nlmsghdr *n, int maxlen, int type, __u64 data); +int addattrstrz(struct nlmsghdr *n, int maxlen, int type, const char *data); +int addattr_l(struct nlmsghdr *n, int maxlen, int type, const void *data, int alen); +int addraw_l(struct nlmsghdr *n, int maxlen, const void *data, int len); +struct rtattr *addattr_nest(struct nlmsghdr *n, int maxlen, int type); +int addattr_nest_end(struct nlmsghdr *n, struct rtattr *nest); +#endif /* NETLINK_HELPERS_H */ diff --git a/tools/testing/selftests/bpf/prog_tests/align.c b/tools/testing/selftests/bpf/prog_tests/align.c index 465c1c3a3d3c..4ebd0da898f5 100644 --- a/tools/testing/selftests/bpf/prog_tests/align.c +++ b/tools/testing/selftests/bpf/prog_tests/align.c @@ -40,7 +40,7 @@ static struct bpf_align_test tests[] = { }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .matches = { - {0, "R1", "ctx(off=0,imm=0)"}, + {0, "R1", "ctx()"}, {0, "R10", "fp0"}, {0, "R3_w", "2"}, {1, "R3_w", "4"}, @@ -68,7 +68,7 @@ static struct bpf_align_test tests[] = { }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .matches = { - {0, "R1", "ctx(off=0,imm=0)"}, + {0, "R1", "ctx()"}, {0, "R10", "fp0"}, {0, "R3_w", "1"}, {1, "R3_w", "2"}, @@ -97,7 +97,7 @@ static struct bpf_align_test tests[] = { }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .matches = { - {0, "R1", "ctx(off=0,imm=0)"}, + {0, "R1", "ctx()"}, {0, "R10", "fp0"}, {0, "R3_w", "4"}, {1, "R3_w", "8"}, @@ -119,7 +119,7 @@ static struct bpf_align_test tests[] = { }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .matches = { - {0, "R1", "ctx(off=0,imm=0)"}, + {0, "R1", "ctx()"}, {0, "R10", "fp0"}, {0, "R3_w", "7"}, {1, "R3_w", "7"}, @@ -162,13 +162,13 @@ static struct bpf_align_test tests[] = { }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .matches = { - {6, "R0_w", "pkt(off=8,r=8,imm=0)"}, + {6, "R0_w", "pkt(off=8,r=8)"}, {6, "R3_w", "var_off=(0x0; 0xff)"}, {7, "R3_w", "var_off=(0x0; 0x1fe)"}, {8, "R3_w", "var_off=(0x0; 0x3fc)"}, {9, "R3_w", "var_off=(0x0; 0x7f8)"}, {10, "R3_w", "var_off=(0x0; 0xff0)"}, - {12, "R3_w", "pkt_end(off=0,imm=0)"}, + {12, "R3_w", "pkt_end()"}, {17, "R4_w", "var_off=(0x0; 0xff)"}, {18, "R4_w", "var_off=(0x0; 0x1fe0)"}, {19, "R4_w", "var_off=(0x0; 0xff0)"}, @@ -235,11 +235,11 @@ static struct bpf_align_test tests[] = { }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .matches = { - {2, "R5_w", "pkt(off=0,r=0,imm=0)"}, - {4, "R5_w", "pkt(off=14,r=0,imm=0)"}, - {5, "R4_w", "pkt(off=14,r=0,imm=0)"}, - {9, "R2", "pkt(off=0,r=18,imm=0)"}, - {10, "R5", "pkt(off=14,r=18,imm=0)"}, + {2, "R5_w", "pkt(r=0)"}, + {4, "R5_w", "pkt(off=14,r=0)"}, + {5, "R4_w", "pkt(off=14,r=0)"}, + {9, "R2", "pkt(r=18)"}, + {10, "R5", "pkt(off=14,r=18)"}, {10, "R4_w", "var_off=(0x0; 0xff)"}, {13, "R4_w", "var_off=(0x0; 0xffff)"}, {14, "R4_w", "var_off=(0x0; 0xffff)"}, @@ -299,7 +299,7 @@ static struct bpf_align_test tests[] = { /* Calculated offset in R6 has unknown value, but known * alignment of 4. */ - {6, "R2_w", "pkt(off=0,r=8,imm=0)"}, + {6, "R2_w", "pkt(r=8)"}, {7, "R6_w", "var_off=(0x0; 0x3fc)"}, /* Offset is added to packet pointer R5, resulting in * known fixed offset, and variable offset from R6. @@ -337,7 +337,7 @@ static struct bpf_align_test tests[] = { /* Constant offset is added to R5 packet pointer, * resulting in reg->off value of 14. */ - {26, "R5_w", "pkt(off=14,r=8,"}, + {26, "R5_w", "pkt(off=14,r=8)"}, /* Variable offset is added to R5, resulting in a * variable offset of (4n). See comment for insn #18 * for R4 = R5 trick. @@ -397,7 +397,7 @@ static struct bpf_align_test tests[] = { /* Calculated offset in R6 has unknown value, but known * alignment of 4. */ - {6, "R2_w", "pkt(off=0,r=8,imm=0)"}, + {6, "R2_w", "pkt(r=8)"}, {7, "R6_w", "var_off=(0x0; 0x3fc)"}, /* Adding 14 makes R6 be (4n+2) */ {8, "R6_w", "var_off=(0x2; 0x7fc)"}, @@ -459,7 +459,7 @@ static struct bpf_align_test tests[] = { .prog_type = BPF_PROG_TYPE_SCHED_CLS, .result = REJECT, .matches = { - {3, "R5_w", "pkt_end(off=0,imm=0)"}, + {3, "R5_w", "pkt_end()"}, /* (ptr - ptr) << 2 == unknown, (4n) */ {5, "R5_w", "var_off=(0x0; 0xfffffffffffffffc)"}, /* (4n) + 14 == (4n+2). We blow our bounds, because @@ -513,7 +513,7 @@ static struct bpf_align_test tests[] = { /* Calculated offset in R6 has unknown value, but known * alignment of 4. */ - {6, "R2_w", "pkt(off=0,r=8,imm=0)"}, + {6, "R2_w", "pkt(r=8)"}, {8, "R6_w", "var_off=(0x0; 0x3fc)"}, /* Adding 14 makes R6 be (4n+2) */ {9, "R6_w", "var_off=(0x2; 0x7fc)"}, @@ -566,7 +566,7 @@ static struct bpf_align_test tests[] = { /* Calculated offset in R6 has unknown value, but known * alignment of 4. */ - {6, "R2_w", "pkt(off=0,r=8,imm=0)"}, + {6, "R2_w", "pkt(r=8)"}, {9, "R6_w", "var_off=(0x0; 0x3c)"}, /* Adding 14 makes R6 be (4n+2) */ {10, "R6_w", "var_off=(0x2; 0x7c)"}, @@ -659,14 +659,14 @@ static int do_test_single(struct bpf_align_test *test) /* Check the next line as well in case the previous line * did not have a corresponding bpf insn. Example: * func#0 @0 - * 0: R1=ctx(off=0,imm=0) R10=fp0 + * 0: R1=ctx() R10=fp0 * 0: (b7) r3 = 2 ; R3_w=2 * * Sometimes it's actually two lines below, e.g. when * searching for "6: R3_w=scalar(umax=255,var_off=(0x0; 0xff))": - * from 4 to 6: R0_w=pkt(off=8,r=8,imm=0) R1=ctx(off=0,imm=0) R2_w=pkt(off=0,r=8,imm=0) R3_w=pkt_end(off=0,imm=0) R10=fp0 - * 6: R0_w=pkt(off=8,r=8,imm=0) R1=ctx(off=0,imm=0) R2_w=pkt(off=0,r=8,imm=0) R3_w=pkt_end(off=0,imm=0) R10=fp0 - * 6: (71) r3 = *(u8 *)(r2 +0) ; R2_w=pkt(off=0,r=8,imm=0) R3_w=scalar(umax=255,var_off=(0x0; 0xff)) + * from 4 to 6: R0_w=pkt(off=8,r=8) R1=ctx() R2_w=pkt(r=8) R3_w=pkt_end() R10=fp0 + * 6: R0_w=pkt(off=8,r=8) R1=ctx() R2_w=pkt(r=8) R3_w=pkt_end() R10=fp0 + * 6: (71) r3 = *(u8 *)(r2 +0) ; R2_w=pkt(r=8) R3_w=scalar(umax=255,var_off=(0x0; 0xff)) */ while (!(p = strstr(line_ptr, m.reg)) || !strstr(p, m.match)) { cur_line = -1; diff --git a/tools/testing/selftests/bpf/prog_tests/bind_perm.c b/tools/testing/selftests/bpf/prog_tests/bind_perm.c index a1766a298bb7..f7cd129cb82b 100644 --- a/tools/testing/selftests/bpf/prog_tests/bind_perm.c +++ b/tools/testing/selftests/bpf/prog_tests/bind_perm.c @@ -9,8 +9,6 @@ #include "cap_helpers.h" #include "bind_perm.skel.h" -static int duration; - static int create_netns(void) { if (!ASSERT_OK(unshare(CLONE_NEWNET), "create netns")) @@ -27,7 +25,7 @@ void try_bind(int family, int port, int expected_errno) int fd = -1; fd = socket(family, SOCK_STREAM, 0); - if (CHECK(fd < 0, "fd", "errno %d", errno)) + if (!ASSERT_GE(fd, 0, "socket")) goto close_socket; if (family == AF_INET) { @@ -60,7 +58,7 @@ void test_bind_perm(void) return; cgroup_fd = test__join_cgroup("/bind_perm"); - if (CHECK(cgroup_fd < 0, "cg-join", "errno %d", errno)) + if (!ASSERT_GE(cgroup_fd, 0, "test__join_cgroup")) return; skel = bind_perm__open_and_load(); diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c index 41aba139b20b..618af9dfae9b 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c @@ -7,7 +7,7 @@ #include "bpf_iter_ipv6_route.skel.h" #include "bpf_iter_netlink.skel.h" #include "bpf_iter_bpf_map.skel.h" -#include "bpf_iter_task.skel.h" +#include "bpf_iter_tasks.skel.h" #include "bpf_iter_task_stack.skel.h" #include "bpf_iter_task_file.skel.h" #include "bpf_iter_task_vmas.skel.h" @@ -34,8 +34,6 @@ #include "bpf_iter_ksym.skel.h" #include "bpf_iter_sockmap.skel.h" -static int duration; - static void test_btf_id_or_null(void) { struct bpf_iter_test_kern3 *skel; @@ -64,7 +62,7 @@ static void do_dummy_read_opts(struct bpf_program *prog, struct bpf_iter_attach_ /* not check contents, but ensure read() ends without error */ while ((len = read(iter_fd, buf, sizeof(buf))) > 0) ; - CHECK(len < 0, "read", "read failed: %s\n", strerror(errno)); + ASSERT_GE(len, 0, "read"); close(iter_fd); @@ -215,12 +213,12 @@ static void *do_nothing_wait(void *arg) static void test_task_common_nocheck(struct bpf_iter_attach_opts *opts, int *num_unknown, int *num_known) { - struct bpf_iter_task *skel; + struct bpf_iter_tasks *skel; pthread_t thread_id; void *ret; - skel = bpf_iter_task__open_and_load(); - if (!ASSERT_OK_PTR(skel, "bpf_iter_task__open_and_load")) + skel = bpf_iter_tasks__open_and_load(); + if (!ASSERT_OK_PTR(skel, "bpf_iter_tasks__open_and_load")) return; ASSERT_OK(pthread_mutex_lock(&do_nothing_mutex), "pthread_mutex_lock"); @@ -239,7 +237,7 @@ static void test_task_common_nocheck(struct bpf_iter_attach_opts *opts, ASSERT_FALSE(pthread_join(thread_id, &ret) || ret != NULL, "pthread_join"); - bpf_iter_task__destroy(skel); + bpf_iter_tasks__destroy(skel); } static void test_task_common(struct bpf_iter_attach_opts *opts, int num_unknown, int num_known) @@ -307,10 +305,10 @@ static void test_task_pidfd(void) static void test_task_sleepable(void) { - struct bpf_iter_task *skel; + struct bpf_iter_tasks *skel; - skel = bpf_iter_task__open_and_load(); - if (!ASSERT_OK_PTR(skel, "bpf_iter_task__open_and_load")) + skel = bpf_iter_tasks__open_and_load(); + if (!ASSERT_OK_PTR(skel, "bpf_iter_tasks__open_and_load")) return; do_dummy_read(skel->progs.dump_task_sleepable); @@ -320,7 +318,7 @@ static void test_task_sleepable(void) ASSERT_GT(skel->bss->num_success_copy_from_user_task, 0, "num_success_copy_from_user_task"); - bpf_iter_task__destroy(skel); + bpf_iter_tasks__destroy(skel); } static void test_task_stack(void) @@ -334,6 +332,8 @@ static void test_task_stack(void) do_dummy_read(skel->progs.dump_task_stack); do_dummy_read(skel->progs.get_task_user_stacks); + ASSERT_EQ(skel->bss->num_user_stacks, 1, "num_user_stacks"); + bpf_iter_task_stack__destroy(skel); } @@ -413,7 +413,7 @@ static int do_btf_read(struct bpf_iter_task_btf *skel) goto free_link; } - if (CHECK(err < 0, "read", "read failed: %s\n", strerror(errno))) + if (!ASSERT_GE(err, 0, "read")) goto free_link; ASSERT_HAS_SUBSTR(taskbuf, "(struct task_struct)", @@ -526,11 +526,11 @@ static int do_read_with_fd(int iter_fd, const char *expected, start = 0; while ((len = read(iter_fd, buf + start, read_buf_len)) > 0) { start += len; - if (CHECK(start >= 16, "read", "read len %d\n", len)) + if (!ASSERT_LT(start, 16, "read")) return -1; read_buf_len = read_one_char ? 1 : 16 - start; } - if (CHECK(len < 0, "read", "read failed: %s\n", strerror(errno))) + if (!ASSERT_GE(len, 0, "read")) return -1; if (!ASSERT_STREQ(buf, expected, "read")) @@ -571,8 +571,7 @@ static int do_read(const char *path, const char *expected) int err, iter_fd; iter_fd = open(path, O_RDONLY); - if (CHECK(iter_fd < 0, "open", "open %s failed: %s\n", - path, strerror(errno))) + if (!ASSERT_GE(iter_fd, 0, "open")) return -1; err = do_read_with_fd(iter_fd, expected, false); @@ -600,7 +599,7 @@ static void test_file_iter(void) unlink(path); err = bpf_link__pin(link, path); - if (CHECK(err, "pin_iter", "pin_iter to %s failed: %d\n", path, err)) + if (!ASSERT_OK(err, "pin_iter")) goto free_link; err = do_read(path, "abcd"); @@ -651,12 +650,10 @@ static void test_overflow(bool test_e2big_overflow, bool ret1) * overflow and needs restart. */ map1_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, 4, 8, 1, NULL); - if (CHECK(map1_fd < 0, "bpf_map_create", - "map_creation failed: %s\n", strerror(errno))) + if (!ASSERT_GE(map1_fd, 0, "bpf_map_create")) goto out; map2_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, 4, 8, 1, NULL); - if (CHECK(map2_fd < 0, "bpf_map_create", - "map_creation failed: %s\n", strerror(errno))) + if (!ASSERT_GE(map2_fd, 0, "bpf_map_create")) goto free_map1; /* bpf_seq_printf kernel buffer is 8 pages, so one map @@ -685,14 +682,12 @@ static void test_overflow(bool test_e2big_overflow, bool ret1) /* setup filtering map_id in bpf program */ map_info_len = sizeof(map_info); err = bpf_map_get_info_by_fd(map1_fd, &map_info, &map_info_len); - if (CHECK(err, "get_map_info", "get map info failed: %s\n", - strerror(errno))) + if (!ASSERT_OK(err, "get_map_info")) goto free_map2; skel->bss->map1_id = map_info.id; err = bpf_map_get_info_by_fd(map2_fd, &map_info, &map_info_len); - if (CHECK(err, "get_map_info", "get map info failed: %s\n", - strerror(errno))) + if (!ASSERT_OK(err, "get_map_info")) goto free_map2; skel->bss->map2_id = map_info.id; @@ -705,7 +700,7 @@ static void test_overflow(bool test_e2big_overflow, bool ret1) goto free_link; buf = malloc(expected_read_len); - if (!buf) + if (!ASSERT_OK_PTR(buf, "malloc")) goto close_iter; /* do read */ @@ -714,16 +709,14 @@ static void test_overflow(bool test_e2big_overflow, bool ret1) while ((len = read(iter_fd, buf, expected_read_len)) > 0) total_read_len += len; - CHECK(len != -1 || errno != E2BIG, "read", - "expected ret -1, errno E2BIG, but get ret %d, error %s\n", - len, strerror(errno)); + ASSERT_EQ(len, -1, "read"); + ASSERT_EQ(errno, E2BIG, "read"); goto free_buf; } else if (!ret1) { while ((len = read(iter_fd, buf, expected_read_len)) > 0) total_read_len += len; - if (CHECK(len < 0, "read", "read failed: %s\n", - strerror(errno))) + if (!ASSERT_GE(len, 0, "read")) goto free_buf; } else { do { @@ -732,8 +725,7 @@ static void test_overflow(bool test_e2big_overflow, bool ret1) total_read_len += len; } while (len > 0 || len == -EAGAIN); - if (CHECK(len < 0, "read", "read failed: %s\n", - strerror(errno))) + if (!ASSERT_GE(len, 0, "read")) goto free_buf; } @@ -836,7 +828,7 @@ static void test_bpf_hash_map(void) /* do some tests */ while ((len = read(iter_fd, buf, sizeof(buf))) > 0) ; - if (CHECK(len < 0, "read", "read failed: %s\n", strerror(errno))) + if (!ASSERT_GE(len, 0, "read")) goto close_iter; /* test results */ @@ -878,6 +870,8 @@ static void test_bpf_percpu_hash_map(void) skel->rodata->num_cpus = bpf_num_possible_cpus(); val = malloc(8 * bpf_num_possible_cpus()); + if (!ASSERT_OK_PTR(val, "malloc")) + goto out; err = bpf_iter_bpf_percpu_hash_map__load(skel); if (!ASSERT_OK_PTR(skel, "bpf_iter_bpf_percpu_hash_map__load")) @@ -917,7 +911,7 @@ static void test_bpf_percpu_hash_map(void) /* do some tests */ while ((len = read(iter_fd, buf, sizeof(buf))) > 0) ; - if (CHECK(len < 0, "read", "read failed: %s\n", strerror(errno))) + if (!ASSERT_GE(len, 0, "read")) goto close_iter; /* test results */ @@ -983,17 +977,14 @@ static void test_bpf_array_map(void) start = 0; while ((len = read(iter_fd, buf + start, sizeof(buf) - start)) > 0) start += len; - if (CHECK(len < 0, "read", "read failed: %s\n", strerror(errno))) + if (!ASSERT_GE(len, 0, "read")) goto close_iter; /* test results */ res_first_key = *(__u32 *)buf; res_first_val = *(__u64 *)(buf + sizeof(__u32)); - if (CHECK(res_first_key != 0 || res_first_val != first_val, - "bpf_seq_write", - "seq_write failure: first key %u vs expected 0, " - " first value %llu vs expected %llu\n", - res_first_key, res_first_val, first_val)) + if (!ASSERT_EQ(res_first_key, 0, "bpf_seq_write") || + !ASSERT_EQ(res_first_val, first_val, "bpf_seq_write")) goto close_iter; if (!ASSERT_EQ(skel->bss->key_sum, expected_key, "key_sum")) @@ -1057,6 +1048,8 @@ static void test_bpf_percpu_array_map(void) skel->rodata->num_cpus = bpf_num_possible_cpus(); val = malloc(8 * bpf_num_possible_cpus()); + if (!ASSERT_OK_PTR(val, "malloc")) + goto out; err = bpf_iter_bpf_percpu_array_map__load(skel); if (!ASSERT_OK_PTR(skel, "bpf_iter_bpf_percpu_array_map__load")) @@ -1092,7 +1085,7 @@ static void test_bpf_percpu_array_map(void) /* do some tests */ while ((len = read(iter_fd, buf, sizeof(buf))) > 0) ; - if (CHECK(len < 0, "read", "read failed: %s\n", strerror(errno))) + if (!ASSERT_GE(len, 0, "read")) goto close_iter; /* test results */ @@ -1131,6 +1124,7 @@ static void test_bpf_sk_storage_delete(void) sock_fd = socket(AF_INET6, SOCK_STREAM, 0); if (!ASSERT_GE(sock_fd, 0, "socket")) goto out; + err = bpf_map_update_elem(map_fd, &sock_fd, &val, BPF_NOEXIST); if (!ASSERT_OK(err, "map_update")) goto out; @@ -1151,14 +1145,19 @@ static void test_bpf_sk_storage_delete(void) /* do some tests */ while ((len = read(iter_fd, buf, sizeof(buf))) > 0) ; - if (CHECK(len < 0, "read", "read failed: %s\n", strerror(errno))) + if (!ASSERT_GE(len, 0, "read")) goto close_iter; /* test results */ err = bpf_map_lookup_elem(map_fd, &sock_fd, &val); - if (CHECK(!err || errno != ENOENT, "bpf_map_lookup_elem", - "map value wasn't deleted (err=%d, errno=%d)\n", err, errno)) - goto close_iter; + + /* Note: The following assertions serve to ensure + * the value was deleted. It does so by asserting + * that bpf_map_lookup_elem has failed. This might + * seem counterintuitive at first. + */ + ASSERT_ERR(err, "bpf_map_lookup_elem"); + ASSERT_EQ(errno, ENOENT, "bpf_map_lookup_elem"); close_iter: close(iter_fd); @@ -1203,17 +1202,15 @@ static void test_bpf_sk_storage_get(void) do_dummy_read(skel->progs.fill_socket_owner); err = bpf_map_lookup_elem(map_fd, &sock_fd, &val); - if (CHECK(err || val != getpid(), "bpf_map_lookup_elem", - "map value wasn't set correctly (expected %d, got %d, err=%d)\n", - getpid(), val, err)) + if (!ASSERT_OK(err, "bpf_map_lookup_elem") || + !ASSERT_EQ(val, getpid(), "bpf_map_lookup_elem")) goto close_socket; do_dummy_read(skel->progs.negate_socket_local_storage); err = bpf_map_lookup_elem(map_fd, &sock_fd, &val); - CHECK(err || val != -getpid(), "bpf_map_lookup_elem", - "map value wasn't set correctly (expected %d, got %d, err=%d)\n", - -getpid(), val, err); + ASSERT_OK(err, "bpf_map_lookup_elem"); + ASSERT_EQ(val, -getpid(), "bpf_map_lookup_elem"); close_socket: close(sock_fd); @@ -1290,7 +1287,7 @@ static void test_bpf_sk_storage_map(void) /* do some tests */ while ((len = read(iter_fd, buf, sizeof(buf))) > 0) ; - if (CHECK(len < 0, "read", "read failed: %s\n", strerror(errno))) + if (!ASSERT_GE(len, 0, "read")) goto close_iter; /* test results */ diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c b/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c index 675b90b15280..f09d6ac2ef09 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c @@ -25,7 +25,7 @@ void serial_test_bpf_obj_id(void) */ __u32 map_ids[nr_iters + 1]; char jited_insns[128], xlated_insns[128], zeros[128], tp_name[128]; - __u32 i, next_id, info_len, nr_id_found, duration = 0; + __u32 i, next_id, info_len, nr_id_found; struct timespec real_time_ts, boot_time_ts; int err = 0; __u64 array_value; @@ -33,16 +33,16 @@ void serial_test_bpf_obj_id(void) time_t now, load_time; err = bpf_prog_get_fd_by_id(0); - CHECK(err >= 0 || errno != ENOENT, - "get-fd-by-notexist-prog-id", "err %d errno %d\n", err, errno); + ASSERT_LT(err, 0, "bpf_prog_get_fd_by_id"); + ASSERT_EQ(errno, ENOENT, "bpf_prog_get_fd_by_id"); err = bpf_map_get_fd_by_id(0); - CHECK(err >= 0 || errno != ENOENT, - "get-fd-by-notexist-map-id", "err %d errno %d\n", err, errno); + ASSERT_LT(err, 0, "bpf_map_get_fd_by_id"); + ASSERT_EQ(errno, ENOENT, "bpf_map_get_fd_by_id"); err = bpf_link_get_fd_by_id(0); - CHECK(err >= 0 || errno != ENOENT, - "get-fd-by-notexist-link-id", "err %d errno %d\n", err, errno); + ASSERT_LT(err, 0, "bpf_map_get_fd_by_id"); + ASSERT_EQ(errno, ENOENT, "bpf_map_get_fd_by_id"); /* Check bpf_map_get_info_by_fd() */ bzero(zeros, sizeof(zeros)); @@ -53,25 +53,26 @@ void serial_test_bpf_obj_id(void) /* test_obj_id.o is a dumb prog. It should never fail * to load. */ - if (CHECK_FAIL(err)) + if (!ASSERT_OK(err, "bpf_prog_test_load")) continue; /* Insert a magic value to the map */ map_fds[i] = bpf_find_map(__func__, objs[i], "test_map_id"); - if (CHECK_FAIL(map_fds[i] < 0)) + if (!ASSERT_GE(map_fds[i], 0, "bpf_find_map")) goto done; + err = bpf_map_update_elem(map_fds[i], &array_key, &array_magic_value, 0); - if (CHECK_FAIL(err)) + if (!ASSERT_OK(err, "bpf_map_update_elem")) goto done; - prog = bpf_object__find_program_by_name(objs[i], - "test_obj_id"); - if (CHECK_FAIL(!prog)) + prog = bpf_object__find_program_by_name(objs[i], "test_obj_id"); + if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name")) goto done; + links[i] = bpf_program__attach(prog); err = libbpf_get_error(links[i]); - if (CHECK(err, "prog_attach", "prog #%d, err %d\n", i, err)) { + if (!ASSERT_OK(err, "bpf_program__attach")) { links[i] = NULL; goto done; } @@ -81,24 +82,14 @@ void serial_test_bpf_obj_id(void) bzero(&map_infos[i], info_len); err = bpf_map_get_info_by_fd(map_fds[i], &map_infos[i], &info_len); - if (CHECK(err || - map_infos[i].type != BPF_MAP_TYPE_ARRAY || - map_infos[i].key_size != sizeof(__u32) || - map_infos[i].value_size != sizeof(__u64) || - map_infos[i].max_entries != 1 || - map_infos[i].map_flags != 0 || - info_len != sizeof(struct bpf_map_info) || - strcmp((char *)map_infos[i].name, expected_map_name), - "get-map-info(fd)", - "err %d errno %d type %d(%d) info_len %u(%zu) key_size %u value_size %u max_entries %u map_flags %X name %s(%s)\n", - err, errno, - map_infos[i].type, BPF_MAP_TYPE_ARRAY, - info_len, sizeof(struct bpf_map_info), - map_infos[i].key_size, - map_infos[i].value_size, - map_infos[i].max_entries, - map_infos[i].map_flags, - map_infos[i].name, expected_map_name)) + if (!ASSERT_OK(err, "bpf_map_get_info_by_fd") || + !ASSERT_EQ(map_infos[i].type, BPF_MAP_TYPE_ARRAY, "map_type") || + !ASSERT_EQ(map_infos[i].key_size, sizeof(__u32), "key_size") || + !ASSERT_EQ(map_infos[i].value_size, sizeof(__u64), "value_size") || + !ASSERT_EQ(map_infos[i].max_entries, 1, "max_entries") || + !ASSERT_EQ(map_infos[i].map_flags, 0, "map_flags") || + !ASSERT_EQ(info_len, sizeof(struct bpf_map_info), "map_info_len") || + !ASSERT_STREQ((char *)map_infos[i].name, expected_map_name, "map_name")) goto done; /* Check getting prog info */ @@ -112,48 +103,34 @@ void serial_test_bpf_obj_id(void) prog_infos[i].xlated_prog_len = sizeof(xlated_insns); prog_infos[i].map_ids = ptr_to_u64(map_ids + i); prog_infos[i].nr_map_ids = 2; + err = clock_gettime(CLOCK_REALTIME, &real_time_ts); - if (CHECK_FAIL(err)) + if (!ASSERT_OK(err, "clock_gettime")) goto done; + err = clock_gettime(CLOCK_BOOTTIME, &boot_time_ts); - if (CHECK_FAIL(err)) + if (!ASSERT_OK(err, "clock_gettime")) goto done; + err = bpf_prog_get_info_by_fd(prog_fds[i], &prog_infos[i], &info_len); load_time = (real_time_ts.tv_sec - boot_time_ts.tv_sec) + (prog_infos[i].load_time / nsec_per_sec); - if (CHECK(err || - prog_infos[i].type != BPF_PROG_TYPE_RAW_TRACEPOINT || - info_len != sizeof(struct bpf_prog_info) || - (env.jit_enabled && !prog_infos[i].jited_prog_len) || - (env.jit_enabled && - !memcmp(jited_insns, zeros, sizeof(zeros))) || - !prog_infos[i].xlated_prog_len || - !memcmp(xlated_insns, zeros, sizeof(zeros)) || - load_time < now - 60 || load_time > now + 60 || - prog_infos[i].created_by_uid != my_uid || - prog_infos[i].nr_map_ids != 1 || - *(int *)(long)prog_infos[i].map_ids != map_infos[i].id || - strcmp((char *)prog_infos[i].name, expected_prog_name), - "get-prog-info(fd)", - "err %d errno %d i %d type %d(%d) info_len %u(%zu) " - "jit_enabled %d jited_prog_len %u xlated_prog_len %u " - "jited_prog %d xlated_prog %d load_time %lu(%lu) " - "uid %u(%u) nr_map_ids %u(%u) map_id %u(%u) " - "name %s(%s)\n", - err, errno, i, - prog_infos[i].type, BPF_PROG_TYPE_SOCKET_FILTER, - info_len, sizeof(struct bpf_prog_info), - env.jit_enabled, - prog_infos[i].jited_prog_len, - prog_infos[i].xlated_prog_len, - !!memcmp(jited_insns, zeros, sizeof(zeros)), - !!memcmp(xlated_insns, zeros, sizeof(zeros)), - load_time, now, - prog_infos[i].created_by_uid, my_uid, - prog_infos[i].nr_map_ids, 1, - *(int *)(long)prog_infos[i].map_ids, map_infos[i].id, - prog_infos[i].name, expected_prog_name)) + + if (!ASSERT_OK(err, "bpf_prog_get_info_by_fd") || + !ASSERT_EQ(prog_infos[i].type, BPF_PROG_TYPE_RAW_TRACEPOINT, "prog_type") || + !ASSERT_EQ(info_len, sizeof(struct bpf_prog_info), "prog_info_len") || + !ASSERT_FALSE((env.jit_enabled && !prog_infos[i].jited_prog_len), "jited_prog_len") || + !ASSERT_FALSE((env.jit_enabled && !memcmp(jited_insns, zeros, sizeof(zeros))), + "jited_insns") || + !ASSERT_NEQ(prog_infos[i].xlated_prog_len, 0, "xlated_prog_len") || + !ASSERT_NEQ(memcmp(xlated_insns, zeros, sizeof(zeros)), 0, "xlated_insns") || + !ASSERT_GE(load_time, (now - 60), "load_time") || + !ASSERT_LE(load_time, (now + 60), "load_time") || + !ASSERT_EQ(prog_infos[i].created_by_uid, my_uid, "created_by_uid") || + !ASSERT_EQ(prog_infos[i].nr_map_ids, 1, "nr_map_ids") || + !ASSERT_EQ(*(int *)(long)prog_infos[i].map_ids, map_infos[i].id, "map_ids") || + !ASSERT_STREQ((char *)prog_infos[i].name, expected_prog_name, "prog_name")) goto done; /* Check getting link info */ @@ -163,25 +140,12 @@ void serial_test_bpf_obj_id(void) link_infos[i].raw_tracepoint.tp_name_len = sizeof(tp_name); err = bpf_link_get_info_by_fd(bpf_link__fd(links[i]), &link_infos[i], &info_len); - if (CHECK(err || - link_infos[i].type != BPF_LINK_TYPE_RAW_TRACEPOINT || - link_infos[i].prog_id != prog_infos[i].id || - link_infos[i].raw_tracepoint.tp_name != ptr_to_u64(&tp_name) || - strcmp(u64_to_ptr(link_infos[i].raw_tracepoint.tp_name), - "sys_enter") || - info_len != sizeof(struct bpf_link_info), - "get-link-info(fd)", - "err %d errno %d info_len %u(%zu) type %d(%d) id %d " - "prog_id %d (%d) tp_name %s(%s)\n", - err, errno, - info_len, sizeof(struct bpf_link_info), - link_infos[i].type, BPF_LINK_TYPE_RAW_TRACEPOINT, - link_infos[i].id, - link_infos[i].prog_id, prog_infos[i].id, - (const char *)u64_to_ptr(link_infos[i].raw_tracepoint.tp_name), - "sys_enter")) + if (!ASSERT_OK(err, "bpf_link_get_info_by_fd") || + !ASSERT_EQ(link_infos[i].type, BPF_LINK_TYPE_RAW_TRACEPOINT, "link_type") || + !ASSERT_EQ(link_infos[i].prog_id, prog_infos[i].id, "prog_id") || + !ASSERT_EQ(link_infos[i].raw_tracepoint.tp_name, ptr_to_u64(&tp_name), "&tp_name") || + !ASSERT_STREQ(u64_to_ptr(link_infos[i].raw_tracepoint.tp_name), "sys_enter", "tp_name")) goto done; - } /* Check bpf_prog_get_next_id() */ @@ -190,7 +154,7 @@ void serial_test_bpf_obj_id(void) while (!bpf_prog_get_next_id(next_id, &next_id)) { struct bpf_prog_info prog_info = {}; __u32 saved_map_id; - int prog_fd; + int prog_fd, cmp_res; info_len = sizeof(prog_info); @@ -198,9 +162,7 @@ void serial_test_bpf_obj_id(void) if (prog_fd < 0 && errno == ENOENT) /* The bpf_prog is in the dead row */ continue; - if (CHECK(prog_fd < 0, "get-prog-fd(next_id)", - "prog_fd %d next_id %d errno %d\n", - prog_fd, next_id, errno)) + if (!ASSERT_GE(prog_fd, 0, "bpf_prog_get_fd_by_id")) break; for (i = 0; i < nr_iters; i++) @@ -218,9 +180,8 @@ void serial_test_bpf_obj_id(void) */ prog_info.nr_map_ids = 1; err = bpf_prog_get_info_by_fd(prog_fd, &prog_info, &info_len); - if (CHECK(!err || errno != EFAULT, - "get-prog-fd-bad-nr-map-ids", "err %d errno %d(%d)", - err, errno, EFAULT)) + if (!ASSERT_ERR(err, "bpf_prog_get_info_by_fd") || + !ASSERT_EQ(errno, EFAULT, "bpf_prog_get_info_by_fd")) break; bzero(&prog_info, sizeof(prog_info)); info_len = sizeof(prog_info); @@ -231,27 +192,22 @@ void serial_test_bpf_obj_id(void) err = bpf_prog_get_info_by_fd(prog_fd, &prog_info, &info_len); prog_infos[i].jited_prog_insns = 0; prog_infos[i].xlated_prog_insns = 0; - CHECK(err || info_len != sizeof(struct bpf_prog_info) || - memcmp(&prog_info, &prog_infos[i], info_len) || - *(int *)(long)prog_info.map_ids != saved_map_id, - "get-prog-info(next_id->fd)", - "err %d errno %d info_len %u(%zu) memcmp %d map_id %u(%u)\n", - err, errno, info_len, sizeof(struct bpf_prog_info), - memcmp(&prog_info, &prog_infos[i], info_len), - *(int *)(long)prog_info.map_ids, saved_map_id); + cmp_res = memcmp(&prog_info, &prog_infos[i], info_len); + + ASSERT_OK(err, "bpf_prog_get_info_by_fd"); + ASSERT_EQ(info_len, sizeof(struct bpf_prog_info), "prog_info_len"); + ASSERT_OK(cmp_res, "memcmp"); + ASSERT_EQ(*(int *)(long)prog_info.map_ids, saved_map_id, "map_id"); close(prog_fd); } - CHECK(nr_id_found != nr_iters, - "check total prog id found by get_next_id", - "nr_id_found %u(%u)\n", - nr_id_found, nr_iters); + ASSERT_EQ(nr_id_found, nr_iters, "prog_nr_id_found"); /* Check bpf_map_get_next_id() */ nr_id_found = 0; next_id = 0; while (!bpf_map_get_next_id(next_id, &next_id)) { struct bpf_map_info map_info = {}; - int map_fd; + int map_fd, cmp_res; info_len = sizeof(map_info); @@ -259,9 +215,7 @@ void serial_test_bpf_obj_id(void) if (map_fd < 0 && errno == ENOENT) /* The bpf_map is in the dead row */ continue; - if (CHECK(map_fd < 0, "get-map-fd(next_id)", - "map_fd %d next_id %u errno %d\n", - map_fd, next_id, errno)) + if (!ASSERT_GE(map_fd, 0, "bpf_map_get_fd_by_id")) break; for (i = 0; i < nr_iters; i++) @@ -274,25 +228,19 @@ void serial_test_bpf_obj_id(void) nr_id_found++; err = bpf_map_lookup_elem(map_fd, &array_key, &array_value); - if (CHECK_FAIL(err)) + if (!ASSERT_OK(err, "bpf_map_lookup_elem")) goto done; err = bpf_map_get_info_by_fd(map_fd, &map_info, &info_len); - CHECK(err || info_len != sizeof(struct bpf_map_info) || - memcmp(&map_info, &map_infos[i], info_len) || - array_value != array_magic_value, - "check get-map-info(next_id->fd)", - "err %d errno %d info_len %u(%zu) memcmp %d array_value %llu(%llu)\n", - err, errno, info_len, sizeof(struct bpf_map_info), - memcmp(&map_info, &map_infos[i], info_len), - array_value, array_magic_value); + cmp_res = memcmp(&map_info, &map_infos[i], info_len); + ASSERT_OK(err, "bpf_map_get_info_by_fd"); + ASSERT_EQ(info_len, sizeof(struct bpf_map_info), "info_len"); + ASSERT_OK(cmp_res, "memcmp"); + ASSERT_EQ(array_value, array_magic_value, "array_value"); close(map_fd); } - CHECK(nr_id_found != nr_iters, - "check total map id found by get_next_id", - "nr_id_found %u(%u)\n", - nr_id_found, nr_iters); + ASSERT_EQ(nr_id_found, nr_iters, "map_nr_id_found"); /* Check bpf_link_get_next_id() */ nr_id_found = 0; @@ -308,9 +256,7 @@ void serial_test_bpf_obj_id(void) if (link_fd < 0 && errno == ENOENT) /* The bpf_link is in the dead row */ continue; - if (CHECK(link_fd < 0, "get-link-fd(next_id)", - "link_fd %d next_id %u errno %d\n", - link_fd, next_id, errno)) + if (!ASSERT_GE(link_fd, 0, "bpf_link_get_fd_by_id")) break; for (i = 0; i < nr_iters; i++) @@ -325,17 +271,13 @@ void serial_test_bpf_obj_id(void) err = bpf_link_get_info_by_fd(link_fd, &link_info, &info_len); cmp_res = memcmp(&link_info, &link_infos[i], offsetof(struct bpf_link_info, raw_tracepoint)); - CHECK(err || info_len != sizeof(link_info) || cmp_res, - "check get-link-info(next_id->fd)", - "err %d errno %d info_len %u(%zu) memcmp %d\n", - err, errno, info_len, sizeof(struct bpf_link_info), - cmp_res); + ASSERT_OK(err, "bpf_link_get_info_by_fd"); + ASSERT_EQ(info_len, sizeof(link_info), "info_len"); + ASSERT_OK(cmp_res, "memcmp"); close(link_fd); } - CHECK(nr_id_found != nr_iters, - "check total link id found by get_next_id", - "nr_id_found %u(%u)\n", nr_id_found, nr_iters); + ASSERT_EQ(nr_id_found, nr_iters, "link_nr_id_found"); done: for (i = 0; i < nr_iters; i++) { diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c index 4aabeaa525d4..a88e6e07e4f5 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c @@ -20,15 +20,14 @@ static const unsigned int total_bytes = 10 * 1024 * 1024; static int expected_stg = 0xeB9F; -static int stop, duration; +static int stop; static int settcpca(int fd, const char *tcp_ca) { int err; err = setsockopt(fd, IPPROTO_TCP, TCP_CONGESTION, tcp_ca, strlen(tcp_ca)); - if (CHECK(err == -1, "setsockopt(fd, TCP_CONGESTION)", "errno:%d\n", - errno)) + if (!ASSERT_NEQ(err, -1, "setsockopt")) return -1; return 0; @@ -65,8 +64,7 @@ static void *server(void *arg) bytes += nr_sent; } - CHECK(bytes != total_bytes, "send", "%zd != %u nr_sent:%zd errno:%d\n", - bytes, total_bytes, nr_sent, errno); + ASSERT_EQ(bytes, total_bytes, "send"); done: if (fd >= 0) @@ -92,10 +90,11 @@ static void do_test(const char *tcp_ca, const struct bpf_map *sk_stg_map) WRITE_ONCE(stop, 0); lfd = socket(AF_INET6, SOCK_STREAM, 0); - if (CHECK(lfd == -1, "socket", "errno:%d\n", errno)) + if (!ASSERT_NEQ(lfd, -1, "socket")) return; + fd = socket(AF_INET6, SOCK_STREAM, 0); - if (CHECK(fd == -1, "socket", "errno:%d\n", errno)) { + if (!ASSERT_NEQ(fd, -1, "socket")) { close(lfd); return; } @@ -108,26 +107,27 @@ static void do_test(const char *tcp_ca, const struct bpf_map *sk_stg_map) sa6.sin6_family = AF_INET6; sa6.sin6_addr = in6addr_loopback; err = bind(lfd, (struct sockaddr *)&sa6, addrlen); - if (CHECK(err == -1, "bind", "errno:%d\n", errno)) + if (!ASSERT_NEQ(err, -1, "bind")) goto done; + err = getsockname(lfd, (struct sockaddr *)&sa6, &addrlen); - if (CHECK(err == -1, "getsockname", "errno:%d\n", errno)) + if (!ASSERT_NEQ(err, -1, "getsockname")) goto done; + err = listen(lfd, 1); - if (CHECK(err == -1, "listen", "errno:%d\n", errno)) + if (!ASSERT_NEQ(err, -1, "listen")) goto done; if (sk_stg_map) { err = bpf_map_update_elem(bpf_map__fd(sk_stg_map), &fd, &expected_stg, BPF_NOEXIST); - if (CHECK(err, "bpf_map_update_elem(sk_stg_map)", - "err:%d errno:%d\n", err, errno)) + if (!ASSERT_OK(err, "bpf_map_update_elem(sk_stg_map)")) goto done; } /* connect to server */ err = connect(fd, (struct sockaddr *)&sa6, addrlen); - if (CHECK(err == -1, "connect", "errno:%d\n", errno)) + if (!ASSERT_NEQ(err, -1, "connect")) goto done; if (sk_stg_map) { @@ -135,14 +135,13 @@ static void do_test(const char *tcp_ca, const struct bpf_map *sk_stg_map) err = bpf_map_lookup_elem(bpf_map__fd(sk_stg_map), &fd, &tmp_stg); - if (CHECK(!err || errno != ENOENT, - "bpf_map_lookup_elem(sk_stg_map)", - "err:%d errno:%d\n", err, errno)) + if (!ASSERT_ERR(err, "bpf_map_lookup_elem(sk_stg_map)") || + !ASSERT_EQ(errno, ENOENT, "bpf_map_lookup_elem(sk_stg_map)")) goto done; } err = pthread_create(&srv_thread, NULL, server, (void *)(long)lfd); - if (CHECK(err != 0, "pthread_create", "err:%d errno:%d\n", err, errno)) + if (!ASSERT_OK(err, "pthread_create")) goto done; /* recv total_bytes */ @@ -156,13 +155,12 @@ static void do_test(const char *tcp_ca, const struct bpf_map *sk_stg_map) bytes += nr_recv; } - CHECK(bytes != total_bytes, "recv", "%zd != %u nr_recv:%zd errno:%d\n", - bytes, total_bytes, nr_recv, errno); + ASSERT_EQ(bytes, total_bytes, "recv"); WRITE_ONCE(stop, 1); pthread_join(srv_thread, &thread_ret); - CHECK(IS_ERR(thread_ret), "pthread_join", "thread_ret:%ld", - PTR_ERR(thread_ret)); + ASSERT_OK(IS_ERR(thread_ret), "thread_ret"); + done: close(lfd); close(fd); @@ -174,7 +172,7 @@ static void test_cubic(void) struct bpf_link *link; cubic_skel = bpf_cubic__open_and_load(); - if (CHECK(!cubic_skel, "bpf_cubic__open_and_load", "failed\n")) + if (!ASSERT_OK_PTR(cubic_skel, "bpf_cubic__open_and_load")) return; link = bpf_map__attach_struct_ops(cubic_skel->maps.cubic); @@ -197,7 +195,7 @@ static void test_dctcp(void) struct bpf_link *link; dctcp_skel = bpf_dctcp__open_and_load(); - if (CHECK(!dctcp_skel, "bpf_dctcp__open_and_load", "failed\n")) + if (!ASSERT_OK_PTR(dctcp_skel, "bpf_dctcp__open_and_load")) return; link = bpf_map__attach_struct_ops(dctcp_skel->maps.dctcp); @@ -207,9 +205,7 @@ static void test_dctcp(void) } do_test("bpf_dctcp", dctcp_skel->maps.sk_stg_map); - CHECK(dctcp_skel->bss->stg_result != expected_stg, - "Unexpected stg_result", "stg_result (%x) != expected_stg (%x)\n", - dctcp_skel->bss->stg_result, expected_stg); + ASSERT_EQ(dctcp_skel->bss->stg_result, expected_stg, "stg_result"); bpf_link__destroy(link); bpf_dctcp__destroy(dctcp_skel); diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c index 731c343897d8..e770912fc1d2 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c @@ -35,7 +35,7 @@ static int check_load(const char *file, enum bpf_prog_type type) } bpf_program__set_type(prog, type); - bpf_program__set_flags(prog, BPF_F_TEST_RND_HI32); + bpf_program__set_flags(prog, BPF_F_TEST_RND_HI32 | BPF_F_TEST_REG_INVARIANTS); bpf_program__set_log_level(prog, 4 | extra_prog_load_log_flags); err = bpf_object__load(obj); diff --git a/tools/testing/selftests/bpf/prog_tests/btf.c b/tools/testing/selftests/bpf/prog_tests/btf.c index 92d51f377fe5..8fb4a04fbbc0 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf.c +++ b/tools/testing/selftests/bpf/prog_tests/btf.c @@ -5265,6 +5265,7 @@ static size_t get_pprint_mapv_size(enum pprint_mapv_kind_t mapv_kind) #endif assert(0); + return 0; } static void set_pprint_mapv(enum pprint_mapv_kind_t mapv_kind, diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup1_hierarchy.c b/tools/testing/selftests/bpf/prog_tests/cgroup1_hierarchy.c new file mode 100644 index 000000000000..74d6d7546f40 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/cgroup1_hierarchy.c @@ -0,0 +1,158 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2023 Yafang Shao <laoar.shao@gmail.com> */ + +#include <sys/types.h> +#include <unistd.h> +#include <test_progs.h> +#include "cgroup_helpers.h" +#include "test_cgroup1_hierarchy.skel.h" + +static void bpf_cgroup1(struct test_cgroup1_hierarchy *skel) +{ + struct bpf_link *lsm_link, *fentry_link; + int err; + + /* Attach LSM prog first */ + lsm_link = bpf_program__attach_lsm(skel->progs.lsm_run); + if (!ASSERT_OK_PTR(lsm_link, "lsm_attach")) + return; + + /* LSM prog will be triggered when attaching fentry */ + fentry_link = bpf_program__attach_trace(skel->progs.fentry_run); + ASSERT_NULL(fentry_link, "fentry_attach_fail"); + + err = bpf_link__destroy(lsm_link); + ASSERT_OK(err, "destroy_lsm"); +} + +static void bpf_cgroup1_sleepable(struct test_cgroup1_hierarchy *skel) +{ + struct bpf_link *lsm_link, *fentry_link; + int err; + + /* Attach LSM prog first */ + lsm_link = bpf_program__attach_lsm(skel->progs.lsm_s_run); + if (!ASSERT_OK_PTR(lsm_link, "lsm_attach")) + return; + + /* LSM prog will be triggered when attaching fentry */ + fentry_link = bpf_program__attach_trace(skel->progs.fentry_run); + ASSERT_NULL(fentry_link, "fentry_attach_fail"); + + err = bpf_link__destroy(lsm_link); + ASSERT_OK(err, "destroy_lsm"); +} + +static void bpf_cgroup1_invalid_id(struct test_cgroup1_hierarchy *skel) +{ + struct bpf_link *lsm_link, *fentry_link; + int err; + + /* Attach LSM prog first */ + lsm_link = bpf_program__attach_lsm(skel->progs.lsm_run); + if (!ASSERT_OK_PTR(lsm_link, "lsm_attach")) + return; + + /* LSM prog will be triggered when attaching fentry */ + fentry_link = bpf_program__attach_trace(skel->progs.fentry_run); + if (!ASSERT_OK_PTR(fentry_link, "fentry_attach_success")) + goto cleanup; + + err = bpf_link__destroy(fentry_link); + ASSERT_OK(err, "destroy_lsm"); + +cleanup: + err = bpf_link__destroy(lsm_link); + ASSERT_OK(err, "destroy_fentry"); +} + +void test_cgroup1_hierarchy(void) +{ + struct test_cgroup1_hierarchy *skel; + __u64 current_cgid; + int hid, err; + + skel = test_cgroup1_hierarchy__open(); + if (!ASSERT_OK_PTR(skel, "open")) + return; + + skel->bss->target_pid = getpid(); + + err = bpf_program__set_attach_target(skel->progs.fentry_run, 0, "bpf_fentry_test1"); + if (!ASSERT_OK(err, "fentry_set_target")) + goto destroy; + + err = test_cgroup1_hierarchy__load(skel); + if (!ASSERT_OK(err, "load")) + goto destroy; + + /* Setup cgroup1 hierarchy */ + err = setup_classid_environment(); + if (!ASSERT_OK(err, "setup_classid_environment")) + goto destroy; + + err = join_classid(); + if (!ASSERT_OK(err, "join_cgroup1")) + goto cleanup; + + current_cgid = get_classid_cgroup_id(); + if (!ASSERT_GE(current_cgid, 0, "cgroup1 id")) + goto cleanup; + + hid = get_cgroup1_hierarchy_id("net_cls"); + if (!ASSERT_GE(hid, 0, "cgroup1 id")) + goto cleanup; + skel->bss->target_hid = hid; + + if (test__start_subtest("test_cgroup1_hierarchy")) { + skel->bss->target_ancestor_cgid = current_cgid; + bpf_cgroup1(skel); + } + + if (test__start_subtest("test_root_cgid")) { + skel->bss->target_ancestor_cgid = 1; + skel->bss->target_ancestor_level = 0; + bpf_cgroup1(skel); + } + + if (test__start_subtest("test_invalid_level")) { + skel->bss->target_ancestor_cgid = 1; + skel->bss->target_ancestor_level = 1; + bpf_cgroup1_invalid_id(skel); + } + + if (test__start_subtest("test_invalid_cgid")) { + skel->bss->target_ancestor_cgid = 0; + bpf_cgroup1_invalid_id(skel); + } + + if (test__start_subtest("test_invalid_hid")) { + skel->bss->target_ancestor_cgid = 1; + skel->bss->target_ancestor_level = 0; + skel->bss->target_hid = -1; + bpf_cgroup1_invalid_id(skel); + } + + if (test__start_subtest("test_invalid_cgrp_name")) { + skel->bss->target_hid = get_cgroup1_hierarchy_id("net_cl"); + skel->bss->target_ancestor_cgid = current_cgid; + bpf_cgroup1_invalid_id(skel); + } + + if (test__start_subtest("test_invalid_cgrp_name2")) { + skel->bss->target_hid = get_cgroup1_hierarchy_id("net_cls,"); + skel->bss->target_ancestor_cgid = current_cgid; + bpf_cgroup1_invalid_id(skel); + } + + if (test__start_subtest("test_sleepable_prog")) { + skel->bss->target_hid = hid; + skel->bss->target_ancestor_cgid = current_cgid; + bpf_cgroup1_sleepable(skel); + } + +cleanup: + cleanup_classid_environment(); +destroy: + test_cgroup1_hierarchy__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_iter.c b/tools/testing/selftests/bpf/prog_tests/cgroup_iter.c index e02feb5fae97..574d9a0cdc8e 100644 --- a/tools/testing/selftests/bpf/prog_tests/cgroup_iter.c +++ b/tools/testing/selftests/bpf/prog_tests/cgroup_iter.c @@ -4,6 +4,7 @@ #include <test_progs.h> #include <bpf/libbpf.h> #include <bpf/btf.h> +#include "iters_css_task.skel.h" #include "cgroup_iter.skel.h" #include "cgroup_helpers.h" @@ -263,6 +264,35 @@ close_cgrp: close(cgrp_fd); } +static void test_walk_self_only_css_task(void) +{ + struct iters_css_task *skel; + int err; + + skel = iters_css_task__open(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + return; + + bpf_program__set_autoload(skel->progs.cgroup_id_printer, true); + + err = iters_css_task__load(skel); + if (!ASSERT_OK(err, "skel_load")) + goto cleanup; + + err = join_cgroup(cg_path[CHILD2]); + if (!ASSERT_OK(err, "join_cgroup")) + goto cleanup; + + skel->bss->target_pid = getpid(); + snprintf(expected_output, sizeof(expected_output), + PROLOGUE "%8llu\n" EPILOGUE, cg_id[CHILD2]); + read_from_cgroup_iter(skel->progs.cgroup_id_printer, cg_fd[CHILD2], + BPF_CGROUP_ITER_SELF_ONLY, "test_walk_self_only_css_task"); + ASSERT_EQ(skel->bss->css_task_cnt, 1, "css_task_cnt"); +cleanup: + iters_css_task__destroy(skel); +} + void test_cgroup_iter(void) { struct cgroup_iter *skel = NULL; @@ -293,6 +323,9 @@ void test_cgroup_iter(void) test_walk_self_only(skel); if (test__start_subtest("cgroup_iter__dead_self_only")) test_walk_dead_self_only(skel); + if (test__start_subtest("cgroup_iter__self_only_css_task")) + test_walk_self_only_css_task(); + out: cgroup_iter__destroy(skel); cleanup_cgroups(); diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_v1v2.c b/tools/testing/selftests/bpf/prog_tests/cgroup_v1v2.c index 9026b42914d3..addf720428f7 100644 --- a/tools/testing/selftests/bpf/prog_tests/cgroup_v1v2.c +++ b/tools/testing/selftests/bpf/prog_tests/cgroup_v1v2.c @@ -71,7 +71,7 @@ void test_cgroup_v1v2(void) } ASSERT_OK(run_test(cgroup_fd, server_fd, false), "cgroup-v2-only"); setup_classid_environment(); - set_classid(42); + set_classid(); ASSERT_OK(run_test(cgroup_fd, server_fd, true), "cgroup-v1v2"); cleanup_classid_environment(); close(server_fd); diff --git a/tools/testing/selftests/bpf/prog_tests/iters.c b/tools/testing/selftests/bpf/prog_tests/iters.c index b696873c5455..bf84d4a1d9ae 100644 --- a/tools/testing/selftests/bpf/prog_tests/iters.c +++ b/tools/testing/selftests/bpf/prog_tests/iters.c @@ -1,7 +1,14 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ +#include <sys/syscall.h> +#include <sys/mman.h> +#include <sys/wait.h> +#include <unistd.h> +#include <malloc.h> +#include <stdlib.h> #include <test_progs.h> +#include "cgroup_helpers.h" #include "iters.skel.h" #include "iters_state_safety.skel.h" @@ -9,6 +16,10 @@ #include "iters_num.skel.h" #include "iters_testmod_seq.skel.h" #include "iters_task_vma.skel.h" +#include "iters_task.skel.h" +#include "iters_css_task.skel.h" +#include "iters_css.skel.h" +#include "iters_task_failure.skel.h" static void subtest_num_iters(void) { @@ -146,11 +157,144 @@ cleanup: iters_task_vma__destroy(skel); } +static pthread_mutex_t do_nothing_mutex; + +static void *do_nothing_wait(void *arg) +{ + pthread_mutex_lock(&do_nothing_mutex); + pthread_mutex_unlock(&do_nothing_mutex); + + pthread_exit(arg); +} + +#define thread_num 2 + +static void subtest_task_iters(void) +{ + struct iters_task *skel = NULL; + pthread_t thread_ids[thread_num]; + void *ret; + int err; + + skel = iters_task__open_and_load(); + if (!ASSERT_OK_PTR(skel, "open_and_load")) + goto cleanup; + skel->bss->target_pid = getpid(); + err = iters_task__attach(skel); + if (!ASSERT_OK(err, "iters_task__attach")) + goto cleanup; + pthread_mutex_lock(&do_nothing_mutex); + for (int i = 0; i < thread_num; i++) + ASSERT_OK(pthread_create(&thread_ids[i], NULL, &do_nothing_wait, NULL), + "pthread_create"); + + syscall(SYS_getpgid); + iters_task__detach(skel); + ASSERT_EQ(skel->bss->procs_cnt, 1, "procs_cnt"); + ASSERT_EQ(skel->bss->threads_cnt, thread_num + 1, "threads_cnt"); + ASSERT_EQ(skel->bss->proc_threads_cnt, thread_num + 1, "proc_threads_cnt"); + pthread_mutex_unlock(&do_nothing_mutex); + for (int i = 0; i < thread_num; i++) + ASSERT_OK(pthread_join(thread_ids[i], &ret), "pthread_join"); +cleanup: + iters_task__destroy(skel); +} + +extern int stack_mprotect(void); + +static void subtest_css_task_iters(void) +{ + struct iters_css_task *skel = NULL; + int err, cg_fd, cg_id; + const char *cgrp_path = "/cg1"; + + err = setup_cgroup_environment(); + if (!ASSERT_OK(err, "setup_cgroup_environment")) + goto cleanup; + cg_fd = create_and_get_cgroup(cgrp_path); + if (!ASSERT_GE(cg_fd, 0, "create_and_get_cgroup")) + goto cleanup; + cg_id = get_cgroup_id(cgrp_path); + err = join_cgroup(cgrp_path); + if (!ASSERT_OK(err, "join_cgroup")) + goto cleanup; + + skel = iters_css_task__open_and_load(); + if (!ASSERT_OK_PTR(skel, "open_and_load")) + goto cleanup; + + skel->bss->target_pid = getpid(); + skel->bss->cg_id = cg_id; + err = iters_css_task__attach(skel); + if (!ASSERT_OK(err, "iters_task__attach")) + goto cleanup; + err = stack_mprotect(); + if (!ASSERT_EQ(err, -1, "stack_mprotect") || + !ASSERT_EQ(errno, EPERM, "stack_mprotect")) + goto cleanup; + iters_css_task__detach(skel); + ASSERT_EQ(skel->bss->css_task_cnt, 1, "css_task_cnt"); + +cleanup: + cleanup_cgroup_environment(); + iters_css_task__destroy(skel); +} + +static void subtest_css_iters(void) +{ + struct iters_css *skel = NULL; + struct { + const char *path; + int fd; + } cgs[] = { + { "/cg1" }, + { "/cg1/cg2" }, + { "/cg1/cg2/cg3" }, + { "/cg1/cg2/cg3/cg4" }, + }; + int err, cg_nr = ARRAY_SIZE(cgs); + int i; + + err = setup_cgroup_environment(); + if (!ASSERT_OK(err, "setup_cgroup_environment")) + goto cleanup; + for (i = 0; i < cg_nr; i++) { + cgs[i].fd = create_and_get_cgroup(cgs[i].path); + if (!ASSERT_GE(cgs[i].fd, 0, "create_and_get_cgroup")) + goto cleanup; + } + + skel = iters_css__open_and_load(); + if (!ASSERT_OK_PTR(skel, "open_and_load")) + goto cleanup; + + skel->bss->target_pid = getpid(); + skel->bss->root_cg_id = get_cgroup_id(cgs[0].path); + skel->bss->leaf_cg_id = get_cgroup_id(cgs[cg_nr - 1].path); + err = iters_css__attach(skel); + + if (!ASSERT_OK(err, "iters_task__attach")) + goto cleanup; + + syscall(SYS_getpgid); + ASSERT_EQ(skel->bss->pre_order_cnt, cg_nr, "pre_order_cnt"); + ASSERT_EQ(skel->bss->first_cg_id, get_cgroup_id(cgs[0].path), "first_cg_id"); + + ASSERT_EQ(skel->bss->post_order_cnt, cg_nr, "post_order_cnt"); + ASSERT_EQ(skel->bss->last_cg_id, get_cgroup_id(cgs[0].path), "last_cg_id"); + ASSERT_EQ(skel->bss->tree_high, cg_nr - 1, "tree_high"); + iters_css__detach(skel); +cleanup: + cleanup_cgroup_environment(); + iters_css__destroy(skel); +} + void test_iters(void) { RUN_TESTS(iters_state_safety); RUN_TESTS(iters_looping); RUN_TESTS(iters); + RUN_TESTS(iters_css_task); if (env.has_testmod) RUN_TESTS(iters_testmod_seq); @@ -161,4 +305,11 @@ void test_iters(void) subtest_testmod_seq_iters(); if (test__start_subtest("task_vma")) subtest_task_vma_iters(); + if (test__start_subtest("task")) + subtest_task_iters(); + if (test__start_subtest("css_task")) + subtest_css_task_iters(); + if (test__start_subtest("css")) + subtest_css_iters(); + RUN_TESTS(iters_task_failure); } diff --git a/tools/testing/selftests/bpf/prog_tests/linked_list.c b/tools/testing/selftests/bpf/prog_tests/linked_list.c index 69dc31383b78..2fb89de63bd2 100644 --- a/tools/testing/selftests/bpf/prog_tests/linked_list.c +++ b/tools/testing/selftests/bpf/prog_tests/linked_list.c @@ -94,14 +94,8 @@ static struct { { "incorrect_head_var_off2", "variable ptr_ access var_off=(0x0; 0xffffffff) disallowed" }, { "incorrect_head_off1", "bpf_list_head not found at offset=25" }, { "incorrect_head_off2", "bpf_list_head not found at offset=1" }, - { "pop_front_off", - "15: (bf) r1 = r6 ; R1_w=ptr_or_null_foo(id=4,ref_obj_id=4,off=48,imm=0) " - "R6_w=ptr_or_null_foo(id=4,ref_obj_id=4,off=48,imm=0) refs=2,4\n" - "16: (85) call bpf_this_cpu_ptr#154\nR1 type=ptr_or_null_ expected=percpu_ptr_" }, - { "pop_back_off", - "15: (bf) r1 = r6 ; R1_w=ptr_or_null_foo(id=4,ref_obj_id=4,off=48,imm=0) " - "R6_w=ptr_or_null_foo(id=4,ref_obj_id=4,off=48,imm=0) refs=2,4\n" - "16: (85) call bpf_this_cpu_ptr#154\nR1 type=ptr_or_null_ expected=percpu_ptr_" }, + { "pop_front_off", "off 48 doesn't point to 'struct bpf_spin_lock' that is at 40" }, + { "pop_back_off", "off 48 doesn't point to 'struct bpf_spin_lock' that is at 40" }, }; static void test_linked_list_fail_prog(const char *prog_name, const char *err_msg) diff --git a/tools/testing/selftests/bpf/prog_tests/local_kptr_stash.c b/tools/testing/selftests/bpf/prog_tests/local_kptr_stash.c index b25b870f87ba..e6e50a394472 100644 --- a/tools/testing/selftests/bpf/prog_tests/local_kptr_stash.c +++ b/tools/testing/selftests/bpf/prog_tests/local_kptr_stash.c @@ -73,6 +73,37 @@ static void test_local_kptr_stash_unstash(void) local_kptr_stash__destroy(skel); } +static void test_refcount_acquire_without_unstash(void) +{ + LIBBPF_OPTS(bpf_test_run_opts, opts, + .data_in = &pkt_v4, + .data_size_in = sizeof(pkt_v4), + .repeat = 1, + ); + struct local_kptr_stash *skel; + int ret; + + skel = local_kptr_stash__open_and_load(); + if (!ASSERT_OK_PTR(skel, "local_kptr_stash__open_and_load")) + return; + + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.refcount_acquire_without_unstash), + &opts); + ASSERT_OK(ret, "refcount_acquire_without_unstash run"); + ASSERT_EQ(opts.retval, 2, "refcount_acquire_without_unstash retval"); + + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.stash_refcounted_node), &opts); + ASSERT_OK(ret, "stash_refcounted_node run"); + ASSERT_OK(opts.retval, "stash_refcounted_node retval"); + + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.refcount_acquire_without_unstash), + &opts); + ASSERT_OK(ret, "refcount_acquire_without_unstash (2) run"); + ASSERT_EQ(opts.retval, 42, "refcount_acquire_without_unstash (2) retval"); + + local_kptr_stash__destroy(skel); +} + static void test_local_kptr_stash_fail(void) { RUN_TESTS(local_kptr_stash_fail); @@ -86,6 +117,8 @@ void test_local_kptr_stash(void) test_local_kptr_stash_plain(); if (test__start_subtest("local_kptr_stash_unstash")) test_local_kptr_stash_unstash(); + if (test__start_subtest("refcount_acquire_without_unstash")) + test_refcount_acquire_without_unstash(); if (test__start_subtest("local_kptr_stash_fail")) test_local_kptr_stash_fail(); } diff --git a/tools/testing/selftests/bpf/prog_tests/log_buf.c b/tools/testing/selftests/bpf/prog_tests/log_buf.c index fe9a23e65ef4..0f7ea4d7d9f6 100644 --- a/tools/testing/selftests/bpf/prog_tests/log_buf.c +++ b/tools/testing/selftests/bpf/prog_tests/log_buf.c @@ -78,7 +78,7 @@ static void obj_load_log_buf(void) ASSERT_OK_PTR(strstr(libbpf_log_buf, "prog 'bad_prog': BPF program load failed"), "libbpf_log_not_empty"); ASSERT_OK_PTR(strstr(obj_log_buf, "DATASEC license"), "obj_log_not_empty"); - ASSERT_OK_PTR(strstr(good_log_buf, "0: R1=ctx(off=0,imm=0) R10=fp0"), + ASSERT_OK_PTR(strstr(good_log_buf, "0: R1=ctx() R10=fp0"), "good_log_verbose"); ASSERT_OK_PTR(strstr(bad_log_buf, "invalid access to map value, value_size=16 off=16000 size=4"), "bad_log_not_empty"); @@ -175,7 +175,7 @@ static void bpf_prog_load_log_buf(void) opts.log_level = 2; fd = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, "good_prog", "GPL", good_prog_insns, good_prog_insn_cnt, &opts); - ASSERT_OK_PTR(strstr(log_buf, "0: R1=ctx(off=0,imm=0) R10=fp0"), "good_log_2"); + ASSERT_OK_PTR(strstr(log_buf, "0: R1=ctx() R10=fp0"), "good_log_2"); ASSERT_GE(fd, 0, "good_fd2"); if (fd >= 0) close(fd); diff --git a/tools/testing/selftests/bpf/prog_tests/reg_bounds.c b/tools/testing/selftests/bpf/prog_tests/reg_bounds.c new file mode 100644 index 000000000000..0c9abd279e18 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/reg_bounds.c @@ -0,0 +1,2124 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ + +#define _GNU_SOURCE +#include <limits.h> +#include <test_progs.h> +#include <linux/filter.h> +#include <linux/bpf.h> + +/* ================================= + * SHORT AND CONSISTENT NUMBER TYPES + * ================================= + */ +#define U64_MAX ((u64)UINT64_MAX) +#define U32_MAX ((u32)UINT_MAX) +#define U16_MAX ((u32)UINT_MAX) +#define S64_MIN ((s64)INT64_MIN) +#define S64_MAX ((s64)INT64_MAX) +#define S32_MIN ((s32)INT_MIN) +#define S32_MAX ((s32)INT_MAX) +#define S16_MIN ((s16)0x80000000) +#define S16_MAX ((s16)0x7fffffff) + +typedef unsigned long long ___u64; +typedef unsigned int ___u32; +typedef long long ___s64; +typedef int ___s32; + +/* avoid conflicts with already defined types in kernel headers */ +#define u64 ___u64 +#define u32 ___u32 +#define s64 ___s64 +#define s32 ___s32 + +/* ================================== + * STRING BUF ABSTRACTION AND HELPERS + * ================================== + */ +struct strbuf { + size_t buf_sz; + int pos; + char buf[0]; +}; + +#define DEFINE_STRBUF(name, N) \ + struct { struct strbuf buf; char data[(N)]; } ___##name; \ + struct strbuf *name = (___##name.buf.buf_sz = (N), ___##name.buf.pos = 0, &___##name.buf) + +__printf(2, 3) +static inline void snappendf(struct strbuf *s, const char *fmt, ...) +{ + va_list args; + + va_start(args, fmt); + s->pos += vsnprintf(s->buf + s->pos, + s->pos < s->buf_sz ? s->buf_sz - s->pos : 0, + fmt, args); + va_end(args); +} + +/* ================================== + * GENERIC NUMBER TYPE AND OPERATIONS + * ================================== + */ +enum num_t { U64, first_t = U64, U32, S64, S32, last_t = S32 }; + +static __always_inline u64 min_t(enum num_t t, u64 x, u64 y) +{ + switch (t) { + case U64: return (u64)x < (u64)y ? (u64)x : (u64)y; + case U32: return (u32)x < (u32)y ? (u32)x : (u32)y; + case S64: return (s64)x < (s64)y ? (s64)x : (s64)y; + case S32: return (s32)x < (s32)y ? (s32)x : (s32)y; + default: printf("min_t!\n"); exit(1); + } +} + +static __always_inline u64 max_t(enum num_t t, u64 x, u64 y) +{ + switch (t) { + case U64: return (u64)x > (u64)y ? (u64)x : (u64)y; + case U32: return (u32)x > (u32)y ? (u32)x : (u32)y; + case S64: return (s64)x > (s64)y ? (s64)x : (s64)y; + case S32: return (s32)x > (s32)y ? (u32)(s32)x : (u32)(s32)y; + default: printf("max_t!\n"); exit(1); + } +} + +static __always_inline u64 cast_t(enum num_t t, u64 x) +{ + switch (t) { + case U64: return (u64)x; + case U32: return (u32)x; + case S64: return (s64)x; + case S32: return (u32)(s32)x; + default: printf("cast_t!\n"); exit(1); + } +} + +static const char *t_str(enum num_t t) +{ + switch (t) { + case U64: return "u64"; + case U32: return "u32"; + case S64: return "s64"; + case S32: return "s32"; + default: printf("t_str!\n"); exit(1); + } +} + +static enum num_t t_is_32(enum num_t t) +{ + switch (t) { + case U64: return false; + case U32: return true; + case S64: return false; + case S32: return true; + default: printf("t_is_32!\n"); exit(1); + } +} + +static enum num_t t_signed(enum num_t t) +{ + switch (t) { + case U64: return S64; + case U32: return S32; + case S64: return S64; + case S32: return S32; + default: printf("t_signed!\n"); exit(1); + } +} + +static enum num_t t_unsigned(enum num_t t) +{ + switch (t) { + case U64: return U64; + case U32: return U32; + case S64: return U64; + case S32: return U32; + default: printf("t_unsigned!\n"); exit(1); + } +} + +#define UNUM_MAX_DECIMAL U16_MAX +#define SNUM_MAX_DECIMAL S16_MAX +#define SNUM_MIN_DECIMAL S16_MIN + +static bool num_is_small(enum num_t t, u64 x) +{ + switch (t) { + case U64: return (u64)x <= UNUM_MAX_DECIMAL; + case U32: return (u32)x <= UNUM_MAX_DECIMAL; + case S64: return (s64)x >= SNUM_MIN_DECIMAL && (s64)x <= SNUM_MAX_DECIMAL; + case S32: return (s32)x >= SNUM_MIN_DECIMAL && (s32)x <= SNUM_MAX_DECIMAL; + default: printf("num_is_small!\n"); exit(1); + } +} + +static void snprintf_num(enum num_t t, struct strbuf *sb, u64 x) +{ + bool is_small = num_is_small(t, x); + + if (is_small) { + switch (t) { + case U64: return snappendf(sb, "%llu", (u64)x); + case U32: return snappendf(sb, "%u", (u32)x); + case S64: return snappendf(sb, "%lld", (s64)x); + case S32: return snappendf(sb, "%d", (s32)x); + default: printf("snprintf_num!\n"); exit(1); + } + } else { + switch (t) { + case U64: + if (x == U64_MAX) + return snappendf(sb, "U64_MAX"); + else if (x >= U64_MAX - 256) + return snappendf(sb, "U64_MAX-%llu", U64_MAX - x); + else + return snappendf(sb, "%#llx", (u64)x); + case U32: + if ((u32)x == U32_MAX) + return snappendf(sb, "U32_MAX"); + else if ((u32)x >= U32_MAX - 256) + return snappendf(sb, "U32_MAX-%u", U32_MAX - (u32)x); + else + return snappendf(sb, "%#x", (u32)x); + case S64: + if ((s64)x == S64_MAX) + return snappendf(sb, "S64_MAX"); + else if ((s64)x >= S64_MAX - 256) + return snappendf(sb, "S64_MAX-%lld", S64_MAX - (s64)x); + else if ((s64)x == S64_MIN) + return snappendf(sb, "S64_MIN"); + else if ((s64)x <= S64_MIN + 256) + return snappendf(sb, "S64_MIN+%lld", (s64)x - S64_MIN); + else + return snappendf(sb, "%#llx", (s64)x); + case S32: + if ((s32)x == S32_MAX) + return snappendf(sb, "S32_MAX"); + else if ((s32)x >= S32_MAX - 256) + return snappendf(sb, "S32_MAX-%d", S32_MAX - (s32)x); + else if ((s32)x == S32_MIN) + return snappendf(sb, "S32_MIN"); + else if ((s32)x <= S32_MIN + 256) + return snappendf(sb, "S32_MIN+%d", (s32)x - S32_MIN); + else + return snappendf(sb, "%#x", (s32)x); + default: printf("snprintf_num!\n"); exit(1); + } + } +} + +/* =================================== + * GENERIC RANGE STRUCT AND OPERATIONS + * =================================== + */ +struct range { + u64 a, b; +}; + +static void snprintf_range(enum num_t t, struct strbuf *sb, struct range x) +{ + if (x.a == x.b) + return snprintf_num(t, sb, x.a); + + snappendf(sb, "["); + snprintf_num(t, sb, x.a); + snappendf(sb, "; "); + snprintf_num(t, sb, x.b); + snappendf(sb, "]"); +} + +static void print_range(enum num_t t, struct range x, const char *sfx) +{ + DEFINE_STRBUF(sb, 128); + + snprintf_range(t, sb, x); + printf("%s%s", sb->buf, sfx); +} + +static const struct range unkn[] = { + [U64] = { 0, U64_MAX }, + [U32] = { 0, U32_MAX }, + [S64] = { (u64)S64_MIN, (u64)S64_MAX }, + [S32] = { (u64)(u32)S32_MIN, (u64)(u32)S32_MAX }, +}; + +static struct range unkn_subreg(enum num_t t) +{ + switch (t) { + case U64: return unkn[U32]; + case U32: return unkn[U32]; + case S64: return unkn[U32]; + case S32: return unkn[S32]; + default: printf("unkn_subreg!\n"); exit(1); + } +} + +static struct range range(enum num_t t, u64 a, u64 b) +{ + switch (t) { + case U64: return (struct range){ (u64)a, (u64)b }; + case U32: return (struct range){ (u32)a, (u32)b }; + case S64: return (struct range){ (s64)a, (s64)b }; + case S32: return (struct range){ (u32)(s32)a, (u32)(s32)b }; + default: printf("range!\n"); exit(1); + } +} + +static __always_inline u32 sign64(u64 x) { return (x >> 63) & 1; } +static __always_inline u32 sign32(u64 x) { return ((u32)x >> 31) & 1; } +static __always_inline u32 upper32(u64 x) { return (u32)(x >> 32); } +static __always_inline u64 swap_low32(u64 x, u32 y) { return (x & 0xffffffff00000000ULL) | y; } + +static bool range_eq(struct range x, struct range y) +{ + return x.a == y.a && x.b == y.b; +} + +static struct range range_cast_to_s32(struct range x) +{ + u64 a = x.a, b = x.b; + + /* if upper 32 bits are constant, lower 32 bits should form a proper + * s32 range to be correct + */ + if (upper32(a) == upper32(b) && (s32)a <= (s32)b) + return range(S32, a, b); + + /* Special case where upper bits form a small sequence of two + * sequential numbers (in 32-bit unsigned space, so 0xffffffff to + * 0x00000000 is also valid), while lower bits form a proper s32 range + * going from negative numbers to positive numbers. + * + * E.g.: [0xfffffff0ffffff00; 0xfffffff100000010]. Iterating + * over full 64-bit numbers range will form a proper [-16, 16] + * ([0xffffff00; 0x00000010]) range in its lower 32 bits. + */ + if (upper32(a) + 1 == upper32(b) && (s32)a < 0 && (s32)b >= 0) + return range(S32, a, b); + + /* otherwise we can't derive much meaningful information */ + return unkn[S32]; +} + +static struct range range_cast_u64(enum num_t to_t, struct range x) +{ + u64 a = (u64)x.a, b = (u64)x.b; + + switch (to_t) { + case U64: + return x; + case U32: + if (upper32(a) != upper32(b)) + return unkn[U32]; + return range(U32, a, b); + case S64: + if (sign64(a) != sign64(b)) + return unkn[S64]; + return range(S64, a, b); + case S32: + return range_cast_to_s32(x); + default: printf("range_cast_u64!\n"); exit(1); + } +} + +static struct range range_cast_s64(enum num_t to_t, struct range x) +{ + s64 a = (s64)x.a, b = (s64)x.b; + + switch (to_t) { + case U64: + /* equivalent to (s64)a <= (s64)b check */ + if (sign64(a) != sign64(b)) + return unkn[U64]; + return range(U64, a, b); + case U32: + if (upper32(a) != upper32(b) || sign32(a) != sign32(b)) + return unkn[U32]; + return range(U32, a, b); + case S64: + return x; + case S32: + return range_cast_to_s32(x); + default: printf("range_cast_s64!\n"); exit(1); + } +} + +static struct range range_cast_u32(enum num_t to_t, struct range x) +{ + u32 a = (u32)x.a, b = (u32)x.b; + + switch (to_t) { + case U64: + case S64: + /* u32 is always a valid zero-extended u64/s64 */ + return range(to_t, a, b); + case U32: + return x; + case S32: + return range_cast_to_s32(range(U32, a, b)); + default: printf("range_cast_u32!\n"); exit(1); + } +} + +static struct range range_cast_s32(enum num_t to_t, struct range x) +{ + s32 a = (s32)x.a, b = (s32)x.b; + + switch (to_t) { + case U64: + case U32: + case S64: + if (sign32(a) != sign32(b)) + return unkn[to_t]; + return range(to_t, a, b); + case S32: + return x; + default: printf("range_cast_s32!\n"); exit(1); + } +} + +/* Reinterpret range in *from_t* domain as a range in *to_t* domain preserving + * all possible information. Worst case, it will be unknown range within + * *to_t* domain, if nothing more specific can be guaranteed during the + * conversion + */ +static struct range range_cast(enum num_t from_t, enum num_t to_t, struct range from) +{ + switch (from_t) { + case U64: return range_cast_u64(to_t, from); + case U32: return range_cast_u32(to_t, from); + case S64: return range_cast_s64(to_t, from); + case S32: return range_cast_s32(to_t, from); + default: printf("range_cast!\n"); exit(1); + } +} + +static bool is_valid_num(enum num_t t, u64 x) +{ + switch (t) { + case U64: return true; + case U32: return upper32(x) == 0; + case S64: return true; + case S32: return upper32(x) == 0; + default: printf("is_valid_num!\n"); exit(1); + } +} + +static bool is_valid_range(enum num_t t, struct range x) +{ + if (!is_valid_num(t, x.a) || !is_valid_num(t, x.b)) + return false; + + switch (t) { + case U64: return (u64)x.a <= (u64)x.b; + case U32: return (u32)x.a <= (u32)x.b; + case S64: return (s64)x.a <= (s64)x.b; + case S32: return (s32)x.a <= (s32)x.b; + default: printf("is_valid_range!\n"); exit(1); + } +} + +static struct range range_improve(enum num_t t, struct range old, struct range new) +{ + return range(t, max_t(t, old.a, new.a), min_t(t, old.b, new.b)); +} + +static struct range range_refine(enum num_t x_t, struct range x, enum num_t y_t, struct range y) +{ + struct range y_cast; + + y_cast = range_cast(y_t, x_t, y); + + /* the case when new range knowledge, *y*, is a 32-bit subregister + * range, while previous range knowledge, *x*, is a full register + * 64-bit range, needs special treatment to take into account upper 32 + * bits of full register range + */ + if (t_is_32(y_t) && !t_is_32(x_t)) { + struct range x_swap; + + /* some combinations of upper 32 bits and sign bit can lead to + * invalid ranges, in such cases it's easier to detect them + * after cast/swap than try to enumerate all the conditions + * under which transformation and knowledge transfer is valid + */ + x_swap = range(x_t, swap_low32(x.a, y_cast.a), swap_low32(x.b, y_cast.b)); + if (!is_valid_range(x_t, x_swap)) + return x; + return range_improve(x_t, x, x_swap); + } + + /* otherwise, plain range cast and intersection works */ + return range_improve(x_t, x, y_cast); +} + +/* ======================= + * GENERIC CONDITIONAL OPS + * ======================= + */ +enum op { OP_LT, OP_LE, OP_GT, OP_GE, OP_EQ, OP_NE, first_op = OP_LT, last_op = OP_NE }; + +static enum op complement_op(enum op op) +{ + switch (op) { + case OP_LT: return OP_GE; + case OP_LE: return OP_GT; + case OP_GT: return OP_LE; + case OP_GE: return OP_LT; + case OP_EQ: return OP_NE; + case OP_NE: return OP_EQ; + default: printf("complement_op!\n"); exit(1); + } +} + +static const char *op_str(enum op op) +{ + switch (op) { + case OP_LT: return "<"; + case OP_LE: return "<="; + case OP_GT: return ">"; + case OP_GE: return ">="; + case OP_EQ: return "=="; + case OP_NE: return "!="; + default: printf("op_str!\n"); exit(1); + } +} + +/* Can register with range [x.a, x.b] *EVER* satisfy + * OP (<, <=, >, >=, ==, !=) relation to + * a regsiter with range [y.a, y.b] + * _in *num_t* domain_ + */ +static bool range_canbe_op(enum num_t t, struct range x, struct range y, enum op op) +{ +#define range_canbe(T) do { \ + switch (op) { \ + case OP_LT: return (T)x.a < (T)y.b; \ + case OP_LE: return (T)x.a <= (T)y.b; \ + case OP_GT: return (T)x.b > (T)y.a; \ + case OP_GE: return (T)x.b >= (T)y.a; \ + case OP_EQ: return (T)max_t(t, x.a, y.a) <= (T)min_t(t, x.b, y.b); \ + case OP_NE: return !((T)x.a == (T)x.b && (T)y.a == (T)y.b && (T)x.a == (T)y.a); \ + default: printf("range_canbe op %d\n", op); exit(1); \ + } \ +} while (0) + + switch (t) { + case U64: { range_canbe(u64); } + case U32: { range_canbe(u32); } + case S64: { range_canbe(s64); } + case S32: { range_canbe(s32); } + default: printf("range_canbe!\n"); exit(1); + } +#undef range_canbe +} + +/* Does register with range [x.a, x.b] *ALWAYS* satisfy + * OP (<, <=, >, >=, ==, !=) relation to + * a regsiter with range [y.a, y.b] + * _in *num_t* domain_ + */ +static bool range_always_op(enum num_t t, struct range x, struct range y, enum op op) +{ + /* always op <=> ! canbe complement(op) */ + return !range_canbe_op(t, x, y, complement_op(op)); +} + +/* Does register with range [x.a, x.b] *NEVER* satisfy + * OP (<, <=, >, >=, ==, !=) relation to + * a regsiter with range [y.a, y.b] + * _in *num_t* domain_ + */ +static bool range_never_op(enum num_t t, struct range x, struct range y, enum op op) +{ + return !range_canbe_op(t, x, y, op); +} + +/* similar to verifier's is_branch_taken(): + * 1 - always taken; + * 0 - never taken, + * -1 - unsure. + */ +static int range_branch_taken_op(enum num_t t, struct range x, struct range y, enum op op) +{ + if (range_always_op(t, x, y, op)) + return 1; + if (range_never_op(t, x, y, op)) + return 0; + return -1; +} + +/* What would be the new estimates for register x and y ranges assuming truthful + * OP comparison between them. I.e., (x OP y == true) => x <- newx, y <- newy. + * + * We assume "interesting" cases where ranges overlap. Cases where it's + * obvious that (x OP y) is either always true or false should be filtered with + * range_never and range_always checks. + */ +static void range_cond(enum num_t t, struct range x, struct range y, + enum op op, struct range *newx, struct range *newy) +{ + if (!range_canbe_op(t, x, y, op)) { + /* nothing to adjust, can't happen, return original values */ + *newx = x; + *newy = y; + return; + } + switch (op) { + case OP_LT: + *newx = range(t, x.a, min_t(t, x.b, y.b - 1)); + *newy = range(t, max_t(t, x.a + 1, y.a), y.b); + break; + case OP_LE: + *newx = range(t, x.a, min_t(t, x.b, y.b)); + *newy = range(t, max_t(t, x.a, y.a), y.b); + break; + case OP_GT: + *newx = range(t, max_t(t, x.a, y.a + 1), x.b); + *newy = range(t, y.a, min_t(t, x.b - 1, y.b)); + break; + case OP_GE: + *newx = range(t, max_t(t, x.a, y.a), x.b); + *newy = range(t, y.a, min_t(t, x.b, y.b)); + break; + case OP_EQ: + *newx = range(t, max_t(t, x.a, y.a), min_t(t, x.b, y.b)); + *newy = range(t, max_t(t, x.a, y.a), min_t(t, x.b, y.b)); + break; + case OP_NE: + /* generic case, can't derive more information */ + *newx = range(t, x.a, x.b); + *newy = range(t, y.a, y.b); + break; + + /* below extended logic is not supported by verifier just yet */ + if (x.a == x.b && x.a == y.a) { + /* X is a constant matching left side of Y */ + *newx = range(t, x.a, x.b); + *newy = range(t, y.a + 1, y.b); + } else if (x.a == x.b && x.b == y.b) { + /* X is a constant matching rigth side of Y */ + *newx = range(t, x.a, x.b); + *newy = range(t, y.a, y.b - 1); + } else if (y.a == y.b && x.a == y.a) { + /* Y is a constant matching left side of X */ + *newx = range(t, x.a + 1, x.b); + *newy = range(t, y.a, y.b); + } else if (y.a == y.b && x.b == y.b) { + /* Y is a constant matching rigth side of X */ + *newx = range(t, x.a, x.b - 1); + *newy = range(t, y.a, y.b); + } else { + /* generic case, can't derive more information */ + *newx = range(t, x.a, x.b); + *newy = range(t, y.a, y.b); + } + + break; + default: + break; + } +} + +/* ======================= + * REGISTER STATE HANDLING + * ======================= + */ +struct reg_state { + struct range r[4]; /* indexed by enum num_t: U64, U32, S64, S32 */ + bool valid; +}; + +static void print_reg_state(struct reg_state *r, const char *sfx) +{ + DEFINE_STRBUF(sb, 512); + enum num_t t; + int cnt = 0; + + if (!r->valid) { + printf("<not found>%s", sfx); + return; + } + + snappendf(sb, "scalar("); + for (t = first_t; t <= last_t; t++) { + snappendf(sb, "%s%s=", cnt++ ? "," : "", t_str(t)); + snprintf_range(t, sb, r->r[t]); + } + snappendf(sb, ")"); + + printf("%s%s", sb->buf, sfx); +} + +static void print_refinement(enum num_t s_t, struct range src, + enum num_t d_t, struct range old, struct range new, + const char *ctx) +{ + printf("REFINING (%s) (%s)SRC=", ctx, t_str(s_t)); + print_range(s_t, src, ""); + printf(" (%s)DST_OLD=", t_str(d_t)); + print_range(d_t, old, ""); + printf(" (%s)DST_NEW=", t_str(d_t)); + print_range(d_t, new, "\n"); +} + +static void reg_state_refine(struct reg_state *r, enum num_t t, struct range x, const char *ctx) +{ + enum num_t d_t, s_t; + struct range old; + bool keep_going = false; + +again: + /* try to derive new knowledge from just learned range x of type t */ + for (d_t = first_t; d_t <= last_t; d_t++) { + old = r->r[d_t]; + r->r[d_t] = range_refine(d_t, r->r[d_t], t, x); + if (!range_eq(r->r[d_t], old)) { + keep_going = true; + if (env.verbosity >= VERBOSE_VERY) + print_refinement(t, x, d_t, old, r->r[d_t], ctx); + } + } + + /* now see if we can derive anything new from updated reg_state's ranges */ + for (s_t = first_t; s_t <= last_t; s_t++) { + for (d_t = first_t; d_t <= last_t; d_t++) { + old = r->r[d_t]; + r->r[d_t] = range_refine(d_t, r->r[d_t], s_t, r->r[s_t]); + if (!range_eq(r->r[d_t], old)) { + keep_going = true; + if (env.verbosity >= VERBOSE_VERY) + print_refinement(s_t, r->r[s_t], d_t, old, r->r[d_t], ctx); + } + } + } + + /* keep refining until we converge */ + if (keep_going) { + keep_going = false; + goto again; + } +} + +static void reg_state_set_const(struct reg_state *rs, enum num_t t, u64 val) +{ + enum num_t tt; + + rs->valid = true; + for (tt = first_t; tt <= last_t; tt++) + rs->r[tt] = tt == t ? range(t, val, val) : unkn[tt]; + + reg_state_refine(rs, t, rs->r[t], "CONST"); +} + +static void reg_state_cond(enum num_t t, struct reg_state *x, struct reg_state *y, enum op op, + struct reg_state *newx, struct reg_state *newy, const char *ctx) +{ + char buf[32]; + enum num_t ts[2]; + struct reg_state xx = *x, yy = *y; + int i, t_cnt; + struct range z1, z2; + + if (op == OP_EQ || op == OP_NE) { + /* OP_EQ and OP_NE are sign-agnostic, so we need to process + * both signed and unsigned domains at the same time + */ + ts[0] = t_unsigned(t); + ts[1] = t_signed(t); + t_cnt = 2; + } else { + ts[0] = t; + t_cnt = 1; + } + + for (i = 0; i < t_cnt; i++) { + t = ts[i]; + z1 = x->r[t]; + z2 = y->r[t]; + + range_cond(t, z1, z2, op, &z1, &z2); + + if (newx) { + snprintf(buf, sizeof(buf), "%s R1", ctx); + reg_state_refine(&xx, t, z1, buf); + } + if (newy) { + snprintf(buf, sizeof(buf), "%s R2", ctx); + reg_state_refine(&yy, t, z2, buf); + } + } + + if (newx) + *newx = xx; + if (newy) + *newy = yy; +} + +static int reg_state_branch_taken_op(enum num_t t, struct reg_state *x, struct reg_state *y, + enum op op) +{ + if (op == OP_EQ || op == OP_NE) { + /* OP_EQ and OP_NE are sign-agnostic */ + enum num_t tu = t_unsigned(t); + enum num_t ts = t_signed(t); + int br_u, br_s, br; + + br_u = range_branch_taken_op(tu, x->r[tu], y->r[tu], op); + br_s = range_branch_taken_op(ts, x->r[ts], y->r[ts], op); + + if (br_u >= 0 && br_s >= 0 && br_u != br_s) + ASSERT_FALSE(true, "branch taken inconsistency!\n"); + + /* if 64-bit ranges are indecisive, use 32-bit subranges to + * eliminate always/never taken branches, if possible + */ + if (br_u == -1 && (t == U64 || t == S64)) { + br = range_branch_taken_op(U32, x->r[U32], y->r[U32], op); + /* we can only reject for OP_EQ, never take branch + * based on lower 32 bits + */ + if (op == OP_EQ && br == 0) + return 0; + /* for OP_NEQ we can be conclusive only if lower 32 bits + * differ and thus inequality branch is always taken + */ + if (op == OP_NE && br == 1) + return 1; + + br = range_branch_taken_op(S32, x->r[S32], y->r[S32], op); + if (op == OP_EQ && br == 0) + return 0; + if (op == OP_NE && br == 1) + return 1; + } + + return br_u >= 0 ? br_u : br_s; + } + return range_branch_taken_op(t, x->r[t], y->r[t], op); +} + +/* ===================================== + * BPF PROGS GENERATION AND VERIFICATION + * ===================================== + */ +struct case_spec { + /* whether to init full register (r1) or sub-register (w1) */ + bool init_subregs; + /* whether to establish initial value range on full register (r1) or + * sub-register (w1) + */ + bool setup_subregs; + /* whether to establish initial value range using signed or unsigned + * comparisons (i.e., initialize umin/umax or smin/smax directly) + */ + bool setup_signed; + /* whether to perform comparison on full registers or sub-registers */ + bool compare_subregs; + /* whether to perform comparison using signed or unsigned operations */ + bool compare_signed; +}; + +/* Generate test BPF program based on provided test ranges, operation, and + * specifications about register bitness and signedness. + */ +static int load_range_cmp_prog(struct range x, struct range y, enum op op, + int branch_taken, struct case_spec spec, + char *log_buf, size_t log_sz, + int *false_pos, int *true_pos) +{ +#define emit(insn) ({ \ + struct bpf_insn __insns[] = { insn }; \ + int __i; \ + for (__i = 0; __i < ARRAY_SIZE(__insns); __i++) \ + insns[cur_pos + __i] = __insns[__i]; \ + cur_pos += __i; \ +}) +#define JMP_TO(target) (target - cur_pos - 1) + int cur_pos = 0, exit_pos, fd, op_code; + struct bpf_insn insns[64]; + LIBBPF_OPTS(bpf_prog_load_opts, opts, + .log_level = 2, + .log_buf = log_buf, + .log_size = log_sz, + .prog_flags = BPF_F_TEST_REG_INVARIANTS, + ); + + /* ; skip exit block below + * goto +2; + */ + emit(BPF_JMP_A(2)); + exit_pos = cur_pos; + /* ; exit block for all the preparatory conditionals + * out: + * r0 = 0; + * exit; + */ + emit(BPF_MOV64_IMM(BPF_REG_0, 0)); + emit(BPF_EXIT_INSN()); + /* + * ; assign r6/w6 and r7/w7 unpredictable u64/u32 value + * call bpf_get_current_pid_tgid; + * r6 = r0; | w6 = w0; + * call bpf_get_current_pid_tgid; + * r7 = r0; | w7 = w0; + */ + emit(BPF_EMIT_CALL(BPF_FUNC_get_current_pid_tgid)); + if (spec.init_subregs) + emit(BPF_MOV32_REG(BPF_REG_6, BPF_REG_0)); + else + emit(BPF_MOV64_REG(BPF_REG_6, BPF_REG_0)); + emit(BPF_EMIT_CALL(BPF_FUNC_get_current_pid_tgid)); + if (spec.init_subregs) + emit(BPF_MOV32_REG(BPF_REG_7, BPF_REG_0)); + else + emit(BPF_MOV64_REG(BPF_REG_7, BPF_REG_0)); + /* ; setup initial r6/w6 possible value range ([x.a, x.b]) + * r1 = %[x.a] ll; | w1 = %[x.a]; + * r2 = %[x.b] ll; | w2 = %[x.b]; + * if r6 < r1 goto out; | if w6 < w1 goto out; + * if r6 > r2 goto out; | if w6 > w2 goto out; + */ + if (spec.setup_subregs) { + emit(BPF_MOV32_IMM(BPF_REG_1, (s32)x.a)); + emit(BPF_MOV32_IMM(BPF_REG_2, (s32)x.b)); + emit(BPF_JMP32_REG(spec.setup_signed ? BPF_JSLT : BPF_JLT, + BPF_REG_6, BPF_REG_1, JMP_TO(exit_pos))); + emit(BPF_JMP32_REG(spec.setup_signed ? BPF_JSGT : BPF_JGT, + BPF_REG_6, BPF_REG_2, JMP_TO(exit_pos))); + } else { + emit(BPF_LD_IMM64(BPF_REG_1, x.a)); + emit(BPF_LD_IMM64(BPF_REG_2, x.b)); + emit(BPF_JMP_REG(spec.setup_signed ? BPF_JSLT : BPF_JLT, + BPF_REG_6, BPF_REG_1, JMP_TO(exit_pos))); + emit(BPF_JMP_REG(spec.setup_signed ? BPF_JSGT : BPF_JGT, + BPF_REG_6, BPF_REG_2, JMP_TO(exit_pos))); + } + /* ; setup initial r7/w7 possible value range ([y.a, y.b]) + * r1 = %[y.a] ll; | w1 = %[y.a]; + * r2 = %[y.b] ll; | w2 = %[y.b]; + * if r7 < r1 goto out; | if w7 < w1 goto out; + * if r7 > r2 goto out; | if w7 > w2 goto out; + */ + if (spec.setup_subregs) { + emit(BPF_MOV32_IMM(BPF_REG_1, (s32)y.a)); + emit(BPF_MOV32_IMM(BPF_REG_2, (s32)y.b)); + emit(BPF_JMP32_REG(spec.setup_signed ? BPF_JSLT : BPF_JLT, + BPF_REG_7, BPF_REG_1, JMP_TO(exit_pos))); + emit(BPF_JMP32_REG(spec.setup_signed ? BPF_JSGT : BPF_JGT, + BPF_REG_7, BPF_REG_2, JMP_TO(exit_pos))); + } else { + emit(BPF_LD_IMM64(BPF_REG_1, y.a)); + emit(BPF_LD_IMM64(BPF_REG_2, y.b)); + emit(BPF_JMP_REG(spec.setup_signed ? BPF_JSLT : BPF_JLT, + BPF_REG_7, BPF_REG_1, JMP_TO(exit_pos))); + emit(BPF_JMP_REG(spec.setup_signed ? BPF_JSGT : BPF_JGT, + BPF_REG_7, BPF_REG_2, JMP_TO(exit_pos))); + } + /* ; range test instruction + * if r6 <op> r7 goto +3; | if w6 <op> w7 goto +3; + */ + switch (op) { + case OP_LT: op_code = spec.compare_signed ? BPF_JSLT : BPF_JLT; break; + case OP_LE: op_code = spec.compare_signed ? BPF_JSLE : BPF_JLE; break; + case OP_GT: op_code = spec.compare_signed ? BPF_JSGT : BPF_JGT; break; + case OP_GE: op_code = spec.compare_signed ? BPF_JSGE : BPF_JGE; break; + case OP_EQ: op_code = BPF_JEQ; break; + case OP_NE: op_code = BPF_JNE; break; + default: + printf("unrecognized op %d\n", op); + return -ENOTSUP; + } + /* ; BEFORE conditional, r0/w0 = {r6/w6,r7/w7} is to extract verifier state reliably + * ; this is used for debugging, as verifier doesn't always print + * ; registers states as of condition jump instruction (e.g., when + * ; precision marking happens) + * r0 = r6; | w0 = w6; + * r0 = r7; | w0 = w7; + */ + if (spec.compare_subregs) { + emit(BPF_MOV32_REG(BPF_REG_0, BPF_REG_6)); + emit(BPF_MOV32_REG(BPF_REG_0, BPF_REG_7)); + } else { + emit(BPF_MOV64_REG(BPF_REG_0, BPF_REG_6)); + emit(BPF_MOV64_REG(BPF_REG_0, BPF_REG_7)); + } + if (spec.compare_subregs) + emit(BPF_JMP32_REG(op_code, BPF_REG_6, BPF_REG_7, 3)); + else + emit(BPF_JMP_REG(op_code, BPF_REG_6, BPF_REG_7, 3)); + /* ; FALSE branch, r0/w0 = {r6/w6,r7/w7} is to extract verifier state reliably + * r0 = r6; | w0 = w6; + * r0 = r7; | w0 = w7; + * exit; + */ + *false_pos = cur_pos; + if (spec.compare_subregs) { + emit(BPF_MOV32_REG(BPF_REG_0, BPF_REG_6)); + emit(BPF_MOV32_REG(BPF_REG_0, BPF_REG_7)); + } else { + emit(BPF_MOV64_REG(BPF_REG_0, BPF_REG_6)); + emit(BPF_MOV64_REG(BPF_REG_0, BPF_REG_7)); + } + if (branch_taken == 1) /* false branch is never taken */ + emit(BPF_EMIT_CALL(0xDEAD)); /* poison this branch */ + else + emit(BPF_EXIT_INSN()); + /* ; TRUE branch, r0/w0 = {r6/w6,r7/w7} is to extract verifier state reliably + * r0 = r6; | w0 = w6; + * r0 = r7; | w0 = w7; + * exit; + */ + *true_pos = cur_pos; + if (spec.compare_subregs) { + emit(BPF_MOV32_REG(BPF_REG_0, BPF_REG_6)); + emit(BPF_MOV32_REG(BPF_REG_0, BPF_REG_7)); + } else { + emit(BPF_MOV64_REG(BPF_REG_0, BPF_REG_6)); + emit(BPF_MOV64_REG(BPF_REG_0, BPF_REG_7)); + } + if (branch_taken == 0) /* true branch is never taken */ + emit(BPF_EMIT_CALL(0xDEAD)); /* poison this branch */ + emit(BPF_EXIT_INSN()); /* last instruction has to be exit */ + + fd = bpf_prog_load(BPF_PROG_TYPE_RAW_TRACEPOINT, "reg_bounds_test", + "GPL", insns, cur_pos, &opts); + if (fd < 0) + return fd; + + close(fd); + return 0; +#undef emit +#undef JMP_TO +} + +#define str_has_pfx(str, pfx) (strncmp(str, pfx, strlen(pfx)) == 0) + +/* Parse register state from verifier log. + * `s` should point to the start of "Rx = ..." substring in the verifier log. + */ +static int parse_reg_state(const char *s, struct reg_state *reg) +{ + /* There are two generic forms for SCALAR register: + * - known constant: R6_rwD=P%lld + * - range: R6_rwD=scalar(id=1,...), where "..." is a comma-separated + * list of optional range specifiers: + * - umin=%llu, if missing, assumed 0; + * - umax=%llu, if missing, assumed U64_MAX; + * - smin=%lld, if missing, assumed S64_MIN; + * - smax=%lld, if missing, assummed S64_MAX; + * - umin32=%d, if missing, assumed 0; + * - umax32=%d, if missing, assumed U32_MAX; + * - smin32=%d, if missing, assumed S32_MIN; + * - smax32=%d, if missing, assummed S32_MAX; + * - var_off=(%#llx; %#llx), tnum part, we don't care about it. + * + * If some of the values are equal, they will be grouped (but min/max + * are not mixed together, and similarly negative values are not + * grouped with non-negative ones). E.g.: + * + * R6_w=Pscalar(smin=smin32=0, smax=umax=umax32=1000) + * + * _rwD part is optional (and any of the letters can be missing). + * P (precision mark) is optional as well. + * + * Anything inside scalar() is optional, including id, of course. + */ + struct { + const char *pfx; + u64 *dst, def; + bool is_32, is_set; + } *f, fields[8] = { + {"smin=", ®->r[S64].a, S64_MIN}, + {"smax=", ®->r[S64].b, S64_MAX}, + {"umin=", ®->r[U64].a, 0}, + {"umax=", ®->r[U64].b, U64_MAX}, + {"smin32=", ®->r[S32].a, (u32)S32_MIN, true}, + {"smax32=", ®->r[S32].b, (u32)S32_MAX, true}, + {"umin32=", ®->r[U32].a, 0, true}, + {"umax32=", ®->r[U32].b, U32_MAX, true}, + }; + const char *p; + int i; + + p = strchr(s, '='); + if (!p) + return -EINVAL; + p++; + if (*p == 'P') + p++; + + if (!str_has_pfx(p, "scalar(")) { + long long sval; + enum num_t t; + + if (p[0] == '0' && p[1] == 'x') { + if (sscanf(p, "%llx", &sval) != 1) + return -EINVAL; + } else { + if (sscanf(p, "%lld", &sval) != 1) + return -EINVAL; + } + + reg->valid = true; + for (t = first_t; t <= last_t; t++) { + reg->r[t] = range(t, sval, sval); + } + return 0; + } + + p += sizeof("scalar"); + while (p) { + int midxs[ARRAY_SIZE(fields)], mcnt = 0; + u64 val; + + for (i = 0; i < ARRAY_SIZE(fields); i++) { + f = &fields[i]; + if (!str_has_pfx(p, f->pfx)) + continue; + midxs[mcnt++] = i; + p += strlen(f->pfx); + } + + if (mcnt) { + /* populate all matched fields */ + if (p[0] == '0' && p[1] == 'x') { + if (sscanf(p, "%llx", &val) != 1) + return -EINVAL; + } else { + if (sscanf(p, "%lld", &val) != 1) + return -EINVAL; + } + + for (i = 0; i < mcnt; i++) { + f = &fields[midxs[i]]; + f->is_set = true; + *f->dst = f->is_32 ? (u64)(u32)val : val; + } + } else if (str_has_pfx(p, "var_off")) { + /* skip "var_off=(0x0; 0x3f)" part completely */ + p = strchr(p, ')'); + if (!p) + return -EINVAL; + p++; + } + + p = strpbrk(p, ",)"); + if (*p == ')') + break; + if (p) + p++; + } + + reg->valid = true; + + for (i = 0; i < ARRAY_SIZE(fields); i++) { + f = &fields[i]; + if (!f->is_set) + *f->dst = f->def; + } + + return 0; +} + + +/* Parse all register states (TRUE/FALSE branches and DST/SRC registers) + * out of the verifier log for a corresponding test case BPF program. + */ +static int parse_range_cmp_log(const char *log_buf, struct case_spec spec, + int false_pos, int true_pos, + struct reg_state *false1_reg, struct reg_state *false2_reg, + struct reg_state *true1_reg, struct reg_state *true2_reg) +{ + struct { + int insn_idx; + int reg_idx; + const char *reg_upper; + struct reg_state *state; + } specs[] = { + {false_pos, 6, "R6=", false1_reg}, + {false_pos + 1, 7, "R7=", false2_reg}, + {true_pos, 6, "R6=", true1_reg}, + {true_pos + 1, 7, "R7=", true2_reg}, + }; + char buf[32]; + const char *p = log_buf, *q; + int i, err; + + for (i = 0; i < 4; i++) { + sprintf(buf, "%d: (%s) %s = %s%d", specs[i].insn_idx, + spec.compare_subregs ? "bc" : "bf", + spec.compare_subregs ? "w0" : "r0", + spec.compare_subregs ? "w" : "r", specs[i].reg_idx); + + q = strstr(p, buf); + if (!q) { + *specs[i].state = (struct reg_state){.valid = false}; + continue; + } + p = strstr(q, specs[i].reg_upper); + if (!p) + return -EINVAL; + err = parse_reg_state(p, specs[i].state); + if (err) + return -EINVAL; + } + return 0; +} + +/* Validate ranges match, and print details if they don't */ +static bool assert_range_eq(enum num_t t, struct range x, struct range y, + const char *ctx1, const char *ctx2) +{ + DEFINE_STRBUF(sb, 512); + + if (range_eq(x, y)) + return true; + + snappendf(sb, "MISMATCH %s.%s: ", ctx1, ctx2); + snprintf_range(t, sb, x); + snappendf(sb, " != "); + snprintf_range(t, sb, y); + + printf("%s\n", sb->buf); + + return false; +} + +/* Validate that register states match, and print details if they don't */ +static bool assert_reg_state_eq(struct reg_state *r, struct reg_state *e, const char *ctx) +{ + bool ok = true; + enum num_t t; + + if (r->valid != e->valid) { + printf("MISMATCH %s: actual %s != expected %s\n", ctx, + r->valid ? "<valid>" : "<invalid>", + e->valid ? "<valid>" : "<invalid>"); + return false; + } + + if (!r->valid) + return true; + + for (t = first_t; t <= last_t; t++) { + if (!assert_range_eq(t, r->r[t], e->r[t], ctx, t_str(t))) + ok = false; + } + + return ok; +} + +/* Printf verifier log, filtering out irrelevant noise */ +static void print_verifier_log(const char *buf) +{ + const char *p; + + while (buf[0]) { + p = strchrnul(buf, '\n'); + + /* filter out irrelevant precision backtracking logs */ + if (str_has_pfx(buf, "mark_precise: ")) + goto skip_line; + + printf("%.*s\n", (int)(p - buf), buf); + +skip_line: + buf = *p == '\0' ? p : p + 1; + } +} + +/* Simulate provided test case purely with our own range-based logic. + * This is done to set up expectations for verifier's branch_taken logic and + * verifier's register states in the verifier log. + */ +static void sim_case(enum num_t init_t, enum num_t cond_t, + struct range x, struct range y, enum op op, + struct reg_state *fr1, struct reg_state *fr2, + struct reg_state *tr1, struct reg_state *tr2, + int *branch_taken) +{ + const u64 A = x.a; + const u64 B = x.b; + const u64 C = y.a; + const u64 D = y.b; + struct reg_state rc; + enum op rev_op = complement_op(op); + enum num_t t; + + fr1->valid = fr2->valid = true; + tr1->valid = tr2->valid = true; + for (t = first_t; t <= last_t; t++) { + /* if we are initializing using 32-bit subregisters, + * full registers get upper 32 bits zeroed automatically + */ + struct range z = t_is_32(init_t) ? unkn_subreg(t) : unkn[t]; + + fr1->r[t] = fr2->r[t] = tr1->r[t] = tr2->r[t] = z; + } + + /* step 1: r1 >= A, r2 >= C */ + reg_state_set_const(&rc, init_t, A); + reg_state_cond(init_t, fr1, &rc, OP_GE, fr1, NULL, "r1>=A"); + reg_state_set_const(&rc, init_t, C); + reg_state_cond(init_t, fr2, &rc, OP_GE, fr2, NULL, "r2>=C"); + *tr1 = *fr1; + *tr2 = *fr2; + if (env.verbosity >= VERBOSE_VERY) { + printf("STEP1 (%s) R1: ", t_str(init_t)); print_reg_state(fr1, "\n"); + printf("STEP1 (%s) R2: ", t_str(init_t)); print_reg_state(fr2, "\n"); + } + + /* step 2: r1 <= B, r2 <= D */ + reg_state_set_const(&rc, init_t, B); + reg_state_cond(init_t, fr1, &rc, OP_LE, fr1, NULL, "r1<=B"); + reg_state_set_const(&rc, init_t, D); + reg_state_cond(init_t, fr2, &rc, OP_LE, fr2, NULL, "r2<=D"); + *tr1 = *fr1; + *tr2 = *fr2; + if (env.verbosity >= VERBOSE_VERY) { + printf("STEP2 (%s) R1: ", t_str(init_t)); print_reg_state(fr1, "\n"); + printf("STEP2 (%s) R2: ", t_str(init_t)); print_reg_state(fr2, "\n"); + } + + /* step 3: r1 <op> r2 */ + *branch_taken = reg_state_branch_taken_op(cond_t, fr1, fr2, op); + fr1->valid = fr2->valid = false; + tr1->valid = tr2->valid = false; + if (*branch_taken != 1) { /* FALSE is possible */ + fr1->valid = fr2->valid = true; + reg_state_cond(cond_t, fr1, fr2, rev_op, fr1, fr2, "FALSE"); + } + if (*branch_taken != 0) { /* TRUE is possible */ + tr1->valid = tr2->valid = true; + reg_state_cond(cond_t, tr1, tr2, op, tr1, tr2, "TRUE"); + } + if (env.verbosity >= VERBOSE_VERY) { + printf("STEP3 (%s) FALSE R1:", t_str(cond_t)); print_reg_state(fr1, "\n"); + printf("STEP3 (%s) FALSE R2:", t_str(cond_t)); print_reg_state(fr2, "\n"); + printf("STEP3 (%s) TRUE R1:", t_str(cond_t)); print_reg_state(tr1, "\n"); + printf("STEP3 (%s) TRUE R2:", t_str(cond_t)); print_reg_state(tr2, "\n"); + } +} + +/* =============================== + * HIGH-LEVEL TEST CASE VALIDATION + * =============================== + */ +static u32 upper_seeds[] = { + 0, + 1, + U32_MAX, + U32_MAX - 1, + S32_MAX, + (u32)S32_MIN, +}; + +static u32 lower_seeds[] = { + 0, + 1, + 2, (u32)-2, + 255, (u32)-255, + UINT_MAX, + UINT_MAX - 1, + INT_MAX, + (u32)INT_MIN, +}; + +struct ctx { + int val_cnt, subval_cnt, range_cnt, subrange_cnt; + u64 uvals[ARRAY_SIZE(upper_seeds) * ARRAY_SIZE(lower_seeds)]; + s64 svals[ARRAY_SIZE(upper_seeds) * ARRAY_SIZE(lower_seeds)]; + u32 usubvals[ARRAY_SIZE(lower_seeds)]; + s32 ssubvals[ARRAY_SIZE(lower_seeds)]; + struct range *uranges, *sranges; + struct range *usubranges, *ssubranges; + int max_failure_cnt, cur_failure_cnt; + int total_case_cnt, case_cnt; + int rand_case_cnt; + unsigned rand_seed; + __u64 start_ns; + char progress_ctx[64]; +}; + +static void cleanup_ctx(struct ctx *ctx) +{ + free(ctx->uranges); + free(ctx->sranges); + free(ctx->usubranges); + free(ctx->ssubranges); +} + +struct subtest_case { + enum num_t init_t; + enum num_t cond_t; + struct range x; + struct range y; + enum op op; +}; + +static void subtest_case_str(struct strbuf *sb, struct subtest_case *t, bool use_op) +{ + snappendf(sb, "(%s)", t_str(t->init_t)); + snprintf_range(t->init_t, sb, t->x); + snappendf(sb, " (%s)%s ", t_str(t->cond_t), use_op ? op_str(t->op) : "<op>"); + snprintf_range(t->init_t, sb, t->y); +} + +/* Generate and validate test case based on specific combination of setup + * register ranges (including their expected num_t domain), and conditional + * operation to perform (including num_t domain in which it has to be + * performed) + */ +static int verify_case_op(enum num_t init_t, enum num_t cond_t, + struct range x, struct range y, enum op op) +{ + char log_buf[256 * 1024]; + size_t log_sz = sizeof(log_buf); + int err, false_pos = 0, true_pos = 0, branch_taken; + struct reg_state fr1, fr2, tr1, tr2; + struct reg_state fe1, fe2, te1, te2; + bool failed = false; + struct case_spec spec = { + .init_subregs = (init_t == U32 || init_t == S32), + .setup_subregs = (init_t == U32 || init_t == S32), + .setup_signed = (init_t == S64 || init_t == S32), + .compare_subregs = (cond_t == U32 || cond_t == S32), + .compare_signed = (cond_t == S64 || cond_t == S32), + }; + + log_buf[0] = '\0'; + + sim_case(init_t, cond_t, x, y, op, &fe1, &fe2, &te1, &te2, &branch_taken); + + err = load_range_cmp_prog(x, y, op, branch_taken, spec, + log_buf, log_sz, &false_pos, &true_pos); + if (err) { + ASSERT_OK(err, "load_range_cmp_prog"); + failed = true; + } + + err = parse_range_cmp_log(log_buf, spec, false_pos, true_pos, + &fr1, &fr2, &tr1, &tr2); + if (err) { + ASSERT_OK(err, "parse_range_cmp_log"); + failed = true; + } + + if (!assert_reg_state_eq(&fr1, &fe1, "false_reg1") || + !assert_reg_state_eq(&fr2, &fe2, "false_reg2") || + !assert_reg_state_eq(&tr1, &te1, "true_reg1") || + !assert_reg_state_eq(&tr2, &te2, "true_reg2")) { + failed = true; + } + + if (failed || env.verbosity >= VERBOSE_NORMAL) { + if (failed || env.verbosity >= VERBOSE_VERY) { + printf("VERIFIER LOG:\n========================\n"); + print_verifier_log(log_buf); + printf("=====================\n"); + } + printf("ACTUAL FALSE1: "); print_reg_state(&fr1, "\n"); + printf("EXPECTED FALSE1: "); print_reg_state(&fe1, "\n"); + printf("ACTUAL FALSE2: "); print_reg_state(&fr2, "\n"); + printf("EXPECTED FALSE2: "); print_reg_state(&fe2, "\n"); + printf("ACTUAL TRUE1: "); print_reg_state(&tr1, "\n"); + printf("EXPECTED TRUE1: "); print_reg_state(&te1, "\n"); + printf("ACTUAL TRUE2: "); print_reg_state(&tr2, "\n"); + printf("EXPECTED TRUE2: "); print_reg_state(&te2, "\n"); + + return failed ? -EINVAL : 0; + } + + return 0; +} + +/* Given setup ranges and number types, go over all supported operations, + * generating individual subtest for each allowed combination + */ +static int verify_case_opt(struct ctx *ctx, enum num_t init_t, enum num_t cond_t, + struct range x, struct range y, bool is_subtest) +{ + DEFINE_STRBUF(sb, 256); + int err; + struct subtest_case sub = { + .init_t = init_t, + .cond_t = cond_t, + .x = x, + .y = y, + }; + + sb->pos = 0; /* reset position in strbuf */ + subtest_case_str(sb, &sub, false /* ignore op */); + if (is_subtest && !test__start_subtest(sb->buf)) + return 0; + + for (sub.op = first_op; sub.op <= last_op; sub.op++) { + sb->pos = 0; /* reset position in strbuf */ + subtest_case_str(sb, &sub, true /* print op */); + + if (env.verbosity >= VERBOSE_NORMAL) /* this speeds up debugging */ + printf("TEST CASE: %s\n", sb->buf); + + err = verify_case_op(init_t, cond_t, x, y, sub.op); + if (err || env.verbosity >= VERBOSE_NORMAL) + ASSERT_OK(err, sb->buf); + if (err) { + ctx->cur_failure_cnt++; + if (ctx->cur_failure_cnt > ctx->max_failure_cnt) + return err; + return 0; /* keep testing other cases */ + } + ctx->case_cnt++; + if ((ctx->case_cnt % 10000) == 0) { + double progress = (ctx->case_cnt + 0.0) / ctx->total_case_cnt; + u64 elapsed_ns = get_time_ns() - ctx->start_ns; + double remain_ns = elapsed_ns / progress * (1 - progress); + + fprintf(env.stderr, "PROGRESS (%s): %d/%d (%.2lf%%), " + "elapsed %llu mins (%.2lf hrs), " + "ETA %.0lf mins (%.2lf hrs)\n", + ctx->progress_ctx, + ctx->case_cnt, ctx->total_case_cnt, 100.0 * progress, + elapsed_ns / 1000000000 / 60, + elapsed_ns / 1000000000.0 / 3600, + remain_ns / 1000000000.0 / 60, + remain_ns / 1000000000.0 / 3600); + } + } + + return 0; +} + +static int verify_case(struct ctx *ctx, enum num_t init_t, enum num_t cond_t, + struct range x, struct range y) +{ + return verify_case_opt(ctx, init_t, cond_t, x, y, true /* is_subtest */); +} + +/* ================================ + * GENERATED CASES FROM SEED VALUES + * ================================ + */ +static int u64_cmp(const void *p1, const void *p2) +{ + u64 x1 = *(const u64 *)p1, x2 = *(const u64 *)p2; + + return x1 != x2 ? (x1 < x2 ? -1 : 1) : 0; +} + +static int u32_cmp(const void *p1, const void *p2) +{ + u32 x1 = *(const u32 *)p1, x2 = *(const u32 *)p2; + + return x1 != x2 ? (x1 < x2 ? -1 : 1) : 0; +} + +static int s64_cmp(const void *p1, const void *p2) +{ + s64 x1 = *(const s64 *)p1, x2 = *(const s64 *)p2; + + return x1 != x2 ? (x1 < x2 ? -1 : 1) : 0; +} + +static int s32_cmp(const void *p1, const void *p2) +{ + s32 x1 = *(const s32 *)p1, x2 = *(const s32 *)p2; + + return x1 != x2 ? (x1 < x2 ? -1 : 1) : 0; +} + +/* Generate valid unique constants from seeds, both signed and unsigned */ +static void gen_vals(struct ctx *ctx) +{ + int i, j, cnt = 0; + + for (i = 0; i < ARRAY_SIZE(upper_seeds); i++) { + for (j = 0; j < ARRAY_SIZE(lower_seeds); j++) { + ctx->uvals[cnt++] = (((u64)upper_seeds[i]) << 32) | lower_seeds[j]; + } + } + + /* sort and compact uvals (i.e., it's `sort | uniq`) */ + qsort(ctx->uvals, cnt, sizeof(*ctx->uvals), u64_cmp); + for (i = 1, j = 0; i < cnt; i++) { + if (ctx->uvals[j] == ctx->uvals[i]) + continue; + j++; + ctx->uvals[j] = ctx->uvals[i]; + } + ctx->val_cnt = j + 1; + + /* we have exactly the same number of s64 values, they are just in + * a different order than u64s, so just sort them differently + */ + for (i = 0; i < ctx->val_cnt; i++) + ctx->svals[i] = ctx->uvals[i]; + qsort(ctx->svals, ctx->val_cnt, sizeof(*ctx->svals), s64_cmp); + + if (env.verbosity >= VERBOSE_SUPER) { + DEFINE_STRBUF(sb1, 256); + DEFINE_STRBUF(sb2, 256); + + for (i = 0; i < ctx->val_cnt; i++) { + sb1->pos = sb2->pos = 0; + snprintf_num(U64, sb1, ctx->uvals[i]); + snprintf_num(S64, sb2, ctx->svals[i]); + printf("SEED #%d: u64=%-20s s64=%-20s\n", i, sb1->buf, sb2->buf); + } + } + + /* 32-bit values are generated separately */ + cnt = 0; + for (i = 0; i < ARRAY_SIZE(lower_seeds); i++) { + ctx->usubvals[cnt++] = lower_seeds[i]; + } + + /* sort and compact usubvals (i.e., it's `sort | uniq`) */ + qsort(ctx->usubvals, cnt, sizeof(*ctx->usubvals), u32_cmp); + for (i = 1, j = 0; i < cnt; i++) { + if (ctx->usubvals[j] == ctx->usubvals[i]) + continue; + j++; + ctx->usubvals[j] = ctx->usubvals[i]; + } + ctx->subval_cnt = j + 1; + + for (i = 0; i < ctx->subval_cnt; i++) + ctx->ssubvals[i] = ctx->usubvals[i]; + qsort(ctx->ssubvals, ctx->subval_cnt, sizeof(*ctx->ssubvals), s32_cmp); + + if (env.verbosity >= VERBOSE_SUPER) { + DEFINE_STRBUF(sb1, 256); + DEFINE_STRBUF(sb2, 256); + + for (i = 0; i < ctx->subval_cnt; i++) { + sb1->pos = sb2->pos = 0; + snprintf_num(U32, sb1, ctx->usubvals[i]); + snprintf_num(S32, sb2, ctx->ssubvals[i]); + printf("SUBSEED #%d: u32=%-10s s32=%-10s\n", i, sb1->buf, sb2->buf); + } + } +} + +/* Generate valid ranges from upper/lower seeds */ +static int gen_ranges(struct ctx *ctx) +{ + int i, j, cnt = 0; + + for (i = 0; i < ctx->val_cnt; i++) { + for (j = i; j < ctx->val_cnt; j++) { + if (env.verbosity >= VERBOSE_SUPER) { + DEFINE_STRBUF(sb1, 256); + DEFINE_STRBUF(sb2, 256); + + sb1->pos = sb2->pos = 0; + snprintf_range(U64, sb1, range(U64, ctx->uvals[i], ctx->uvals[j])); + snprintf_range(S64, sb2, range(S64, ctx->svals[i], ctx->svals[j])); + printf("RANGE #%d: u64=%-40s s64=%-40s\n", cnt, sb1->buf, sb2->buf); + } + cnt++; + } + } + ctx->range_cnt = cnt; + + ctx->uranges = calloc(ctx->range_cnt, sizeof(*ctx->uranges)); + if (!ASSERT_OK_PTR(ctx->uranges, "uranges_calloc")) + return -EINVAL; + ctx->sranges = calloc(ctx->range_cnt, sizeof(*ctx->sranges)); + if (!ASSERT_OK_PTR(ctx->sranges, "sranges_calloc")) + return -EINVAL; + + cnt = 0; + for (i = 0; i < ctx->val_cnt; i++) { + for (j = i; j < ctx->val_cnt; j++) { + ctx->uranges[cnt] = range(U64, ctx->uvals[i], ctx->uvals[j]); + ctx->sranges[cnt] = range(S64, ctx->svals[i], ctx->svals[j]); + cnt++; + } + } + + cnt = 0; + for (i = 0; i < ctx->subval_cnt; i++) { + for (j = i; j < ctx->subval_cnt; j++) { + if (env.verbosity >= VERBOSE_SUPER) { + DEFINE_STRBUF(sb1, 256); + DEFINE_STRBUF(sb2, 256); + + sb1->pos = sb2->pos = 0; + snprintf_range(U32, sb1, range(U32, ctx->usubvals[i], ctx->usubvals[j])); + snprintf_range(S32, sb2, range(S32, ctx->ssubvals[i], ctx->ssubvals[j])); + printf("SUBRANGE #%d: u32=%-20s s32=%-20s\n", cnt, sb1->buf, sb2->buf); + } + cnt++; + } + } + ctx->subrange_cnt = cnt; + + ctx->usubranges = calloc(ctx->subrange_cnt, sizeof(*ctx->usubranges)); + if (!ASSERT_OK_PTR(ctx->usubranges, "usubranges_calloc")) + return -EINVAL; + ctx->ssubranges = calloc(ctx->subrange_cnt, sizeof(*ctx->ssubranges)); + if (!ASSERT_OK_PTR(ctx->ssubranges, "ssubranges_calloc")) + return -EINVAL; + + cnt = 0; + for (i = 0; i < ctx->subval_cnt; i++) { + for (j = i; j < ctx->subval_cnt; j++) { + ctx->usubranges[cnt] = range(U32, ctx->usubvals[i], ctx->usubvals[j]); + ctx->ssubranges[cnt] = range(S32, ctx->ssubvals[i], ctx->ssubvals[j]); + cnt++; + } + } + + return 0; +} + +static int parse_env_vars(struct ctx *ctx) +{ + const char *s; + + if ((s = getenv("REG_BOUNDS_MAX_FAILURE_CNT"))) { + errno = 0; + ctx->max_failure_cnt = strtol(s, NULL, 10); + if (errno || ctx->max_failure_cnt < 0) { + ASSERT_OK(-errno, "REG_BOUNDS_MAX_FAILURE_CNT"); + return -EINVAL; + } + } + + if ((s = getenv("REG_BOUNDS_RAND_CASE_CNT"))) { + errno = 0; + ctx->rand_case_cnt = strtol(s, NULL, 10); + if (errno || ctx->rand_case_cnt < 0) { + ASSERT_OK(-errno, "REG_BOUNDS_RAND_CASE_CNT"); + return -EINVAL; + } + } + + if ((s = getenv("REG_BOUNDS_RAND_SEED"))) { + errno = 0; + ctx->rand_seed = strtoul(s, NULL, 10); + if (errno) { + ASSERT_OK(-errno, "REG_BOUNDS_RAND_SEED"); + return -EINVAL; + } + } + + return 0; +} + +static int prepare_gen_tests(struct ctx *ctx) +{ + const char *s; + int err; + + if (!(s = getenv("SLOW_TESTS")) || strcmp(s, "1") != 0) { + test__skip(); + return -ENOTSUP; + } + + err = parse_env_vars(ctx); + if (err) + return err; + + gen_vals(ctx); + err = gen_ranges(ctx); + if (err) { + ASSERT_OK(err, "gen_ranges"); + return err; + } + + return 0; +} + +/* Go over generated constants and ranges and validate various supported + * combinations of them + */ +static void validate_gen_range_vs_const_64(enum num_t init_t, enum num_t cond_t) +{ + struct ctx ctx; + struct range rconst; + const struct range *ranges; + const u64 *vals; + int i, j; + + memset(&ctx, 0, sizeof(ctx)); + + if (prepare_gen_tests(&ctx)) + goto cleanup; + + ranges = init_t == U64 ? ctx.uranges : ctx.sranges; + vals = init_t == U64 ? ctx.uvals : (const u64 *)ctx.svals; + + ctx.total_case_cnt = (last_op - first_op + 1) * (2 * ctx.range_cnt * ctx.val_cnt); + ctx.start_ns = get_time_ns(); + snprintf(ctx.progress_ctx, sizeof(ctx.progress_ctx), + "RANGE x CONST, %s -> %s", + t_str(init_t), t_str(cond_t)); + + for (i = 0; i < ctx.val_cnt; i++) { + for (j = 0; j < ctx.range_cnt; j++) { + rconst = range(init_t, vals[i], vals[i]); + + /* (u64|s64)(<range> x <const>) */ + if (verify_case(&ctx, init_t, cond_t, ranges[j], rconst)) + goto cleanup; + /* (u64|s64)(<const> x <range>) */ + if (verify_case(&ctx, init_t, cond_t, rconst, ranges[j])) + goto cleanup; + } + } + +cleanup: + cleanup_ctx(&ctx); +} + +static void validate_gen_range_vs_const_32(enum num_t init_t, enum num_t cond_t) +{ + struct ctx ctx; + struct range rconst; + const struct range *ranges; + const u32 *vals; + int i, j; + + memset(&ctx, 0, sizeof(ctx)); + + if (prepare_gen_tests(&ctx)) + goto cleanup; + + ranges = init_t == U32 ? ctx.usubranges : ctx.ssubranges; + vals = init_t == U32 ? ctx.usubvals : (const u32 *)ctx.ssubvals; + + ctx.total_case_cnt = (last_op - first_op + 1) * (2 * ctx.subrange_cnt * ctx.subval_cnt); + ctx.start_ns = get_time_ns(); + snprintf(ctx.progress_ctx, sizeof(ctx.progress_ctx), + "RANGE x CONST, %s -> %s", + t_str(init_t), t_str(cond_t)); + + for (i = 0; i < ctx.subval_cnt; i++) { + for (j = 0; j < ctx.subrange_cnt; j++) { + rconst = range(init_t, vals[i], vals[i]); + + /* (u32|s32)(<range> x <const>) */ + if (verify_case(&ctx, init_t, cond_t, ranges[j], rconst)) + goto cleanup; + /* (u32|s32)(<const> x <range>) */ + if (verify_case(&ctx, init_t, cond_t, rconst, ranges[j])) + goto cleanup; + } + } + +cleanup: + cleanup_ctx(&ctx); +} + +static void validate_gen_range_vs_range(enum num_t init_t, enum num_t cond_t) +{ + struct ctx ctx; + const struct range *ranges; + int i, j, rcnt; + + memset(&ctx, 0, sizeof(ctx)); + + if (prepare_gen_tests(&ctx)) + goto cleanup; + + switch (init_t) + { + case U64: + ranges = ctx.uranges; + rcnt = ctx.range_cnt; + break; + case U32: + ranges = ctx.usubranges; + rcnt = ctx.subrange_cnt; + break; + case S64: + ranges = ctx.sranges; + rcnt = ctx.range_cnt; + break; + case S32: + ranges = ctx.ssubranges; + rcnt = ctx.subrange_cnt; + break; + default: + printf("validate_gen_range_vs_range!\n"); + exit(1); + } + + ctx.total_case_cnt = (last_op - first_op + 1) * (2 * rcnt * (rcnt + 1) / 2); + ctx.start_ns = get_time_ns(); + snprintf(ctx.progress_ctx, sizeof(ctx.progress_ctx), + "RANGE x RANGE, %s -> %s", + t_str(init_t), t_str(cond_t)); + + for (i = 0; i < rcnt; i++) { + for (j = i; j < rcnt; j++) { + /* (<range> x <range>) */ + if (verify_case(&ctx, init_t, cond_t, ranges[i], ranges[j])) + goto cleanup; + if (verify_case(&ctx, init_t, cond_t, ranges[j], ranges[i])) + goto cleanup; + } + } + +cleanup: + cleanup_ctx(&ctx); +} + +/* Go over thousands of test cases generated from initial seed values. + * Given this take a long time, guard this begind SLOW_TESTS=1 envvar. If + * envvar is not set, this test is skipped during test_progs testing. + * + * We split this up into smaller subsets based on initialization and + * conditiona numeric domains to get an easy parallelization with test_progs' + * -j argument. + */ + +/* RANGE x CONST, U64 initial range */ +void test_reg_bounds_gen_consts_u64_u64(void) { validate_gen_range_vs_const_64(U64, U64); } +void test_reg_bounds_gen_consts_u64_s64(void) { validate_gen_range_vs_const_64(U64, S64); } +void test_reg_bounds_gen_consts_u64_u32(void) { validate_gen_range_vs_const_64(U64, U32); } +void test_reg_bounds_gen_consts_u64_s32(void) { validate_gen_range_vs_const_64(U64, S32); } +/* RANGE x CONST, S64 initial range */ +void test_reg_bounds_gen_consts_s64_u64(void) { validate_gen_range_vs_const_64(S64, U64); } +void test_reg_bounds_gen_consts_s64_s64(void) { validate_gen_range_vs_const_64(S64, S64); } +void test_reg_bounds_gen_consts_s64_u32(void) { validate_gen_range_vs_const_64(S64, U32); } +void test_reg_bounds_gen_consts_s64_s32(void) { validate_gen_range_vs_const_64(S64, S32); } +/* RANGE x CONST, U32 initial range */ +void test_reg_bounds_gen_consts_u32_u64(void) { validate_gen_range_vs_const_32(U32, U64); } +void test_reg_bounds_gen_consts_u32_s64(void) { validate_gen_range_vs_const_32(U32, S64); } +void test_reg_bounds_gen_consts_u32_u32(void) { validate_gen_range_vs_const_32(U32, U32); } +void test_reg_bounds_gen_consts_u32_s32(void) { validate_gen_range_vs_const_32(U32, S32); } +/* RANGE x CONST, S32 initial range */ +void test_reg_bounds_gen_consts_s32_u64(void) { validate_gen_range_vs_const_32(S32, U64); } +void test_reg_bounds_gen_consts_s32_s64(void) { validate_gen_range_vs_const_32(S32, S64); } +void test_reg_bounds_gen_consts_s32_u32(void) { validate_gen_range_vs_const_32(S32, U32); } +void test_reg_bounds_gen_consts_s32_s32(void) { validate_gen_range_vs_const_32(S32, S32); } + +/* RANGE x RANGE, U64 initial range */ +void test_reg_bounds_gen_ranges_u64_u64(void) { validate_gen_range_vs_range(U64, U64); } +void test_reg_bounds_gen_ranges_u64_s64(void) { validate_gen_range_vs_range(U64, S64); } +void test_reg_bounds_gen_ranges_u64_u32(void) { validate_gen_range_vs_range(U64, U32); } +void test_reg_bounds_gen_ranges_u64_s32(void) { validate_gen_range_vs_range(U64, S32); } +/* RANGE x RANGE, S64 initial range */ +void test_reg_bounds_gen_ranges_s64_u64(void) { validate_gen_range_vs_range(S64, U64); } +void test_reg_bounds_gen_ranges_s64_s64(void) { validate_gen_range_vs_range(S64, S64); } +void test_reg_bounds_gen_ranges_s64_u32(void) { validate_gen_range_vs_range(S64, U32); } +void test_reg_bounds_gen_ranges_s64_s32(void) { validate_gen_range_vs_range(S64, S32); } +/* RANGE x RANGE, U32 initial range */ +void test_reg_bounds_gen_ranges_u32_u64(void) { validate_gen_range_vs_range(U32, U64); } +void test_reg_bounds_gen_ranges_u32_s64(void) { validate_gen_range_vs_range(U32, S64); } +void test_reg_bounds_gen_ranges_u32_u32(void) { validate_gen_range_vs_range(U32, U32); } +void test_reg_bounds_gen_ranges_u32_s32(void) { validate_gen_range_vs_range(U32, S32); } +/* RANGE x RANGE, S32 initial range */ +void test_reg_bounds_gen_ranges_s32_u64(void) { validate_gen_range_vs_range(S32, U64); } +void test_reg_bounds_gen_ranges_s32_s64(void) { validate_gen_range_vs_range(S32, S64); } +void test_reg_bounds_gen_ranges_s32_u32(void) { validate_gen_range_vs_range(S32, U32); } +void test_reg_bounds_gen_ranges_s32_s32(void) { validate_gen_range_vs_range(S32, S32); } + +#define DEFAULT_RAND_CASE_CNT 100 + +#define RAND_21BIT_MASK ((1 << 22) - 1) + +static u64 rand_u64() +{ + /* RAND_MAX is guaranteed to be at least 1<<15, but in practice it + * seems to be 1<<31, so we need to call it thrice to get full u64; + * we'll use rougly equal split: 22 + 21 + 21 bits + */ + return ((u64)random() << 42) | + (((u64)random() & RAND_21BIT_MASK) << 21) | + (random() & RAND_21BIT_MASK); +} + +static u64 rand_const(enum num_t t) +{ + return cast_t(t, rand_u64()); +} + +static struct range rand_range(enum num_t t) +{ + u64 x = rand_const(t), y = rand_const(t); + + return range(t, min_t(t, x, y), max_t(t, x, y)); +} + +static void validate_rand_ranges(enum num_t init_t, enum num_t cond_t, bool const_range) +{ + struct ctx ctx; + struct range range1, range2; + int err, i; + u64 t; + + memset(&ctx, 0, sizeof(ctx)); + + err = parse_env_vars(&ctx); + if (err) { + ASSERT_OK(err, "parse_env_vars"); + return; + } + + if (ctx.rand_case_cnt == 0) + ctx.rand_case_cnt = DEFAULT_RAND_CASE_CNT; + if (ctx.rand_seed == 0) + ctx.rand_seed = (unsigned)get_time_ns(); + + srandom(ctx.rand_seed); + + ctx.total_case_cnt = (last_op - first_op + 1) * (2 * ctx.rand_case_cnt); + ctx.start_ns = get_time_ns(); + snprintf(ctx.progress_ctx, sizeof(ctx.progress_ctx), + "[RANDOM SEED %u] RANGE x %s, %s -> %s", + ctx.rand_seed, const_range ? "CONST" : "RANGE", + t_str(init_t), t_str(cond_t)); + + for (i = 0; i < ctx.rand_case_cnt; i++) { + range1 = rand_range(init_t); + if (const_range) { + t = rand_const(init_t); + range2 = range(init_t, t, t); + } else { + range2 = rand_range(init_t); + } + + /* <range1> x <range2> */ + if (verify_case_opt(&ctx, init_t, cond_t, range1, range2, false /* !is_subtest */)) + goto cleanup; + /* <range2> x <range1> */ + if (verify_case_opt(&ctx, init_t, cond_t, range2, range1, false /* !is_subtest */)) + goto cleanup; + } + +cleanup: + /* make sure we report random seed for reproducing */ + ASSERT_TRUE(true, ctx.progress_ctx); + cleanup_ctx(&ctx); +} + +/* [RANDOM] RANGE x CONST, U64 initial range */ +void test_reg_bounds_rand_consts_u64_u64(void) { validate_rand_ranges(U64, U64, true /* const */); } +void test_reg_bounds_rand_consts_u64_s64(void) { validate_rand_ranges(U64, S64, true /* const */); } +void test_reg_bounds_rand_consts_u64_u32(void) { validate_rand_ranges(U64, U32, true /* const */); } +void test_reg_bounds_rand_consts_u64_s32(void) { validate_rand_ranges(U64, S32, true /* const */); } +/* [RANDOM] RANGE x CONST, S64 initial range */ +void test_reg_bounds_rand_consts_s64_u64(void) { validate_rand_ranges(S64, U64, true /* const */); } +void test_reg_bounds_rand_consts_s64_s64(void) { validate_rand_ranges(S64, S64, true /* const */); } +void test_reg_bounds_rand_consts_s64_u32(void) { validate_rand_ranges(S64, U32, true /* const */); } +void test_reg_bounds_rand_consts_s64_s32(void) { validate_rand_ranges(S64, S32, true /* const */); } +/* [RANDOM] RANGE x CONST, U32 initial range */ +void test_reg_bounds_rand_consts_u32_u64(void) { validate_rand_ranges(U32, U64, true /* const */); } +void test_reg_bounds_rand_consts_u32_s64(void) { validate_rand_ranges(U32, S64, true /* const */); } +void test_reg_bounds_rand_consts_u32_u32(void) { validate_rand_ranges(U32, U32, true /* const */); } +void test_reg_bounds_rand_consts_u32_s32(void) { validate_rand_ranges(U32, S32, true /* const */); } +/* [RANDOM] RANGE x CONST, S32 initial range */ +void test_reg_bounds_rand_consts_s32_u64(void) { validate_rand_ranges(S32, U64, true /* const */); } +void test_reg_bounds_rand_consts_s32_s64(void) { validate_rand_ranges(S32, S64, true /* const */); } +void test_reg_bounds_rand_consts_s32_u32(void) { validate_rand_ranges(S32, U32, true /* const */); } +void test_reg_bounds_rand_consts_s32_s32(void) { validate_rand_ranges(S32, S32, true /* const */); } + +/* [RANDOM] RANGE x RANGE, U64 initial range */ +void test_reg_bounds_rand_ranges_u64_u64(void) { validate_rand_ranges(U64, U64, false /* range */); } +void test_reg_bounds_rand_ranges_u64_s64(void) { validate_rand_ranges(U64, S64, false /* range */); } +void test_reg_bounds_rand_ranges_u64_u32(void) { validate_rand_ranges(U64, U32, false /* range */); } +void test_reg_bounds_rand_ranges_u64_s32(void) { validate_rand_ranges(U64, S32, false /* range */); } +/* [RANDOM] RANGE x RANGE, S64 initial range */ +void test_reg_bounds_rand_ranges_s64_u64(void) { validate_rand_ranges(S64, U64, false /* range */); } +void test_reg_bounds_rand_ranges_s64_s64(void) { validate_rand_ranges(S64, S64, false /* range */); } +void test_reg_bounds_rand_ranges_s64_u32(void) { validate_rand_ranges(S64, U32, false /* range */); } +void test_reg_bounds_rand_ranges_s64_s32(void) { validate_rand_ranges(S64, S32, false /* range */); } +/* [RANDOM] RANGE x RANGE, U32 initial range */ +void test_reg_bounds_rand_ranges_u32_u64(void) { validate_rand_ranges(U32, U64, false /* range */); } +void test_reg_bounds_rand_ranges_u32_s64(void) { validate_rand_ranges(U32, S64, false /* range */); } +void test_reg_bounds_rand_ranges_u32_u32(void) { validate_rand_ranges(U32, U32, false /* range */); } +void test_reg_bounds_rand_ranges_u32_s32(void) { validate_rand_ranges(U32, S32, false /* range */); } +/* [RANDOM] RANGE x RANGE, S32 initial range */ +void test_reg_bounds_rand_ranges_s32_u64(void) { validate_rand_ranges(S32, U64, false /* range */); } +void test_reg_bounds_rand_ranges_s32_s64(void) { validate_rand_ranges(S32, S64, false /* range */); } +void test_reg_bounds_rand_ranges_s32_u32(void) { validate_rand_ranges(S32, U32, false /* range */); } +void test_reg_bounds_rand_ranges_s32_s32(void) { validate_rand_ranges(S32, S32, false /* range */); } + +/* A set of hard-coded "interesting" cases to validate as part of normal + * test_progs test runs + */ +static struct subtest_case crafted_cases[] = { + {U64, U64, {0, 0xffffffff}, {0, 0}}, + {U64, U64, {0, 0x80000000}, {0, 0}}, + {U64, U64, {0x100000000ULL, 0x100000100ULL}, {0, 0}}, + {U64, U64, {0x100000000ULL, 0x180000000ULL}, {0, 0}}, + {U64, U64, {0x100000000ULL, 0x1ffffff00ULL}, {0, 0}}, + {U64, U64, {0x100000000ULL, 0x1ffffff01ULL}, {0, 0}}, + {U64, U64, {0x100000000ULL, 0x1fffffffeULL}, {0, 0}}, + {U64, U64, {0x100000001ULL, 0x1000000ffULL}, {0, 0}}, + + /* single point overlap, interesting BPF_EQ and BPF_NE interactions */ + {U64, U64, {0, 1}, {1, 0x80000000}}, + {U64, S64, {0, 1}, {1, 0x80000000}}, + {U64, U32, {0, 1}, {1, 0x80000000}}, + {U64, S32, {0, 1}, {1, 0x80000000}}, + + {U64, S64, {0, 0xffffffff00000000ULL}, {0, 0}}, + {U64, S64, {0x7fffffffffffffffULL, 0xffffffff00000000ULL}, {0, 0}}, + {U64, S64, {0x7fffffff00000001ULL, 0xffffffff00000000ULL}, {0, 0}}, + {U64, S64, {0, 0xffffffffULL}, {1, 1}}, + {U64, S64, {0, 0xffffffffULL}, {0x7fffffff, 0x7fffffff}}, + + {U64, U32, {0, 0x100000000}, {0, 0}}, + {U64, U32, {0xfffffffe, 0x100000000}, {0x80000000, 0x80000000}}, + + {U64, S32, {0, 0xffffffff00000000ULL}, {0, 0}}, + /* these are tricky cases where lower 32 bits allow to tighten 64 + * bit boundaries based on tightened lower 32 bit boundaries + */ + {U64, S32, {0, 0x0ffffffffULL}, {0, 0}}, + {U64, S32, {0, 0x100000000ULL}, {0, 0}}, + {U64, S32, {0, 0x100000001ULL}, {0, 0}}, + {U64, S32, {0, 0x180000000ULL}, {0, 0}}, + {U64, S32, {0, 0x17fffffffULL}, {0, 0}}, + {U64, S32, {0, 0x180000001ULL}, {0, 0}}, + + /* verifier knows about [-1, 0] range for s32 for this case already */ + {S64, S64, {0xffffffffffffffffULL, 0}, {0xffffffff00000000ULL, 0xffffffff00000000ULL}}, + /* but didn't know about these cases initially */ + {U64, U64, {0xffffffff, 0x100000000ULL}, {0, 0}}, /* s32: [-1, 0] */ + {U64, U64, {0xffffffff, 0x100000001ULL}, {0, 0}}, /* s32: [-1, 1] */ + + /* longer convergence case: learning from u64 -> s64 -> u64 -> u32, + * arriving at u32: [1, U32_MAX] (instead of more pessimistic [0, U32_MAX]) + */ + {S64, U64, {0xffffffff00000001ULL, 0}, {0xffffffff00000000ULL, 0xffffffff00000000ULL}}, + + {U32, U32, {1, U32_MAX}, {0, 0}}, + + {U32, S32, {0, U32_MAX}, {U32_MAX, U32_MAX}}, + + {S32, U64, {(u32)(s32)S32_MIN, (u32)(s32)S32_MIN}, {(u32)(s32)-255, 0}}, + {S32, S64, {(u32)(s32)S32_MIN, (u32)(s32)-255}, {(u32)(s32)-2, 0}}, + {S32, S64, {0, 1}, {(u32)(s32)S32_MIN, (u32)(s32)S32_MIN}}, + {S32, U32, {(u32)(s32)S32_MIN, (u32)(s32)S32_MIN}, {(u32)(s32)S32_MIN, (u32)(s32)S32_MIN}}, +}; + +/* Go over crafted hard-coded cases. This is fast, so we do it as part of + * normal test_progs run. + */ +void test_reg_bounds_crafted(void) +{ + struct ctx ctx; + int i; + + memset(&ctx, 0, sizeof(ctx)); + + for (i = 0; i < ARRAY_SIZE(crafted_cases); i++) { + struct subtest_case *c = &crafted_cases[i]; + + verify_case(&ctx, c->init_t, c->cond_t, c->x, c->y); + verify_case(&ctx, c->init_t, c->cond_t, c->y, c->x); + } + + cleanup_ctx(&ctx); +} diff --git a/tools/testing/selftests/bpf/prog_tests/sockopt.c b/tools/testing/selftests/bpf/prog_tests/sockopt.c index 9e6a5e3ed4de..5a4491d4edfe 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockopt.c +++ b/tools/testing/selftests/bpf/prog_tests/sockopt.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include <test_progs.h> +#include <io_uring/mini_liburing.h> #include "cgroup_helpers.h" static char bpf_log_buf[4096]; @@ -38,6 +39,7 @@ static struct sockopt_test { socklen_t get_optlen_ret; enum sockopt_test_error error; + bool io_uring_support; } tests[] = { /* ==================== getsockopt ==================== */ @@ -251,7 +253,9 @@ static struct sockopt_test { .attach_type = BPF_CGROUP_GETSOCKOPT, .expected_attach_type = BPF_CGROUP_GETSOCKOPT, + .get_level = SOL_SOCKET, .get_optlen = 64, + .io_uring_support = true, }, { .descr = "getsockopt: deny bigger ctx->optlen", @@ -276,6 +280,7 @@ static struct sockopt_test { .get_optlen = 64, .error = EFAULT_GETSOCKOPT, + .io_uring_support = true, }, { .descr = "getsockopt: ignore >PAGE_SIZE optlen", @@ -318,6 +323,7 @@ static struct sockopt_test { .get_optval = {}, /* the changes are ignored */ .get_optlen = PAGE_SIZE + 1, .error = EOPNOTSUPP_GETSOCKOPT, + .io_uring_support = true, }, { .descr = "getsockopt: support smaller ctx->optlen", @@ -337,8 +343,10 @@ static struct sockopt_test { .attach_type = BPF_CGROUP_GETSOCKOPT, .expected_attach_type = BPF_CGROUP_GETSOCKOPT, + .get_level = SOL_SOCKET, .get_optlen = 64, .get_optlen_ret = 32, + .io_uring_support = true, }, { .descr = "getsockopt: deny writing to ctx->optval", @@ -518,6 +526,7 @@ static struct sockopt_test { .set_level = 123, .set_optlen = 1, + .io_uring_support = true, }, { .descr = "setsockopt: allow changing ctx->level", @@ -572,6 +581,7 @@ static struct sockopt_test { .set_optname = 123, .set_optlen = 1, + .io_uring_support = true, }, { .descr = "setsockopt: allow changing ctx->optname", @@ -624,6 +634,7 @@ static struct sockopt_test { .expected_attach_type = BPF_CGROUP_SETSOCKOPT, .set_optlen = 64, + .io_uring_support = true, }, { .descr = "setsockopt: ctx->optlen == -1 is ok", @@ -640,6 +651,7 @@ static struct sockopt_test { .expected_attach_type = BPF_CGROUP_SETSOCKOPT, .set_optlen = 64, + .io_uring_support = true, }, { .descr = "setsockopt: deny ctx->optlen < 0 (except -1)", @@ -658,6 +670,7 @@ static struct sockopt_test { .set_optlen = 4, .error = EFAULT_SETSOCKOPT, + .io_uring_support = true, }, { .descr = "setsockopt: deny ctx->optlen > input optlen", @@ -675,6 +688,7 @@ static struct sockopt_test { .set_optlen = 64, .error = EFAULT_SETSOCKOPT, + .io_uring_support = true, }, { .descr = "setsockopt: ignore >PAGE_SIZE optlen", @@ -940,7 +954,89 @@ static int load_prog(const struct bpf_insn *insns, return fd; } -static int run_test(int cgroup_fd, struct sockopt_test *test) +/* Core function that handles io_uring ring initialization, + * sending SQE with sockopt command and waiting for the CQE. + */ +static int uring_sockopt(int op, int fd, int level, int optname, + const void *optval, socklen_t optlen) +{ + struct io_uring_cqe *cqe; + struct io_uring_sqe *sqe; + struct io_uring ring; + int err; + + err = io_uring_queue_init(1, &ring, 0); + if (!ASSERT_OK(err, "io_uring initialization")) + return err; + + sqe = io_uring_get_sqe(&ring); + if (!ASSERT_NEQ(sqe, NULL, "Get an SQE")) { + err = -1; + goto fail; + } + + io_uring_prep_cmd(sqe, op, fd, level, optname, optval, optlen); + + err = io_uring_submit(&ring); + if (!ASSERT_EQ(err, 1, "Submit SQE")) + goto fail; + + err = io_uring_wait_cqe(&ring, &cqe); + if (!ASSERT_OK(err, "Wait for CQE")) + goto fail; + + err = cqe->res; + +fail: + io_uring_queue_exit(&ring); + + return err; +} + +static int uring_setsockopt(int fd, int level, int optname, const void *optval, + socklen_t optlen) +{ + return uring_sockopt(SOCKET_URING_OP_SETSOCKOPT, fd, level, optname, + optval, optlen); +} + +static int uring_getsockopt(int fd, int level, int optname, void *optval, + socklen_t *optlen) +{ + int ret = uring_sockopt(SOCKET_URING_OP_GETSOCKOPT, fd, level, optname, + optval, *optlen); + if (ret < 0) + return ret; + + /* Populate optlen back to be compatible with systemcall interface, + * and simplify the test. + */ + *optlen = ret; + + return 0; +} + +/* Execute the setsocktopt operation */ +static int call_setsockopt(bool use_io_uring, int fd, int level, int optname, + const void *optval, socklen_t optlen) +{ + if (use_io_uring) + return uring_setsockopt(fd, level, optname, optval, optlen); + + return setsockopt(fd, level, optname, optval, optlen); +} + +/* Execute the getsocktopt operation */ +static int call_getsockopt(bool use_io_uring, int fd, int level, int optname, + void *optval, socklen_t *optlen) +{ + if (use_io_uring) + return uring_getsockopt(fd, level, optname, optval, optlen); + + return getsockopt(fd, level, optname, optval, optlen); +} + +static int run_test(int cgroup_fd, struct sockopt_test *test, bool use_io_uring) { int sock_fd, err, prog_fd; void *optval = NULL; @@ -980,8 +1076,9 @@ static int run_test(int cgroup_fd, struct sockopt_test *test) test->set_optlen = num_pages * sysconf(_SC_PAGESIZE) + remainder; } - err = setsockopt(sock_fd, test->set_level, test->set_optname, - test->set_optval, test->set_optlen); + err = call_setsockopt(use_io_uring, sock_fd, test->set_level, + test->set_optname, test->set_optval, + test->set_optlen); if (err) { if (errno == EPERM && test->error == EPERM_SETSOCKOPT) goto close_sock_fd; @@ -1008,8 +1105,8 @@ static int run_test(int cgroup_fd, struct sockopt_test *test) socklen_t expected_get_optlen = test->get_optlen_ret ?: test->get_optlen; - err = getsockopt(sock_fd, test->get_level, test->get_optname, - optval, &optlen); + err = call_getsockopt(use_io_uring, sock_fd, test->get_level, + test->get_optname, optval, &optlen); if (err) { if (errno == EOPNOTSUPP && test->error == EOPNOTSUPP_GETSOCKOPT) goto free_optval; @@ -1063,7 +1160,11 @@ void test_sockopt(void) if (!test__start_subtest(tests[i].descr)) continue; - ASSERT_OK(run_test(cgroup_fd, &tests[i]), tests[i].descr); + ASSERT_OK(run_test(cgroup_fd, &tests[i], false), + tests[i].descr); + if (tests[i].io_uring_support) + ASSERT_OK(run_test(cgroup_fd, &tests[i], true), + tests[i].descr); } close(cgroup_fd); diff --git a/tools/testing/selftests/bpf/prog_tests/spin_lock.c b/tools/testing/selftests/bpf/prog_tests/spin_lock.c index f29c08d93beb..18d451be57c8 100644 --- a/tools/testing/selftests/bpf/prog_tests/spin_lock.c +++ b/tools/testing/selftests/bpf/prog_tests/spin_lock.c @@ -13,22 +13,22 @@ static struct { const char *err_msg; } spin_lock_fail_tests[] = { { "lock_id_kptr_preserve", - "5: (bf) r1 = r0 ; R0_w=ptr_foo(id=2,ref_obj_id=2,off=0,imm=0) " - "R1_w=ptr_foo(id=2,ref_obj_id=2,off=0,imm=0) refs=2\n6: (85) call bpf_this_cpu_ptr#154\n" + "5: (bf) r1 = r0 ; R0_w=ptr_foo(id=2,ref_obj_id=2) " + "R1_w=ptr_foo(id=2,ref_obj_id=2) refs=2\n6: (85) call bpf_this_cpu_ptr#154\n" "R1 type=ptr_ expected=percpu_ptr_" }, { "lock_id_global_zero", - "; R1_w=map_value(off=0,ks=4,vs=4,imm=0)\n2: (85) call bpf_this_cpu_ptr#154\n" + "; R1_w=map_value(map=.data.A,ks=4,vs=4)\n2: (85) call bpf_this_cpu_ptr#154\n" "R1 type=map_value expected=percpu_ptr_" }, { "lock_id_mapval_preserve", "[0-9]\\+: (bf) r1 = r0 ;" - " R0_w=map_value(id=1,off=0,ks=4,vs=8,imm=0)" - " R1_w=map_value(id=1,off=0,ks=4,vs=8,imm=0)\n" + " R0_w=map_value(id=1,map=array_map,ks=4,vs=8)" + " R1_w=map_value(id=1,map=array_map,ks=4,vs=8)\n" "[0-9]\\+: (85) call bpf_this_cpu_ptr#154\n" "R1 type=map_value expected=percpu_ptr_" }, { "lock_id_innermapval_preserve", "[0-9]\\+: (bf) r1 = r0 ;" - " R0=map_value(id=2,off=0,ks=4,vs=8,imm=0)" - " R1_w=map_value(id=2,off=0,ks=4,vs=8,imm=0)\n" + " R0=map_value(id=2,ks=4,vs=8)" + " R1_w=map_value(id=2,ks=4,vs=8)\n" "[0-9]\\+: (85) call bpf_this_cpu_ptr#154\n" "R1 type=map_value expected=percpu_ptr_" }, { "lock_id_mismatch_kptr_kptr", "bpf_spin_unlock of different lock" }, diff --git a/tools/testing/selftests/bpf/prog_tests/task_under_cgroup.c b/tools/testing/selftests/bpf/prog_tests/task_under_cgroup.c index 4224727fb364..626d76fe43a2 100644 --- a/tools/testing/selftests/bpf/prog_tests/task_under_cgroup.c +++ b/tools/testing/selftests/bpf/prog_tests/task_under_cgroup.c @@ -30,8 +30,15 @@ void test_task_under_cgroup(void) if (!ASSERT_OK(ret, "test_task_under_cgroup__load")) goto cleanup; - ret = test_task_under_cgroup__attach(skel); - if (!ASSERT_OK(ret, "test_task_under_cgroup__attach")) + /* First, attach the LSM program, and then it will be triggered when the + * TP_BTF program is attached. + */ + skel->links.lsm_run = bpf_program__attach_lsm(skel->progs.lsm_run); + if (!ASSERT_OK_PTR(skel->links.lsm_run, "attach_lsm")) + goto cleanup; + + skel->links.tp_btf_run = bpf_program__attach_trace(skel->progs.tp_btf_run); + if (!ASSERT_OK_PTR(skel->links.tp_btf_run, "attach_tp_btf")) goto cleanup; pid = fork(); diff --git a/tools/testing/selftests/bpf/prog_tests/tc_helpers.h b/tools/testing/selftests/bpf/prog_tests/tc_helpers.h index 67f985f7d215..924d0e25320c 100644 --- a/tools/testing/selftests/bpf/prog_tests/tc_helpers.h +++ b/tools/testing/selftests/bpf/prog_tests/tc_helpers.h @@ -4,6 +4,10 @@ #define TC_HELPERS #include <test_progs.h> +#ifndef loopback +# define loopback 1 +#endif + static inline __u32 id_from_prog_fd(int fd) { struct bpf_prog_info prog_info = {}; diff --git a/tools/testing/selftests/bpf/prog_tests/tc_netkit.c b/tools/testing/selftests/bpf/prog_tests/tc_netkit.c new file mode 100644 index 000000000000..15ee7b2fc410 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/tc_netkit.c @@ -0,0 +1,687 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Isovalent */ +#include <uapi/linux/if_link.h> +#include <net/if.h> +#include <test_progs.h> + +#define netkit_peer "nk0" +#define netkit_name "nk1" + +#define ping_addr_neigh 0x0a000002 /* 10.0.0.2 */ +#define ping_addr_noneigh 0x0a000003 /* 10.0.0.3 */ + +#include "test_tc_link.skel.h" +#include "netlink_helpers.h" +#include "tc_helpers.h" + +#define ICMP_ECHO 8 + +struct icmphdr { + __u8 type; + __u8 code; + __sum16 checksum; + struct { + __be16 id; + __be16 sequence; + } echo; +}; + +struct iplink_req { + struct nlmsghdr n; + struct ifinfomsg i; + char buf[1024]; +}; + +static int create_netkit(int mode, int policy, int peer_policy, int *ifindex, + bool same_netns) +{ + struct rtnl_handle rth = { .fd = -1 }; + struct iplink_req req = {}; + struct rtattr *linkinfo, *data; + const char *type = "netkit"; + int err; + + err = rtnl_open(&rth, 0); + if (!ASSERT_OK(err, "open_rtnetlink")) + return err; + + memset(&req, 0, sizeof(req)); + req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)); + req.n.nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL; + req.n.nlmsg_type = RTM_NEWLINK; + req.i.ifi_family = AF_UNSPEC; + + addattr_l(&req.n, sizeof(req), IFLA_IFNAME, netkit_name, + strlen(netkit_name)); + linkinfo = addattr_nest(&req.n, sizeof(req), IFLA_LINKINFO); + addattr_l(&req.n, sizeof(req), IFLA_INFO_KIND, type, strlen(type)); + data = addattr_nest(&req.n, sizeof(req), IFLA_INFO_DATA); + addattr32(&req.n, sizeof(req), IFLA_NETKIT_POLICY, policy); + addattr32(&req.n, sizeof(req), IFLA_NETKIT_PEER_POLICY, peer_policy); + addattr32(&req.n, sizeof(req), IFLA_NETKIT_MODE, mode); + addattr_nest_end(&req.n, data); + addattr_nest_end(&req.n, linkinfo); + + err = rtnl_talk(&rth, &req.n, NULL); + ASSERT_OK(err, "talk_rtnetlink"); + rtnl_close(&rth); + *ifindex = if_nametoindex(netkit_name); + + ASSERT_GT(*ifindex, 0, "retrieve_ifindex"); + ASSERT_OK(system("ip netns add foo"), "create netns"); + ASSERT_OK(system("ip link set dev " netkit_name " up"), + "up primary"); + ASSERT_OK(system("ip addr add dev " netkit_name " 10.0.0.1/24"), + "addr primary"); + if (same_netns) { + ASSERT_OK(system("ip link set dev " netkit_peer " up"), + "up peer"); + ASSERT_OK(system("ip addr add dev " netkit_peer " 10.0.0.2/24"), + "addr peer"); + } else { + ASSERT_OK(system("ip link set " netkit_peer " netns foo"), + "move peer"); + ASSERT_OK(system("ip netns exec foo ip link set dev " + netkit_peer " up"), "up peer"); + ASSERT_OK(system("ip netns exec foo ip addr add dev " + netkit_peer " 10.0.0.2/24"), "addr peer"); + } + return err; +} + +static void destroy_netkit(void) +{ + ASSERT_OK(system("ip link del dev " netkit_name), "del primary"); + ASSERT_OK(system("ip netns del foo"), "delete netns"); + ASSERT_EQ(if_nametoindex(netkit_name), 0, netkit_name "_ifindex"); +} + +static int __send_icmp(__u32 dest) +{ + struct sockaddr_in addr; + struct icmphdr icmp; + int sock, ret; + + ret = write_sysctl("/proc/sys/net/ipv4/ping_group_range", "0 0"); + if (!ASSERT_OK(ret, "write_sysctl(net.ipv4.ping_group_range)")) + return ret; + + sock = socket(AF_INET, SOCK_DGRAM, IPPROTO_ICMP); + if (!ASSERT_GE(sock, 0, "icmp_socket")) + return -errno; + + ret = setsockopt(sock, SOL_SOCKET, SO_BINDTODEVICE, + netkit_name, strlen(netkit_name) + 1); + if (!ASSERT_OK(ret, "setsockopt(SO_BINDTODEVICE)")) + goto out; + + memset(&addr, 0, sizeof(addr)); + addr.sin_family = AF_INET; + addr.sin_addr.s_addr = htonl(dest); + + memset(&icmp, 0, sizeof(icmp)); + icmp.type = ICMP_ECHO; + icmp.echo.id = 1234; + icmp.echo.sequence = 1; + + ret = sendto(sock, &icmp, sizeof(icmp), 0, + (struct sockaddr *)&addr, sizeof(addr)); + if (!ASSERT_GE(ret, 0, "icmp_sendto")) + ret = -errno; + else + ret = 0; +out: + close(sock); + return ret; +} + +static int send_icmp(void) +{ + return __send_icmp(ping_addr_neigh); +} + +void serial_test_tc_netkit_basic(void) +{ + LIBBPF_OPTS(bpf_prog_query_opts, optq); + LIBBPF_OPTS(bpf_netkit_opts, optl); + __u32 prog_ids[2], link_ids[2]; + __u32 pid1, pid2, lid1, lid2; + struct test_tc_link *skel; + struct bpf_link *link; + int err, ifindex; + + err = create_netkit(NETKIT_L2, NETKIT_PASS, NETKIT_PASS, + &ifindex, false); + if (err) + return; + + skel = test_tc_link__open(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + goto cleanup; + + ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc1, + BPF_NETKIT_PRIMARY), 0, "tc1_attach_type"); + ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc2, + BPF_NETKIT_PEER), 0, "tc2_attach_type"); + + err = test_tc_link__load(skel); + if (!ASSERT_OK(err, "skel_load")) + goto cleanup; + + pid1 = id_from_prog_fd(bpf_program__fd(skel->progs.tc1)); + pid2 = id_from_prog_fd(bpf_program__fd(skel->progs.tc2)); + + ASSERT_NEQ(pid1, pid2, "prog_ids_1_2"); + + assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PRIMARY, 0); + assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PEER, 0); + + ASSERT_EQ(skel->bss->seen_tc1, false, "seen_tc1"); + ASSERT_EQ(skel->bss->seen_tc2, false, "seen_tc2"); + + link = bpf_program__attach_netkit(skel->progs.tc1, ifindex, &optl); + if (!ASSERT_OK_PTR(link, "link_attach")) + goto cleanup; + + skel->links.tc1 = link; + + lid1 = id_from_link_fd(bpf_link__fd(skel->links.tc1)); + + assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PRIMARY, 1); + assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PEER, 0); + + optq.prog_ids = prog_ids; + optq.link_ids = link_ids; + + memset(prog_ids, 0, sizeof(prog_ids)); + memset(link_ids, 0, sizeof(link_ids)); + optq.count = ARRAY_SIZE(prog_ids); + + err = bpf_prog_query_opts(ifindex, BPF_NETKIT_PRIMARY, &optq); + if (!ASSERT_OK(err, "prog_query")) + goto cleanup; + + ASSERT_EQ(optq.count, 1, "count"); + ASSERT_EQ(optq.revision, 2, "revision"); + ASSERT_EQ(optq.prog_ids[0], pid1, "prog_ids[0]"); + ASSERT_EQ(optq.link_ids[0], lid1, "link_ids[0]"); + ASSERT_EQ(optq.prog_ids[1], 0, "prog_ids[1]"); + ASSERT_EQ(optq.link_ids[1], 0, "link_ids[1]"); + + tc_skel_reset_all_seen(skel); + ASSERT_EQ(send_icmp(), 0, "icmp_pkt"); + + ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); + ASSERT_EQ(skel->bss->seen_tc2, false, "seen_tc2"); + + link = bpf_program__attach_netkit(skel->progs.tc2, ifindex, &optl); + if (!ASSERT_OK_PTR(link, "link_attach")) + goto cleanup; + + skel->links.tc2 = link; + + lid2 = id_from_link_fd(bpf_link__fd(skel->links.tc2)); + ASSERT_NEQ(lid1, lid2, "link_ids_1_2"); + + assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PRIMARY, 1); + assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PEER, 1); + + memset(prog_ids, 0, sizeof(prog_ids)); + memset(link_ids, 0, sizeof(link_ids)); + optq.count = ARRAY_SIZE(prog_ids); + + err = bpf_prog_query_opts(ifindex, BPF_NETKIT_PEER, &optq); + if (!ASSERT_OK(err, "prog_query")) + goto cleanup; + + ASSERT_EQ(optq.count, 1, "count"); + ASSERT_EQ(optq.revision, 2, "revision"); + ASSERT_EQ(optq.prog_ids[0], pid2, "prog_ids[0]"); + ASSERT_EQ(optq.link_ids[0], lid2, "link_ids[0]"); + ASSERT_EQ(optq.prog_ids[1], 0, "prog_ids[1]"); + ASSERT_EQ(optq.link_ids[1], 0, "link_ids[1]"); + + tc_skel_reset_all_seen(skel); + ASSERT_EQ(send_icmp(), 0, "icmp_pkt"); + + ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); + ASSERT_EQ(skel->bss->seen_tc2, true, "seen_tc2"); +cleanup: + test_tc_link__destroy(skel); + + assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PRIMARY, 0); + assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PEER, 0); + destroy_netkit(); +} + +static void serial_test_tc_netkit_multi_links_target(int mode, int target) +{ + LIBBPF_OPTS(bpf_prog_query_opts, optq); + LIBBPF_OPTS(bpf_netkit_opts, optl); + __u32 prog_ids[3], link_ids[3]; + __u32 pid1, pid2, lid1, lid2; + struct test_tc_link *skel; + struct bpf_link *link; + int err, ifindex; + + err = create_netkit(mode, NETKIT_PASS, NETKIT_PASS, + &ifindex, false); + if (err) + return; + + skel = test_tc_link__open(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + goto cleanup; + + ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc1, + target), 0, "tc1_attach_type"); + ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc2, + target), 0, "tc2_attach_type"); + + err = test_tc_link__load(skel); + if (!ASSERT_OK(err, "skel_load")) + goto cleanup; + + pid1 = id_from_prog_fd(bpf_program__fd(skel->progs.tc1)); + pid2 = id_from_prog_fd(bpf_program__fd(skel->progs.tc2)); + + ASSERT_NEQ(pid1, pid2, "prog_ids_1_2"); + + assert_mprog_count_ifindex(ifindex, target, 0); + + ASSERT_EQ(skel->bss->seen_tc1, false, "seen_tc1"); + ASSERT_EQ(skel->bss->seen_eth, false, "seen_eth"); + ASSERT_EQ(skel->bss->seen_tc2, false, "seen_tc2"); + + link = bpf_program__attach_netkit(skel->progs.tc1, ifindex, &optl); + if (!ASSERT_OK_PTR(link, "link_attach")) + goto cleanup; + + skel->links.tc1 = link; + + lid1 = id_from_link_fd(bpf_link__fd(skel->links.tc1)); + + assert_mprog_count_ifindex(ifindex, target, 1); + + optq.prog_ids = prog_ids; + optq.link_ids = link_ids; + + memset(prog_ids, 0, sizeof(prog_ids)); + memset(link_ids, 0, sizeof(link_ids)); + optq.count = ARRAY_SIZE(prog_ids); + + err = bpf_prog_query_opts(ifindex, target, &optq); + if (!ASSERT_OK(err, "prog_query")) + goto cleanup; + + ASSERT_EQ(optq.count, 1, "count"); + ASSERT_EQ(optq.revision, 2, "revision"); + ASSERT_EQ(optq.prog_ids[0], pid1, "prog_ids[0]"); + ASSERT_EQ(optq.link_ids[0], lid1, "link_ids[0]"); + ASSERT_EQ(optq.prog_ids[1], 0, "prog_ids[1]"); + ASSERT_EQ(optq.link_ids[1], 0, "link_ids[1]"); + + tc_skel_reset_all_seen(skel); + ASSERT_EQ(send_icmp(), 0, "icmp_pkt"); + + ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); + ASSERT_EQ(skel->bss->seen_eth, true, "seen_eth"); + ASSERT_EQ(skel->bss->seen_tc2, false, "seen_tc2"); + + LIBBPF_OPTS_RESET(optl, + .flags = BPF_F_BEFORE, + .relative_fd = bpf_program__fd(skel->progs.tc1), + ); + + link = bpf_program__attach_netkit(skel->progs.tc2, ifindex, &optl); + if (!ASSERT_OK_PTR(link, "link_attach")) + goto cleanup; + + skel->links.tc2 = link; + + lid2 = id_from_link_fd(bpf_link__fd(skel->links.tc2)); + ASSERT_NEQ(lid1, lid2, "link_ids_1_2"); + + assert_mprog_count_ifindex(ifindex, target, 2); + + memset(prog_ids, 0, sizeof(prog_ids)); + memset(link_ids, 0, sizeof(link_ids)); + optq.count = ARRAY_SIZE(prog_ids); + + err = bpf_prog_query_opts(ifindex, target, &optq); + if (!ASSERT_OK(err, "prog_query")) + goto cleanup; + + ASSERT_EQ(optq.count, 2, "count"); + ASSERT_EQ(optq.revision, 3, "revision"); + ASSERT_EQ(optq.prog_ids[0], pid2, "prog_ids[0]"); + ASSERT_EQ(optq.link_ids[0], lid2, "link_ids[0]"); + ASSERT_EQ(optq.prog_ids[1], pid1, "prog_ids[1]"); + ASSERT_EQ(optq.link_ids[1], lid1, "link_ids[1]"); + ASSERT_EQ(optq.prog_ids[2], 0, "prog_ids[2]"); + ASSERT_EQ(optq.link_ids[2], 0, "link_ids[2]"); + + tc_skel_reset_all_seen(skel); + ASSERT_EQ(send_icmp(), 0, "icmp_pkt"); + + ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); + ASSERT_EQ(skel->bss->seen_eth, true, "seen_eth"); + ASSERT_EQ(skel->bss->seen_tc2, true, "seen_tc2"); +cleanup: + test_tc_link__destroy(skel); + + assert_mprog_count_ifindex(ifindex, target, 0); + destroy_netkit(); +} + +void serial_test_tc_netkit_multi_links(void) +{ + serial_test_tc_netkit_multi_links_target(NETKIT_L2, BPF_NETKIT_PRIMARY); + serial_test_tc_netkit_multi_links_target(NETKIT_L3, BPF_NETKIT_PRIMARY); + serial_test_tc_netkit_multi_links_target(NETKIT_L2, BPF_NETKIT_PEER); + serial_test_tc_netkit_multi_links_target(NETKIT_L3, BPF_NETKIT_PEER); +} + +static void serial_test_tc_netkit_multi_opts_target(int mode, int target) +{ + LIBBPF_OPTS(bpf_prog_attach_opts, opta); + LIBBPF_OPTS(bpf_prog_detach_opts, optd); + LIBBPF_OPTS(bpf_prog_query_opts, optq); + __u32 pid1, pid2, fd1, fd2; + __u32 prog_ids[3]; + struct test_tc_link *skel; + int err, ifindex; + + err = create_netkit(mode, NETKIT_PASS, NETKIT_PASS, + &ifindex, false); + if (err) + return; + + skel = test_tc_link__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_load")) + goto cleanup; + + fd1 = bpf_program__fd(skel->progs.tc1); + fd2 = bpf_program__fd(skel->progs.tc2); + + pid1 = id_from_prog_fd(fd1); + pid2 = id_from_prog_fd(fd2); + + ASSERT_NEQ(pid1, pid2, "prog_ids_1_2"); + + assert_mprog_count_ifindex(ifindex, target, 0); + + ASSERT_EQ(skel->bss->seen_tc1, false, "seen_tc1"); + ASSERT_EQ(skel->bss->seen_eth, false, "seen_eth"); + ASSERT_EQ(skel->bss->seen_tc2, false, "seen_tc2"); + + err = bpf_prog_attach_opts(fd1, ifindex, target, &opta); + if (!ASSERT_EQ(err, 0, "prog_attach")) + goto cleanup; + + assert_mprog_count_ifindex(ifindex, target, 1); + + optq.prog_ids = prog_ids; + + memset(prog_ids, 0, sizeof(prog_ids)); + optq.count = ARRAY_SIZE(prog_ids); + + err = bpf_prog_query_opts(ifindex, target, &optq); + if (!ASSERT_OK(err, "prog_query")) + goto cleanup_fd1; + + ASSERT_EQ(optq.count, 1, "count"); + ASSERT_EQ(optq.revision, 2, "revision"); + ASSERT_EQ(optq.prog_ids[0], pid1, "prog_ids[0]"); + ASSERT_EQ(optq.prog_ids[1], 0, "prog_ids[1]"); + + tc_skel_reset_all_seen(skel); + ASSERT_EQ(send_icmp(), 0, "icmp_pkt"); + + ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); + ASSERT_EQ(skel->bss->seen_eth, true, "seen_eth"); + ASSERT_EQ(skel->bss->seen_tc2, false, "seen_tc2"); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_BEFORE, + .relative_fd = fd1, + ); + + err = bpf_prog_attach_opts(fd2, ifindex, target, &opta); + if (!ASSERT_EQ(err, 0, "prog_attach")) + goto cleanup_fd1; + + assert_mprog_count_ifindex(ifindex, target, 2); + + memset(prog_ids, 0, sizeof(prog_ids)); + optq.count = ARRAY_SIZE(prog_ids); + + err = bpf_prog_query_opts(ifindex, target, &optq); + if (!ASSERT_OK(err, "prog_query")) + goto cleanup_fd2; + + ASSERT_EQ(optq.count, 2, "count"); + ASSERT_EQ(optq.revision, 3, "revision"); + ASSERT_EQ(optq.prog_ids[0], pid2, "prog_ids[0]"); + ASSERT_EQ(optq.prog_ids[1], pid1, "prog_ids[1]"); + ASSERT_EQ(optq.prog_ids[2], 0, "prog_ids[2]"); + + tc_skel_reset_all_seen(skel); + ASSERT_EQ(send_icmp(), 0, "icmp_pkt"); + + ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); + ASSERT_EQ(skel->bss->seen_eth, true, "seen_eth"); + ASSERT_EQ(skel->bss->seen_tc2, true, "seen_tc2"); + +cleanup_fd2: + err = bpf_prog_detach_opts(fd2, ifindex, target, &optd); + ASSERT_OK(err, "prog_detach"); + assert_mprog_count_ifindex(ifindex, target, 1); +cleanup_fd1: + err = bpf_prog_detach_opts(fd1, ifindex, target, &optd); + ASSERT_OK(err, "prog_detach"); + assert_mprog_count_ifindex(ifindex, target, 0); +cleanup: + test_tc_link__destroy(skel); + + assert_mprog_count_ifindex(ifindex, target, 0); + destroy_netkit(); +} + +void serial_test_tc_netkit_multi_opts(void) +{ + serial_test_tc_netkit_multi_opts_target(NETKIT_L2, BPF_NETKIT_PRIMARY); + serial_test_tc_netkit_multi_opts_target(NETKIT_L3, BPF_NETKIT_PRIMARY); + serial_test_tc_netkit_multi_opts_target(NETKIT_L2, BPF_NETKIT_PEER); + serial_test_tc_netkit_multi_opts_target(NETKIT_L3, BPF_NETKIT_PEER); +} + +void serial_test_tc_netkit_device(void) +{ + LIBBPF_OPTS(bpf_prog_query_opts, optq); + LIBBPF_OPTS(bpf_netkit_opts, optl); + __u32 prog_ids[2], link_ids[2]; + __u32 pid1, pid2, lid1; + struct test_tc_link *skel; + struct bpf_link *link; + int err, ifindex, ifindex2; + + err = create_netkit(NETKIT_L3, NETKIT_PASS, NETKIT_PASS, + &ifindex, true); + if (err) + return; + + ifindex2 = if_nametoindex(netkit_peer); + ASSERT_NEQ(ifindex, ifindex2, "ifindex_1_2"); + + skel = test_tc_link__open(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + goto cleanup; + + ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc1, + BPF_NETKIT_PRIMARY), 0, "tc1_attach_type"); + ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc2, + BPF_NETKIT_PEER), 0, "tc2_attach_type"); + ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc3, + BPF_NETKIT_PRIMARY), 0, "tc3_attach_type"); + + err = test_tc_link__load(skel); + if (!ASSERT_OK(err, "skel_load")) + goto cleanup; + + pid1 = id_from_prog_fd(bpf_program__fd(skel->progs.tc1)); + pid2 = id_from_prog_fd(bpf_program__fd(skel->progs.tc2)); + + ASSERT_NEQ(pid1, pid2, "prog_ids_1_2"); + + assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PRIMARY, 0); + assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PEER, 0); + + ASSERT_EQ(skel->bss->seen_tc1, false, "seen_tc1"); + ASSERT_EQ(skel->bss->seen_tc2, false, "seen_tc2"); + + link = bpf_program__attach_netkit(skel->progs.tc1, ifindex, &optl); + if (!ASSERT_OK_PTR(link, "link_attach")) + goto cleanup; + + skel->links.tc1 = link; + + lid1 = id_from_link_fd(bpf_link__fd(skel->links.tc1)); + + assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PRIMARY, 1); + assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PEER, 0); + + optq.prog_ids = prog_ids; + optq.link_ids = link_ids; + + memset(prog_ids, 0, sizeof(prog_ids)); + memset(link_ids, 0, sizeof(link_ids)); + optq.count = ARRAY_SIZE(prog_ids); + + err = bpf_prog_query_opts(ifindex, BPF_NETKIT_PRIMARY, &optq); + if (!ASSERT_OK(err, "prog_query")) + goto cleanup; + + ASSERT_EQ(optq.count, 1, "count"); + ASSERT_EQ(optq.revision, 2, "revision"); + ASSERT_EQ(optq.prog_ids[0], pid1, "prog_ids[0]"); + ASSERT_EQ(optq.link_ids[0], lid1, "link_ids[0]"); + ASSERT_EQ(optq.prog_ids[1], 0, "prog_ids[1]"); + ASSERT_EQ(optq.link_ids[1], 0, "link_ids[1]"); + + tc_skel_reset_all_seen(skel); + ASSERT_EQ(send_icmp(), 0, "icmp_pkt"); + + ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); + ASSERT_EQ(skel->bss->seen_tc2, false, "seen_tc2"); + + memset(prog_ids, 0, sizeof(prog_ids)); + memset(link_ids, 0, sizeof(link_ids)); + optq.count = ARRAY_SIZE(prog_ids); + + err = bpf_prog_query_opts(ifindex2, BPF_NETKIT_PRIMARY, &optq); + ASSERT_EQ(err, -EACCES, "prog_query_should_fail"); + + err = bpf_prog_query_opts(ifindex2, BPF_NETKIT_PEER, &optq); + ASSERT_EQ(err, -EACCES, "prog_query_should_fail"); + + link = bpf_program__attach_netkit(skel->progs.tc2, ifindex2, &optl); + if (!ASSERT_ERR_PTR(link, "link_attach_should_fail")) { + bpf_link__destroy(link); + goto cleanup; + } + + link = bpf_program__attach_netkit(skel->progs.tc3, ifindex2, &optl); + if (!ASSERT_ERR_PTR(link, "link_attach_should_fail")) { + bpf_link__destroy(link); + goto cleanup; + } + + assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PRIMARY, 1); + assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PEER, 0); +cleanup: + test_tc_link__destroy(skel); + + assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PRIMARY, 0); + assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PEER, 0); + destroy_netkit(); +} + +static void serial_test_tc_netkit_neigh_links_target(int mode, int target) +{ + LIBBPF_OPTS(bpf_prog_query_opts, optq); + LIBBPF_OPTS(bpf_netkit_opts, optl); + __u32 prog_ids[2], link_ids[2]; + __u32 pid1, lid1; + struct test_tc_link *skel; + struct bpf_link *link; + int err, ifindex; + + err = create_netkit(mode, NETKIT_PASS, NETKIT_PASS, + &ifindex, false); + if (err) + return; + + skel = test_tc_link__open(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + goto cleanup; + + ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc1, + BPF_NETKIT_PRIMARY), 0, "tc1_attach_type"); + + err = test_tc_link__load(skel); + if (!ASSERT_OK(err, "skel_load")) + goto cleanup; + + pid1 = id_from_prog_fd(bpf_program__fd(skel->progs.tc1)); + + assert_mprog_count_ifindex(ifindex, target, 0); + + ASSERT_EQ(skel->bss->seen_tc1, false, "seen_tc1"); + ASSERT_EQ(skel->bss->seen_eth, false, "seen_eth"); + + link = bpf_program__attach_netkit(skel->progs.tc1, ifindex, &optl); + if (!ASSERT_OK_PTR(link, "link_attach")) + goto cleanup; + + skel->links.tc1 = link; + + lid1 = id_from_link_fd(bpf_link__fd(skel->links.tc1)); + + assert_mprog_count_ifindex(ifindex, target, 1); + + optq.prog_ids = prog_ids; + optq.link_ids = link_ids; + + memset(prog_ids, 0, sizeof(prog_ids)); + memset(link_ids, 0, sizeof(link_ids)); + optq.count = ARRAY_SIZE(prog_ids); + + err = bpf_prog_query_opts(ifindex, target, &optq); + if (!ASSERT_OK(err, "prog_query")) + goto cleanup; + + ASSERT_EQ(optq.count, 1, "count"); + ASSERT_EQ(optq.revision, 2, "revision"); + ASSERT_EQ(optq.prog_ids[0], pid1, "prog_ids[0]"); + ASSERT_EQ(optq.link_ids[0], lid1, "link_ids[0]"); + ASSERT_EQ(optq.prog_ids[1], 0, "prog_ids[1]"); + ASSERT_EQ(optq.link_ids[1], 0, "link_ids[1]"); + + tc_skel_reset_all_seen(skel); + ASSERT_EQ(__send_icmp(ping_addr_noneigh), 0, "icmp_pkt"); + + ASSERT_EQ(skel->bss->seen_tc1, true /* L2: ARP */, "seen_tc1"); + ASSERT_EQ(skel->bss->seen_eth, mode == NETKIT_L3, "seen_eth"); +cleanup: + test_tc_link__destroy(skel); + + assert_mprog_count_ifindex(ifindex, target, 0); + destroy_netkit(); +} + +void serial_test_tc_netkit_neigh_links(void) +{ + serial_test_tc_netkit_neigh_links_target(NETKIT_L2, BPF_NETKIT_PRIMARY); + serial_test_tc_netkit_neigh_links_target(NETKIT_L3, BPF_NETKIT_PRIMARY); +} diff --git a/tools/testing/selftests/bpf/prog_tests/tc_opts.c b/tools/testing/selftests/bpf/prog_tests/tc_opts.c index ca506d2fcf58..196abf223465 100644 --- a/tools/testing/selftests/bpf/prog_tests/tc_opts.c +++ b/tools/testing/selftests/bpf/prog_tests/tc_opts.c @@ -2387,12 +2387,9 @@ static int generate_dummy_prog(void) const size_t prog_insn_cnt = sizeof(prog_insns) / sizeof(struct bpf_insn); LIBBPF_OPTS(bpf_prog_load_opts, opts); const size_t log_buf_sz = 256; - char *log_buf; + char log_buf[log_buf_sz]; int fd = -1; - log_buf = malloc(log_buf_sz); - if (!ASSERT_OK_PTR(log_buf, "log_buf_alloc")) - return fd; opts.log_buf = log_buf; opts.log_size = log_buf_sz; @@ -2402,7 +2399,6 @@ static int generate_dummy_prog(void) prog_insns, prog_insn_cnt, &opts); ASSERT_STREQ(log_buf, "", "log_0"); ASSERT_GE(fd, 0, "prog_fd"); - free(log_buf); return fd; } @@ -2471,7 +2467,7 @@ static void test_tc_opts_query_target(int target) __u32 fd1, fd2, fd3, fd4, id1, id2, id3, id4; struct test_tc_link *skel; union bpf_attr attr; - __u32 prog_ids[5]; + __u32 prog_ids[10]; int err; skel = test_tc_link__open_and_load(); @@ -2599,6 +2595,135 @@ static void test_tc_opts_query_target(int target) ASSERT_EQ(attr.query.link_ids, 0, "link_ids"); ASSERT_EQ(attr.query.link_attach_flags, 0, "link_attach_flags"); + /* Test 3: Query with smaller prog_ids array */ + memset(&attr, 0, attr_size); + attr.query.target_ifindex = loopback; + attr.query.attach_type = target; + + memset(prog_ids, 0, sizeof(prog_ids)); + attr.query.prog_ids = ptr_to_u64(prog_ids); + attr.query.count = 2; + + err = syscall(__NR_bpf, BPF_PROG_QUERY, &attr, attr_size); + ASSERT_EQ(err, -1, "prog_query_should_fail"); + ASSERT_EQ(errno, ENOSPC, "prog_query_should_fail"); + + ASSERT_EQ(attr.query.count, 4, "count"); + ASSERT_EQ(attr.query.revision, 5, "revision"); + ASSERT_EQ(attr.query.query_flags, 0, "query_flags"); + ASSERT_EQ(attr.query.attach_flags, 0, "attach_flags"); + ASSERT_EQ(attr.query.target_ifindex, loopback, "target_ifindex"); + ASSERT_EQ(attr.query.attach_type, target, "attach_type"); + ASSERT_EQ(attr.query.prog_ids, ptr_to_u64(prog_ids), "prog_ids"); + ASSERT_EQ(prog_ids[0], id1, "prog_ids[0]"); + ASSERT_EQ(prog_ids[1], id2, "prog_ids[1]"); + ASSERT_EQ(prog_ids[2], 0, "prog_ids[2]"); + ASSERT_EQ(prog_ids[3], 0, "prog_ids[3]"); + ASSERT_EQ(prog_ids[4], 0, "prog_ids[4]"); + ASSERT_EQ(attr.query.prog_attach_flags, 0, "prog_attach_flags"); + ASSERT_EQ(attr.query.link_ids, 0, "link_ids"); + ASSERT_EQ(attr.query.link_attach_flags, 0, "link_attach_flags"); + + /* Test 4: Query with larger prog_ids array */ + memset(&attr, 0, attr_size); + attr.query.target_ifindex = loopback; + attr.query.attach_type = target; + + memset(prog_ids, 0, sizeof(prog_ids)); + attr.query.prog_ids = ptr_to_u64(prog_ids); + attr.query.count = 10; + + err = syscall(__NR_bpf, BPF_PROG_QUERY, &attr, attr_size); + if (!ASSERT_OK(err, "prog_query")) + goto cleanup4; + + ASSERT_EQ(attr.query.count, 4, "count"); + ASSERT_EQ(attr.query.revision, 5, "revision"); + ASSERT_EQ(attr.query.query_flags, 0, "query_flags"); + ASSERT_EQ(attr.query.attach_flags, 0, "attach_flags"); + ASSERT_EQ(attr.query.target_ifindex, loopback, "target_ifindex"); + ASSERT_EQ(attr.query.attach_type, target, "attach_type"); + ASSERT_EQ(attr.query.prog_ids, ptr_to_u64(prog_ids), "prog_ids"); + ASSERT_EQ(prog_ids[0], id1, "prog_ids[0]"); + ASSERT_EQ(prog_ids[1], id2, "prog_ids[1]"); + ASSERT_EQ(prog_ids[2], id3, "prog_ids[2]"); + ASSERT_EQ(prog_ids[3], id4, "prog_ids[3]"); + ASSERT_EQ(prog_ids[4], 0, "prog_ids[4]"); + ASSERT_EQ(attr.query.prog_attach_flags, 0, "prog_attach_flags"); + ASSERT_EQ(attr.query.link_ids, 0, "link_ids"); + ASSERT_EQ(attr.query.link_attach_flags, 0, "link_attach_flags"); + + /* Test 5: Query with NULL prog_ids array but with count > 0 */ + memset(&attr, 0, attr_size); + attr.query.target_ifindex = loopback; + attr.query.attach_type = target; + + memset(prog_ids, 0, sizeof(prog_ids)); + attr.query.count = sizeof(prog_ids); + + err = syscall(__NR_bpf, BPF_PROG_QUERY, &attr, attr_size); + if (!ASSERT_OK(err, "prog_query")) + goto cleanup4; + + ASSERT_EQ(attr.query.count, 4, "count"); + ASSERT_EQ(attr.query.revision, 5, "revision"); + ASSERT_EQ(attr.query.query_flags, 0, "query_flags"); + ASSERT_EQ(attr.query.attach_flags, 0, "attach_flags"); + ASSERT_EQ(attr.query.target_ifindex, loopback, "target_ifindex"); + ASSERT_EQ(attr.query.attach_type, target, "attach_type"); + ASSERT_EQ(prog_ids[0], 0, "prog_ids[0]"); + ASSERT_EQ(prog_ids[1], 0, "prog_ids[1]"); + ASSERT_EQ(prog_ids[2], 0, "prog_ids[2]"); + ASSERT_EQ(prog_ids[3], 0, "prog_ids[3]"); + ASSERT_EQ(prog_ids[4], 0, "prog_ids[4]"); + ASSERT_EQ(attr.query.prog_ids, 0, "prog_ids"); + ASSERT_EQ(attr.query.prog_attach_flags, 0, "prog_attach_flags"); + ASSERT_EQ(attr.query.link_ids, 0, "link_ids"); + ASSERT_EQ(attr.query.link_attach_flags, 0, "link_attach_flags"); + + /* Test 6: Query with non-NULL prog_ids array but with count == 0 */ + memset(&attr, 0, attr_size); + attr.query.target_ifindex = loopback; + attr.query.attach_type = target; + + memset(prog_ids, 0, sizeof(prog_ids)); + attr.query.prog_ids = ptr_to_u64(prog_ids); + + err = syscall(__NR_bpf, BPF_PROG_QUERY, &attr, attr_size); + if (!ASSERT_OK(err, "prog_query")) + goto cleanup4; + + ASSERT_EQ(attr.query.count, 4, "count"); + ASSERT_EQ(attr.query.revision, 5, "revision"); + ASSERT_EQ(attr.query.query_flags, 0, "query_flags"); + ASSERT_EQ(attr.query.attach_flags, 0, "attach_flags"); + ASSERT_EQ(attr.query.target_ifindex, loopback, "target_ifindex"); + ASSERT_EQ(attr.query.attach_type, target, "attach_type"); + ASSERT_EQ(prog_ids[0], 0, "prog_ids[0]"); + ASSERT_EQ(prog_ids[1], 0, "prog_ids[1]"); + ASSERT_EQ(prog_ids[2], 0, "prog_ids[2]"); + ASSERT_EQ(prog_ids[3], 0, "prog_ids[3]"); + ASSERT_EQ(prog_ids[4], 0, "prog_ids[4]"); + ASSERT_EQ(attr.query.prog_ids, ptr_to_u64(prog_ids), "prog_ids"); + ASSERT_EQ(attr.query.prog_attach_flags, 0, "prog_attach_flags"); + ASSERT_EQ(attr.query.link_ids, 0, "link_ids"); + ASSERT_EQ(attr.query.link_attach_flags, 0, "link_attach_flags"); + + /* Test 7: Query with invalid flags */ + attr.query.attach_flags = 0; + attr.query.query_flags = 1; + + err = syscall(__NR_bpf, BPF_PROG_QUERY, &attr, attr_size); + ASSERT_EQ(err, -1, "prog_query_should_fail"); + ASSERT_EQ(errno, EINVAL, "prog_query_should_fail"); + + attr.query.attach_flags = 1; + attr.query.query_flags = 0; + + err = syscall(__NR_bpf, BPF_PROG_QUERY, &attr, attr_size); + ASSERT_EQ(err, -1, "prog_query_should_fail"); + ASSERT_EQ(errno, EINVAL, "prog_query_should_fail"); + cleanup4: err = bpf_prog_detach_opts(fd4, loopback, target, &optd); ASSERT_OK(err, "prog_detach"); diff --git a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c index 6ee22c3b251a..518f143c5b0f 100644 --- a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c +++ b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c @@ -24,6 +24,7 @@ #include "test_progs.h" #include "network_helpers.h" +#include "netlink_helpers.h" #include "test_tc_neigh_fib.skel.h" #include "test_tc_neigh.skel.h" #include "test_tc_peer.skel.h" @@ -110,11 +111,17 @@ static void netns_setup_namespaces_nofail(const char *verb) } } +enum dev_mode { + MODE_VETH, + MODE_NETKIT, +}; + struct netns_setup_result { - int ifindex_veth_src; - int ifindex_veth_src_fwd; - int ifindex_veth_dst; - int ifindex_veth_dst_fwd; + enum dev_mode dev_mode; + int ifindex_src; + int ifindex_src_fwd; + int ifindex_dst; + int ifindex_dst_fwd; }; static int get_ifaddr(const char *name, char *ifaddr) @@ -137,58 +144,110 @@ static int get_ifaddr(const char *name, char *ifaddr) return 0; } +static int create_netkit(int mode, char *prim, char *peer) +{ + struct rtattr *linkinfo, *data, *peer_info; + struct rtnl_handle rth = { .fd = -1 }; + const char *type = "netkit"; + struct { + struct nlmsghdr n; + struct ifinfomsg i; + char buf[1024]; + } req = {}; + int err; + + err = rtnl_open(&rth, 0); + if (!ASSERT_OK(err, "open_rtnetlink")) + return err; + + memset(&req, 0, sizeof(req)); + req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)); + req.n.nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL; + req.n.nlmsg_type = RTM_NEWLINK; + req.i.ifi_family = AF_UNSPEC; + + addattr_l(&req.n, sizeof(req), IFLA_IFNAME, prim, strlen(prim)); + linkinfo = addattr_nest(&req.n, sizeof(req), IFLA_LINKINFO); + addattr_l(&req.n, sizeof(req), IFLA_INFO_KIND, type, strlen(type)); + data = addattr_nest(&req.n, sizeof(req), IFLA_INFO_DATA); + addattr32(&req.n, sizeof(req), IFLA_NETKIT_MODE, mode); + peer_info = addattr_nest(&req.n, sizeof(req), IFLA_NETKIT_PEER_INFO); + req.n.nlmsg_len += sizeof(struct ifinfomsg); + addattr_l(&req.n, sizeof(req), IFLA_IFNAME, peer, strlen(peer)); + addattr_nest_end(&req.n, peer_info); + addattr_nest_end(&req.n, data); + addattr_nest_end(&req.n, linkinfo); + + err = rtnl_talk(&rth, &req.n, NULL); + ASSERT_OK(err, "talk_rtnetlink"); + rtnl_close(&rth); + return err; +} + static int netns_setup_links_and_routes(struct netns_setup_result *result) { struct nstoken *nstoken = NULL; - char veth_src_fwd_addr[IFADDR_STR_LEN+1] = {}; - - SYS(fail, "ip link add veth_src type veth peer name veth_src_fwd"); - SYS(fail, "ip link add veth_dst type veth peer name veth_dst_fwd"); + char src_fwd_addr[IFADDR_STR_LEN+1] = {}; + int err; - SYS(fail, "ip link set veth_dst_fwd address " MAC_DST_FWD); - SYS(fail, "ip link set veth_dst address " MAC_DST); + if (result->dev_mode == MODE_VETH) { + SYS(fail, "ip link add src type veth peer name src_fwd"); + SYS(fail, "ip link add dst type veth peer name dst_fwd"); + + SYS(fail, "ip link set dst_fwd address " MAC_DST_FWD); + SYS(fail, "ip link set dst address " MAC_DST); + } else if (result->dev_mode == MODE_NETKIT) { + err = create_netkit(NETKIT_L3, "src", "src_fwd"); + if (!ASSERT_OK(err, "create_ifindex_src")) + goto fail; + err = create_netkit(NETKIT_L3, "dst", "dst_fwd"); + if (!ASSERT_OK(err, "create_ifindex_dst")) + goto fail; + } - if (get_ifaddr("veth_src_fwd", veth_src_fwd_addr)) + if (get_ifaddr("src_fwd", src_fwd_addr)) goto fail; - result->ifindex_veth_src = if_nametoindex("veth_src"); - if (!ASSERT_GT(result->ifindex_veth_src, 0, "ifindex_veth_src")) + result->ifindex_src = if_nametoindex("src"); + if (!ASSERT_GT(result->ifindex_src, 0, "ifindex_src")) goto fail; - result->ifindex_veth_src_fwd = if_nametoindex("veth_src_fwd"); - if (!ASSERT_GT(result->ifindex_veth_src_fwd, 0, "ifindex_veth_src_fwd")) + result->ifindex_src_fwd = if_nametoindex("src_fwd"); + if (!ASSERT_GT(result->ifindex_src_fwd, 0, "ifindex_src_fwd")) goto fail; - result->ifindex_veth_dst = if_nametoindex("veth_dst"); - if (!ASSERT_GT(result->ifindex_veth_dst, 0, "ifindex_veth_dst")) + result->ifindex_dst = if_nametoindex("dst"); + if (!ASSERT_GT(result->ifindex_dst, 0, "ifindex_dst")) goto fail; - result->ifindex_veth_dst_fwd = if_nametoindex("veth_dst_fwd"); - if (!ASSERT_GT(result->ifindex_veth_dst_fwd, 0, "ifindex_veth_dst_fwd")) + result->ifindex_dst_fwd = if_nametoindex("dst_fwd"); + if (!ASSERT_GT(result->ifindex_dst_fwd, 0, "ifindex_dst_fwd")) goto fail; - SYS(fail, "ip link set veth_src netns " NS_SRC); - SYS(fail, "ip link set veth_src_fwd netns " NS_FWD); - SYS(fail, "ip link set veth_dst_fwd netns " NS_FWD); - SYS(fail, "ip link set veth_dst netns " NS_DST); + SYS(fail, "ip link set src netns " NS_SRC); + SYS(fail, "ip link set src_fwd netns " NS_FWD); + SYS(fail, "ip link set dst_fwd netns " NS_FWD); + SYS(fail, "ip link set dst netns " NS_DST); /** setup in 'src' namespace */ nstoken = open_netns(NS_SRC); if (!ASSERT_OK_PTR(nstoken, "setns src")) goto fail; - SYS(fail, "ip addr add " IP4_SRC "/32 dev veth_src"); - SYS(fail, "ip addr add " IP6_SRC "/128 dev veth_src nodad"); - SYS(fail, "ip link set dev veth_src up"); + SYS(fail, "ip addr add " IP4_SRC "/32 dev src"); + SYS(fail, "ip addr add " IP6_SRC "/128 dev src nodad"); + SYS(fail, "ip link set dev src up"); - SYS(fail, "ip route add " IP4_DST "/32 dev veth_src scope global"); - SYS(fail, "ip route add " IP4_NET "/16 dev veth_src scope global"); - SYS(fail, "ip route add " IP6_DST "/128 dev veth_src scope global"); + SYS(fail, "ip route add " IP4_DST "/32 dev src scope global"); + SYS(fail, "ip route add " IP4_NET "/16 dev src scope global"); + SYS(fail, "ip route add " IP6_DST "/128 dev src scope global"); - SYS(fail, "ip neigh add " IP4_DST " dev veth_src lladdr %s", - veth_src_fwd_addr); - SYS(fail, "ip neigh add " IP6_DST " dev veth_src lladdr %s", - veth_src_fwd_addr); + if (result->dev_mode == MODE_VETH) { + SYS(fail, "ip neigh add " IP4_DST " dev src lladdr %s", + src_fwd_addr); + SYS(fail, "ip neigh add " IP6_DST " dev src lladdr %s", + src_fwd_addr); + } close_netns(nstoken); @@ -201,15 +260,15 @@ static int netns_setup_links_and_routes(struct netns_setup_result *result) * needs v4 one in order to start ARP probing. IP4_NET route is added * to the endpoints so that the ARP processing will reply. */ - SYS(fail, "ip addr add " IP4_SLL "/32 dev veth_src_fwd"); - SYS(fail, "ip addr add " IP4_DLL "/32 dev veth_dst_fwd"); - SYS(fail, "ip link set dev veth_src_fwd up"); - SYS(fail, "ip link set dev veth_dst_fwd up"); + SYS(fail, "ip addr add " IP4_SLL "/32 dev src_fwd"); + SYS(fail, "ip addr add " IP4_DLL "/32 dev dst_fwd"); + SYS(fail, "ip link set dev src_fwd up"); + SYS(fail, "ip link set dev dst_fwd up"); - SYS(fail, "ip route add " IP4_SRC "/32 dev veth_src_fwd scope global"); - SYS(fail, "ip route add " IP6_SRC "/128 dev veth_src_fwd scope global"); - SYS(fail, "ip route add " IP4_DST "/32 dev veth_dst_fwd scope global"); - SYS(fail, "ip route add " IP6_DST "/128 dev veth_dst_fwd scope global"); + SYS(fail, "ip route add " IP4_SRC "/32 dev src_fwd scope global"); + SYS(fail, "ip route add " IP6_SRC "/128 dev src_fwd scope global"); + SYS(fail, "ip route add " IP4_DST "/32 dev dst_fwd scope global"); + SYS(fail, "ip route add " IP6_DST "/128 dev dst_fwd scope global"); close_netns(nstoken); @@ -218,16 +277,18 @@ static int netns_setup_links_and_routes(struct netns_setup_result *result) if (!ASSERT_OK_PTR(nstoken, "setns dst")) goto fail; - SYS(fail, "ip addr add " IP4_DST "/32 dev veth_dst"); - SYS(fail, "ip addr add " IP6_DST "/128 dev veth_dst nodad"); - SYS(fail, "ip link set dev veth_dst up"); + SYS(fail, "ip addr add " IP4_DST "/32 dev dst"); + SYS(fail, "ip addr add " IP6_DST "/128 dev dst nodad"); + SYS(fail, "ip link set dev dst up"); - SYS(fail, "ip route add " IP4_SRC "/32 dev veth_dst scope global"); - SYS(fail, "ip route add " IP4_NET "/16 dev veth_dst scope global"); - SYS(fail, "ip route add " IP6_SRC "/128 dev veth_dst scope global"); + SYS(fail, "ip route add " IP4_SRC "/32 dev dst scope global"); + SYS(fail, "ip route add " IP4_NET "/16 dev dst scope global"); + SYS(fail, "ip route add " IP6_SRC "/128 dev dst scope global"); - SYS(fail, "ip neigh add " IP4_SRC " dev veth_dst lladdr " MAC_DST_FWD); - SYS(fail, "ip neigh add " IP6_SRC " dev veth_dst lladdr " MAC_DST_FWD); + if (result->dev_mode == MODE_VETH) { + SYS(fail, "ip neigh add " IP4_SRC " dev dst lladdr " MAC_DST_FWD); + SYS(fail, "ip neigh add " IP6_SRC " dev dst lladdr " MAC_DST_FWD); + } close_netns(nstoken); @@ -293,23 +354,23 @@ static int netns_load_bpf(const struct bpf_program *src_prog, const struct bpf_program *chk_prog, const struct netns_setup_result *setup_result) { - LIBBPF_OPTS(bpf_tc_hook, qdisc_veth_src_fwd); - LIBBPF_OPTS(bpf_tc_hook, qdisc_veth_dst_fwd); + LIBBPF_OPTS(bpf_tc_hook, qdisc_src_fwd); + LIBBPF_OPTS(bpf_tc_hook, qdisc_dst_fwd); int err; - /* tc qdisc add dev veth_src_fwd clsact */ - QDISC_CLSACT_CREATE(&qdisc_veth_src_fwd, setup_result->ifindex_veth_src_fwd); - /* tc filter add dev veth_src_fwd ingress bpf da src_prog */ - XGRESS_FILTER_ADD(&qdisc_veth_src_fwd, BPF_TC_INGRESS, src_prog, 0); - /* tc filter add dev veth_src_fwd egress bpf da chk_prog */ - XGRESS_FILTER_ADD(&qdisc_veth_src_fwd, BPF_TC_EGRESS, chk_prog, 0); + /* tc qdisc add dev src_fwd clsact */ + QDISC_CLSACT_CREATE(&qdisc_src_fwd, setup_result->ifindex_src_fwd); + /* tc filter add dev src_fwd ingress bpf da src_prog */ + XGRESS_FILTER_ADD(&qdisc_src_fwd, BPF_TC_INGRESS, src_prog, 0); + /* tc filter add dev src_fwd egress bpf da chk_prog */ + XGRESS_FILTER_ADD(&qdisc_src_fwd, BPF_TC_EGRESS, chk_prog, 0); - /* tc qdisc add dev veth_dst_fwd clsact */ - QDISC_CLSACT_CREATE(&qdisc_veth_dst_fwd, setup_result->ifindex_veth_dst_fwd); - /* tc filter add dev veth_dst_fwd ingress bpf da dst_prog */ - XGRESS_FILTER_ADD(&qdisc_veth_dst_fwd, BPF_TC_INGRESS, dst_prog, 0); - /* tc filter add dev veth_dst_fwd egress bpf da chk_prog */ - XGRESS_FILTER_ADD(&qdisc_veth_dst_fwd, BPF_TC_EGRESS, chk_prog, 0); + /* tc qdisc add dev dst_fwd clsact */ + QDISC_CLSACT_CREATE(&qdisc_dst_fwd, setup_result->ifindex_dst_fwd); + /* tc filter add dev dst_fwd ingress bpf da dst_prog */ + XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_INGRESS, dst_prog, 0); + /* tc filter add dev dst_fwd egress bpf da chk_prog */ + XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_EGRESS, chk_prog, 0); return 0; fail: @@ -539,10 +600,10 @@ done: static int netns_load_dtime_bpf(struct test_tc_dtime *skel, const struct netns_setup_result *setup_result) { - LIBBPF_OPTS(bpf_tc_hook, qdisc_veth_src_fwd); - LIBBPF_OPTS(bpf_tc_hook, qdisc_veth_dst_fwd); - LIBBPF_OPTS(bpf_tc_hook, qdisc_veth_src); - LIBBPF_OPTS(bpf_tc_hook, qdisc_veth_dst); + LIBBPF_OPTS(bpf_tc_hook, qdisc_src_fwd); + LIBBPF_OPTS(bpf_tc_hook, qdisc_dst_fwd); + LIBBPF_OPTS(bpf_tc_hook, qdisc_src); + LIBBPF_OPTS(bpf_tc_hook, qdisc_dst); struct nstoken *nstoken; int err; @@ -550,58 +611,58 @@ static int netns_load_dtime_bpf(struct test_tc_dtime *skel, nstoken = open_netns(NS_SRC); if (!ASSERT_OK_PTR(nstoken, "setns " NS_SRC)) return -1; - /* tc qdisc add dev veth_src clsact */ - QDISC_CLSACT_CREATE(&qdisc_veth_src, setup_result->ifindex_veth_src); - /* tc filter add dev veth_src ingress bpf da ingress_host */ - XGRESS_FILTER_ADD(&qdisc_veth_src, BPF_TC_INGRESS, skel->progs.ingress_host, 0); - /* tc filter add dev veth_src egress bpf da egress_host */ - XGRESS_FILTER_ADD(&qdisc_veth_src, BPF_TC_EGRESS, skel->progs.egress_host, 0); + /* tc qdisc add dev src clsact */ + QDISC_CLSACT_CREATE(&qdisc_src, setup_result->ifindex_src); + /* tc filter add dev src ingress bpf da ingress_host */ + XGRESS_FILTER_ADD(&qdisc_src, BPF_TC_INGRESS, skel->progs.ingress_host, 0); + /* tc filter add dev src egress bpf da egress_host */ + XGRESS_FILTER_ADD(&qdisc_src, BPF_TC_EGRESS, skel->progs.egress_host, 0); close_netns(nstoken); /* setup ns_dst tc progs */ nstoken = open_netns(NS_DST); if (!ASSERT_OK_PTR(nstoken, "setns " NS_DST)) return -1; - /* tc qdisc add dev veth_dst clsact */ - QDISC_CLSACT_CREATE(&qdisc_veth_dst, setup_result->ifindex_veth_dst); - /* tc filter add dev veth_dst ingress bpf da ingress_host */ - XGRESS_FILTER_ADD(&qdisc_veth_dst, BPF_TC_INGRESS, skel->progs.ingress_host, 0); - /* tc filter add dev veth_dst egress bpf da egress_host */ - XGRESS_FILTER_ADD(&qdisc_veth_dst, BPF_TC_EGRESS, skel->progs.egress_host, 0); + /* tc qdisc add dev dst clsact */ + QDISC_CLSACT_CREATE(&qdisc_dst, setup_result->ifindex_dst); + /* tc filter add dev dst ingress bpf da ingress_host */ + XGRESS_FILTER_ADD(&qdisc_dst, BPF_TC_INGRESS, skel->progs.ingress_host, 0); + /* tc filter add dev dst egress bpf da egress_host */ + XGRESS_FILTER_ADD(&qdisc_dst, BPF_TC_EGRESS, skel->progs.egress_host, 0); close_netns(nstoken); /* setup ns_fwd tc progs */ nstoken = open_netns(NS_FWD); if (!ASSERT_OK_PTR(nstoken, "setns " NS_FWD)) return -1; - /* tc qdisc add dev veth_dst_fwd clsact */ - QDISC_CLSACT_CREATE(&qdisc_veth_dst_fwd, setup_result->ifindex_veth_dst_fwd); - /* tc filter add dev veth_dst_fwd ingress prio 100 bpf da ingress_fwdns_prio100 */ - XGRESS_FILTER_ADD(&qdisc_veth_dst_fwd, BPF_TC_INGRESS, + /* tc qdisc add dev dst_fwd clsact */ + QDISC_CLSACT_CREATE(&qdisc_dst_fwd, setup_result->ifindex_dst_fwd); + /* tc filter add dev dst_fwd ingress prio 100 bpf da ingress_fwdns_prio100 */ + XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_INGRESS, skel->progs.ingress_fwdns_prio100, 100); - /* tc filter add dev veth_dst_fwd ingress prio 101 bpf da ingress_fwdns_prio101 */ - XGRESS_FILTER_ADD(&qdisc_veth_dst_fwd, BPF_TC_INGRESS, + /* tc filter add dev dst_fwd ingress prio 101 bpf da ingress_fwdns_prio101 */ + XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_INGRESS, skel->progs.ingress_fwdns_prio101, 101); - /* tc filter add dev veth_dst_fwd egress prio 100 bpf da egress_fwdns_prio100 */ - XGRESS_FILTER_ADD(&qdisc_veth_dst_fwd, BPF_TC_EGRESS, + /* tc filter add dev dst_fwd egress prio 100 bpf da egress_fwdns_prio100 */ + XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_EGRESS, skel->progs.egress_fwdns_prio100, 100); - /* tc filter add dev veth_dst_fwd egress prio 101 bpf da egress_fwdns_prio101 */ - XGRESS_FILTER_ADD(&qdisc_veth_dst_fwd, BPF_TC_EGRESS, + /* tc filter add dev dst_fwd egress prio 101 bpf da egress_fwdns_prio101 */ + XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_EGRESS, skel->progs.egress_fwdns_prio101, 101); - /* tc qdisc add dev veth_src_fwd clsact */ - QDISC_CLSACT_CREATE(&qdisc_veth_src_fwd, setup_result->ifindex_veth_src_fwd); - /* tc filter add dev veth_src_fwd ingress prio 100 bpf da ingress_fwdns_prio100 */ - XGRESS_FILTER_ADD(&qdisc_veth_src_fwd, BPF_TC_INGRESS, + /* tc qdisc add dev src_fwd clsact */ + QDISC_CLSACT_CREATE(&qdisc_src_fwd, setup_result->ifindex_src_fwd); + /* tc filter add dev src_fwd ingress prio 100 bpf da ingress_fwdns_prio100 */ + XGRESS_FILTER_ADD(&qdisc_src_fwd, BPF_TC_INGRESS, skel->progs.ingress_fwdns_prio100, 100); - /* tc filter add dev veth_src_fwd ingress prio 101 bpf da ingress_fwdns_prio101 */ - XGRESS_FILTER_ADD(&qdisc_veth_src_fwd, BPF_TC_INGRESS, + /* tc filter add dev src_fwd ingress prio 101 bpf da ingress_fwdns_prio101 */ + XGRESS_FILTER_ADD(&qdisc_src_fwd, BPF_TC_INGRESS, skel->progs.ingress_fwdns_prio101, 101); - /* tc filter add dev veth_src_fwd egress prio 100 bpf da egress_fwdns_prio100 */ - XGRESS_FILTER_ADD(&qdisc_veth_src_fwd, BPF_TC_EGRESS, + /* tc filter add dev src_fwd egress prio 100 bpf da egress_fwdns_prio100 */ + XGRESS_FILTER_ADD(&qdisc_src_fwd, BPF_TC_EGRESS, skel->progs.egress_fwdns_prio100, 100); - /* tc filter add dev veth_src_fwd egress prio 101 bpf da egress_fwdns_prio101 */ - XGRESS_FILTER_ADD(&qdisc_veth_src_fwd, BPF_TC_EGRESS, + /* tc filter add dev src_fwd egress prio 101 bpf da egress_fwdns_prio101 */ + XGRESS_FILTER_ADD(&qdisc_src_fwd, BPF_TC_EGRESS, skel->progs.egress_fwdns_prio101, 101); close_netns(nstoken); return 0; @@ -777,8 +838,8 @@ static void test_tc_redirect_dtime(struct netns_setup_result *setup_result) if (!ASSERT_OK_PTR(skel, "test_tc_dtime__open")) return; - skel->rodata->IFINDEX_SRC = setup_result->ifindex_veth_src_fwd; - skel->rodata->IFINDEX_DST = setup_result->ifindex_veth_dst_fwd; + skel->rodata->IFINDEX_SRC = setup_result->ifindex_src_fwd; + skel->rodata->IFINDEX_DST = setup_result->ifindex_dst_fwd; err = test_tc_dtime__load(skel); if (!ASSERT_OK(err, "test_tc_dtime__load")) @@ -868,8 +929,8 @@ static void test_tc_redirect_neigh(struct netns_setup_result *setup_result) if (!ASSERT_OK_PTR(skel, "test_tc_neigh__open")) goto done; - skel->rodata->IFINDEX_SRC = setup_result->ifindex_veth_src_fwd; - skel->rodata->IFINDEX_DST = setup_result->ifindex_veth_dst_fwd; + skel->rodata->IFINDEX_SRC = setup_result->ifindex_src_fwd; + skel->rodata->IFINDEX_DST = setup_result->ifindex_dst_fwd; err = test_tc_neigh__load(skel); if (!ASSERT_OK(err, "test_tc_neigh__load")) @@ -904,8 +965,8 @@ static void test_tc_redirect_peer(struct netns_setup_result *setup_result) if (!ASSERT_OK_PTR(skel, "test_tc_peer__open")) goto done; - skel->rodata->IFINDEX_SRC = setup_result->ifindex_veth_src_fwd; - skel->rodata->IFINDEX_DST = setup_result->ifindex_veth_dst_fwd; + skel->rodata->IFINDEX_SRC = setup_result->ifindex_src_fwd; + skel->rodata->IFINDEX_DST = setup_result->ifindex_dst_fwd; err = test_tc_peer__load(skel); if (!ASSERT_OK(err, "test_tc_peer__load")) @@ -996,7 +1057,7 @@ static int tun_relay_loop(int src_fd, int target_fd) static void test_tc_redirect_peer_l3(struct netns_setup_result *setup_result) { LIBBPF_OPTS(bpf_tc_hook, qdisc_tun_fwd); - LIBBPF_OPTS(bpf_tc_hook, qdisc_veth_dst_fwd); + LIBBPF_OPTS(bpf_tc_hook, qdisc_dst_fwd); struct test_tc_peer *skel = NULL; struct nstoken *nstoken = NULL; int err; @@ -1045,7 +1106,7 @@ static void test_tc_redirect_peer_l3(struct netns_setup_result *setup_result) goto fail; skel->rodata->IFINDEX_SRC = ifindex; - skel->rodata->IFINDEX_DST = setup_result->ifindex_veth_dst_fwd; + skel->rodata->IFINDEX_DST = setup_result->ifindex_dst_fwd; err = test_tc_peer__load(skel); if (!ASSERT_OK(err, "test_tc_peer__load")) @@ -1053,19 +1114,19 @@ static void test_tc_redirect_peer_l3(struct netns_setup_result *setup_result) /* Load "tc_src_l3" to the tun_fwd interface to redirect packets * towards dst, and "tc_dst" to redirect packets - * and "tc_chk" on veth_dst_fwd to drop non-redirected packets. + * and "tc_chk" on dst_fwd to drop non-redirected packets. */ /* tc qdisc add dev tun_fwd clsact */ QDISC_CLSACT_CREATE(&qdisc_tun_fwd, ifindex); /* tc filter add dev tun_fwd ingress bpf da tc_src_l3 */ XGRESS_FILTER_ADD(&qdisc_tun_fwd, BPF_TC_INGRESS, skel->progs.tc_src_l3, 0); - /* tc qdisc add dev veth_dst_fwd clsact */ - QDISC_CLSACT_CREATE(&qdisc_veth_dst_fwd, setup_result->ifindex_veth_dst_fwd); - /* tc filter add dev veth_dst_fwd ingress bpf da tc_dst_l3 */ - XGRESS_FILTER_ADD(&qdisc_veth_dst_fwd, BPF_TC_INGRESS, skel->progs.tc_dst_l3, 0); - /* tc filter add dev veth_dst_fwd egress bpf da tc_chk */ - XGRESS_FILTER_ADD(&qdisc_veth_dst_fwd, BPF_TC_EGRESS, skel->progs.tc_chk, 0); + /* tc qdisc add dev dst_fwd clsact */ + QDISC_CLSACT_CREATE(&qdisc_dst_fwd, setup_result->ifindex_dst_fwd); + /* tc filter add dev dst_fwd ingress bpf da tc_dst_l3 */ + XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_INGRESS, skel->progs.tc_dst_l3, 0); + /* tc filter add dev dst_fwd egress bpf da tc_chk */ + XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_EGRESS, skel->progs.tc_chk, 0); /* Setup route and neigh tables */ SYS(fail, "ip -netns " NS_SRC " addr add dev tun_src " IP4_TUN_SRC "/24"); @@ -1074,17 +1135,17 @@ static void test_tc_redirect_peer_l3(struct netns_setup_result *setup_result) SYS(fail, "ip -netns " NS_SRC " addr add dev tun_src " IP6_TUN_SRC "/64 nodad"); SYS(fail, "ip -netns " NS_FWD " addr add dev tun_fwd " IP6_TUN_FWD "/64 nodad"); - SYS(fail, "ip -netns " NS_SRC " route del " IP4_DST "/32 dev veth_src scope global"); + SYS(fail, "ip -netns " NS_SRC " route del " IP4_DST "/32 dev src scope global"); SYS(fail, "ip -netns " NS_SRC " route add " IP4_DST "/32 via " IP4_TUN_FWD " dev tun_src scope global"); - SYS(fail, "ip -netns " NS_DST " route add " IP4_TUN_SRC "/32 dev veth_dst scope global"); - SYS(fail, "ip -netns " NS_SRC " route del " IP6_DST "/128 dev veth_src scope global"); + SYS(fail, "ip -netns " NS_DST " route add " IP4_TUN_SRC "/32 dev dst scope global"); + SYS(fail, "ip -netns " NS_SRC " route del " IP6_DST "/128 dev src scope global"); SYS(fail, "ip -netns " NS_SRC " route add " IP6_DST "/128 via " IP6_TUN_FWD " dev tun_src scope global"); - SYS(fail, "ip -netns " NS_DST " route add " IP6_TUN_SRC "/128 dev veth_dst scope global"); + SYS(fail, "ip -netns " NS_DST " route add " IP6_TUN_SRC "/128 dev dst scope global"); - SYS(fail, "ip -netns " NS_DST " neigh add " IP4_TUN_SRC " dev veth_dst lladdr " MAC_DST_FWD); - SYS(fail, "ip -netns " NS_DST " neigh add " IP6_TUN_SRC " dev veth_dst lladdr " MAC_DST_FWD); + SYS(fail, "ip -netns " NS_DST " neigh add " IP4_TUN_SRC " dev dst lladdr " MAC_DST_FWD); + SYS(fail, "ip -netns " NS_DST " neigh add " IP6_TUN_SRC " dev dst lladdr " MAC_DST_FWD); if (!ASSERT_OK(set_forwarding(false), "disable forwarding")) goto fail; @@ -1106,9 +1167,9 @@ fail: close_netns(nstoken); } -#define RUN_TEST(name) \ +#define RUN_TEST(name, mode) \ ({ \ - struct netns_setup_result setup_result; \ + struct netns_setup_result setup_result = { .dev_mode = mode, }; \ if (test__start_subtest(#name)) \ if (ASSERT_OK(netns_setup_namespaces("add"), "setup namespaces")) { \ if (ASSERT_OK(netns_setup_links_and_routes(&setup_result), \ @@ -1122,11 +1183,13 @@ static void *test_tc_redirect_run_tests(void *arg) { netns_setup_namespaces_nofail("delete"); - RUN_TEST(tc_redirect_peer); - RUN_TEST(tc_redirect_peer_l3); - RUN_TEST(tc_redirect_neigh); - RUN_TEST(tc_redirect_neigh_fib); - RUN_TEST(tc_redirect_dtime); + RUN_TEST(tc_redirect_peer, MODE_VETH); + RUN_TEST(tc_redirect_peer, MODE_NETKIT); + RUN_TEST(tc_redirect_peer_l3, MODE_VETH); + RUN_TEST(tc_redirect_peer_l3, MODE_NETKIT); + RUN_TEST(tc_redirect_neigh, MODE_VETH); + RUN_TEST(tc_redirect_neigh_fib, MODE_VETH); + RUN_TEST(tc_redirect_dtime, MODE_VETH); return NULL; } diff --git a/tools/testing/selftests/bpf/prog_tests/test_bpf_ma.c b/tools/testing/selftests/bpf/prog_tests/test_bpf_ma.c index 0cca4e8ae38e..d3491a84b3b9 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_bpf_ma.c +++ b/tools/testing/selftests/bpf/prog_tests/test_bpf_ma.c @@ -9,9 +9,10 @@ #include "test_bpf_ma.skel.h" -void test_test_bpf_ma(void) +static void do_bpf_ma_test(const char *name) { struct test_bpf_ma *skel; + struct bpf_program *prog; struct btf *btf; int i, err; @@ -34,6 +35,11 @@ void test_test_bpf_ma(void) skel->rodata->data_btf_ids[i] = id; } + prog = bpf_object__find_program_by_name(skel->obj, name); + if (!ASSERT_OK_PTR(prog, "invalid prog name")) + goto out; + bpf_program__set_autoload(prog, true); + err = test_bpf_ma__load(skel); if (!ASSERT_OK(err, "load")) goto out; @@ -48,3 +54,15 @@ void test_test_bpf_ma(void) out: test_bpf_ma__destroy(skel); } + +void test_test_bpf_ma(void) +{ + if (test__start_subtest("batch_alloc_free")) + do_bpf_ma_test("test_batch_alloc_free"); + if (test__start_subtest("free_through_map_free")) + do_bpf_ma_test("test_free_through_map_free"); + if (test__start_subtest("batch_percpu_alloc_free")) + do_bpf_ma_test("test_batch_percpu_alloc_free"); + if (test__start_subtest("percpu_free_through_map_free")) + do_bpf_ma_test("test_percpu_free_through_map_free"); +} diff --git a/tools/testing/selftests/bpf/prog_tests/test_bpffs.c b/tools/testing/selftests/bpf/prog_tests/test_bpffs.c index 214d9f4a94a5..ea933fd151c3 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_bpffs.c +++ b/tools/testing/selftests/bpf/prog_tests/test_bpffs.c @@ -8,7 +8,8 @@ #include <sys/types.h> #include <test_progs.h> -#define TDIR "/sys/kernel/debug" +/* TDIR must be in a location we can create a directory in. */ +#define TDIR "/tmp/test_bpffs_testdir" static int read_iter(char *file) { @@ -43,8 +44,11 @@ static int fn(void) if (!ASSERT_OK(err, "mount /")) goto out; - err = umount(TDIR); - if (!ASSERT_OK(err, "umount " TDIR)) + err = mkdir(TDIR, 0777); + /* If the directory already exists we can carry on. It may be left over + * from a previous run. + */ + if ((err && errno != EEXIST) && !ASSERT_OK(err, "mkdir " TDIR)) goto out; err = mount("none", TDIR, "tmpfs", 0, NULL); @@ -138,6 +142,7 @@ out: rmdir(TDIR "/fs1"); rmdir(TDIR "/fs2"); umount(TDIR); + rmdir(TDIR); exit(err); } diff --git a/tools/testing/selftests/bpf/prog_tests/verifier.c b/tools/testing/selftests/bpf/prog_tests/verifier.c index e3e68c97b40c..5cfa7a6316b6 100644 --- a/tools/testing/selftests/bpf/prog_tests/verifier.c +++ b/tools/testing/selftests/bpf/prog_tests/verifier.c @@ -31,6 +31,7 @@ #include "verifier_helper_restricted.skel.h" #include "verifier_helper_value_access.skel.h" #include "verifier_int_ptr.skel.h" +#include "verifier_iterating_callbacks.skel.h" #include "verifier_jeq_infer_not_null.skel.h" #include "verifier_ld_ind.skel.h" #include "verifier_ldsx.skel.h" @@ -46,6 +47,7 @@ #include "verifier_movsx.skel.h" #include "verifier_netfilter_ctx.skel.h" #include "verifier_netfilter_retcode.skel.h" +#include "verifier_precision.skel.h" #include "verifier_prevent_map_lookup.skel.h" #include "verifier_raw_stack.skel.h" #include "verifier_raw_tp_writable.skel.h" @@ -138,6 +140,7 @@ void test_verifier_helper_packet_access(void) { RUN(verifier_helper_packet_acces void test_verifier_helper_restricted(void) { RUN(verifier_helper_restricted); } void test_verifier_helper_value_access(void) { RUN(verifier_helper_value_access); } void test_verifier_int_ptr(void) { RUN(verifier_int_ptr); } +void test_verifier_iterating_callbacks(void) { RUN(verifier_iterating_callbacks); } void test_verifier_jeq_infer_not_null(void) { RUN(verifier_jeq_infer_not_null); } void test_verifier_ld_ind(void) { RUN(verifier_ld_ind); } void test_verifier_ldsx(void) { RUN(verifier_ldsx); } @@ -153,6 +156,7 @@ void test_verifier_meta_access(void) { RUN(verifier_meta_access); } void test_verifier_movsx(void) { RUN(verifier_movsx); } void test_verifier_netfilter_ctx(void) { RUN(verifier_netfilter_ctx); } void test_verifier_netfilter_retcode(void) { RUN(verifier_netfilter_retcode); } +void test_verifier_precision(void) { RUN(verifier_precision); } void test_verifier_prevent_map_lookup(void) { RUN(verifier_prevent_map_lookup); } void test_verifier_raw_stack(void) { RUN(verifier_raw_stack); } void test_verifier_raw_tp_writable(void) { RUN(verifier_raw_tp_writable); } diff --git a/tools/testing/selftests/bpf/prog_tests/vmlinux.c b/tools/testing/selftests/bpf/prog_tests/vmlinux.c index 72310cfc6474..6fb2217d940b 100644 --- a/tools/testing/selftests/bpf/prog_tests/vmlinux.c +++ b/tools/testing/selftests/bpf/prog_tests/vmlinux.c @@ -16,27 +16,27 @@ static void nsleep() void test_vmlinux(void) { - int duration = 0, err; + int err; struct test_vmlinux* skel; struct test_vmlinux__bss *bss; skel = test_vmlinux__open_and_load(); - if (CHECK(!skel, "skel_open", "failed to open skeleton\n")) + if (!ASSERT_OK_PTR(skel, "test_vmlinux__open_and_load")) return; bss = skel->bss; err = test_vmlinux__attach(skel); - if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err)) + if (!ASSERT_OK(err, "test_vmlinux__attach")) goto cleanup; /* trigger everything */ nsleep(); - CHECK(!bss->tp_called, "tp", "not called\n"); - CHECK(!bss->raw_tp_called, "raw_tp", "not called\n"); - CHECK(!bss->tp_btf_called, "tp_btf", "not called\n"); - CHECK(!bss->kprobe_called, "kprobe", "not called\n"); - CHECK(!bss->fentry_called, "fentry", "not called\n"); + ASSERT_TRUE(bss->tp_called, "tp"); + ASSERT_TRUE(bss->raw_tp_called, "raw_tp"); + ASSERT_TRUE(bss->tp_btf_called, "tp_btf"); + ASSERT_TRUE(bss->kprobe_called, "kprobe"); + ASSERT_TRUE(bss->fentry_called, "fentry"); cleanup: test_vmlinux__destroy(skel); diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c b/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c index f2b8167b72a8..442f4ca39fd7 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c @@ -35,6 +35,8 @@ int dump_task_stack(struct bpf_iter__task *ctx) return 0; } +int num_user_stacks = 0; + SEC("iter/task") int get_task_user_stacks(struct bpf_iter__task *ctx) { @@ -51,6 +53,9 @@ int get_task_user_stacks(struct bpf_iter__task *ctx) if (res <= 0) return 0; + /* Only one task, the current one, should succeed */ + ++num_user_stacks; + buf_sz += res; /* If the verifier doesn't refine bpf_get_task_stack res, and instead diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task.c b/tools/testing/selftests/bpf/progs/bpf_iter_tasks.c index 96131b9a1caa..96131b9a1caa 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_task.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_tasks.c diff --git a/tools/testing/selftests/bpf/progs/bpf_loop_bench.c b/tools/testing/selftests/bpf/progs/bpf_loop_bench.c index 4ce76eb064c4..d461746fd3c1 100644 --- a/tools/testing/selftests/bpf/progs/bpf_loop_bench.c +++ b/tools/testing/selftests/bpf/progs/bpf_loop_bench.c @@ -15,13 +15,16 @@ static int empty_callback(__u32 index, void *data) return 0; } +static int outer_loop(__u32 index, void *data) +{ + bpf_loop(nr_loops, empty_callback, NULL, 0); + __sync_add_and_fetch(&hits, nr_loops); + return 0; +} + SEC("fentry/" SYS_PREFIX "sys_getpgid") int benchmark(void *ctx) { - for (int i = 0; i < 1000; i++) { - bpf_loop(nr_loops, empty_callback, NULL, 0); - - __sync_add_and_fetch(&hits, nr_loops); - } + bpf_loop(1000, outer_loop, NULL, 0); return 0; } diff --git a/tools/testing/selftests/bpf/progs/cb_refs.c b/tools/testing/selftests/bpf/progs/cb_refs.c index 76d661b20e87..56c764df8196 100644 --- a/tools/testing/selftests/bpf/progs/cb_refs.c +++ b/tools/testing/selftests/bpf/progs/cb_refs.c @@ -33,6 +33,7 @@ int underflow_prog(void *ctx) if (!p) return 0; bpf_for_each_map_elem(&array_map, cb1, &p, 0); + bpf_kfunc_call_test_release(p); return 0; } diff --git a/tools/testing/selftests/bpf/progs/exceptions_assert.c b/tools/testing/selftests/bpf/progs/exceptions_assert.c index e1e5c54a6a11..49efaed143fc 100644 --- a/tools/testing/selftests/bpf/progs/exceptions_assert.c +++ b/tools/testing/selftests/bpf/progs/exceptions_assert.c @@ -18,48 +18,48 @@ return *(u64 *)num; \ } -__msg(": R0_w=-2147483648 R10=fp0") +__msg(": R0_w=0xffffffff80000000 R10=fp0") check_assert(s64, eq, int_min, INT_MIN); -__msg(": R0_w=2147483647 R10=fp0") +__msg(": R0_w=0x7fffffff R10=fp0") check_assert(s64, eq, int_max, INT_MAX); __msg(": R0_w=0 R10=fp0") check_assert(s64, eq, zero, 0); -__msg(": R0_w=-9223372036854775808 R1_w=-9223372036854775808 R10=fp0") +__msg(": R0_w=0x8000000000000000 R1_w=0x8000000000000000 R10=fp0") check_assert(s64, eq, llong_min, LLONG_MIN); -__msg(": R0_w=9223372036854775807 R1_w=9223372036854775807 R10=fp0") +__msg(": R0_w=0x7fffffffffffffff R1_w=0x7fffffffffffffff R10=fp0") check_assert(s64, eq, llong_max, LLONG_MAX); -__msg(": R0_w=scalar(smax=2147483646) R10=fp0") +__msg(": R0_w=scalar(smax=0x7ffffffe) R10=fp0") check_assert(s64, lt, pos, INT_MAX); -__msg(": R0_w=scalar(smax=-1,umin=9223372036854775808,var_off=(0x8000000000000000; 0x7fffffffffffffff))") +__msg(": R0_w=scalar(smax=-1,umin=0x8000000000000000,var_off=(0x8000000000000000; 0x7fffffffffffffff))") check_assert(s64, lt, zero, 0); -__msg(": R0_w=scalar(smax=-2147483649,umin=9223372036854775808,umax=18446744071562067967,var_off=(0x8000000000000000; 0x7fffffffffffffff))") +__msg(": R0_w=scalar(smax=0xffffffff7fffffff,umin=0x8000000000000000,umax=0xffffffff7fffffff,var_off=(0x8000000000000000; 0x7fffffffffffffff))") check_assert(s64, lt, neg, INT_MIN); -__msg(": R0_w=scalar(smax=2147483647) R10=fp0") +__msg(": R0_w=scalar(smax=0x7fffffff) R10=fp0") check_assert(s64, le, pos, INT_MAX); __msg(": R0_w=scalar(smax=0) R10=fp0") check_assert(s64, le, zero, 0); -__msg(": R0_w=scalar(smax=-2147483648,umin=9223372036854775808,umax=18446744071562067968,var_off=(0x8000000000000000; 0x7fffffffffffffff))") +__msg(": R0_w=scalar(smax=0xffffffff80000000,umin=0x8000000000000000,umax=0xffffffff80000000,var_off=(0x8000000000000000; 0x7fffffffffffffff))") check_assert(s64, le, neg, INT_MIN); -__msg(": R0_w=scalar(smin=umin=2147483648,umax=9223372036854775807,var_off=(0x0; 0x7fffffffffffffff))") +__msg(": R0_w=scalar(smin=umin=0x80000000,umax=0x7fffffffffffffff,var_off=(0x0; 0x7fffffffffffffff))") check_assert(s64, gt, pos, INT_MAX); -__msg(": R0_w=scalar(smin=umin=1,umax=9223372036854775807,var_off=(0x0; 0x7fffffffffffffff))") +__msg(": R0_w=scalar(smin=umin=1,umax=0x7fffffffffffffff,var_off=(0x0; 0x7fffffffffffffff))") check_assert(s64, gt, zero, 0); -__msg(": R0_w=scalar(smin=-2147483647) R10=fp0") +__msg(": R0_w=scalar(smin=0xffffffff80000001) R10=fp0") check_assert(s64, gt, neg, INT_MIN); -__msg(": R0_w=scalar(smin=umin=2147483647,umax=9223372036854775807,var_off=(0x0; 0x7fffffffffffffff))") +__msg(": R0_w=scalar(smin=umin=0x7fffffff,umax=0x7fffffffffffffff,var_off=(0x0; 0x7fffffffffffffff))") check_assert(s64, ge, pos, INT_MAX); -__msg(": R0_w=scalar(smin=0,umax=9223372036854775807,var_off=(0x0; 0x7fffffffffffffff)) R10=fp0") +__msg(": R0_w=scalar(smin=0,umax=0x7fffffffffffffff,var_off=(0x0; 0x7fffffffffffffff)) R10=fp0") check_assert(s64, ge, zero, 0); -__msg(": R0_w=scalar(smin=-2147483648) R10=fp0") +__msg(": R0_w=scalar(smin=0xffffffff80000000) R10=fp0") check_assert(s64, ge, neg, INT_MIN); SEC("?tc") __log_level(2) __failure -__msg(": R0=0 R1=ctx(off=0,imm=0) R2=scalar(smin=smin32=-2147483646,smax=smax32=2147483645) R10=fp0") +__msg(": R0=0 R1=ctx() R2=scalar(smin=0xffffffff80000002,smax=smax32=0x7ffffffd,smin32=0x80000002) R10=fp0") int check_assert_range_s64(struct __sk_buff *ctx) { struct bpf_sock *sk = ctx->sk; @@ -75,7 +75,7 @@ int check_assert_range_s64(struct __sk_buff *ctx) SEC("?tc") __log_level(2) __failure -__msg(": R1=ctx(off=0,imm=0) R2=scalar(smin=umin=smin32=umin32=4096,smax=umax=smax32=umax32=8192,var_off=(0x0; 0x3fff))") +__msg(": R1=ctx() R2=scalar(smin=umin=smin32=umin32=4096,smax=umax=smax32=umax32=8192,var_off=(0x0; 0x3fff))") int check_assert_range_u64(struct __sk_buff *ctx) { u64 num = ctx->len; @@ -86,7 +86,7 @@ int check_assert_range_u64(struct __sk_buff *ctx) SEC("?tc") __log_level(2) __failure -__msg(": R0=0 R1=ctx(off=0,imm=0) R2=4096 R10=fp0") +__msg(": R0=0 R1=ctx() R2=4096 R10=fp0") int check_assert_single_range_s64(struct __sk_buff *ctx) { struct bpf_sock *sk = ctx->sk; @@ -103,7 +103,7 @@ int check_assert_single_range_s64(struct __sk_buff *ctx) SEC("?tc") __log_level(2) __failure -__msg(": R1=ctx(off=0,imm=0) R2=4096 R10=fp0") +__msg(": R1=ctx() R2=4096 R10=fp0") int check_assert_single_range_u64(struct __sk_buff *ctx) { u64 num = ctx->len; @@ -114,7 +114,7 @@ int check_assert_single_range_u64(struct __sk_buff *ctx) SEC("?tc") __log_level(2) __failure -__msg(": R1=pkt(off=64,r=64,imm=0) R2=pkt_end(off=0,imm=0) R6=pkt(off=0,r=64,imm=0) R10=fp0") +__msg(": R1=pkt(off=64,r=64) R2=pkt_end() R6=pkt(r=64) R10=fp0") int check_assert_generic(struct __sk_buff *ctx) { u8 *data_end = (void *)(long)ctx->data_end; diff --git a/tools/testing/selftests/bpf/progs/exceptions_fail.c b/tools/testing/selftests/bpf/progs/exceptions_fail.c index 4c39e920dac2..8c0ef2742208 100644 --- a/tools/testing/selftests/bpf/progs/exceptions_fail.c +++ b/tools/testing/selftests/bpf/progs/exceptions_fail.c @@ -171,6 +171,7 @@ int reject_with_rbtree_add_throw(void *ctx) return 0; bpf_spin_lock(&lock); bpf_rbtree_add(&rbtree, &f->node, rbless); + bpf_spin_unlock(&lock); return 0; } @@ -214,6 +215,7 @@ int reject_with_cb_reference(void *ctx) if (!f) return 0; bpf_loop(5, subprog_cb_ref, NULL, 0); + bpf_obj_drop(f); return 0; } diff --git a/tools/testing/selftests/bpf/progs/iters.c b/tools/testing/selftests/bpf/progs/iters.c index 6b9b3c56f009..b2181f850d3e 100644 --- a/tools/testing/selftests/bpf/progs/iters.c +++ b/tools/testing/selftests/bpf/progs/iters.c @@ -14,6 +14,13 @@ int my_pid; int arr[256]; int small_arr[16] SEC(".data.small_arr"); +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 10); + __type(key, int); + __type(value, int); +} amap SEC(".maps"); + #ifdef REAL_TEST #define MY_PID_GUARD() if (my_pid != (bpf_get_current_pid_tgid() >> 32)) return 0 #else @@ -716,4 +723,714 @@ int iter_pass_iter_ptr_to_subprog(const void *ctx) return 0; } +SEC("?raw_tp") +__failure +__msg("R1 type=scalar expected=fp") +__naked int delayed_read_mark(void) +{ + /* This is equivalent to C program below. + * The call to bpf_iter_num_next() is reachable with r7 values &fp[-16] and 0xdead. + * State with r7=&fp[-16] is visited first and follows r6 != 42 ... continue branch. + * At this point iterator next() call is reached with r7 that has no read mark. + * Loop body with r7=0xdead would only be visited if verifier would decide to continue + * with second loop iteration. Absence of read mark on r7 might affect state + * equivalent logic used for iterator convergence tracking. + * + * r7 = &fp[-16] + * fp[-16] = 0 + * r6 = bpf_get_prandom_u32() + * bpf_iter_num_new(&fp[-8], 0, 10) + * while (bpf_iter_num_next(&fp[-8])) { + * r6++ + * if (r6 != 42) { + * r7 = 0xdead + * continue; + * } + * bpf_probe_read_user(r7, 8, 0xdeadbeef); // this is not safe + * } + * bpf_iter_num_destroy(&fp[-8]) + * return 0 + */ + asm volatile ( + "r7 = r10;" + "r7 += -16;" + "r0 = 0;" + "*(u64 *)(r7 + 0) = r0;" + "call %[bpf_get_prandom_u32];" + "r6 = r0;" + "r1 = r10;" + "r1 += -8;" + "r2 = 0;" + "r3 = 10;" + "call %[bpf_iter_num_new];" + "1:" + "r1 = r10;" + "r1 += -8;" + "call %[bpf_iter_num_next];" + "if r0 == 0 goto 2f;" + "r6 += 1;" + "if r6 != 42 goto 3f;" + "r7 = 0xdead;" + "goto 1b;" + "3:" + "r1 = r7;" + "r2 = 8;" + "r3 = 0xdeadbeef;" + "call %[bpf_probe_read_user];" + "goto 1b;" + "2:" + "r1 = r10;" + "r1 += -8;" + "call %[bpf_iter_num_destroy];" + "r0 = 0;" + "exit;" + : + : __imm(bpf_get_prandom_u32), + __imm(bpf_iter_num_new), + __imm(bpf_iter_num_next), + __imm(bpf_iter_num_destroy), + __imm(bpf_probe_read_user) + : __clobber_all + ); +} + +SEC("?raw_tp") +__failure +__msg("math between fp pointer and register with unbounded") +__naked int delayed_precision_mark(void) +{ + /* This is equivalent to C program below. + * The test is similar to delayed_iter_mark but verifies that incomplete + * precision don't fool verifier. + * The call to bpf_iter_num_next() is reachable with r7 values -16 and -32. + * State with r7=-16 is visited first and follows r6 != 42 ... continue branch. + * At this point iterator next() call is reached with r7 that has no read + * and precision marks. + * Loop body with r7=-32 would only be visited if verifier would decide to continue + * with second loop iteration. Absence of precision mark on r7 might affect state + * equivalent logic used for iterator convergence tracking. + * + * r8 = 0 + * fp[-16] = 0 + * r7 = -16 + * r6 = bpf_get_prandom_u32() + * bpf_iter_num_new(&fp[-8], 0, 10) + * while (bpf_iter_num_next(&fp[-8])) { + * if (r6 != 42) { + * r7 = -32 + * r6 = bpf_get_prandom_u32() + * continue; + * } + * r0 = r10 + * r0 += r7 + * r8 = *(u64 *)(r0 + 0) // this is not safe + * r6 = bpf_get_prandom_u32() + * } + * bpf_iter_num_destroy(&fp[-8]) + * return r8 + */ + asm volatile ( + "r8 = 0;" + "*(u64 *)(r10 - 16) = r8;" + "r7 = -16;" + "call %[bpf_get_prandom_u32];" + "r6 = r0;" + "r1 = r10;" + "r1 += -8;" + "r2 = 0;" + "r3 = 10;" + "call %[bpf_iter_num_new];" + "1:" + "r1 = r10;" + "r1 += -8;\n" + "call %[bpf_iter_num_next];" + "if r0 == 0 goto 2f;" + "if r6 != 42 goto 3f;" + "r7 = -32;" + "call %[bpf_get_prandom_u32];" + "r6 = r0;" + "goto 1b;\n" + "3:" + "r0 = r10;" + "r0 += r7;" + "r8 = *(u64 *)(r0 + 0);" + "call %[bpf_get_prandom_u32];" + "r6 = r0;" + "goto 1b;\n" + "2:" + "r1 = r10;" + "r1 += -8;" + "call %[bpf_iter_num_destroy];" + "r0 = r8;" + "exit;" + : + : __imm(bpf_get_prandom_u32), + __imm(bpf_iter_num_new), + __imm(bpf_iter_num_next), + __imm(bpf_iter_num_destroy), + __imm(bpf_probe_read_user) + : __clobber_all + ); +} + +SEC("?raw_tp") +__failure +__msg("math between fp pointer and register with unbounded") +__flag(BPF_F_TEST_STATE_FREQ) +__naked int loop_state_deps1(void) +{ + /* This is equivalent to C program below. + * + * The case turns out to be tricky in a sense that: + * - states with c=-25 are explored only on a second iteration + * of the outer loop; + * - states with read+precise mark on c are explored only on + * second iteration of the inner loop and in a state which + * is pushed to states stack first. + * + * Depending on the details of iterator convergence logic + * verifier might stop states traversal too early and miss + * unsafe c=-25 memory access. + * + * j = iter_new(); // fp[-16] + * a = 0; // r6 + * b = 0; // r7 + * c = -24; // r8 + * while (iter_next(j)) { + * i = iter_new(); // fp[-8] + * a = 0; // r6 + * b = 0; // r7 + * while (iter_next(i)) { + * if (a == 1) { + * a = 0; + * b = 1; + * } else if (a == 0) { + * a = 1; + * if (random() == 42) + * continue; + * if (b == 1) { + * *(r10 + c) = 7; // this is not safe + * iter_destroy(i); + * iter_destroy(j); + * return; + * } + * } + * } + * iter_destroy(i); + * a = 0; + * b = 0; + * c = -25; + * } + * iter_destroy(j); + * return; + */ + asm volatile ( + "r1 = r10;" + "r1 += -16;" + "r2 = 0;" + "r3 = 10;" + "call %[bpf_iter_num_new];" + "r6 = 0;" + "r7 = 0;" + "r8 = -24;" + "j_loop_%=:" + "r1 = r10;" + "r1 += -16;" + "call %[bpf_iter_num_next];" + "if r0 == 0 goto j_loop_end_%=;" + "r1 = r10;" + "r1 += -8;" + "r2 = 0;" + "r3 = 10;" + "call %[bpf_iter_num_new];" + "r6 = 0;" + "r7 = 0;" + "i_loop_%=:" + "r1 = r10;" + "r1 += -8;" + "call %[bpf_iter_num_next];" + "if r0 == 0 goto i_loop_end_%=;" + "check_one_r6_%=:" + "if r6 != 1 goto check_zero_r6_%=;" + "r6 = 0;" + "r7 = 1;" + "goto i_loop_%=;" + "check_zero_r6_%=:" + "if r6 != 0 goto i_loop_%=;" + "r6 = 1;" + "call %[bpf_get_prandom_u32];" + "if r0 != 42 goto check_one_r7_%=;" + "goto i_loop_%=;" + "check_one_r7_%=:" + "if r7 != 1 goto i_loop_%=;" + "r0 = r10;" + "r0 += r8;" + "r1 = 7;" + "*(u64 *)(r0 + 0) = r1;" + "r1 = r10;" + "r1 += -8;" + "call %[bpf_iter_num_destroy];" + "r1 = r10;" + "r1 += -16;" + "call %[bpf_iter_num_destroy];" + "r0 = 0;" + "exit;" + "i_loop_end_%=:" + "r1 = r10;" + "r1 += -8;" + "call %[bpf_iter_num_destroy];" + "r6 = 0;" + "r7 = 0;" + "r8 = -25;" + "goto j_loop_%=;" + "j_loop_end_%=:" + "r1 = r10;" + "r1 += -16;" + "call %[bpf_iter_num_destroy];" + "r0 = 0;" + "exit;" + : + : __imm(bpf_get_prandom_u32), + __imm(bpf_iter_num_new), + __imm(bpf_iter_num_next), + __imm(bpf_iter_num_destroy) + : __clobber_all + ); +} + +SEC("?raw_tp") +__failure +__msg("math between fp pointer and register with unbounded") +__flag(BPF_F_TEST_STATE_FREQ) +__naked int loop_state_deps2(void) +{ + /* This is equivalent to C program below. + * + * The case turns out to be tricky in a sense that: + * - states with read+precise mark on c are explored only on a second + * iteration of the first inner loop and in a state which is pushed to + * states stack first. + * - states with c=-25 are explored only on a second iteration of the + * second inner loop and in a state which is pushed to states stack + * first. + * + * Depending on the details of iterator convergence logic + * verifier might stop states traversal too early and miss + * unsafe c=-25 memory access. + * + * j = iter_new(); // fp[-16] + * a = 0; // r6 + * b = 0; // r7 + * c = -24; // r8 + * while (iter_next(j)) { + * i = iter_new(); // fp[-8] + * a = 0; // r6 + * b = 0; // r7 + * while (iter_next(i)) { + * if (a == 1) { + * a = 0; + * b = 1; + * } else if (a == 0) { + * a = 1; + * if (random() == 42) + * continue; + * if (b == 1) { + * *(r10 + c) = 7; // this is not safe + * iter_destroy(i); + * iter_destroy(j); + * return; + * } + * } + * } + * iter_destroy(i); + * i = iter_new(); // fp[-8] + * a = 0; // r6 + * b = 0; // r7 + * while (iter_next(i)) { + * if (a == 1) { + * a = 0; + * b = 1; + * } else if (a == 0) { + * a = 1; + * if (random() == 42) + * continue; + * if (b == 1) { + * a = 0; + * c = -25; + * } + * } + * } + * iter_destroy(i); + * } + * iter_destroy(j); + * return; + */ + asm volatile ( + "r1 = r10;" + "r1 += -16;" + "r2 = 0;" + "r3 = 10;" + "call %[bpf_iter_num_new];" + "r6 = 0;" + "r7 = 0;" + "r8 = -24;" + "j_loop_%=:" + "r1 = r10;" + "r1 += -16;" + "call %[bpf_iter_num_next];" + "if r0 == 0 goto j_loop_end_%=;" + + /* first inner loop */ + "r1 = r10;" + "r1 += -8;" + "r2 = 0;" + "r3 = 10;" + "call %[bpf_iter_num_new];" + "r6 = 0;" + "r7 = 0;" + "i_loop_%=:" + "r1 = r10;" + "r1 += -8;" + "call %[bpf_iter_num_next];" + "if r0 == 0 goto i_loop_end_%=;" + "check_one_r6_%=:" + "if r6 != 1 goto check_zero_r6_%=;" + "r6 = 0;" + "r7 = 1;" + "goto i_loop_%=;" + "check_zero_r6_%=:" + "if r6 != 0 goto i_loop_%=;" + "r6 = 1;" + "call %[bpf_get_prandom_u32];" + "if r0 != 42 goto check_one_r7_%=;" + "goto i_loop_%=;" + "check_one_r7_%=:" + "if r7 != 1 goto i_loop_%=;" + "r0 = r10;" + "r0 += r8;" + "r1 = 7;" + "*(u64 *)(r0 + 0) = r1;" + "r1 = r10;" + "r1 += -8;" + "call %[bpf_iter_num_destroy];" + "r1 = r10;" + "r1 += -16;" + "call %[bpf_iter_num_destroy];" + "r0 = 0;" + "exit;" + "i_loop_end_%=:" + "r1 = r10;" + "r1 += -8;" + "call %[bpf_iter_num_destroy];" + + /* second inner loop */ + "r1 = r10;" + "r1 += -8;" + "r2 = 0;" + "r3 = 10;" + "call %[bpf_iter_num_new];" + "r6 = 0;" + "r7 = 0;" + "i2_loop_%=:" + "r1 = r10;" + "r1 += -8;" + "call %[bpf_iter_num_next];" + "if r0 == 0 goto i2_loop_end_%=;" + "check2_one_r6_%=:" + "if r6 != 1 goto check2_zero_r6_%=;" + "r6 = 0;" + "r7 = 1;" + "goto i2_loop_%=;" + "check2_zero_r6_%=:" + "if r6 != 0 goto i2_loop_%=;" + "r6 = 1;" + "call %[bpf_get_prandom_u32];" + "if r0 != 42 goto check2_one_r7_%=;" + "goto i2_loop_%=;" + "check2_one_r7_%=:" + "if r7 != 1 goto i2_loop_%=;" + "r6 = 0;" + "r8 = -25;" + "goto i2_loop_%=;" + "i2_loop_end_%=:" + "r1 = r10;" + "r1 += -8;" + "call %[bpf_iter_num_destroy];" + + "r6 = 0;" + "r7 = 0;" + "goto j_loop_%=;" + "j_loop_end_%=:" + "r1 = r10;" + "r1 += -16;" + "call %[bpf_iter_num_destroy];" + "r0 = 0;" + "exit;" + : + : __imm(bpf_get_prandom_u32), + __imm(bpf_iter_num_new), + __imm(bpf_iter_num_next), + __imm(bpf_iter_num_destroy) + : __clobber_all + ); +} + +SEC("?raw_tp") +__success +__naked int triple_continue(void) +{ + /* This is equivalent to C program below. + * High branching factor of the loop body turned out to be + * problematic for one of the iterator convergence tracking + * algorithms explored. + * + * r6 = bpf_get_prandom_u32() + * bpf_iter_num_new(&fp[-8], 0, 10) + * while (bpf_iter_num_next(&fp[-8])) { + * if (bpf_get_prandom_u32() != 42) + * continue; + * if (bpf_get_prandom_u32() != 42) + * continue; + * if (bpf_get_prandom_u32() != 42) + * continue; + * r0 += 0; + * } + * bpf_iter_num_destroy(&fp[-8]) + * return 0 + */ + asm volatile ( + "r1 = r10;" + "r1 += -8;" + "r2 = 0;" + "r3 = 10;" + "call %[bpf_iter_num_new];" + "loop_%=:" + "r1 = r10;" + "r1 += -8;" + "call %[bpf_iter_num_next];" + "if r0 == 0 goto loop_end_%=;" + "call %[bpf_get_prandom_u32];" + "if r0 != 42 goto loop_%=;" + "call %[bpf_get_prandom_u32];" + "if r0 != 42 goto loop_%=;" + "call %[bpf_get_prandom_u32];" + "if r0 != 42 goto loop_%=;" + "r0 += 0;" + "goto loop_%=;" + "loop_end_%=:" + "r1 = r10;" + "r1 += -8;" + "call %[bpf_iter_num_destroy];" + "r0 = 0;" + "exit;" + : + : __imm(bpf_get_prandom_u32), + __imm(bpf_iter_num_new), + __imm(bpf_iter_num_next), + __imm(bpf_iter_num_destroy) + : __clobber_all + ); +} + +SEC("?raw_tp") +__success +__naked int widen_spill(void) +{ + /* This is equivalent to C program below. + * The counter is stored in fp[-16], if this counter is not widened + * verifier states representing loop iterations would never converge. + * + * fp[-16] = 0 + * bpf_iter_num_new(&fp[-8], 0, 10) + * while (bpf_iter_num_next(&fp[-8])) { + * r0 = fp[-16]; + * r0 += 1; + * fp[-16] = r0; + * } + * bpf_iter_num_destroy(&fp[-8]) + * return 0 + */ + asm volatile ( + "r0 = 0;" + "*(u64 *)(r10 - 16) = r0;" + "r1 = r10;" + "r1 += -8;" + "r2 = 0;" + "r3 = 10;" + "call %[bpf_iter_num_new];" + "loop_%=:" + "r1 = r10;" + "r1 += -8;" + "call %[bpf_iter_num_next];" + "if r0 == 0 goto loop_end_%=;" + "r0 = *(u64 *)(r10 - 16);" + "r0 += 1;" + "*(u64 *)(r10 - 16) = r0;" + "goto loop_%=;" + "loop_end_%=:" + "r1 = r10;" + "r1 += -8;" + "call %[bpf_iter_num_destroy];" + "r0 = 0;" + "exit;" + : + : __imm(bpf_iter_num_new), + __imm(bpf_iter_num_next), + __imm(bpf_iter_num_destroy) + : __clobber_all + ); +} + +SEC("raw_tp") +__success +__naked int checkpoint_states_deletion(void) +{ + /* This is equivalent to C program below. + * + * int *a, *b, *c, *d, *e, *f; + * int i, sum = 0; + * bpf_for(i, 0, 10) { + * a = bpf_map_lookup_elem(&amap, &i); + * b = bpf_map_lookup_elem(&amap, &i); + * c = bpf_map_lookup_elem(&amap, &i); + * d = bpf_map_lookup_elem(&amap, &i); + * e = bpf_map_lookup_elem(&amap, &i); + * f = bpf_map_lookup_elem(&amap, &i); + * if (a) sum += 1; + * if (b) sum += 1; + * if (c) sum += 1; + * if (d) sum += 1; + * if (e) sum += 1; + * if (f) sum += 1; + * } + * return 0; + * + * The body of the loop spawns multiple simulation paths + * with different combination of NULL/non-NULL information for a/b/c/d/e/f. + * Each combination is unique from states_equal() point of view. + * Explored states checkpoint is created after each iterator next call. + * Iterator convergence logic expects that eventually current state + * would get equal to one of the explored states and thus loop + * exploration would be finished (at-least for a specific path). + * Verifier evicts explored states with high miss to hit ratio + * to to avoid comparing current state with too many explored + * states per instruction. + * This test is designed to "stress test" eviction policy defined using formula: + * + * sl->miss_cnt > sl->hit_cnt * N + N // if true sl->state is evicted + * + * Currently N is set to 64, which allows for 6 variables in this test. + */ + asm volatile ( + "r6 = 0;" /* a */ + "r7 = 0;" /* b */ + "r8 = 0;" /* c */ + "*(u64 *)(r10 - 24) = r6;" /* d */ + "*(u64 *)(r10 - 32) = r6;" /* e */ + "*(u64 *)(r10 - 40) = r6;" /* f */ + "r9 = 0;" /* sum */ + "r1 = r10;" + "r1 += -8;" + "r2 = 0;" + "r3 = 10;" + "call %[bpf_iter_num_new];" + "loop_%=:" + "r1 = r10;" + "r1 += -8;" + "call %[bpf_iter_num_next];" + "if r0 == 0 goto loop_end_%=;" + + "*(u64 *)(r10 - 16) = r0;" + + "r1 = %[amap] ll;" + "r2 = r10;" + "r2 += -16;" + "call %[bpf_map_lookup_elem];" + "r6 = r0;" + + "r1 = %[amap] ll;" + "r2 = r10;" + "r2 += -16;" + "call %[bpf_map_lookup_elem];" + "r7 = r0;" + + "r1 = %[amap] ll;" + "r2 = r10;" + "r2 += -16;" + "call %[bpf_map_lookup_elem];" + "r8 = r0;" + + "r1 = %[amap] ll;" + "r2 = r10;" + "r2 += -16;" + "call %[bpf_map_lookup_elem];" + "*(u64 *)(r10 - 24) = r0;" + + "r1 = %[amap] ll;" + "r2 = r10;" + "r2 += -16;" + "call %[bpf_map_lookup_elem];" + "*(u64 *)(r10 - 32) = r0;" + + "r1 = %[amap] ll;" + "r2 = r10;" + "r2 += -16;" + "call %[bpf_map_lookup_elem];" + "*(u64 *)(r10 - 40) = r0;" + + "if r6 == 0 goto +1;" + "r9 += 1;" + "if r7 == 0 goto +1;" + "r9 += 1;" + "if r8 == 0 goto +1;" + "r9 += 1;" + "r0 = *(u64 *)(r10 - 24);" + "if r0 == 0 goto +1;" + "r9 += 1;" + "r0 = *(u64 *)(r10 - 32);" + "if r0 == 0 goto +1;" + "r9 += 1;" + "r0 = *(u64 *)(r10 - 40);" + "if r0 == 0 goto +1;" + "r9 += 1;" + + "goto loop_%=;" + "loop_end_%=:" + "r1 = r10;" + "r1 += -8;" + "call %[bpf_iter_num_destroy];" + "r0 = 0;" + "exit;" + : + : __imm(bpf_map_lookup_elem), + __imm(bpf_iter_num_new), + __imm(bpf_iter_num_next), + __imm(bpf_iter_num_destroy), + __imm_addr(amap) + : __clobber_all + ); +} + +struct { + int data[32]; + int n; +} loop_data; + +SEC("raw_tp") +__success +int iter_arr_with_actual_elem_count(const void *ctx) +{ + int i, n = loop_data.n, sum = 0; + + if (n > ARRAY_SIZE(loop_data.data)) + return 0; + + bpf_for(i, 0, n) { + /* no rechecking of i against ARRAY_SIZE(loop_data.n) */ + sum += loop_data.data[i]; + } + + return sum; +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/iters_css.c b/tools/testing/selftests/bpf/progs/iters_css.c new file mode 100644 index 000000000000..ec1f6c2f590b --- /dev/null +++ b/tools/testing/selftests/bpf/progs/iters_css.c @@ -0,0 +1,72 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2023 Chuyi Zhou <zhouchuyi@bytedance.com> */ + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include "bpf_misc.h" +#include "bpf_experimental.h" + +char _license[] SEC("license") = "GPL"; + +pid_t target_pid; +u64 root_cg_id, leaf_cg_id; +u64 first_cg_id, last_cg_id; + +int pre_order_cnt, post_order_cnt, tree_high; + +struct cgroup *bpf_cgroup_from_id(u64 cgid) __ksym; +void bpf_cgroup_release(struct cgroup *p) __ksym; +void bpf_rcu_read_lock(void) __ksym; +void bpf_rcu_read_unlock(void) __ksym; + +SEC("fentry.s/" SYS_PREFIX "sys_getpgid") +int iter_css_for_each(const void *ctx) +{ + struct task_struct *cur_task = bpf_get_current_task_btf(); + struct cgroup_subsys_state *root_css, *leaf_css, *pos; + struct cgroup *root_cgrp, *leaf_cgrp, *cur_cgrp; + + if (cur_task->pid != target_pid) + return 0; + + root_cgrp = bpf_cgroup_from_id(root_cg_id); + + if (!root_cgrp) + return 0; + + leaf_cgrp = bpf_cgroup_from_id(leaf_cg_id); + + if (!leaf_cgrp) { + bpf_cgroup_release(root_cgrp); + return 0; + } + root_css = &root_cgrp->self; + leaf_css = &leaf_cgrp->self; + pre_order_cnt = post_order_cnt = tree_high = 0; + first_cg_id = last_cg_id = 0; + + bpf_rcu_read_lock(); + bpf_for_each(css, pos, root_css, BPF_CGROUP_ITER_DESCENDANTS_POST) { + cur_cgrp = pos->cgroup; + post_order_cnt++; + last_cg_id = cur_cgrp->kn->id; + } + + bpf_for_each(css, pos, root_css, BPF_CGROUP_ITER_DESCENDANTS_PRE) { + cur_cgrp = pos->cgroup; + pre_order_cnt++; + if (!first_cg_id) + first_cg_id = cur_cgrp->kn->id; + } + + bpf_for_each(css, pos, leaf_css, BPF_CGROUP_ITER_ANCESTORS_UP) + tree_high++; + + bpf_for_each(css, pos, root_css, BPF_CGROUP_ITER_ANCESTORS_UP) + tree_high--; + bpf_rcu_read_unlock(); + bpf_cgroup_release(root_cgrp); + bpf_cgroup_release(leaf_cgrp); + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/iters_css_task.c b/tools/testing/selftests/bpf/progs/iters_css_task.c new file mode 100644 index 000000000000..9ac758649cb8 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/iters_css_task.c @@ -0,0 +1,102 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2023 Chuyi Zhou <zhouchuyi@bytedance.com> */ + +#include "vmlinux.h" +#include <errno.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include "bpf_misc.h" +#include "bpf_experimental.h" + +char _license[] SEC("license") = "GPL"; + +struct cgroup *bpf_cgroup_acquire(struct cgroup *p) __ksym; +struct cgroup *bpf_cgroup_from_id(u64 cgid) __ksym; +void bpf_cgroup_release(struct cgroup *p) __ksym; + +pid_t target_pid; +int css_task_cnt; +u64 cg_id; + +SEC("lsm/file_mprotect") +int BPF_PROG(iter_css_task_for_each, struct vm_area_struct *vma, + unsigned long reqprot, unsigned long prot, int ret) +{ + struct task_struct *cur_task = bpf_get_current_task_btf(); + struct cgroup_subsys_state *css; + struct task_struct *task; + struct cgroup *cgrp; + + if (cur_task->pid != target_pid) + return ret; + + cgrp = bpf_cgroup_from_id(cg_id); + + if (!cgrp) + return -EPERM; + + css = &cgrp->self; + css_task_cnt = 0; + + bpf_for_each(css_task, task, css, CSS_TASK_ITER_PROCS) + if (task->pid == target_pid) + css_task_cnt++; + + bpf_cgroup_release(cgrp); + + return -EPERM; +} + +static inline u64 cgroup_id(struct cgroup *cgrp) +{ + return cgrp->kn->id; +} + +SEC("?iter/cgroup") +int cgroup_id_printer(struct bpf_iter__cgroup *ctx) +{ + struct seq_file *seq = ctx->meta->seq; + struct cgroup *cgrp = ctx->cgroup; + struct cgroup_subsys_state *css; + struct task_struct *task; + + /* epilogue */ + if (cgrp == NULL) { + BPF_SEQ_PRINTF(seq, "epilogue\n"); + return 0; + } + + /* prologue */ + if (ctx->meta->seq_num == 0) + BPF_SEQ_PRINTF(seq, "prologue\n"); + + BPF_SEQ_PRINTF(seq, "%8llu\n", cgroup_id(cgrp)); + + css = &cgrp->self; + css_task_cnt = 0; + bpf_for_each(css_task, task, css, CSS_TASK_ITER_PROCS) { + if (task->pid == target_pid) + css_task_cnt++; + } + + return 0; +} + +SEC("?fentry.s/" SYS_PREFIX "sys_getpgid") +int BPF_PROG(iter_css_task_for_each_sleep) +{ + u64 cgrp_id = bpf_get_current_cgroup_id(); + struct cgroup *cgrp = bpf_cgroup_from_id(cgrp_id); + struct cgroup_subsys_state *css; + struct task_struct *task; + + if (cgrp == NULL) + return 0; + css = &cgrp->self; + + bpf_for_each(css_task, task, css, CSS_TASK_ITER_PROCS) { + + } + bpf_cgroup_release(cgrp); + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/iters_task.c b/tools/testing/selftests/bpf/progs/iters_task.c new file mode 100644 index 000000000000..c9b4055cd410 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/iters_task.c @@ -0,0 +1,41 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2023 Chuyi Zhou <zhouchuyi@bytedance.com> */ + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include "bpf_misc.h" +#include "bpf_experimental.h" + +char _license[] SEC("license") = "GPL"; + +pid_t target_pid; +int procs_cnt, threads_cnt, proc_threads_cnt; + +void bpf_rcu_read_lock(void) __ksym; +void bpf_rcu_read_unlock(void) __ksym; + +SEC("fentry.s/" SYS_PREFIX "sys_getpgid") +int iter_task_for_each_sleep(void *ctx) +{ + struct task_struct *cur_task = bpf_get_current_task_btf(); + struct task_struct *pos; + + if (cur_task->pid != target_pid) + return 0; + procs_cnt = threads_cnt = proc_threads_cnt = 0; + + bpf_rcu_read_lock(); + bpf_for_each(task, pos, NULL, BPF_TASK_ITER_ALL_PROCS) + if (pos->pid == target_pid) + procs_cnt++; + + bpf_for_each(task, pos, cur_task, BPF_TASK_ITER_PROC_THREADS) + proc_threads_cnt++; + + bpf_for_each(task, pos, NULL, BPF_TASK_ITER_ALL_THREADS) + if (pos->tgid == target_pid) + threads_cnt++; + bpf_rcu_read_unlock(); + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/iters_task_failure.c b/tools/testing/selftests/bpf/progs/iters_task_failure.c new file mode 100644 index 000000000000..6b1588d70652 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/iters_task_failure.c @@ -0,0 +1,105 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2023 Chuyi Zhou <zhouchuyi@bytedance.com> */ + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include "bpf_misc.h" +#include "bpf_experimental.h" + +char _license[] SEC("license") = "GPL"; + +struct cgroup *bpf_cgroup_from_id(u64 cgid) __ksym; +void bpf_cgroup_release(struct cgroup *p) __ksym; +void bpf_rcu_read_lock(void) __ksym; +void bpf_rcu_read_unlock(void) __ksym; + +SEC("?fentry.s/" SYS_PREFIX "sys_getpgid") +__failure __msg("expected an RCU CS when using bpf_iter_task_next") +int BPF_PROG(iter_tasks_without_lock) +{ + struct task_struct *pos; + + bpf_for_each(task, pos, NULL, BPF_TASK_ITER_ALL_PROCS) { + + } + return 0; +} + +SEC("?fentry.s/" SYS_PREFIX "sys_getpgid") +__failure __msg("expected an RCU CS when using bpf_iter_css_next") +int BPF_PROG(iter_css_without_lock) +{ + u64 cg_id = bpf_get_current_cgroup_id(); + struct cgroup *cgrp = bpf_cgroup_from_id(cg_id); + struct cgroup_subsys_state *root_css, *pos; + + if (!cgrp) + return 0; + root_css = &cgrp->self; + + bpf_for_each(css, pos, root_css, BPF_CGROUP_ITER_DESCENDANTS_POST) { + + } + bpf_cgroup_release(cgrp); + return 0; +} + +SEC("?fentry.s/" SYS_PREFIX "sys_getpgid") +__failure __msg("expected an RCU CS when using bpf_iter_task_next") +int BPF_PROG(iter_tasks_lock_and_unlock) +{ + struct task_struct *pos; + + bpf_rcu_read_lock(); + bpf_for_each(task, pos, NULL, BPF_TASK_ITER_ALL_PROCS) { + bpf_rcu_read_unlock(); + + bpf_rcu_read_lock(); + } + bpf_rcu_read_unlock(); + return 0; +} + +SEC("?fentry.s/" SYS_PREFIX "sys_getpgid") +__failure __msg("expected an RCU CS when using bpf_iter_css_next") +int BPF_PROG(iter_css_lock_and_unlock) +{ + u64 cg_id = bpf_get_current_cgroup_id(); + struct cgroup *cgrp = bpf_cgroup_from_id(cg_id); + struct cgroup_subsys_state *root_css, *pos; + + if (!cgrp) + return 0; + root_css = &cgrp->self; + + bpf_rcu_read_lock(); + bpf_for_each(css, pos, root_css, BPF_CGROUP_ITER_DESCENDANTS_POST) { + bpf_rcu_read_unlock(); + + bpf_rcu_read_lock(); + } + bpf_rcu_read_unlock(); + bpf_cgroup_release(cgrp); + return 0; +} + +SEC("?fentry/" SYS_PREFIX "sys_getpgid") +__failure __msg("css_task_iter is only allowed in bpf_lsm, bpf_iter and sleepable progs") +int BPF_PROG(iter_css_task_for_each) +{ + u64 cg_id = bpf_get_current_cgroup_id(); + struct cgroup *cgrp = bpf_cgroup_from_id(cg_id); + struct cgroup_subsys_state *css; + struct task_struct *task; + + if (cgrp == NULL) + return 0; + css = &cgrp->self; + + bpf_for_each(css_task, task, css, CSS_TASK_ITER_PROCS) { + + } + bpf_cgroup_release(cgrp); + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/iters_task_vma.c b/tools/testing/selftests/bpf/progs/iters_task_vma.c index 44edecfdfaee..e085a51d153e 100644 --- a/tools/testing/selftests/bpf/progs/iters_task_vma.c +++ b/tools/testing/selftests/bpf/progs/iters_task_vma.c @@ -30,6 +30,7 @@ int iter_task_vma_for_each(const void *ctx) bpf_for_each(task_vma, vma, task, 0) { if (seen >= 1000) break; + barrier_var(seen); vm_ranges[seen].vm_start = vma->vm_start; vm_ranges[seen].vm_end = vma->vm_end; diff --git a/tools/testing/selftests/bpf/progs/linked_list_fail.c b/tools/testing/selftests/bpf/progs/linked_list_fail.c index f4c63daba229..6438982b928b 100644 --- a/tools/testing/selftests/bpf/progs/linked_list_fail.c +++ b/tools/testing/selftests/bpf/progs/linked_list_fail.c @@ -591,7 +591,9 @@ int pop_ptr_off(void *(*op)(void *head)) n = op(&p->head); bpf_spin_unlock(&p->lock); - bpf_this_cpu_ptr(n); + if (!n) + return 0; + bpf_spin_lock((void *)n); return 0; } diff --git a/tools/testing/selftests/bpf/progs/local_kptr_stash.c b/tools/testing/selftests/bpf/progs/local_kptr_stash.c index b567a666d2b8..1769fdff6aea 100644 --- a/tools/testing/selftests/bpf/progs/local_kptr_stash.c +++ b/tools/testing/selftests/bpf/progs/local_kptr_stash.c @@ -14,6 +14,24 @@ struct node_data { struct bpf_rb_node node; }; +struct refcounted_node { + long data; + struct bpf_rb_node rb_node; + struct bpf_refcount refcount; +}; + +struct stash { + struct bpf_spin_lock l; + struct refcounted_node __kptr *stashed; +}; + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, int); + __type(value, struct stash); + __uint(max_entries, 10); +} refcounted_node_stash SEC(".maps"); + struct plain_local { long key; long data; @@ -38,6 +56,7 @@ struct map_value { * Had to do the same w/ bpf_kfunc_call_test_release below */ struct node_data *just_here_because_btf_bug; +struct refcounted_node *just_here_because_btf_bug2; struct { __uint(type, BPF_MAP_TYPE_ARRAY); @@ -132,4 +151,56 @@ long stash_test_ref_kfunc(void *ctx) return 0; } +SEC("tc") +long refcount_acquire_without_unstash(void *ctx) +{ + struct refcounted_node *p; + struct stash *s; + int ret = 0; + + s = bpf_map_lookup_elem(&refcounted_node_stash, &ret); + if (!s) + return 1; + + if (!s->stashed) + /* refcount_acquire failure is expected when no refcounted_node + * has been stashed before this program executes + */ + return 2; + + p = bpf_refcount_acquire(s->stashed); + if (!p) + return 3; + + ret = s->stashed ? s->stashed->data : -1; + bpf_obj_drop(p); + return ret; +} + +/* Helper for refcount_acquire_without_unstash test */ +SEC("tc") +long stash_refcounted_node(void *ctx) +{ + struct refcounted_node *p; + struct stash *s; + int key = 0; + + s = bpf_map_lookup_elem(&refcounted_node_stash, &key); + if (!s) + return 1; + + p = bpf_obj_new(typeof(*p)); + if (!p) + return 2; + p->data = 42; + + p = bpf_kptr_xchg(&s->stashed, p); + if (p) { + bpf_obj_drop(p); + return 3; + } + + return 0; +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/pyperf180.c b/tools/testing/selftests/bpf/progs/pyperf180.c index c39f559d3100..42c4a8b62e36 100644 --- a/tools/testing/selftests/bpf/progs/pyperf180.c +++ b/tools/testing/selftests/bpf/progs/pyperf180.c @@ -1,4 +1,26 @@ // SPDX-License-Identifier: GPL-2.0 // Copyright (c) 2019 Facebook #define STACK_MAX_LEN 180 + +/* llvm upstream commit at clang18 + * https://github.com/llvm/llvm-project/commit/1a2e77cf9e11dbf56b5720c607313a566eebb16e + * changed inlining behavior and caused compilation failure as some branch + * target distance exceeded 16bit representation which is the maximum for + * cpu v1/v2/v3. Macro __BPF_CPU_VERSION__ is later implemented in clang18 + * to specify which cpu version is used for compilation. So a smaller + * unroll_count can be set if __BPF_CPU_VERSION__ is less than 4, which + * reduced some branch target distances and resolved the compilation failure. + * + * To capture the case where a developer/ci uses clang18 but the corresponding + * repo checkpoint does not have __BPF_CPU_VERSION__, a smaller unroll_count + * will be set as well to prevent potential compilation failures. + */ +#ifdef __BPF_CPU_VERSION__ +#if __BPF_CPU_VERSION__ < 4 +#define UNROLL_COUNT 90 +#endif +#elif __clang_major__ == 18 +#define UNROLL_COUNT 90 +#endif + #include "pyperf.h" diff --git a/tools/testing/selftests/bpf/progs/refcounted_kptr_fail.c b/tools/testing/selftests/bpf/progs/refcounted_kptr_fail.c index 1ef07f6ee580..1553b9c16aa7 100644 --- a/tools/testing/selftests/bpf/progs/refcounted_kptr_fail.c +++ b/tools/testing/selftests/bpf/progs/refcounted_kptr_fail.c @@ -54,6 +54,25 @@ long rbtree_refcounted_node_ref_escapes(void *ctx) } SEC("?tc") +__failure __msg("Possibly NULL pointer passed to trusted arg0") +long refcount_acquire_maybe_null(void *ctx) +{ + struct node_acquire *n, *m; + + n = bpf_obj_new(typeof(*n)); + /* Intentionally not testing !n + * it's MAYBE_NULL for refcount_acquire + */ + m = bpf_refcount_acquire(n); + if (m) + bpf_obj_drop(m); + if (n) + bpf_obj_drop(n); + + return 0; +} + +SEC("?tc") __failure __msg("Unreleased reference id=3 alloc_insn=9") long rbtree_refcounted_node_ref_escapes_owning_input(void *ctx) { diff --git a/tools/testing/selftests/bpf/progs/strobemeta.h b/tools/testing/selftests/bpf/progs/strobemeta.h index e02cfd380746..40df2cc26eaf 100644 --- a/tools/testing/selftests/bpf/progs/strobemeta.h +++ b/tools/testing/selftests/bpf/progs/strobemeta.h @@ -24,9 +24,11 @@ struct task_struct {}; #define STACK_TABLE_EPOCH_SHIFT 20 #define STROBE_MAX_STR_LEN 1 #define STROBE_MAX_CFGS 32 +#define READ_MAP_VAR_PAYLOAD_CAP \ + ((1 + STROBE_MAX_MAP_ENTRIES * 2) * STROBE_MAX_STR_LEN) #define STROBE_MAX_PAYLOAD \ (STROBE_MAX_STRS * STROBE_MAX_STR_LEN + \ - STROBE_MAX_MAPS * (1 + STROBE_MAX_MAP_ENTRIES * 2) * STROBE_MAX_STR_LEN) + STROBE_MAX_MAPS * READ_MAP_VAR_PAYLOAD_CAP) struct strobe_value_header { /* @@ -355,7 +357,7 @@ static __always_inline uint64_t read_str_var(struct strobemeta_cfg *cfg, size_t idx, void *tls_base, struct strobe_value_generic *value, struct strobemeta_payload *data, - void *payload) + size_t off) { void *location; uint64_t len; @@ -366,7 +368,7 @@ static __always_inline uint64_t read_str_var(struct strobemeta_cfg *cfg, return 0; bpf_probe_read_user(value, sizeof(struct strobe_value_generic), location); - len = bpf_probe_read_user_str(payload, STROBE_MAX_STR_LEN, value->ptr); + len = bpf_probe_read_user_str(&data->payload[off], STROBE_MAX_STR_LEN, value->ptr); /* * if bpf_probe_read_user_str returns error (<0), due to casting to * unsinged int, it will become big number, so next check is @@ -378,14 +380,14 @@ static __always_inline uint64_t read_str_var(struct strobemeta_cfg *cfg, return 0; data->str_lens[idx] = len; - return len; + return off + len; } -static __always_inline void *read_map_var(struct strobemeta_cfg *cfg, - size_t idx, void *tls_base, - struct strobe_value_generic *value, - struct strobemeta_payload *data, - void *payload) +static __always_inline uint64_t read_map_var(struct strobemeta_cfg *cfg, + size_t idx, void *tls_base, + struct strobe_value_generic *value, + struct strobemeta_payload *data, + size_t off) { struct strobe_map_descr* descr = &data->map_descrs[idx]; struct strobe_map_raw map; @@ -397,11 +399,11 @@ static __always_inline void *read_map_var(struct strobemeta_cfg *cfg, location = calc_location(&cfg->map_locs[idx], tls_base); if (!location) - return payload; + return off; bpf_probe_read_user(value, sizeof(struct strobe_value_generic), location); if (bpf_probe_read_user(&map, sizeof(struct strobe_map_raw), value->ptr)) - return payload; + return off; descr->id = map.id; descr->cnt = map.cnt; @@ -410,10 +412,10 @@ static __always_inline void *read_map_var(struct strobemeta_cfg *cfg, data->req_meta_valid = 1; } - len = bpf_probe_read_user_str(payload, STROBE_MAX_STR_LEN, map.tag); + len = bpf_probe_read_user_str(&data->payload[off], STROBE_MAX_STR_LEN, map.tag); if (len <= STROBE_MAX_STR_LEN) { descr->tag_len = len; - payload += len; + off += len; } #ifdef NO_UNROLL @@ -426,22 +428,22 @@ static __always_inline void *read_map_var(struct strobemeta_cfg *cfg, break; descr->key_lens[i] = 0; - len = bpf_probe_read_user_str(payload, STROBE_MAX_STR_LEN, + len = bpf_probe_read_user_str(&data->payload[off], STROBE_MAX_STR_LEN, map.entries[i].key); if (len <= STROBE_MAX_STR_LEN) { descr->key_lens[i] = len; - payload += len; + off += len; } descr->val_lens[i] = 0; - len = bpf_probe_read_user_str(payload, STROBE_MAX_STR_LEN, + len = bpf_probe_read_user_str(&data->payload[off], STROBE_MAX_STR_LEN, map.entries[i].val); if (len <= STROBE_MAX_STR_LEN) { descr->val_lens[i] = len; - payload += len; + off += len; } } - return payload; + return off; } #ifdef USE_BPF_LOOP @@ -455,14 +457,20 @@ struct read_var_ctx { struct strobemeta_payload *data; void *tls_base; struct strobemeta_cfg *cfg; - void *payload; + size_t payload_off; /* value gets mutated */ struct strobe_value_generic *value; enum read_type type; }; -static int read_var_callback(__u32 index, struct read_var_ctx *ctx) +static int read_var_callback(__u64 index, struct read_var_ctx *ctx) { + /* lose precision info for ctx->payload_off, verifier won't track + * double xor, barrier_var() is needed to force clang keep both xors. + */ + ctx->payload_off ^= index; + barrier_var(ctx->payload_off); + ctx->payload_off ^= index; switch (ctx->type) { case READ_INT_VAR: if (index >= STROBE_MAX_INTS) @@ -472,14 +480,18 @@ static int read_var_callback(__u32 index, struct read_var_ctx *ctx) case READ_MAP_VAR: if (index >= STROBE_MAX_MAPS) return 1; - ctx->payload = read_map_var(ctx->cfg, index, ctx->tls_base, - ctx->value, ctx->data, ctx->payload); + if (ctx->payload_off > sizeof(ctx->data->payload) - READ_MAP_VAR_PAYLOAD_CAP) + return 1; + ctx->payload_off = read_map_var(ctx->cfg, index, ctx->tls_base, + ctx->value, ctx->data, ctx->payload_off); break; case READ_STR_VAR: if (index >= STROBE_MAX_STRS) return 1; - ctx->payload += read_str_var(ctx->cfg, index, ctx->tls_base, - ctx->value, ctx->data, ctx->payload); + if (ctx->payload_off > sizeof(ctx->data->payload) - STROBE_MAX_STR_LEN) + return 1; + ctx->payload_off = read_str_var(ctx->cfg, index, ctx->tls_base, + ctx->value, ctx->data, ctx->payload_off); break; } return 0; @@ -501,7 +513,8 @@ static void *read_strobe_meta(struct task_struct *task, pid_t pid = bpf_get_current_pid_tgid() >> 32; struct strobe_value_generic value = {0}; struct strobemeta_cfg *cfg; - void *tls_base, *payload; + size_t payload_off; + void *tls_base; cfg = bpf_map_lookup_elem(&strobemeta_cfgs, &pid); if (!cfg) @@ -509,7 +522,7 @@ static void *read_strobe_meta(struct task_struct *task, data->int_vals_set_mask = 0; data->req_meta_valid = 0; - payload = data->payload; + payload_off = 0; /* * we don't have struct task_struct definition, it should be: * tls_base = (void *)task->thread.fsbase; @@ -522,7 +535,7 @@ static void *read_strobe_meta(struct task_struct *task, .tls_base = tls_base, .value = &value, .data = data, - .payload = payload, + .payload_off = 0, }; int err; @@ -540,6 +553,11 @@ static void *read_strobe_meta(struct task_struct *task, err = bpf_loop(STROBE_MAX_MAPS, read_var_callback, &ctx, 0); if (err != STROBE_MAX_MAPS) return NULL; + + payload_off = ctx.payload_off; + /* this should not really happen, here only to satisfy verifer */ + if (payload_off > sizeof(data->payload)) + payload_off = sizeof(data->payload); #else #ifdef NO_UNROLL #pragma clang loop unroll(disable) @@ -555,7 +573,7 @@ static void *read_strobe_meta(struct task_struct *task, #pragma unroll #endif /* NO_UNROLL */ for (int i = 0; i < STROBE_MAX_STRS; ++i) { - payload += read_str_var(cfg, i, tls_base, &value, data, payload); + payload_off = read_str_var(cfg, i, tls_base, &value, data, payload_off); } #ifdef NO_UNROLL #pragma clang loop unroll(disable) @@ -563,7 +581,7 @@ static void *read_strobe_meta(struct task_struct *task, #pragma unroll #endif /* NO_UNROLL */ for (int i = 0; i < STROBE_MAX_MAPS; ++i) { - payload = read_map_var(cfg, i, tls_base, &value, data, payload); + payload_off = read_map_var(cfg, i, tls_base, &value, data, payload_off); } #endif /* USE_BPF_LOOP */ @@ -571,7 +589,7 @@ static void *read_strobe_meta(struct task_struct *task, * return pointer right after end of payload, so it's possible to * calculate exact amount of useful data that needs to be sent */ - return payload; + return &data->payload[payload_off]; } SEC("raw_tracepoint/kfree_skb") diff --git a/tools/testing/selftests/bpf/progs/test_bpf_ma.c b/tools/testing/selftests/bpf/progs/test_bpf_ma.c index ecde41ae0fc8..b685a4aba6bd 100644 --- a/tools/testing/selftests/bpf/progs/test_bpf_ma.c +++ b/tools/testing/selftests/bpf/progs/test_bpf_ma.c @@ -37,10 +37,20 @@ int pid = 0; __type(key, int); \ __type(value, struct map_value_##_size); \ __uint(max_entries, 128); \ - } array_##_size SEC(".maps"); + } array_##_size SEC(".maps") -static __always_inline void batch_alloc_free(struct bpf_map *map, unsigned int batch, - unsigned int idx) +#define DEFINE_ARRAY_WITH_PERCPU_KPTR(_size) \ + struct map_value_percpu_##_size { \ + struct bin_data_##_size __percpu_kptr * data; \ + }; \ + struct { \ + __uint(type, BPF_MAP_TYPE_ARRAY); \ + __type(key, int); \ + __type(value, struct map_value_percpu_##_size); \ + __uint(max_entries, 128); \ + } array_percpu_##_size SEC(".maps") + +static __always_inline void batch_alloc(struct bpf_map *map, unsigned int batch, unsigned int idx) { struct generic_map_value *value; unsigned int i, key; @@ -65,6 +75,14 @@ static __always_inline void batch_alloc_free(struct bpf_map *map, unsigned int b return; } } +} + +static __always_inline void batch_free(struct bpf_map *map, unsigned int batch, unsigned int idx) +{ + struct generic_map_value *value; + unsigned int i, key; + void *old; + for (i = 0; i < batch; i++) { key = i; value = bpf_map_lookup_elem(map, &key); @@ -81,8 +99,72 @@ static __always_inline void batch_alloc_free(struct bpf_map *map, unsigned int b } } +static __always_inline void batch_percpu_alloc(struct bpf_map *map, unsigned int batch, + unsigned int idx) +{ + struct generic_map_value *value; + unsigned int i, key; + void *old, *new; + + for (i = 0; i < batch; i++) { + key = i; + value = bpf_map_lookup_elem(map, &key); + if (!value) { + err = 1; + return; + } + /* per-cpu allocator may not be able to refill in time */ + new = bpf_percpu_obj_new_impl(data_btf_ids[idx], NULL); + if (!new) + continue; + + old = bpf_kptr_xchg(&value->data, new); + if (old) { + bpf_percpu_obj_drop(old); + err = 2; + return; + } + } +} + +static __always_inline void batch_percpu_free(struct bpf_map *map, unsigned int batch, + unsigned int idx) +{ + struct generic_map_value *value; + unsigned int i, key; + void *old; + + for (i = 0; i < batch; i++) { + key = i; + value = bpf_map_lookup_elem(map, &key); + if (!value) { + err = 3; + return; + } + old = bpf_kptr_xchg(&value->data, NULL); + if (!old) + continue; + bpf_percpu_obj_drop(old); + } +} + +#define CALL_BATCH_ALLOC(size, batch, idx) \ + batch_alloc((struct bpf_map *)(&array_##size), batch, idx) + #define CALL_BATCH_ALLOC_FREE(size, batch, idx) \ - batch_alloc_free((struct bpf_map *)(&array_##size), batch, idx) + do { \ + batch_alloc((struct bpf_map *)(&array_##size), batch, idx); \ + batch_free((struct bpf_map *)(&array_##size), batch, idx); \ + } while (0) + +#define CALL_BATCH_PERCPU_ALLOC(size, batch, idx) \ + batch_percpu_alloc((struct bpf_map *)(&array_percpu_##size), batch, idx) + +#define CALL_BATCH_PERCPU_ALLOC_FREE(size, batch, idx) \ + do { \ + batch_percpu_alloc((struct bpf_map *)(&array_percpu_##size), batch, idx); \ + batch_percpu_free((struct bpf_map *)(&array_percpu_##size), batch, idx); \ + } while (0) DEFINE_ARRAY_WITH_KPTR(8); DEFINE_ARRAY_WITH_KPTR(16); @@ -97,8 +179,21 @@ DEFINE_ARRAY_WITH_KPTR(1024); DEFINE_ARRAY_WITH_KPTR(2048); DEFINE_ARRAY_WITH_KPTR(4096); -SEC("fentry/" SYS_PREFIX "sys_nanosleep") -int test_bpf_mem_alloc_free(void *ctx) +/* per-cpu kptr doesn't support bin_data_8 which is a zero-sized array */ +DEFINE_ARRAY_WITH_PERCPU_KPTR(16); +DEFINE_ARRAY_WITH_PERCPU_KPTR(32); +DEFINE_ARRAY_WITH_PERCPU_KPTR(64); +DEFINE_ARRAY_WITH_PERCPU_KPTR(96); +DEFINE_ARRAY_WITH_PERCPU_KPTR(128); +DEFINE_ARRAY_WITH_PERCPU_KPTR(192); +DEFINE_ARRAY_WITH_PERCPU_KPTR(256); +DEFINE_ARRAY_WITH_PERCPU_KPTR(512); +DEFINE_ARRAY_WITH_PERCPU_KPTR(1024); +DEFINE_ARRAY_WITH_PERCPU_KPTR(2048); +DEFINE_ARRAY_WITH_PERCPU_KPTR(4096); + +SEC("?fentry/" SYS_PREFIX "sys_nanosleep") +int test_batch_alloc_free(void *ctx) { if ((u32)bpf_get_current_pid_tgid() != pid) return 0; @@ -121,3 +216,76 @@ int test_bpf_mem_alloc_free(void *ctx) return 0; } + +SEC("?fentry/" SYS_PREFIX "sys_nanosleep") +int test_free_through_map_free(void *ctx) +{ + if ((u32)bpf_get_current_pid_tgid() != pid) + return 0; + + /* Alloc 128 8-bytes objects in batch to trigger refilling, + * then free these objects through map free. + */ + CALL_BATCH_ALLOC(8, 128, 0); + CALL_BATCH_ALLOC(16, 128, 1); + CALL_BATCH_ALLOC(32, 128, 2); + CALL_BATCH_ALLOC(64, 128, 3); + CALL_BATCH_ALLOC(96, 128, 4); + CALL_BATCH_ALLOC(128, 128, 5); + CALL_BATCH_ALLOC(192, 128, 6); + CALL_BATCH_ALLOC(256, 128, 7); + CALL_BATCH_ALLOC(512, 64, 8); + CALL_BATCH_ALLOC(1024, 32, 9); + CALL_BATCH_ALLOC(2048, 16, 10); + CALL_BATCH_ALLOC(4096, 8, 11); + + return 0; +} + +SEC("?fentry/" SYS_PREFIX "sys_nanosleep") +int test_batch_percpu_alloc_free(void *ctx) +{ + if ((u32)bpf_get_current_pid_tgid() != pid) + return 0; + + /* Alloc 128 16-bytes per-cpu objects in batch to trigger refilling, + * then free 128 16-bytes per-cpu objects in batch to trigger freeing. + */ + CALL_BATCH_PERCPU_ALLOC_FREE(16, 128, 1); + CALL_BATCH_PERCPU_ALLOC_FREE(32, 128, 2); + CALL_BATCH_PERCPU_ALLOC_FREE(64, 128, 3); + CALL_BATCH_PERCPU_ALLOC_FREE(96, 128, 4); + CALL_BATCH_PERCPU_ALLOC_FREE(128, 128, 5); + CALL_BATCH_PERCPU_ALLOC_FREE(192, 128, 6); + CALL_BATCH_PERCPU_ALLOC_FREE(256, 128, 7); + CALL_BATCH_PERCPU_ALLOC_FREE(512, 64, 8); + CALL_BATCH_PERCPU_ALLOC_FREE(1024, 32, 9); + CALL_BATCH_PERCPU_ALLOC_FREE(2048, 16, 10); + CALL_BATCH_PERCPU_ALLOC_FREE(4096, 8, 11); + + return 0; +} + +SEC("?fentry/" SYS_PREFIX "sys_nanosleep") +int test_percpu_free_through_map_free(void *ctx) +{ + if ((u32)bpf_get_current_pid_tgid() != pid) + return 0; + + /* Alloc 128 16-bytes per-cpu objects in batch to trigger refilling, + * then free these object through map free. + */ + CALL_BATCH_PERCPU_ALLOC(16, 128, 1); + CALL_BATCH_PERCPU_ALLOC(32, 128, 2); + CALL_BATCH_PERCPU_ALLOC(64, 128, 3); + CALL_BATCH_PERCPU_ALLOC(96, 128, 4); + CALL_BATCH_PERCPU_ALLOC(128, 128, 5); + CALL_BATCH_PERCPU_ALLOC(192, 128, 6); + CALL_BATCH_PERCPU_ALLOC(256, 128, 7); + CALL_BATCH_PERCPU_ALLOC(512, 64, 8); + CALL_BATCH_PERCPU_ALLOC(1024, 32, 9); + CALL_BATCH_PERCPU_ALLOC(2048, 16, 10); + CALL_BATCH_PERCPU_ALLOC(4096, 8, 11); + + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/test_cgroup1_hierarchy.c b/tools/testing/selftests/bpf/progs/test_cgroup1_hierarchy.c new file mode 100644 index 000000000000..44628865fe1d --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_cgroup1_hierarchy.c @@ -0,0 +1,71 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2023 Yafang Shao <laoar.shao@gmail.com> */ + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_core_read.h> + +__u32 target_ancestor_level; +__u64 target_ancestor_cgid; +int target_pid, target_hid; + +struct cgroup *bpf_task_get_cgroup1(struct task_struct *task, int hierarchy_id) __ksym; +struct cgroup *bpf_cgroup_ancestor(struct cgroup *cgrp, int level) __ksym; +void bpf_cgroup_release(struct cgroup *cgrp) __ksym; + +static int bpf_link_create_verify(int cmd) +{ + struct cgroup *cgrp, *ancestor; + struct task_struct *task; + int ret = 0; + + if (cmd != BPF_LINK_CREATE) + return 0; + + task = bpf_get_current_task_btf(); + + /* Then it can run in parallel with others */ + if (task->pid != target_pid) + return 0; + + cgrp = bpf_task_get_cgroup1(task, target_hid); + if (!cgrp) + return 0; + + /* Refuse it if its cgid or its ancestor's cgid is the target cgid */ + if (cgrp->kn->id == target_ancestor_cgid) + ret = -1; + + ancestor = bpf_cgroup_ancestor(cgrp, target_ancestor_level); + if (!ancestor) + goto out; + + if (ancestor->kn->id == target_ancestor_cgid) + ret = -1; + bpf_cgroup_release(ancestor); + +out: + bpf_cgroup_release(cgrp); + return ret; +} + +SEC("lsm/bpf") +int BPF_PROG(lsm_run, int cmd, union bpf_attr *attr, unsigned int size) +{ + return bpf_link_create_verify(cmd); +} + +SEC("lsm.s/bpf") +int BPF_PROG(lsm_s_run, int cmd, union bpf_attr *attr, unsigned int size) +{ + return bpf_link_create_verify(cmd); +} + +SEC("fentry") +int BPF_PROG(fentry_run) +{ + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_ldsx_insn.c b/tools/testing/selftests/bpf/progs/test_ldsx_insn.c index 3ddcb3777912..2a2a942737d7 100644 --- a/tools/testing/selftests/bpf/progs/test_ldsx_insn.c +++ b/tools/testing/selftests/bpf/progs/test_ldsx_insn.c @@ -7,7 +7,8 @@ #if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \ (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64) || \ - defined(__TARGET_ARCH_s390)) && __clang_major__ >= 18 + defined(__TARGET_ARCH_s390) || defined(__TARGET_ARCH_loongarch)) && \ + __clang_major__ >= 18 const volatile int skip = 0; #else const volatile int skip = 1; diff --git a/tools/testing/selftests/bpf/progs/test_task_under_cgroup.c b/tools/testing/selftests/bpf/progs/test_task_under_cgroup.c index 56cdc0a553f0..7e750309ce27 100644 --- a/tools/testing/selftests/bpf/progs/test_task_under_cgroup.c +++ b/tools/testing/selftests/bpf/progs/test_task_under_cgroup.c @@ -18,7 +18,7 @@ const volatile __u64 cgid; int remote_pid; SEC("tp_btf/task_newtask") -int BPF_PROG(handle__task_newtask, struct task_struct *task, u64 clone_flags) +int BPF_PROG(tp_btf_run, struct task_struct *task, u64 clone_flags) { struct cgroup *cgrp = NULL; struct task_struct *acquired; @@ -48,4 +48,30 @@ out: return 0; } +SEC("lsm.s/bpf") +int BPF_PROG(lsm_run, int cmd, union bpf_attr *attr, unsigned int size) +{ + struct cgroup *cgrp = NULL; + struct task_struct *task; + int ret = 0; + + task = bpf_get_current_task_btf(); + if (local_pid != task->pid) + return 0; + + if (cmd != BPF_LINK_CREATE) + return 0; + + /* 1 is the root cgroup */ + cgrp = bpf_cgroup_from_id(1); + if (!cgrp) + goto out; + if (!bpf_task_under_cgroup(task, cgrp)) + ret = -1; + bpf_cgroup_release(cgrp); + +out: + return ret; +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_tc_link.c b/tools/testing/selftests/bpf/progs/test_tc_link.c index 30e7124c49a1..992400acb957 100644 --- a/tools/testing/selftests/bpf/progs/test_tc_link.c +++ b/tools/testing/selftests/bpf/progs/test_tc_link.c @@ -1,7 +1,11 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2023 Isovalent */ #include <stdbool.h> + #include <linux/bpf.h> +#include <linux/if_ether.h> + +#include <bpf/bpf_endian.h> #include <bpf/bpf_helpers.h> char LICENSE[] SEC("license") = "GPL"; @@ -12,10 +16,19 @@ bool seen_tc3; bool seen_tc4; bool seen_tc5; bool seen_tc6; +bool seen_eth; SEC("tc/ingress") int tc1(struct __sk_buff *skb) { + struct ethhdr eth = {}; + + if (skb->protocol != __bpf_constant_htons(ETH_P_IP)) + goto out; + if (bpf_skb_load_bytes(skb, 0, ð, sizeof(eth))) + goto out; + seen_eth = eth.h_proto == bpf_htons(ETH_P_IP); +out: seen_tc1 = true; return TCX_NEXT; } diff --git a/tools/testing/selftests/bpf/progs/verifier_bounds.c b/tools/testing/selftests/bpf/progs/verifier_bounds.c index c5588a14fe2e..ec430b71730b 100644 --- a/tools/testing/selftests/bpf/progs/verifier_bounds.c +++ b/tools/testing/selftests/bpf/progs/verifier_bounds.c @@ -965,6 +965,7 @@ l0_%=: r0 = 0; \ SEC("xdp") __description("bound check with JMP_JSLT for crossing 64-bit signed boundary") __success __retval(0) +__flag(!BPF_F_TEST_REG_INVARIANTS) /* known invariants violation */ __naked void crossing_64_bit_signed_boundary_2(void) { asm volatile (" \ @@ -1046,6 +1047,7 @@ l0_%=: r0 = 0; \ SEC("xdp") __description("bound check with JMP32_JSLT for crossing 32-bit signed boundary") __success __retval(0) +__flag(!BPF_F_TEST_REG_INVARIANTS) /* known invariants violation */ __naked void crossing_32_bit_signed_boundary_2(void) { asm volatile (" \ diff --git a/tools/testing/selftests/bpf/progs/verifier_bswap.c b/tools/testing/selftests/bpf/progs/verifier_bswap.c index 107525fb4a6a..e61755656e8d 100644 --- a/tools/testing/selftests/bpf/progs/verifier_bswap.c +++ b/tools/testing/selftests/bpf/progs/verifier_bswap.c @@ -6,7 +6,8 @@ #if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \ (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64) || \ - defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_s390)) && \ + defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_s390) || \ + defined(__TARGET_ARCH_loongarch)) && \ __clang_major__ >= 18 SEC("socket") diff --git a/tools/testing/selftests/bpf/progs/verifier_cfg.c b/tools/testing/selftests/bpf/progs/verifier_cfg.c index df7697b94007..c1f55e1d80a4 100644 --- a/tools/testing/selftests/bpf/progs/verifier_cfg.c +++ b/tools/testing/selftests/bpf/progs/verifier_cfg.c @@ -97,4 +97,66 @@ l0_%=: r2 = r0; \ " ::: __clobber_all); } +SEC("socket") +__description("conditional loop (2)") +__success +__failure_unpriv __msg_unpriv("back-edge from insn 10 to 11") +__naked void conditional_loop2(void) +{ + asm volatile (" \ + r9 = 2 ll; \ + r3 = 0x20 ll; \ + r4 = 0x35 ll; \ + r8 = r4; \ + goto l1_%=; \ +l0_%=: r9 -= r3; \ + r9 -= r4; \ + r9 -= r8; \ +l1_%=: r8 += r4; \ + if r8 < 0x64 goto l0_%=; \ + r0 = r9; \ + exit; \ +" ::: __clobber_all); +} + +SEC("socket") +__description("unconditional loop after conditional jump") +__failure __msg("infinite loop detected") +__failure_unpriv __msg_unpriv("back-edge from insn 3 to 2") +__naked void uncond_loop_after_cond_jmp(void) +{ + asm volatile (" \ + r0 = 0; \ + if r0 > 0 goto l1_%=; \ +l0_%=: r0 = 1; \ + goto l0_%=; \ +l1_%=: exit; \ +" ::: __clobber_all); +} + + +__naked __noinline __used +static unsigned long never_ending_subprog() +{ + asm volatile (" \ + r0 = r1; \ + goto -1; \ +" ::: __clobber_all); +} + +SEC("socket") +__description("unconditional loop after conditional jump") +/* infinite loop is detected *after* check_cfg() */ +__failure __msg("infinite loop detected") +__naked void uncond_loop_in_subprog_after_cond_jmp(void) +{ + asm volatile (" \ + r0 = 0; \ + if r0 > 0 goto l1_%=; \ +l0_%=: r0 += 1; \ + call never_ending_subprog; \ +l1_%=: exit; \ +" ::: __clobber_all); +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/verifier_gotol.c b/tools/testing/selftests/bpf/progs/verifier_gotol.c index 9f202eda952f..d1edbcff9a18 100644 --- a/tools/testing/selftests/bpf/progs/verifier_gotol.c +++ b/tools/testing/selftests/bpf/progs/verifier_gotol.c @@ -6,7 +6,8 @@ #if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \ (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64) || \ - defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_s390)) && \ + defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_s390) || \ + defined(__TARGET_ARCH_loongarch)) && \ __clang_major__ >= 18 SEC("socket") diff --git a/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c b/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c new file mode 100644 index 000000000000..5905e036e0ea --- /dev/null +++ b/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c @@ -0,0 +1,242 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include "bpf_misc.h" + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 8); + __type(key, __u32); + __type(value, __u64); +} map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_USER_RINGBUF); + __uint(max_entries, 8); +} ringbuf SEC(".maps"); + +struct vm_area_struct; +struct bpf_map; + +struct buf_context { + char *buf; +}; + +struct num_context { + __u64 i; + __u64 j; +}; + +__u8 choice_arr[2] = { 0, 1 }; + +static int unsafe_on_2nd_iter_cb(__u32 idx, struct buf_context *ctx) +{ + if (idx == 0) { + ctx->buf = (char *)(0xDEAD); + return 0; + } + + if (bpf_probe_read_user(ctx->buf, 8, (void *)(0xBADC0FFEE))) + return 1; + + return 0; +} + +SEC("?raw_tp") +__failure __msg("R1 type=scalar expected=fp") +int unsafe_on_2nd_iter(void *unused) +{ + char buf[4]; + struct buf_context loop_ctx = { .buf = buf }; + + bpf_loop(100, unsafe_on_2nd_iter_cb, &loop_ctx, 0); + return 0; +} + +static int unsafe_on_zero_iter_cb(__u32 idx, struct num_context *ctx) +{ + ctx->i = 0; + return 0; +} + +SEC("?raw_tp") +__failure __msg("invalid access to map value, value_size=2 off=32 size=1") +int unsafe_on_zero_iter(void *unused) +{ + struct num_context loop_ctx = { .i = 32 }; + + bpf_loop(100, unsafe_on_zero_iter_cb, &loop_ctx, 0); + return choice_arr[loop_ctx.i]; +} + +static int widening_cb(__u32 idx, struct num_context *ctx) +{ + ++ctx->i; + return 0; +} + +SEC("?raw_tp") +__success +int widening(void *unused) +{ + struct num_context loop_ctx = { .i = 0, .j = 1 }; + + bpf_loop(100, widening_cb, &loop_ctx, 0); + /* loop_ctx.j is not changed during callback iteration, + * verifier should not apply widening to it. + */ + return choice_arr[loop_ctx.j]; +} + +static int loop_detection_cb(__u32 idx, struct num_context *ctx) +{ + for (;;) {} + return 0; +} + +SEC("?raw_tp") +__failure __msg("infinite loop detected") +int loop_detection(void *unused) +{ + struct num_context loop_ctx = { .i = 0 }; + + bpf_loop(100, loop_detection_cb, &loop_ctx, 0); + return 0; +} + +static __always_inline __u64 oob_state_machine(struct num_context *ctx) +{ + switch (ctx->i) { + case 0: + ctx->i = 1; + break; + case 1: + ctx->i = 32; + break; + } + return 0; +} + +static __u64 for_each_map_elem_cb(struct bpf_map *map, __u32 *key, __u64 *val, void *data) +{ + return oob_state_machine(data); +} + +SEC("?raw_tp") +__failure __msg("invalid access to map value, value_size=2 off=32 size=1") +int unsafe_for_each_map_elem(void *unused) +{ + struct num_context loop_ctx = { .i = 0 }; + + bpf_for_each_map_elem(&map, for_each_map_elem_cb, &loop_ctx, 0); + return choice_arr[loop_ctx.i]; +} + +static __u64 ringbuf_drain_cb(struct bpf_dynptr *dynptr, void *data) +{ + return oob_state_machine(data); +} + +SEC("?raw_tp") +__failure __msg("invalid access to map value, value_size=2 off=32 size=1") +int unsafe_ringbuf_drain(void *unused) +{ + struct num_context loop_ctx = { .i = 0 }; + + bpf_user_ringbuf_drain(&ringbuf, ringbuf_drain_cb, &loop_ctx, 0); + return choice_arr[loop_ctx.i]; +} + +static __u64 find_vma_cb(struct task_struct *task, struct vm_area_struct *vma, void *data) +{ + return oob_state_machine(data); +} + +SEC("?raw_tp") +__failure __msg("invalid access to map value, value_size=2 off=32 size=1") +int unsafe_find_vma(void *unused) +{ + struct task_struct *task = bpf_get_current_task_btf(); + struct num_context loop_ctx = { .i = 0 }; + + bpf_find_vma(task, 0, find_vma_cb, &loop_ctx, 0); + return choice_arr[loop_ctx.i]; +} + +static int iter_limit_cb(__u32 idx, struct num_context *ctx) +{ + ctx->i++; + return 0; +} + +SEC("?raw_tp") +__success +int bpf_loop_iter_limit_ok(void *unused) +{ + struct num_context ctx = { .i = 0 }; + + bpf_loop(1, iter_limit_cb, &ctx, 0); + return choice_arr[ctx.i]; +} + +SEC("?raw_tp") +__failure __msg("invalid access to map value, value_size=2 off=2 size=1") +int bpf_loop_iter_limit_overflow(void *unused) +{ + struct num_context ctx = { .i = 0 }; + + bpf_loop(2, iter_limit_cb, &ctx, 0); + return choice_arr[ctx.i]; +} + +static int iter_limit_level2a_cb(__u32 idx, struct num_context *ctx) +{ + ctx->i += 100; + return 0; +} + +static int iter_limit_level2b_cb(__u32 idx, struct num_context *ctx) +{ + ctx->i += 10; + return 0; +} + +static int iter_limit_level1_cb(__u32 idx, struct num_context *ctx) +{ + ctx->i += 1; + bpf_loop(1, iter_limit_level2a_cb, ctx, 0); + bpf_loop(1, iter_limit_level2b_cb, ctx, 0); + return 0; +} + +/* Check that path visiting every callback function once had been + * reached by verifier. Variables 'ctx{1,2}i' below serve as flags, + * with each decimal digit corresponding to a callback visit marker. + */ +SEC("socket") +__success __retval(111111) +int bpf_loop_iter_limit_nested(void *unused) +{ + struct num_context ctx1 = { .i = 0 }; + struct num_context ctx2 = { .i = 0 }; + __u64 a, b, c; + + bpf_loop(1, iter_limit_level1_cb, &ctx1, 0); + bpf_loop(1, iter_limit_level1_cb, &ctx2, 0); + a = ctx1.i; + b = ctx2.i; + /* Force 'ctx1.i' and 'ctx2.i' precise. */ + c = choice_arr[(a + b) % 2]; + /* This makes 'c' zero, but neither clang nor verifier know it. */ + c /= 10; + /* Make sure that verifier does not visit 'impossible' states: + * enumerate all possible callback visit masks. + */ + if (a != 0 && a != 1 && a != 11 && a != 101 && a != 111 && + b != 0 && b != 1 && b != 11 && b != 101 && b != 111) + asm volatile ("r0 /= 0;" ::: "r0"); + return 1000 * a + b + c; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/verifier_ldsx.c b/tools/testing/selftests/bpf/progs/verifier_ldsx.c index 375525329637..d4427d8e1217 100644 --- a/tools/testing/selftests/bpf/progs/verifier_ldsx.c +++ b/tools/testing/selftests/bpf/progs/verifier_ldsx.c @@ -6,7 +6,8 @@ #if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \ (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64) || \ - defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_s390)) && \ + defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_s390) || \ + defined(__TARGET_ARCH_loongarch)) && \ __clang_major__ >= 18 SEC("socket") diff --git a/tools/testing/selftests/bpf/progs/verifier_loops1.c b/tools/testing/selftests/bpf/progs/verifier_loops1.c index 5bc86af80a9a..71735dbf33d4 100644 --- a/tools/testing/selftests/bpf/progs/verifier_loops1.c +++ b/tools/testing/selftests/bpf/progs/verifier_loops1.c @@ -75,9 +75,10 @@ l0_%=: r0 += 1; \ " ::: __clobber_all); } -SEC("tracepoint") +SEC("socket") __description("bounded loop, start in the middle") -__failure __msg("back-edge") +__success +__failure_unpriv __msg_unpriv("back-edge") __naked void loop_start_in_the_middle(void) { asm volatile (" \ @@ -136,7 +137,9 @@ l0_%=: exit; \ SEC("tracepoint") __description("bounded recursion") -__failure __msg("back-edge") +__failure +/* verifier limitation in detecting max stack depth */ +__msg("the call stack of 8 frames is too deep !") __naked void bounded_recursion(void) { asm volatile (" \ diff --git a/tools/testing/selftests/bpf/progs/verifier_movsx.c b/tools/testing/selftests/bpf/progs/verifier_movsx.c index b2a04d1179d0..cbb9d6714f53 100644 --- a/tools/testing/selftests/bpf/progs/verifier_movsx.c +++ b/tools/testing/selftests/bpf/progs/verifier_movsx.c @@ -6,7 +6,8 @@ #if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \ (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64) || \ - defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_s390)) && \ + defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_s390) || \ + defined(__TARGET_ARCH_loongarch)) && \ __clang_major__ >= 18 SEC("socket") diff --git a/tools/testing/selftests/bpf/progs/verifier_precision.c b/tools/testing/selftests/bpf/progs/verifier_precision.c new file mode 100644 index 000000000000..6b564d4c0986 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/verifier_precision.c @@ -0,0 +1,133 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2023 SUSE LLC */ +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include "bpf_misc.h" + +SEC("?raw_tp") +__success __log_level(2) +__msg("mark_precise: frame0: regs=r2 stack= before 3: (bf) r1 = r10") +__msg("mark_precise: frame0: regs=r2 stack= before 2: (55) if r2 != 0xfffffff8 goto pc+2") +__msg("mark_precise: frame0: regs=r2 stack= before 1: (87) r2 = -r2") +__msg("mark_precise: frame0: regs=r2 stack= before 0: (b7) r2 = 8") +__naked int bpf_neg(void) +{ + asm volatile ( + "r2 = 8;" + "r2 = -r2;" + "if r2 != -8 goto 1f;" + "r1 = r10;" + "r1 += r2;" + "1:" + "r0 = 0;" + "exit;" + ::: __clobber_all); +} + +SEC("?raw_tp") +__success __log_level(2) +__msg("mark_precise: frame0: regs=r2 stack= before 3: (bf) r1 = r10") +__msg("mark_precise: frame0: regs=r2 stack= before 2: (55) if r2 != 0x0 goto pc+2") +__msg("mark_precise: frame0: regs=r2 stack= before 1: (d4) r2 = le16 r2") +__msg("mark_precise: frame0: regs=r2 stack= before 0: (b7) r2 = 0") +__naked int bpf_end_to_le(void) +{ + asm volatile ( + "r2 = 0;" + "r2 = le16 r2;" + "if r2 != 0 goto 1f;" + "r1 = r10;" + "r1 += r2;" + "1:" + "r0 = 0;" + "exit;" + ::: __clobber_all); +} + + +SEC("?raw_tp") +__success __log_level(2) +__msg("mark_precise: frame0: regs=r2 stack= before 3: (bf) r1 = r10") +__msg("mark_precise: frame0: regs=r2 stack= before 2: (55) if r2 != 0x0 goto pc+2") +__msg("mark_precise: frame0: regs=r2 stack= before 1: (dc) r2 = be16 r2") +__msg("mark_precise: frame0: regs=r2 stack= before 0: (b7) r2 = 0") +__naked int bpf_end_to_be(void) +{ + asm volatile ( + "r2 = 0;" + "r2 = be16 r2;" + "if r2 != 0 goto 1f;" + "r1 = r10;" + "r1 += r2;" + "1:" + "r0 = 0;" + "exit;" + ::: __clobber_all); +} + +#if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \ + (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64) || \ + defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_s390)) && \ + __clang_major__ >= 18 + +SEC("?raw_tp") +__success __log_level(2) +__msg("mark_precise: frame0: regs=r2 stack= before 3: (bf) r1 = r10") +__msg("mark_precise: frame0: regs=r2 stack= before 2: (55) if r2 != 0x0 goto pc+2") +__msg("mark_precise: frame0: regs=r2 stack= before 1: (d7) r2 = bswap16 r2") +__msg("mark_precise: frame0: regs=r2 stack= before 0: (b7) r2 = 0") +__naked int bpf_end_bswap(void) +{ + asm volatile ( + "r2 = 0;" + "r2 = bswap16 r2;" + "if r2 != 0 goto 1f;" + "r1 = r10;" + "r1 += r2;" + "1:" + "r0 = 0;" + "exit;" + ::: __clobber_all); +} + +#endif /* v4 instruction */ + +SEC("?raw_tp") +__success __log_level(2) +/* + * Without the bug fix there will be no history between "last_idx 3 first_idx 3" + * and "parent state regs=" lines. "R0_w=6" parts are here to help anchor + * expected log messages to the one specific mark_chain_precision operation. + * + * This is quite fragile: if verifier checkpointing heuristic changes, this + * might need adjusting. + */ +__msg("2: (07) r0 += 1 ; R0_w=6") +__msg("3: (35) if r0 >= 0xa goto pc+1") +__msg("mark_precise: frame0: last_idx 3 first_idx 3 subseq_idx -1") +__msg("mark_precise: frame0: regs=r0 stack= before 2: (07) r0 += 1") +__msg("mark_precise: frame0: regs=r0 stack= before 1: (07) r0 += 1") +__msg("mark_precise: frame0: regs=r0 stack= before 4: (05) goto pc-4") +__msg("mark_precise: frame0: regs=r0 stack= before 3: (35) if r0 >= 0xa goto pc+1") +__msg("mark_precise: frame0: parent state regs= stack=: R0_rw=P4") +__msg("3: R0_w=6") +__naked int state_loop_first_last_equal(void) +{ + asm volatile ( + "r0 = 0;" + "l0_%=:" + "r0 += 1;" + "r0 += 1;" + /* every few iterations we'll have a checkpoint here with + * first_idx == last_idx, potentially confusing precision + * backtracking logic + */ + "if r0 >= 10 goto l1_%=;" /* checkpoint + mark_precise */ + "goto l0_%=;" + "l1_%=:" + "exit;" + ::: __clobber_common + ); +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/verifier_sdiv.c b/tools/testing/selftests/bpf/progs/verifier_sdiv.c index 8fc5174808b2..2a2271cf0294 100644 --- a/tools/testing/selftests/bpf/progs/verifier_sdiv.c +++ b/tools/testing/selftests/bpf/progs/verifier_sdiv.c @@ -6,7 +6,8 @@ #if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \ (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64) || \ - defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_s390)) && \ + defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_s390) || \ + defined(__TARGET_ARCH_loongarch)) && \ __clang_major__ >= 18 SEC("socket") diff --git a/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c b/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c index db6b3143338b..f61d623b1ce8 100644 --- a/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c +++ b/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c @@ -119,15 +119,41 @@ __naked int global_subprog_result_precise(void) SEC("?raw_tp") __success __log_level(2) +/* First simulated path does not include callback body, + * r1 and r4 are always precise for bpf_loop() calls. + */ +__msg("9: (85) call bpf_loop#181") +__msg("mark_precise: frame0: last_idx 9 first_idx 9 subseq_idx -1") +__msg("mark_precise: frame0: parent state regs=r4 stack=:") +__msg("mark_precise: frame0: last_idx 8 first_idx 0 subseq_idx 9") +__msg("mark_precise: frame0: regs=r4 stack= before 8: (b7) r4 = 0") +__msg("mark_precise: frame0: last_idx 9 first_idx 9 subseq_idx -1") +__msg("mark_precise: frame0: parent state regs=r1 stack=:") +__msg("mark_precise: frame0: last_idx 8 first_idx 0 subseq_idx 9") +__msg("mark_precise: frame0: regs=r1 stack= before 8: (b7) r4 = 0") +__msg("mark_precise: frame0: regs=r1 stack= before 7: (b7) r3 = 0") +__msg("mark_precise: frame0: regs=r1 stack= before 6: (bf) r2 = r8") +__msg("mark_precise: frame0: regs=r1 stack= before 5: (bf) r1 = r6") +__msg("mark_precise: frame0: regs=r6 stack= before 4: (b7) r6 = 3") +/* r6 precision propagation */ __msg("14: (0f) r1 += r6") -__msg("mark_precise: frame0: last_idx 14 first_idx 10") +__msg("mark_precise: frame0: last_idx 14 first_idx 9") __msg("mark_precise: frame0: regs=r6 stack= before 13: (bf) r1 = r7") __msg("mark_precise: frame0: regs=r6 stack= before 12: (27) r6 *= 4") __msg("mark_precise: frame0: regs=r6 stack= before 11: (25) if r6 > 0x3 goto pc+4") __msg("mark_precise: frame0: regs=r6 stack= before 10: (bf) r6 = r0") -__msg("mark_precise: frame0: parent state regs=r0 stack=:") -__msg("mark_precise: frame0: last_idx 18 first_idx 0") -__msg("mark_precise: frame0: regs=r0 stack= before 18: (95) exit") +__msg("mark_precise: frame0: regs=r0 stack= before 9: (85) call bpf_loop") +/* State entering callback body popped from states stack */ +__msg("from 9 to 17: frame1:") +__msg("17: frame1: R1=scalar() R2=0 R10=fp0 cb") +__msg("17: (b7) r0 = 0") +__msg("18: (95) exit") +__msg("returning from callee:") +__msg("to caller at 9:") +__msg("frame 0: propagating r1,r4") +__msg("mark_precise: frame0: last_idx 9 first_idx 9 subseq_idx -1") +__msg("mark_precise: frame0: regs=r1,r4 stack= before 18: (95) exit") +__msg("from 18 to 9: safe") __naked int callback_result_precise(void) { asm volatile ( @@ -233,20 +259,36 @@ __naked int parent_callee_saved_reg_precise_global(void) SEC("?raw_tp") __success __log_level(2) +/* First simulated path does not include callback body */ __msg("12: (0f) r1 += r6") -__msg("mark_precise: frame0: last_idx 12 first_idx 10") +__msg("mark_precise: frame0: last_idx 12 first_idx 9") __msg("mark_precise: frame0: regs=r6 stack= before 11: (bf) r1 = r7") __msg("mark_precise: frame0: regs=r6 stack= before 10: (27) r6 *= 4") +__msg("mark_precise: frame0: regs=r6 stack= before 9: (85) call bpf_loop") __msg("mark_precise: frame0: parent state regs=r6 stack=:") -__msg("mark_precise: frame0: last_idx 16 first_idx 0") -__msg("mark_precise: frame0: regs=r6 stack= before 16: (95) exit") -__msg("mark_precise: frame1: regs= stack= before 15: (b7) r0 = 0") -__msg("mark_precise: frame1: regs= stack= before 9: (85) call bpf_loop#181") +__msg("mark_precise: frame0: last_idx 8 first_idx 0 subseq_idx 9") __msg("mark_precise: frame0: regs=r6 stack= before 8: (b7) r4 = 0") __msg("mark_precise: frame0: regs=r6 stack= before 7: (b7) r3 = 0") __msg("mark_precise: frame0: regs=r6 stack= before 6: (bf) r2 = r8") __msg("mark_precise: frame0: regs=r6 stack= before 5: (b7) r1 = 1") __msg("mark_precise: frame0: regs=r6 stack= before 4: (b7) r6 = 3") +/* State entering callback body popped from states stack */ +__msg("from 9 to 15: frame1:") +__msg("15: frame1: R1=scalar() R2=0 R10=fp0 cb") +__msg("15: (b7) r0 = 0") +__msg("16: (95) exit") +__msg("returning from callee:") +__msg("to caller at 9:") +/* r1, r4 are always precise for bpf_loop(), + * r6 was marked before backtracking to callback body. + */ +__msg("frame 0: propagating r1,r4,r6") +__msg("mark_precise: frame0: last_idx 9 first_idx 9 subseq_idx -1") +__msg("mark_precise: frame0: regs=r1,r4,r6 stack= before 16: (95) exit") +__msg("mark_precise: frame1: regs= stack= before 15: (b7) r0 = 0") +__msg("mark_precise: frame1: regs= stack= before 9: (85) call bpf_loop") +__msg("mark_precise: frame0: parent state regs= stack=:") +__msg("from 16 to 9: safe") __naked int parent_callee_saved_reg_precise_with_callback(void) { asm volatile ( @@ -373,22 +415,38 @@ __naked int parent_stack_slot_precise_global(void) SEC("?raw_tp") __success __log_level(2) +/* First simulated path does not include callback body */ __msg("14: (0f) r1 += r6") -__msg("mark_precise: frame0: last_idx 14 first_idx 11") +__msg("mark_precise: frame0: last_idx 14 first_idx 10") __msg("mark_precise: frame0: regs=r6 stack= before 13: (bf) r1 = r7") __msg("mark_precise: frame0: regs=r6 stack= before 12: (27) r6 *= 4") __msg("mark_precise: frame0: regs=r6 stack= before 11: (79) r6 = *(u64 *)(r10 -8)") +__msg("mark_precise: frame0: regs= stack=-8 before 10: (85) call bpf_loop") __msg("mark_precise: frame0: parent state regs= stack=-8:") -__msg("mark_precise: frame0: last_idx 18 first_idx 0") -__msg("mark_precise: frame0: regs= stack=-8 before 18: (95) exit") -__msg("mark_precise: frame1: regs= stack= before 17: (b7) r0 = 0") -__msg("mark_precise: frame1: regs= stack= before 10: (85) call bpf_loop#181") +__msg("mark_precise: frame0: last_idx 9 first_idx 0 subseq_idx 10") __msg("mark_precise: frame0: regs= stack=-8 before 9: (b7) r4 = 0") __msg("mark_precise: frame0: regs= stack=-8 before 8: (b7) r3 = 0") __msg("mark_precise: frame0: regs= stack=-8 before 7: (bf) r2 = r8") __msg("mark_precise: frame0: regs= stack=-8 before 6: (bf) r1 = r6") __msg("mark_precise: frame0: regs= stack=-8 before 5: (7b) *(u64 *)(r10 -8) = r6") __msg("mark_precise: frame0: regs=r6 stack= before 4: (b7) r6 = 3") +/* State entering callback body popped from states stack */ +__msg("from 10 to 17: frame1:") +__msg("17: frame1: R1=scalar() R2=0 R10=fp0 cb") +__msg("17: (b7) r0 = 0") +__msg("18: (95) exit") +__msg("returning from callee:") +__msg("to caller at 10:") +/* r1, r4 are always precise for bpf_loop(), + * fp-8 was marked before backtracking to callback body. + */ +__msg("frame 0: propagating r1,r4,fp-8") +__msg("mark_precise: frame0: last_idx 10 first_idx 10 subseq_idx -1") +__msg("mark_precise: frame0: regs=r1,r4 stack=-8 before 18: (95) exit") +__msg("mark_precise: frame1: regs= stack= before 17: (b7) r0 = 0") +__msg("mark_precise: frame1: regs= stack= before 10: (85) call bpf_loop#181") +__msg("mark_precise: frame0: parent state regs= stack=:") +__msg("from 18 to 10: safe") __naked int parent_stack_slot_precise_with_callback(void) { asm volatile ( diff --git a/tools/testing/selftests/bpf/progs/xdp_hw_metadata.c b/tools/testing/selftests/bpf/progs/xdp_hw_metadata.c index b2dfd7066c6e..f6d1cc9ad892 100644 --- a/tools/testing/selftests/bpf/progs/xdp_hw_metadata.c +++ b/tools/testing/selftests/bpf/progs/xdp_hw_metadata.c @@ -21,7 +21,7 @@ extern int bpf_xdp_metadata_rx_timestamp(const struct xdp_md *ctx, extern int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx, __u32 *hash, enum xdp_rss_hash_type *rss_type) __ksym; -SEC("xdp") +SEC("xdp.frags") int rx(struct xdp_md *ctx) { void *data, *data_meta, *data_end; diff --git a/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c b/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c index 07d786329105..80f620602d50 100644 --- a/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c +++ b/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c @@ -53,6 +53,8 @@ #define DEFAULT_TTL 64 #define MAX_ALLOWED_PORTS 8 +#define MAX_PACKET_OFF 0xffff + #define swap(a, b) \ do { typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0) @@ -177,69 +179,82 @@ static __always_inline __u32 tcp_ns_to_ts(__u64 ns) return ns / (NSEC_PER_SEC / TCP_TS_HZ); } -static __always_inline __u32 tcp_time_stamp_raw(void) +static __always_inline __u32 tcp_clock_ms(void) { return tcp_ns_to_ts(tcp_clock_ns()); } struct tcpopt_context { - __u8 *ptr; - __u8 *end; + void *data; void *data_end; __be32 *tsecr; __u8 wscale; bool option_timestamp; bool option_sack; + __u32 off; }; -static int tscookie_tcpopt_parse(struct tcpopt_context *ctx) +static __always_inline u8 *next(struct tcpopt_context *ctx, __u32 sz) { - __u8 opcode, opsize; + __u64 off = ctx->off; + __u8 *data; - if (ctx->ptr >= ctx->end) - return 1; - if (ctx->ptr >= ctx->data_end) - return 1; + /* Verifier forbids access to packet when offset exceeds MAX_PACKET_OFF */ + if (off > MAX_PACKET_OFF - sz) + return NULL; - opcode = ctx->ptr[0]; + data = ctx->data + off; + barrier_var(data); + if (data + sz >= ctx->data_end) + return NULL; - if (opcode == TCPOPT_EOL) - return 1; - if (opcode == TCPOPT_NOP) { - ++ctx->ptr; - return 0; - } + ctx->off += sz; + return data; +} - if (ctx->ptr + 1 >= ctx->end) - return 1; - if (ctx->ptr + 1 >= ctx->data_end) +static int tscookie_tcpopt_parse(struct tcpopt_context *ctx) +{ + __u8 *opcode, *opsize, *wscale, *tsecr; + __u32 off = ctx->off; + + opcode = next(ctx, 1); + if (!opcode) return 1; - opsize = ctx->ptr[1]; - if (opsize < 2) + + if (*opcode == TCPOPT_EOL) return 1; + if (*opcode == TCPOPT_NOP) + return 0; - if (ctx->ptr + opsize > ctx->end) + opsize = next(ctx, 1); + if (!opsize || *opsize < 2) return 1; - switch (opcode) { + switch (*opcode) { case TCPOPT_WINDOW: - if (opsize == TCPOLEN_WINDOW && ctx->ptr + TCPOLEN_WINDOW <= ctx->data_end) - ctx->wscale = ctx->ptr[2] < TCP_MAX_WSCALE ? ctx->ptr[2] : TCP_MAX_WSCALE; + wscale = next(ctx, 1); + if (!wscale) + return 1; + if (*opsize == TCPOLEN_WINDOW) + ctx->wscale = *wscale < TCP_MAX_WSCALE ? *wscale : TCP_MAX_WSCALE; break; case TCPOPT_TIMESTAMP: - if (opsize == TCPOLEN_TIMESTAMP && ctx->ptr + TCPOLEN_TIMESTAMP <= ctx->data_end) { + tsecr = next(ctx, 4); + if (!tsecr) + return 1; + if (*opsize == TCPOLEN_TIMESTAMP) { ctx->option_timestamp = true; /* Client's tsval becomes our tsecr. */ - *ctx->tsecr = get_unaligned((__be32 *)(ctx->ptr + 2)); + *ctx->tsecr = get_unaligned((__be32 *)tsecr); } break; case TCPOPT_SACK_PERM: - if (opsize == TCPOLEN_SACK_PERM) + if (*opsize == TCPOLEN_SACK_PERM) ctx->option_sack = true; break; } - ctx->ptr += opsize; + ctx->off = off + *opsize; return 0; } @@ -256,16 +271,21 @@ static int tscookie_tcpopt_parse_batch(__u32 index, void *context) static __always_inline bool tscookie_init(struct tcphdr *tcp_header, __u16 tcp_len, __be32 *tsval, - __be32 *tsecr, void *data_end) + __be32 *tsecr, void *data, void *data_end) { struct tcpopt_context loop_ctx = { - .ptr = (__u8 *)(tcp_header + 1), - .end = (__u8 *)tcp_header + tcp_len, + .data = data, .data_end = data_end, .tsecr = tsecr, .wscale = TS_OPT_WSCALE_MASK, .option_timestamp = false, .option_sack = false, + /* Note: currently verifier would track .off as unbound scalar. + * In case if verifier would at some point get smarter and + * compute bounded value for this var, beware that it might + * hinder bpf_loop() convergence validation. + */ + .off = (__u8 *)(tcp_header + 1) - (__u8 *)data, }; u32 cookie; @@ -274,7 +294,7 @@ static __always_inline bool tscookie_init(struct tcphdr *tcp_header, if (!loop_ctx.option_timestamp) return false; - cookie = tcp_time_stamp_raw() & ~TSMASK; + cookie = tcp_clock_ms() & ~TSMASK; cookie |= loop_ctx.wscale & TS_OPT_WSCALE_MASK; if (loop_ctx.option_sack) cookie |= TS_OPT_SACK; @@ -635,7 +655,7 @@ static __always_inline int syncookie_handle_syn(struct header_pointers *hdr, cookie = (__u32)value; if (tscookie_init((void *)hdr->tcp, hdr->tcp_len, - &tsopt_buf[0], &tsopt_buf[1], data_end)) + &tsopt_buf[0], &tsopt_buf[1], data, data_end)) tsopt = tsopt_buf; /* Check that there is enough space for a SYNACK. It also covers diff --git a/tools/testing/selftests/bpf/test_loader.c b/tools/testing/selftests/bpf/test_loader.c index 37ffa57f28a1..a350ecdfba4a 100644 --- a/tools/testing/selftests/bpf/test_loader.c +++ b/tools/testing/selftests/bpf/test_loader.c @@ -153,6 +153,14 @@ static int parse_retval(const char *str, int *val, const char *name) return parse_int(str, val, name); } +static void update_flags(int *flags, int flag, bool clear) +{ + if (clear) + *flags &= ~flag; + else + *flags |= flag; +} + /* Uses btf_decl_tag attributes to describe the expected test * behavior, see bpf_misc.h for detailed description of each attribute * and attribute combinations. @@ -171,6 +179,7 @@ static int parse_test_spec(struct test_loader *tester, memset(spec, 0, sizeof(*spec)); spec->prog_name = bpf_program__name(prog); + spec->prog_flags = BPF_F_TEST_REG_INVARIANTS; /* by default be strict */ btf = bpf_object__btf(obj); if (!btf) { @@ -187,7 +196,8 @@ static int parse_test_spec(struct test_loader *tester, for (i = 1; i < btf__type_cnt(btf); i++) { const char *s, *val, *msg; const struct btf_type *t; - int tmp; + bool clear; + int flags; t = btf__type_by_id(btf, i); if (!btf_is_decl_tag(t)) @@ -253,23 +263,30 @@ static int parse_test_spec(struct test_loader *tester, goto cleanup; } else if (str_has_pfx(s, TEST_TAG_PROG_FLAGS_PFX)) { val = s + sizeof(TEST_TAG_PROG_FLAGS_PFX) - 1; + + clear = val[0] == '!'; + if (clear) + val++; + if (strcmp(val, "BPF_F_STRICT_ALIGNMENT") == 0) { - spec->prog_flags |= BPF_F_STRICT_ALIGNMENT; + update_flags(&spec->prog_flags, BPF_F_STRICT_ALIGNMENT, clear); } else if (strcmp(val, "BPF_F_ANY_ALIGNMENT") == 0) { - spec->prog_flags |= BPF_F_ANY_ALIGNMENT; + update_flags(&spec->prog_flags, BPF_F_ANY_ALIGNMENT, clear); } else if (strcmp(val, "BPF_F_TEST_RND_HI32") == 0) { - spec->prog_flags |= BPF_F_TEST_RND_HI32; + update_flags(&spec->prog_flags, BPF_F_TEST_RND_HI32, clear); } else if (strcmp(val, "BPF_F_TEST_STATE_FREQ") == 0) { - spec->prog_flags |= BPF_F_TEST_STATE_FREQ; + update_flags(&spec->prog_flags, BPF_F_TEST_STATE_FREQ, clear); } else if (strcmp(val, "BPF_F_SLEEPABLE") == 0) { - spec->prog_flags |= BPF_F_SLEEPABLE; + update_flags(&spec->prog_flags, BPF_F_SLEEPABLE, clear); } else if (strcmp(val, "BPF_F_XDP_HAS_FRAGS") == 0) { - spec->prog_flags |= BPF_F_XDP_HAS_FRAGS; + update_flags(&spec->prog_flags, BPF_F_XDP_HAS_FRAGS, clear); + } else if (strcmp(val, "BPF_F_TEST_REG_INVARIANTS") == 0) { + update_flags(&spec->prog_flags, BPF_F_TEST_REG_INVARIANTS, clear); } else /* assume numeric value */ { - err = parse_int(val, &tmp, "test prog flags"); + err = parse_int(val, &flags, "test prog flags"); if (err) goto cleanup; - spec->prog_flags |= tmp; + update_flags(&spec->prog_flags, flags, clear); } } } diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c index 7fc00e423e4d..767e0693df10 100644 --- a/tools/testing/selftests/bpf/test_maps.c +++ b/tools/testing/selftests/bpf/test_maps.c @@ -1396,13 +1396,18 @@ static void test_map_stress(void) #define MAX_DELAY_US 50000 #define MIN_DELAY_RANGE_US 5000 -static int map_update_retriable(int map_fd, const void *key, const void *value, - int flags, int attempts) +static bool retry_for_again_or_busy(int err) +{ + return (err == EAGAIN || err == EBUSY); +} + +int map_update_retriable(int map_fd, const void *key, const void *value, int flags, int attempts, + retry_for_error_fn need_retry) { int delay = rand() % MIN_DELAY_RANGE_US; while (bpf_map_update_elem(map_fd, key, value, flags)) { - if (!attempts || (errno != EAGAIN && errno != EBUSY)) + if (!attempts || !need_retry(errno)) return -errno; if (delay <= MAX_DELAY_US / 2) @@ -1445,11 +1450,13 @@ static void test_update_delete(unsigned int fn, void *data) key = value = i; if (do_update) { - err = map_update_retriable(fd, &key, &value, BPF_NOEXIST, MAP_RETRIES); + err = map_update_retriable(fd, &key, &value, BPF_NOEXIST, MAP_RETRIES, + retry_for_again_or_busy); if (err) printf("error %d %d\n", err, errno); assert(err == 0); - err = map_update_retriable(fd, &key, &value, BPF_EXIST, MAP_RETRIES); + err = map_update_retriable(fd, &key, &value, BPF_EXIST, MAP_RETRIES, + retry_for_again_or_busy); if (err) printf("error %d %d\n", err, errno); assert(err == 0); diff --git a/tools/testing/selftests/bpf/test_maps.h b/tools/testing/selftests/bpf/test_maps.h index f6fbca761732..e4ac704a536c 100644 --- a/tools/testing/selftests/bpf/test_maps.h +++ b/tools/testing/selftests/bpf/test_maps.h @@ -4,6 +4,7 @@ #include <stdio.h> #include <stdlib.h> +#include <stdbool.h> #define CHECK(condition, tag, format...) ({ \ int __ret = !!(condition); \ @@ -16,4 +17,8 @@ extern int skips; +typedef bool (*retry_for_error_fn)(int err); +int map_update_retriable(int map_fd, const void *key, const void *value, int flags, int attempts, + retry_for_error_fn need_retry); + #endif diff --git a/tools/testing/selftests/bpf/test_sock_addr.c b/tools/testing/selftests/bpf/test_sock_addr.c index 2c89674fc62c..b0068a9d2cfe 100644 --- a/tools/testing/selftests/bpf/test_sock_addr.c +++ b/tools/testing/selftests/bpf/test_sock_addr.c @@ -679,7 +679,7 @@ static int load_path(const struct sock_addr_test *test, const char *path) bpf_program__set_type(prog, BPF_PROG_TYPE_CGROUP_SOCK_ADDR); bpf_program__set_expected_attach_type(prog, test->expected_attach_type); - bpf_program__set_flags(prog, BPF_F_TEST_RND_HI32); + bpf_program__set_flags(prog, BPF_F_TEST_RND_HI32 | BPF_F_TEST_REG_INVARIANTS); err = bpf_object__load(obj); if (err) { diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 98107e0452d3..f36e41435be7 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -1588,7 +1588,7 @@ static void do_test_single(struct bpf_test *test, bool unpriv, if (fixup_skips != skips) return; - pflags = BPF_F_TEST_RND_HI32; + pflags = BPF_F_TEST_RND_HI32 | BPF_F_TEST_REG_INVARIANTS; if (test->flags & F_LOAD_WITH_STRICT_ALIGNMENT) pflags |= BPF_F_STRICT_ALIGNMENT; if (test->flags & F_NEEDS_EFFICIENT_UNALIGNED_ACCESS) diff --git a/tools/testing/selftests/bpf/testing_helpers.c b/tools/testing/selftests/bpf/testing_helpers.c index 8d994884c7b4..d2458c1b1671 100644 --- a/tools/testing/selftests/bpf/testing_helpers.c +++ b/tools/testing/selftests/bpf/testing_helpers.c @@ -276,7 +276,7 @@ int bpf_prog_test_load(const char *file, enum bpf_prog_type type, if (type != BPF_PROG_TYPE_UNSPEC && bpf_program__type(prog) != type) bpf_program__set_type(prog, type); - flags = bpf_program__flags(prog) | BPF_F_TEST_RND_HI32; + flags = bpf_program__flags(prog) | BPF_F_TEST_RND_HI32 | BPF_F_TEST_REG_INVARIANTS; bpf_program__set_flags(prog, flags); err = bpf_object__load(obj); @@ -299,7 +299,7 @@ int bpf_test_load_program(enum bpf_prog_type type, const struct bpf_insn *insns, { LIBBPF_OPTS(bpf_prog_load_opts, opts, .kern_version = kern_version, - .prog_flags = BPF_F_TEST_RND_HI32, + .prog_flags = BPF_F_TEST_RND_HI32 | BPF_F_TEST_REG_INVARIANTS, .log_level = extra_prog_load_log_flags, .log_buf = log_buf, .log_size = log_buf_sz, diff --git a/tools/testing/selftests/bpf/unpriv_helpers.c b/tools/testing/selftests/bpf/unpriv_helpers.c index 2a6efbd0401e..b6d016461fb0 100644 --- a/tools/testing/selftests/bpf/unpriv_helpers.c +++ b/tools/testing/selftests/bpf/unpriv_helpers.c @@ -4,9 +4,40 @@ #include <stdlib.h> #include <error.h> #include <stdio.h> +#include <string.h> +#include <unistd.h> +#include <fcntl.h> #include "unpriv_helpers.h" +static bool get_mitigations_off(void) +{ + char cmdline[4096], *c; + int fd, ret = false; + + fd = open("/proc/cmdline", O_RDONLY); + if (fd < 0) { + perror("open /proc/cmdline"); + return false; + } + + if (read(fd, cmdline, sizeof(cmdline) - 1) < 0) { + perror("read /proc/cmdline"); + goto out; + } + + cmdline[sizeof(cmdline) - 1] = '\0'; + for (c = strtok(cmdline, " \n"); c; c = strtok(NULL, " \n")) { + if (strncmp(c, "mitigations=off", strlen(c))) + continue; + ret = true; + break; + } +out: + close(fd); + return ret; +} + bool get_unpriv_disabled(void) { bool disabled; @@ -22,5 +53,5 @@ bool get_unpriv_disabled(void) disabled = true; } - return disabled; + return disabled ? true : get_mitigations_off(); } diff --git a/tools/testing/selftests/bpf/verifier/bpf_st_mem.c b/tools/testing/selftests/bpf/verifier/bpf_st_mem.c index 3af2501082b2..b616575c3b00 100644 --- a/tools/testing/selftests/bpf/verifier/bpf_st_mem.c +++ b/tools/testing/selftests/bpf/verifier/bpf_st_mem.c @@ -65,3 +65,35 @@ .expected_attach_type = BPF_SK_LOOKUP, .runs = -1, }, +{ + "BPF_ST_MEM stack imm sign", + /* Check if verifier correctly reasons about sign of an + * immediate spilled to stack by BPF_ST instruction. + * + * fp[-8] = -44; + * r0 = fp[-8]; + * if r0 s< 0 goto ret0; + * r0 = -1; + * exit; + * ret0: + * r0 = 0; + * exit; + */ + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, -44), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8), + BPF_JMP_IMM(BPF_JSLT, BPF_REG_0, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, -1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + /* Use prog type that requires return value in range [0, 1] */ + .prog_type = BPF_PROG_TYPE_SK_LOOKUP, + .expected_attach_type = BPF_SK_LOOKUP, + .result = VERBOSE_ACCEPT, + .runs = -1, + .errstr = "0: (7a) *(u64 *)(r10 -8) = -44 ; R10=fp0 fp-8_w=-44\ + 2: (c5) if r0 s< 0x0 goto pc+2\ + R0_w=-44", +}, diff --git a/tools/testing/selftests/bpf/verifier/calls.c b/tools/testing/selftests/bpf/verifier/calls.c index 1bdf2b43e49e..3d5cd51071f0 100644 --- a/tools/testing/selftests/bpf/verifier/calls.c +++ b/tools/testing/selftests/bpf/verifier/calls.c @@ -442,7 +442,7 @@ BPF_EXIT_INSN(), }, .prog_type = BPF_PROG_TYPE_TRACEPOINT, - .errstr = "back-edge from insn 0 to 0", + .errstr = "the call stack of 9 frames is too deep", .result = REJECT, }, { @@ -799,7 +799,7 @@ BPF_EXIT_INSN(), }, .prog_type = BPF_PROG_TYPE_TRACEPOINT, - .errstr = "back-edge", + .errstr = "the call stack of 9 frames is too deep", .result = REJECT, }, { @@ -811,7 +811,7 @@ BPF_EXIT_INSN(), }, .prog_type = BPF_PROG_TYPE_TRACEPOINT, - .errstr = "back-edge", + .errstr = "the call stack of 9 frames is too deep", .result = REJECT, }, { diff --git a/tools/testing/selftests/bpf/verifier/ld_imm64.c b/tools/testing/selftests/bpf/verifier/ld_imm64.c index f9297900cea6..78f19c255f20 100644 --- a/tools/testing/selftests/bpf/verifier/ld_imm64.c +++ b/tools/testing/selftests/bpf/verifier/ld_imm64.c @@ -9,8 +9,8 @@ BPF_MOV64_IMM(BPF_REG_0, 2), BPF_EXIT_INSN(), }, - .errstr = "invalid BPF_LD_IMM insn", - .errstr_unpriv = "R1 pointer comparison", + .errstr = "jump into the middle of ldimm64 insn 1", + .errstr_unpriv = "jump into the middle of ldimm64 insn 1", .result = REJECT, }, { @@ -23,8 +23,8 @@ BPF_LD_IMM64(BPF_REG_0, 1), BPF_EXIT_INSN(), }, - .errstr = "invalid BPF_LD_IMM insn", - .errstr_unpriv = "R1 pointer comparison", + .errstr = "jump into the middle of ldimm64 insn 1", + .errstr_unpriv = "jump into the middle of ldimm64 insn 1", .result = REJECT, }, { diff --git a/tools/testing/selftests/bpf/veristat.c b/tools/testing/selftests/bpf/veristat.c index 655095810d4a..1d418d66e375 100644 --- a/tools/testing/selftests/bpf/veristat.c +++ b/tools/testing/selftests/bpf/veristat.c @@ -18,6 +18,7 @@ #include <libelf.h> #include <gelf.h> #include <float.h> +#include <math.h> #ifndef ARRAY_SIZE #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) @@ -99,6 +100,7 @@ struct stat_specs { enum stat_id ids[ALL_STATS_CNT]; enum stat_variant variants[ALL_STATS_CNT]; bool asc[ALL_STATS_CNT]; + bool abs[ALL_STATS_CNT]; int lens[ALL_STATS_CNT * 3]; /* 3x for comparison mode */ }; @@ -133,6 +135,7 @@ struct filter { int stat_id; enum stat_variant stat_var; long value; + bool abs; }; static struct env { @@ -142,10 +145,12 @@ static struct env { bool debug; bool quiet; bool force_checkpoints; + bool force_reg_invariants; enum resfmt out_fmt; bool show_version; bool comparison_mode; bool replay_mode; + int top_n; int log_level; int log_size; @@ -210,8 +215,7 @@ static const struct argp_option opts[] = { { "log-level", 'l', "LEVEL", 0, "Verifier log level (default 0 for normal mode, 1 for verbose mode)" }, { "log-fixed", OPT_LOG_FIXED, NULL, 0, "Disable verifier log rotation" }, { "log-size", OPT_LOG_SIZE, "BYTES", 0, "Customize verifier log size (default to 16MB)" }, - { "test-states", 't', NULL, 0, - "Force frequent BPF verifier state checkpointing (set BPF_F_TEST_STATE_FREQ program flag)" }, + { "top-n", 'n', "N", 0, "Emit only up to first N results." }, { "quiet", 'q', NULL, 0, "Quiet mode" }, { "emit", 'e', "SPEC", 0, "Specify stats to be emitted" }, { "sort", 's', "SPEC", 0, "Specify sort order" }, @@ -219,6 +223,10 @@ static const struct argp_option opts[] = { { "compare", 'C', NULL, 0, "Comparison mode" }, { "replay", 'R', NULL, 0, "Replay mode" }, { "filter", 'f', "FILTER", 0, "Filter expressions (or @filename for file with expressions)." }, + { "test-states", 't', NULL, 0, + "Force frequent BPF verifier state checkpointing (set BPF_F_TEST_STATE_FREQ program flag)" }, + { "test-reg-invariants", 'r', NULL, 0, + "Force BPF verifier failure on register invariant violation (BPF_F_TEST_REG_INVARIANTS program flag)" }, {}, }; @@ -290,6 +298,16 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state) case 't': env.force_checkpoints = true; break; + case 'r': + env.force_reg_invariants = true; + break; + case 'n': + errno = 0; + env.top_n = strtol(arg, NULL, 10); + if (errno) { + fprintf(stderr, "invalid top N specifier: %s\n", arg); + argp_usage(state); + } case 'C': env.comparison_mode = true; break; @@ -455,7 +473,8 @@ static struct { { OP_EQ, "=" }, }; -static bool parse_stat_id_var(const char *name, size_t len, int *id, enum stat_variant *var); +static bool parse_stat_id_var(const char *name, size_t len, int *id, + enum stat_variant *var, bool *is_abs); static int append_filter(struct filter **filters, int *cnt, const char *str) { @@ -488,13 +507,14 @@ static int append_filter(struct filter **filters, int *cnt, const char *str) long val; const char *end = str; const char *op_str; + bool is_abs; op_str = operators[i].op_str; p = strstr(str, op_str); if (!p) continue; - if (!parse_stat_id_var(str, p - str, &id, &var)) { + if (!parse_stat_id_var(str, p - str, &id, &var, &is_abs)) { fprintf(stderr, "Unrecognized stat name in '%s'!\n", str); return -EINVAL; } @@ -533,6 +553,7 @@ static int append_filter(struct filter **filters, int *cnt, const char *str) f->stat_id = id; f->stat_var = var; f->op = operators[i].op_kind; + f->abs = true; f->value = val; *cnt += 1; @@ -657,7 +678,8 @@ static struct stat_def { [MARK_READ_MAX_LEN] = { "Max mark read length", {"max_mark_read_len", "mark_read"}, }, }; -static bool parse_stat_id_var(const char *name, size_t len, int *id, enum stat_variant *var) +static bool parse_stat_id_var(const char *name, size_t len, int *id, + enum stat_variant *var, bool *is_abs) { static const char *var_sfxs[] = { [VARIANT_A] = "_a", @@ -667,6 +689,14 @@ static bool parse_stat_id_var(const char *name, size_t len, int *id, enum stat_v }; int i, j, k; + /* |<stat>| means we take absolute value of given stat */ + *is_abs = false; + if (len > 2 && name[0] == '|' && name[len - 1] == '|') { + *is_abs = true; + name += 1; + len -= 2; + } + for (i = 0; i < ARRAY_SIZE(stat_defs); i++) { struct stat_def *def = &stat_defs[i]; size_t alias_len, sfx_len; @@ -722,7 +752,7 @@ static bool is_desc_sym(char c) static int parse_stat(const char *stat_name, struct stat_specs *specs) { int id; - bool has_order = false, is_asc = false; + bool has_order = false, is_asc = false, is_abs = false; size_t len = strlen(stat_name); enum stat_variant var; @@ -737,7 +767,7 @@ static int parse_stat(const char *stat_name, struct stat_specs *specs) len -= 1; } - if (!parse_stat_id_var(stat_name, len, &id, &var)) { + if (!parse_stat_id_var(stat_name, len, &id, &var, &is_abs)) { fprintf(stderr, "Unrecognized stat name '%s'\n", stat_name); return -ESRCH; } @@ -745,6 +775,7 @@ static int parse_stat(const char *stat_name, struct stat_specs *specs) specs->ids[specs->spec_cnt] = id; specs->variants[specs->spec_cnt] = var; specs->asc[specs->spec_cnt] = has_order ? is_asc : stat_defs[id].asc_by_default; + specs->abs[specs->spec_cnt] = is_abs; specs->spec_cnt++; return 0; @@ -997,6 +1028,8 @@ static int process_prog(const char *filename, struct bpf_object *obj, struct bpf if (env.force_checkpoints) bpf_program__set_flags(prog, bpf_program__flags(prog) | BPF_F_TEST_STATE_FREQ); + if (env.force_reg_invariants) + bpf_program__set_flags(prog, bpf_program__flags(prog) | BPF_F_TEST_REG_INVARIANTS); err = bpf_object__load(obj); env.progs_processed++; @@ -1103,7 +1136,7 @@ cleanup: } static int cmp_stat(const struct verif_stats *s1, const struct verif_stats *s2, - enum stat_id id, bool asc) + enum stat_id id, bool asc, bool abs) { int cmp = 0; @@ -1124,6 +1157,11 @@ static int cmp_stat(const struct verif_stats *s1, const struct verif_stats *s2, long v1 = s1->stats[id]; long v2 = s2->stats[id]; + if (abs) { + v1 = v1 < 0 ? -v1 : v1; + v2 = v2 < 0 ? -v2 : v2; + } + if (v1 != v2) cmp = v1 < v2 ? -1 : 1; break; @@ -1142,7 +1180,8 @@ static int cmp_prog_stats(const void *v1, const void *v2) int i, cmp; for (i = 0; i < env.sort_spec.spec_cnt; i++) { - cmp = cmp_stat(s1, s2, env.sort_spec.ids[i], env.sort_spec.asc[i]); + cmp = cmp_stat(s1, s2, env.sort_spec.ids[i], + env.sort_spec.asc[i], env.sort_spec.abs[i]); if (cmp != 0) return cmp; } @@ -1211,7 +1250,8 @@ static void fetch_join_stat_value(const struct verif_stats_join *s, static int cmp_join_stat(const struct verif_stats_join *s1, const struct verif_stats_join *s2, - enum stat_id id, enum stat_variant var, bool asc) + enum stat_id id, enum stat_variant var, + bool asc, bool abs) { const char *str1 = NULL, *str2 = NULL; double v1, v2; @@ -1220,6 +1260,11 @@ static int cmp_join_stat(const struct verif_stats_join *s1, fetch_join_stat_value(s1, id, var, &str1, &v1); fetch_join_stat_value(s2, id, var, &str2, &v2); + if (abs) { + v1 = fabs(v1); + v2 = fabs(v2); + } + if (str1) cmp = strcmp(str1, str2); else if (v1 != v2) @@ -1237,7 +1282,8 @@ static int cmp_join_stats(const void *v1, const void *v2) cmp = cmp_join_stat(s1, s2, env.sort_spec.ids[i], env.sort_spec.variants[i], - env.sort_spec.asc[i]); + env.sort_spec.asc[i], + env.sort_spec.abs[i]); if (cmp != 0) return cmp; } @@ -1720,6 +1766,9 @@ static bool is_join_stat_filter_matched(struct filter *f, const struct verif_sta fetch_join_stat_value(stats, f->stat_id, f->stat_var, &str, &value); + if (f->abs) + value = fabs(value); + switch (f->op) { case OP_EQ: return value > f->value - eps && value < f->value + eps; case OP_NEQ: return value < f->value - eps || value > f->value + eps; @@ -1766,7 +1815,7 @@ static int handle_comparison_mode(void) struct stat_specs base_specs = {}, comp_specs = {}; struct stat_specs tmp_sort_spec; enum resfmt cur_fmt; - int err, i, j, last_idx; + int err, i, j, last_idx, cnt; if (env.filename_cnt != 2) { fprintf(stderr, "Comparison mode expects exactly two input CSV files!\n\n"); @@ -1879,7 +1928,7 @@ static int handle_comparison_mode(void) env.join_stat_cnt += 1; } - /* now sort joined results accorsing to sort spec */ + /* now sort joined results according to sort spec */ qsort(env.join_stats, env.join_stat_cnt, sizeof(*env.join_stats), cmp_join_stats); /* for human-readable table output we need to do extra pass to @@ -1896,16 +1945,22 @@ one_more_time: output_comp_headers(cur_fmt); last_idx = -1; + cnt = 0; for (i = 0; i < env.join_stat_cnt; i++) { const struct verif_stats_join *join = &env.join_stats[i]; if (!should_output_join_stats(join)) continue; + if (env.top_n && cnt >= env.top_n) + break; + if (cur_fmt == RESFMT_TABLE_CALCLEN) last_idx = i; output_comp_stats(join, cur_fmt, i == last_idx); + + cnt++; } if (cur_fmt == RESFMT_TABLE_CALCLEN) { @@ -1920,6 +1975,9 @@ static bool is_stat_filter_matched(struct filter *f, const struct verif_stats *s { long value = stats->stats[f->stat_id]; + if (f->abs) + value = value < 0 ? -value : value; + switch (f->op) { case OP_EQ: return value == f->value; case OP_NEQ: return value != f->value; @@ -1964,7 +2022,7 @@ static bool should_output_stats(const struct verif_stats *stats) static void output_prog_stats(void) { const struct verif_stats *stats; - int i, last_stat_idx = 0; + int i, last_stat_idx = 0, cnt = 0; if (env.out_fmt == RESFMT_TABLE) { /* calculate column widths */ @@ -1984,7 +2042,10 @@ static void output_prog_stats(void) stats = &env.prog_stats[i]; if (!should_output_stats(stats)) continue; + if (env.top_n && cnt >= env.top_n) + break; output_stats(stats, env.out_fmt, i == last_stat_idx); + cnt++; } } diff --git a/tools/testing/selftests/bpf/vmtest.sh b/tools/testing/selftests/bpf/vmtest.sh index 685034528018..65d14f3bbe30 100755 --- a/tools/testing/selftests/bpf/vmtest.sh +++ b/tools/testing/selftests/bpf/vmtest.sh @@ -36,7 +36,9 @@ DEFAULT_COMMAND="./test_progs" MOUNT_DIR="mnt" ROOTFS_IMAGE="root.img" OUTPUT_DIR="$HOME/.bpf_selftests" -KCONFIG_REL_PATHS=("tools/testing/selftests/bpf/config" "tools/testing/selftests/bpf/config.${ARCH}") +KCONFIG_REL_PATHS=("tools/testing/selftests/bpf/config" + "tools/testing/selftests/bpf/config.vm" + "tools/testing/selftests/bpf/config.${ARCH}") INDEX_URL="https://raw.githubusercontent.com/libbpf/ci/master/INDEX" NUM_COMPILE_JOBS="$(nproc)" LOG_FILE_BASE="$(date +"bpf_selftests.%Y-%m-%d_%H-%M-%S")" diff --git a/tools/testing/selftests/bpf/xdp_hw_metadata.c b/tools/testing/selftests/bpf/xdp_hw_metadata.c index 17c980138796..c3ba40d0b9de 100644 --- a/tools/testing/selftests/bpf/xdp_hw_metadata.c +++ b/tools/testing/selftests/bpf/xdp_hw_metadata.c @@ -26,6 +26,7 @@ #include <linux/sockios.h> #include <sys/mman.h> #include <net/if.h> +#include <ctype.h> #include <poll.h> #include <time.h> @@ -47,6 +48,7 @@ struct xsk { }; struct xdp_hw_metadata *bpf_obj; +__u16 bind_flags = XDP_COPY; struct xsk *rx_xsk; const char *ifname; int ifindex; @@ -60,7 +62,7 @@ static int open_xsk(int ifindex, struct xsk *xsk, __u32 queue_id) const struct xsk_socket_config socket_config = { .rx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS, .tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS, - .bind_flags = XDP_COPY, + .bind_flags = bind_flags, }; const struct xsk_umem_config umem_config = { .fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS, @@ -263,11 +265,14 @@ static int verify_metadata(struct xsk *rx_xsk, int rxq, int server_fd, clockid_t verify_skb_metadata(server_fd); for (i = 0; i < rxq; i++) { + bool first_seg = true; + bool is_eop = true; + if (fds[i].revents == 0) continue; struct xsk *xsk = &rx_xsk[i]; - +peek: ret = xsk_ring_cons__peek(&xsk->rx, 1, &idx); printf("xsk_ring_cons__peek: %d\n", ret); if (ret != 1) @@ -276,12 +281,19 @@ static int verify_metadata(struct xsk *rx_xsk, int rxq, int server_fd, clockid_t rx_desc = xsk_ring_cons__rx_desc(&xsk->rx, idx); comp_addr = xsk_umem__extract_addr(rx_desc->addr); addr = xsk_umem__add_offset_to_addr(rx_desc->addr); - printf("%p: rx_desc[%u]->addr=%llx addr=%llx comp_addr=%llx\n", - xsk, idx, rx_desc->addr, addr, comp_addr); - verify_xdp_metadata(xsk_umem__get_data(xsk->umem_area, addr), - clock_id); + is_eop = !(rx_desc->options & XDP_PKT_CONTD); + printf("%p: rx_desc[%u]->addr=%llx addr=%llx comp_addr=%llx%s\n", + xsk, idx, rx_desc->addr, addr, comp_addr, is_eop ? " EoP" : ""); + if (first_seg) { + verify_xdp_metadata(xsk_umem__get_data(xsk->umem_area, addr), + clock_id); + first_seg = false; + } + xsk_ring_cons__release(&xsk->rx, 1); refill_rx(xsk, comp_addr); + if (!is_eop) + goto peek; } } @@ -404,6 +416,53 @@ static void timestamping_enable(int fd, int val) error(1, errno, "setsockopt(SO_TIMESTAMPING)"); } +static void print_usage(void) +{ + const char *usage = + "Usage: xdp_hw_metadata [OPTIONS] [IFNAME]\n" + " -m Enable multi-buffer XDP for larger MTU\n" + " -h Display this help and exit\n\n" + "Generate test packets on the other machine with:\n" + " echo -n xdp | nc -u -q1 <dst_ip> 9091\n"; + + printf("%s", usage); +} + +static void read_args(int argc, char *argv[]) +{ + int opt; + + while ((opt = getopt(argc, argv, "mh")) != -1) { + switch (opt) { + case 'm': + bind_flags |= XDP_USE_SG; + break; + case 'h': + print_usage(); + exit(0); + case '?': + if (isprint(optopt)) + fprintf(stderr, "Unknown option: -%c\n", optopt); + fallthrough; + default: + print_usage(); + error(-1, opterr, "Command line options error"); + } + } + + if (optind >= argc) { + fprintf(stderr, "No device name provided\n"); + print_usage(); + exit(-1); + } + + ifname = argv[optind]; + ifindex = if_nametoindex(ifname); + + if (!ifname) + error(-1, errno, "Invalid interface name"); +} + int main(int argc, char *argv[]) { clockid_t clock_id = CLOCK_TAI; @@ -413,13 +472,8 @@ int main(int argc, char *argv[]) struct bpf_program *prog; - if (argc != 2) { - fprintf(stderr, "pass device name\n"); - return -1; - } + read_args(argc, argv); - ifname = argv[1]; - ifindex = if_nametoindex(ifname); rxq = rxq_num(ifname); printf("rxq: %d\n", rxq); diff --git a/tools/testing/selftests/bpf/xskxceiver.c b/tools/testing/selftests/bpf/xskxceiver.c index 591ca9637b23..b604c570309a 100644 --- a/tools/testing/selftests/bpf/xskxceiver.c +++ b/tools/testing/selftests/bpf/xskxceiver.c @@ -908,8 +908,9 @@ static bool is_metadata_correct(struct pkt *pkt, void *buffer, u64 addr) struct xdp_info *meta = data - sizeof(struct xdp_info); if (meta->count != pkt->pkt_nb) { - ksft_print_msg("[%s] expected meta_count [%d], got meta_count [%d]\n", - __func__, pkt->pkt_nb, meta->count); + ksft_print_msg("[%s] expected meta_count [%d], got meta_count [%llu]\n", + __func__, pkt->pkt_nb, + (unsigned long long)meta->count); return false; } @@ -926,11 +927,13 @@ static bool is_frag_valid(struct xsk_umem_info *umem, u64 addr, u32 len, u32 exp if (addr >= umem->num_frames * umem->frame_size || addr + len > umem->num_frames * umem->frame_size) { - ksft_print_msg("Frag invalid addr: %llx len: %u\n", addr, len); + ksft_print_msg("Frag invalid addr: %llx len: %u\n", + (unsigned long long)addr, len); return false; } if (!umem->unaligned_mode && addr % umem->frame_size + len > umem->frame_size) { - ksft_print_msg("Frag crosses frame boundary addr: %llx len: %u\n", addr, len); + ksft_print_msg("Frag crosses frame boundary addr: %llx len: %u\n", + (unsigned long long)addr, len); return false; } @@ -1029,7 +1032,8 @@ static int complete_pkts(struct xsk_socket_info *xsk, int batch_size) u64 addr = *xsk_ring_cons__comp_addr(&xsk->umem->cq, idx + rcvd - 1); ksft_print_msg("[%s] Too many packets completed\n", __func__); - ksft_print_msg("Last completion address: %llx\n", addr); + ksft_print_msg("Last completion address: %llx\n", + (unsigned long long)addr); return TEST_FAILURE; } @@ -1513,8 +1517,9 @@ static int validate_tx_invalid_descs(struct ifobject *ifobject) } if (stats.tx_invalid_descs != ifobject->xsk->pkt_stream->nb_pkts / 2) { - ksft_print_msg("[%s] tx_invalid_descs incorrect. Got [%u] expected [%u]\n", - __func__, stats.tx_invalid_descs, + ksft_print_msg("[%s] tx_invalid_descs incorrect. Got [%llu] expected [%u]\n", + __func__, + (unsigned long long)stats.tx_invalid_descs, ifobject->xsk->pkt_stream->nb_pkts); return TEST_FAILURE; } diff --git a/tools/testing/selftests/cachestat/test_cachestat.c b/tools/testing/selftests/cachestat/test_cachestat.c index 4804c7dc7b31..b171fd53b004 100644 --- a/tools/testing/selftests/cachestat/test_cachestat.c +++ b/tools/testing/selftests/cachestat/test_cachestat.c @@ -27,7 +27,7 @@ static const char * const dev_files[] = { void print_cachestat(struct cachestat *cs) { ksft_print_msg( - "Using cachestat: Cached: %lu, Dirty: %lu, Writeback: %lu, Evicted: %lu, Recently Evicted: %lu\n", + "Using cachestat: Cached: %llu, Dirty: %llu, Writeback: %llu, Evicted: %llu, Recently Evicted: %llu\n", cs->nr_cache, cs->nr_dirty, cs->nr_writeback, cs->nr_evicted, cs->nr_recently_evicted); } diff --git a/tools/testing/selftests/capabilities/Makefile b/tools/testing/selftests/capabilities/Makefile index 6e9d98d457d5..411ac098308f 100644 --- a/tools/testing/selftests/capabilities/Makefile +++ b/tools/testing/selftests/capabilities/Makefile @@ -2,7 +2,7 @@ TEST_GEN_FILES := validate_cap TEST_GEN_PROGS := test_execve -CFLAGS += -O2 -g -std=gnu99 -Wall +CFLAGS += -O2 -g -std=gnu99 -Wall $(KHDR_INCLUDES) LDLIBS += -lcap-ng -lrt -ldl include ../lib.mk diff --git a/tools/testing/selftests/capabilities/test_execve.c b/tools/testing/selftests/capabilities/test_execve.c index df0ef02b4036..e3a352b020a7 100644 --- a/tools/testing/selftests/capabilities/test_execve.c +++ b/tools/testing/selftests/capabilities/test_execve.c @@ -20,14 +20,6 @@ #include "../kselftest.h" -#ifndef PR_CAP_AMBIENT -#define PR_CAP_AMBIENT 47 -# define PR_CAP_AMBIENT_IS_SET 1 -# define PR_CAP_AMBIENT_RAISE 2 -# define PR_CAP_AMBIENT_LOWER 3 -# define PR_CAP_AMBIENT_CLEAR_ALL 4 -#endif - static int nerrs; static pid_t mpid; /* main() pid is used to avoid duplicate test counts */ diff --git a/tools/testing/selftests/capabilities/validate_cap.c b/tools/testing/selftests/capabilities/validate_cap.c index cdfc94268fe6..60b4e7b716a7 100644 --- a/tools/testing/selftests/capabilities/validate_cap.c +++ b/tools/testing/selftests/capabilities/validate_cap.c @@ -9,14 +9,6 @@ #include "../kselftest.h" -#ifndef PR_CAP_AMBIENT -#define PR_CAP_AMBIENT 47 -# define PR_CAP_AMBIENT_IS_SET 1 -# define PR_CAP_AMBIENT_RAISE 2 -# define PR_CAP_AMBIENT_LOWER 3 -# define PR_CAP_AMBIENT_CLEAR_ALL 4 -#endif - #if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 19) # define HAVE_GETAUXVAL #endif diff --git a/tools/testing/selftests/cgroup/.gitignore b/tools/testing/selftests/cgroup/.gitignore index af8c3f30b9c1..2732e0b29271 100644 --- a/tools/testing/selftests/cgroup/.gitignore +++ b/tools/testing/selftests/cgroup/.gitignore @@ -7,4 +7,5 @@ test_kill test_cpu test_cpuset test_zswap +test_hugetlb_memcg wait_inotify diff --git a/tools/testing/selftests/cgroup/Makefile b/tools/testing/selftests/cgroup/Makefile index c27f05f6ce9b..00b441928909 100644 --- a/tools/testing/selftests/cgroup/Makefile +++ b/tools/testing/selftests/cgroup/Makefile @@ -14,6 +14,7 @@ TEST_GEN_PROGS += test_kill TEST_GEN_PROGS += test_cpu TEST_GEN_PROGS += test_cpuset TEST_GEN_PROGS += test_zswap +TEST_GEN_PROGS += test_hugetlb_memcg LOCAL_HDRS += $(selfdir)/clone3/clone3_selftests.h $(selfdir)/pidfd/pidfd.h @@ -27,3 +28,4 @@ $(OUTPUT)/test_kill: cgroup_util.c $(OUTPUT)/test_cpu: cgroup_util.c $(OUTPUT)/test_cpuset: cgroup_util.c $(OUTPUT)/test_zswap: cgroup_util.c +$(OUTPUT)/test_hugetlb_memcg: cgroup_util.c diff --git a/tools/testing/selftests/cgroup/test_cpuset_prs.sh b/tools/testing/selftests/cgroup/test_cpuset_prs.sh index 4afb132e4e4f..a6e9848189d6 100755 --- a/tools/testing/selftests/cgroup/test_cpuset_prs.sh +++ b/tools/testing/selftests/cgroup/test_cpuset_prs.sh @@ -3,7 +3,7 @@ # # Test for cpuset v2 partition root state (PRS) # -# The sched verbose flag is set, if available, so that the console log +# The sched verbose flag can be optionally set so that the console log # can be examined for the correct setting of scheduling domain. # @@ -22,27 +22,27 @@ WAIT_INOTIFY=$(cd $(dirname $0); pwd)/wait_inotify # Find cgroup v2 mount point CGROUP2=$(mount -t cgroup2 | head -1 | awk -e '{print $3}') [[ -n "$CGROUP2" ]] || skip_test "Cgroup v2 mount point not found!" +SUBPARTS_CPUS=$CGROUP2/.__DEBUG__.cpuset.cpus.subpartitions +CPULIST=$(cat $CGROUP2/cpuset.cpus.effective) -CPUS=$(lscpu | grep "^CPU(s):" | sed -e "s/.*:[[:space:]]*//") -[[ $CPUS -lt 8 ]] && skip_test "Test needs at least 8 cpus available!" +NR_CPUS=$(lscpu | grep "^CPU(s):" | sed -e "s/.*:[[:space:]]*//") +[[ $NR_CPUS -lt 8 ]] && skip_test "Test needs at least 8 cpus available!" # Set verbose flag and delay factor PROG=$1 -VERBOSE= +VERBOSE=0 DELAY_FACTOR=1 SCHED_DEBUG= while [[ "$1" = -* ]] do case "$1" in - -v) VERBOSE=1 + -v) ((VERBOSE++)) # Enable sched/verbose can slow thing down [[ $DELAY_FACTOR -eq 1 ]] && DELAY_FACTOR=2 - break ;; -d) DELAY_FACTOR=$2 shift - break ;; *) echo "Usage: $PROG [-v] [-d <delay-factor>" exit @@ -52,7 +52,7 @@ do done # Set sched verbose flag if available when "-v" option is specified -if [[ -n "$VERBOSE" && -d /sys/kernel/debug/sched ]] +if [[ $VERBOSE -gt 0 && -d /sys/kernel/debug/sched ]] then # Used to restore the original setting during cleanup SCHED_DEBUG=$(cat /sys/kernel/debug/sched/verbose) @@ -61,14 +61,26 @@ fi cd $CGROUP2 echo +cpuset > cgroup.subtree_control + +# +# If cpuset has been set up and used in child cgroups, we may not be able to +# create partition under root cgroup because of the CPU exclusivity rule. +# So we are going to skip the test if this is the case. +# [[ -d test ]] || mkdir test -cd test +echo 0-6 > test/cpuset.cpus +echo root > test/cpuset.cpus.partition +cat test/cpuset.cpus.partition | grep -q invalid +RESULT=$? +echo member > test/cpuset.cpus.partition +echo "" > test/cpuset.cpus +[[ $RESULT -eq 0 ]] && skip_test "Child cgroups are using cpuset!" cleanup() { online_cpus + cd $CGROUP2 rmdir A1/A2/A3 A1/A2 A1 B1 > /dev/null 2>&1 - cd .. rmdir test > /dev/null 2>&1 [[ -n "$SCHED_DEBUG" ]] && echo "$SCHED_DEBUG" > /sys/kernel/debug/sched/verbose @@ -103,7 +115,7 @@ test_partition() [[ $? -eq 0 ]] || exit 1 ACTUAL_VAL=$(cat cpuset.cpus.partition) [[ $ACTUAL_VAL != $EXPECTED_VAL ]] && { - echo "cpuset.cpus.partition: expect $EXPECTED_VAL, found $EXPECTED_VAL" + echo "cpuset.cpus.partition: expect $EXPECTED_VAL, found $ACTUAL_VAL" echo "Test FAILED" exit 1 } @@ -114,7 +126,7 @@ test_effective_cpus() EXPECTED_VAL=$1 ACTUAL_VAL=$(cat cpuset.cpus.effective) [[ "$ACTUAL_VAL" != "$EXPECTED_VAL" ]] && { - echo "cpuset.cpus.effective: expect '$EXPECTED_VAL', found '$EXPECTED_VAL'" + echo "cpuset.cpus.effective: expect '$EXPECTED_VAL', found '$ACTUAL_VAL'" echo "Test FAILED" exit 1 } @@ -139,6 +151,7 @@ test_add_proc() # test_isolated() { + cd $CGROUP2/test echo 2-3 > cpuset.cpus TYPE=$(cat cpuset.cpus.partition) [[ $TYPE = member ]] || echo member > cpuset.cpus.partition @@ -203,125 +216,220 @@ test_isolated() # # Cgroup test hierarchy # -# test -- A1 -- A2 -- A3 -# \- B1 +# root -- A1 -- A2 -- A3 +# +- B1 # -# P<v> = set cpus.partition (0:member, 1:root, 2:isolated, -1:root invalid) -# C<l> = add cpu-list +# P<v> = set cpus.partition (0:member, 1:root, 2:isolated) +# C<l> = add cpu-list to cpuset.cpus +# X<l> = add cpu-list to cpuset.cpus.exclusive # S<p> = use prefix in subtree_control # T = put a task into cgroup -# O<c>-<v> = Write <v> to CPU online file of <c> +# O<c>=<v> = Write <v> to CPU online file of <c> # SETUP_A123_PARTITIONS="C1-3:P1:S+ C2-3:P1:S+ C3:P1" TEST_MATRIX=( - # test old-A1 old-A2 old-A3 old-B1 new-A1 new-A2 new-A3 new-B1 fail ECPUs Pstate - # ---- ------ ------ ------ ------ ------ ------ ------ ------ ---- ----- ------ - " S+ C0-1 . . C2-3 S+ C4-5 . . 0 A2:0-1" - " S+ C0-1 . . C2-3 P1 . . . 0 " - " S+ C0-1 . . C2-3 P1:S+ C0-1:P1 . . 0 " - " S+ C0-1 . . C2-3 P1:S+ C1:P1 . . 0 " - " S+ C0-1:S+ . . C2-3 . . . P1 0 " - " S+ C0-1:P1 . . C2-3 S+ C1 . . 0 " - " S+ C0-1:P1 . . C2-3 S+ C1:P1 . . 0 " - " S+ C0-1:P1 . . C2-3 S+ C1:P1 . P1 0 " - " S+ C0-1:P1 . . C2-3 C4-5 . . . 0 A1:4-5" - " S+ C0-1:P1 . . C2-3 S+:C4-5 . . . 0 A1:4-5" - " S+ C0-1 . . C2-3:P1 . . . C2 0 " - " S+ C0-1 . . C2-3:P1 . . . C4-5 0 B1:4-5" - " S+ C0-3:P1:S+ C2-3:P1 . . . . . . 0 A1:0-1,A2:2-3" - " S+ C0-3:P1:S+ C2-3:P1 . . C1-3 . . . 0 A1:1,A2:2-3" - " S+ C2-3:P1:S+ C3:P1 . . C3 . . . 0 A1:,A2:3 A1:P1,A2:P1" - " S+ C2-3:P1:S+ C3:P1 . . C3 P0 . . 0 A1:3,A2:3 A1:P1,A2:P0" - " S+ C2-3:P1:S+ C2:P1 . . C2-4 . . . 0 A1:3-4,A2:2" - " S+ C2-3:P1:S+ C3:P1 . . C3 . . C0-2 0 A1:,B1:0-2 A1:P1,A2:P1" - " S+ $SETUP_A123_PARTITIONS . C2-3 . . . 0 A1:,A2:2,A3:3 A1:P1,A2:P1,A3:P1" + # old-A1 old-A2 old-A3 old-B1 new-A1 new-A2 new-A3 new-B1 fail ECPUs Pstate ISOLCPUS + # ------ ------ ------ ------ ------ ------ ------ ------ ---- ----- ------ -------- + " C0-1 . . C2-3 S+ C4-5 . . 0 A2:0-1" + " C0-1 . . C2-3 P1 . . . 0 " + " C0-1 . . C2-3 P1:S+ C0-1:P1 . . 0 " + " C0-1 . . C2-3 P1:S+ C1:P1 . . 0 " + " C0-1:S+ . . C2-3 . . . P1 0 " + " C0-1:P1 . . C2-3 S+ C1 . . 0 " + " C0-1:P1 . . C2-3 S+ C1:P1 . . 0 " + " C0-1:P1 . . C2-3 S+ C1:P1 . P1 0 " + " C0-1:P1 . . C2-3 C4-5 . . . 0 A1:4-5" + " C0-1:P1 . . C2-3 S+:C4-5 . . . 0 A1:4-5" + " C0-1 . . C2-3:P1 . . . C2 0 " + " C0-1 . . C2-3:P1 . . . C4-5 0 B1:4-5" + "C0-3:P1:S+ C2-3:P1 . . . . . . 0 A1:0-1,A2:2-3" + "C0-3:P1:S+ C2-3:P1 . . C1-3 . . . 0 A1:1,A2:2-3" + "C2-3:P1:S+ C3:P1 . . C3 . . . 0 A1:,A2:3 A1:P1,A2:P1" + "C2-3:P1:S+ C3:P1 . . C3 P0 . . 0 A1:3,A2:3 A1:P1,A2:P0" + "C2-3:P1:S+ C2:P1 . . C2-4 . . . 0 A1:3-4,A2:2" + "C2-3:P1:S+ C3:P1 . . C3 . . C0-2 0 A1:,B1:0-2 A1:P1,A2:P1" + "$SETUP_A123_PARTITIONS . C2-3 . . . 0 A1:,A2:2,A3:3 A1:P1,A2:P1,A3:P1" # CPU offlining cases: - " S+ C0-1 . . C2-3 S+ C4-5 . O2-0 0 A1:0-1,B1:3" - " S+ C0-3:P1:S+ C2-3:P1 . . O2-0 . . . 0 A1:0-1,A2:3" - " S+ C0-3:P1:S+ C2-3:P1 . . O2-0 O2-1 . . 0 A1:0-1,A2:2-3" - " S+ C0-3:P1:S+ C2-3:P1 . . O1-0 . . . 0 A1:0,A2:2-3" - " S+ C0-3:P1:S+ C2-3:P1 . . O1-0 O1-1 . . 0 A1:0-1,A2:2-3" - " S+ C2-3:P1:S+ C3:P1 . . O3-0 O3-1 . . 0 A1:2,A2:3 A1:P1,A2:P1" - " S+ C2-3:P1:S+ C3:P2 . . O3-0 O3-1 . . 0 A1:2,A2:3 A1:P1,A2:P2" - " S+ C2-3:P1:S+ C3:P1 . . O2-0 O2-1 . . 0 A1:2,A2:3 A1:P1,A2:P1" - " S+ C2-3:P1:S+ C3:P2 . . O2-0 O2-1 . . 0 A1:2,A2:3 A1:P1,A2:P2" - " S+ C2-3:P1:S+ C3:P1 . . O2-0 . . . 0 A1:,A2:3 A1:P1,A2:P1" - " S+ C2-3:P1:S+ C3:P1 . . O3-0 . . . 0 A1:2,A2: A1:P1,A2:P1" - " S+ C2-3:P1:S+ C3:P1 . . T:O2-0 . . . 0 A1:3,A2:3 A1:P1,A2:P-1" - " S+ C2-3:P1:S+ C3:P1 . . . T:O3-0 . . 0 A1:2,A2:2 A1:P1,A2:P-1" - " S+ $SETUP_A123_PARTITIONS . O1-0 . . . 0 A1:,A2:2,A3:3 A1:P1,A2:P1,A3:P1" - " S+ $SETUP_A123_PARTITIONS . O2-0 . . . 0 A1:1,A2:,A3:3 A1:P1,A2:P1,A3:P1" - " S+ $SETUP_A123_PARTITIONS . O3-0 . . . 0 A1:1,A2:2,A3: A1:P1,A2:P1,A3:P1" - " S+ $SETUP_A123_PARTITIONS . T:O1-0 . . . 0 A1:2-3,A2:2-3,A3:3 A1:P1,A2:P-1,A3:P-1" - " S+ $SETUP_A123_PARTITIONS . . T:O2-0 . . 0 A1:1,A2:3,A3:3 A1:P1,A2:P1,A3:P-1" - " S+ $SETUP_A123_PARTITIONS . . . T:O3-0 . 0 A1:1,A2:2,A3:2 A1:P1,A2:P1,A3:P-1" - " S+ $SETUP_A123_PARTITIONS . T:O1-0 O1-1 . . 0 A1:1,A2:2,A3:3 A1:P1,A2:P1,A3:P1" - " S+ $SETUP_A123_PARTITIONS . . T:O2-0 O2-1 . 0 A1:1,A2:2,A3:3 A1:P1,A2:P1,A3:P1" - " S+ $SETUP_A123_PARTITIONS . . . T:O3-0 O3-1 0 A1:1,A2:2,A3:3 A1:P1,A2:P1,A3:P1" - " S+ $SETUP_A123_PARTITIONS . T:O1-0 O2-0 O1-1 . 0 A1:1,A2:,A3:3 A1:P1,A2:P1,A3:P1" - " S+ $SETUP_A123_PARTITIONS . T:O1-0 O2-0 O2-1 . 0 A1:2-3,A2:2-3,A3:3 A1:P1,A2:P-1,A3:P-1" - - # test old-A1 old-A2 old-A3 old-B1 new-A1 new-A2 new-A3 new-B1 fail ECPUs Pstate - # ---- ------ ------ ------ ------ ------ ------ ------ ------ ---- ----- ------ + " C0-1 . . C2-3 S+ C4-5 . O2=0 0 A1:0-1,B1:3" + "C0-3:P1:S+ C2-3:P1 . . O2=0 . . . 0 A1:0-1,A2:3" + "C0-3:P1:S+ C2-3:P1 . . O2=0 O2=1 . . 0 A1:0-1,A2:2-3" + "C0-3:P1:S+ C2-3:P1 . . O1=0 . . . 0 A1:0,A2:2-3" + "C0-3:P1:S+ C2-3:P1 . . O1=0 O1=1 . . 0 A1:0-1,A2:2-3" + "C2-3:P1:S+ C3:P1 . . O3=0 O3=1 . . 0 A1:2,A2:3 A1:P1,A2:P1" + "C2-3:P1:S+ C3:P2 . . O3=0 O3=1 . . 0 A1:2,A2:3 A1:P1,A2:P2" + "C2-3:P1:S+ C3:P1 . . O2=0 O2=1 . . 0 A1:2,A2:3 A1:P1,A2:P1" + "C2-3:P1:S+ C3:P2 . . O2=0 O2=1 . . 0 A1:2,A2:3 A1:P1,A2:P2" + "C2-3:P1:S+ C3:P1 . . O2=0 . . . 0 A1:,A2:3 A1:P1,A2:P1" + "C2-3:P1:S+ C3:P1 . . O3=0 . . . 0 A1:2,A2: A1:P1,A2:P1" + "C2-3:P1:S+ C3:P1 . . T:O2=0 . . . 0 A1:3,A2:3 A1:P1,A2:P-1" + "C2-3:P1:S+ C3:P1 . . . T:O3=0 . . 0 A1:2,A2:2 A1:P1,A2:P-1" + "$SETUP_A123_PARTITIONS . O1=0 . . . 0 A1:,A2:2,A3:3 A1:P1,A2:P1,A3:P1" + "$SETUP_A123_PARTITIONS . O2=0 . . . 0 A1:1,A2:,A3:3 A1:P1,A2:P1,A3:P1" + "$SETUP_A123_PARTITIONS . O3=0 . . . 0 A1:1,A2:2,A3: A1:P1,A2:P1,A3:P1" + "$SETUP_A123_PARTITIONS . T:O1=0 . . . 0 A1:2-3,A2:2-3,A3:3 A1:P1,A2:P-1,A3:P-1" + "$SETUP_A123_PARTITIONS . . T:O2=0 . . 0 A1:1,A2:3,A3:3 A1:P1,A2:P1,A3:P-1" + "$SETUP_A123_PARTITIONS . . . T:O3=0 . 0 A1:1,A2:2,A3:2 A1:P1,A2:P1,A3:P-1" + "$SETUP_A123_PARTITIONS . T:O1=0 O1=1 . . 0 A1:1,A2:2,A3:3 A1:P1,A2:P1,A3:P1" + "$SETUP_A123_PARTITIONS . . T:O2=0 O2=1 . 0 A1:1,A2:2,A3:3 A1:P1,A2:P1,A3:P1" + "$SETUP_A123_PARTITIONS . . . T:O3=0 O3=1 0 A1:1,A2:2,A3:3 A1:P1,A2:P1,A3:P1" + "$SETUP_A123_PARTITIONS . T:O1=0 O2=0 O1=1 . 0 A1:1,A2:,A3:3 A1:P1,A2:P1,A3:P1" + "$SETUP_A123_PARTITIONS . T:O1=0 O2=0 O2=1 . 0 A1:2-3,A2:2-3,A3:3 A1:P1,A2:P-1,A3:P-1" + + # old-A1 old-A2 old-A3 old-B1 new-A1 new-A2 new-A3 new-B1 fail ECPUs Pstate ISOLCPUS + # ------ ------ ------ ------ ------ ------ ------ ------ ---- ----- ------ -------- + # + # Remote partition and cpuset.cpus.exclusive tests + # + " C0-3:S+ C1-3:S+ C2-3 . X2-3 . . . 0 A1:0-3,A2:1-3,A3:2-3,XA1:2-3" + " C0-3:S+ C1-3:S+ C2-3 . X2-3 X2-3:P2 . . 0 A1:0-1,A2:2-3,A3:2-3 A1:P0,A2:P2 2-3" + " C0-3:S+ C1-3:S+ C2-3 . X2-3 X3:P2 . . 0 A1:0-2,A2:3,A3:3 A1:P0,A2:P2 3" + " C0-3:S+ C1-3:S+ C2-3 . X2-3 X2-3 X2-3:P2 . 0 A1:0-1,A2:1,A3:2-3 A1:P0,A3:P2 2-3" + " C0-3:S+ C1-3:S+ C2-3 . X2-3 X2-3 X2-3:P2:C3 . 0 A1:0-2,A2:1-2,A3:3 A1:P0,A3:P2 3" + " C0-3:S+ C1-3:S+ C2-3 C2-3 . . . P2 0 A1:0-3,A2:1-3,A3:2-3,B1:2-3 A1:P0,A3:P0,B1:P-2" + " C0-3:S+ C1-3:S+ C2-3 C4-5 . . . P2 0 B1:4-5 B1:P2 4-5" + " C0-3:S+ C1-3:S+ C2-3 C4 X2-3 X2-3 X2-3:P2 P2 0 A3:2-3,B1:4 A3:P2,B1:P2 2-4" + " C0-3:S+ C1-3:S+ C2-3 C4 X2-3 X2-3 X2-3:P2:C1-3 P2 0 A3:2-3,B1:4 A3:P2,B1:P2 2-4" + " C0-3:S+ C1-3:S+ C2-3 C4 X1-3 X1-3:P2 P2 . 0 A2:1,A3:2-3 A2:P2,A3:P2 1-3" + " C0-3:S+ C1-3:S+ C2-3 C4 X2-3 X2-3 X2-3:P2 P2:C4-5 0 A3:2-3,B1:4-5 A3:P2,B1:P2 2-5" + + # Nested remote/local partition tests + " C0-3:S+ C1-3:S+ C2-3 C4-5 X2-3 X2-3:P1 P2 P1 0 A1:0-1,A2:,A3:2-3,B1:4-5 \ + A1:P0,A2:P1,A3:P2,B1:P1 2-3" + " C0-3:S+ C1-3:S+ C2-3 C4 X2-3 X2-3:P1 P2 P1 0 A1:0-1,A2:,A3:2-3,B1:4 \ + A1:P0,A2:P1,A3:P2,B1:P1 2-4" + " C0-3:S+ C1-3:S+ C3 C4 X2-3 X2-3:P1 P2 P1 0 A1:0-1,A2:2,A3:3,B1:4 \ + A1:P0,A2:P1,A3:P2,B1:P1 2-4" + " C0-4:S+ C1-4:S+ C2-4 . X2-4 X2-4:P2 X4:P1 . 0 A1:0-1,A2:2-3,A3:4 \ + A1:P0,A2:P2,A3:P1 2-4" + " C0-4:X2-4:S+ C1-4:X2-4:S+:P2 C2-4:X4:P1 \ + . . X5 . . 0 A1:0-4,A2:1-4,A3:2-4 \ + A1:P0,A2:P-2,A3:P-1 ." + " C0-4:X2-4:S+ C1-4:X2-4:S+:P2 C2-4:X4:P1 \ + . . . X1 . 0 A1:0-1,A2:2-4,A3:2-4 \ + A1:P0,A2:P2,A3:P-1 2-4" + + # Remote partition offline tests + " C0-3:S+ C1-3:S+ C2-3 . X2-3 X2-3 X2-3:P2:O2=0 . 0 A1:0-1,A2:1,A3:3 A1:P0,A3:P2 2-3" + " C0-3:S+ C1-3:S+ C2-3 . X2-3 X2-3 X2-3:P2:O2=0 O2=1 0 A1:0-1,A2:1,A3:2-3 A1:P0,A3:P2 2-3" + " C0-3:S+ C1-3:S+ C3 . X2-3 X2-3 P2:O3=0 . 0 A1:0-2,A2:1-2,A3: A1:P0,A3:P2 3" + " C0-3:S+ C1-3:S+ C3 . X2-3 X2-3 T:P2:O3=0 . 0 A1:0-2,A2:1-2,A3:1-2 A1:P0,A3:P-2 3" + + # An invalidated remote partition cannot self-recover from hotplug + " C0-3:S+ C1-3:S+ C2 . X2-3 X2-3 T:P2:O2=0 O2=1 0 A1:0-3,A2:1-3,A3:2 A1:P0,A3:P-2" + + # cpus.exclusive.effective clearing test + " C0-3:S+ C1-3:S+ C2 . X2-3:X . . . 0 A1:0-3,A2:1-3,A3:2,XA1:" + + # Invalid to valid remote partition transition test + " C0-3:S+ C1-3 . . . X3:P2 . . 0 A1:0-3,A2:1-3,XA2: A2:P-2" + " C0-3:S+ C1-3:X3:P2 + . . X2-3 P2 . . 0 A1:0-2,A2:3,XA2:3 A2:P2 3" + + # Invalid to valid local partition direct transition tests + " C1-3:S+:P2 C2-3:X1:P2 . . . . . . 0 A1:1-3,XA1:1-3,A2:2-3:XA2: A1:P2,A2:P-2 1-3" + " C1-3:S+:P2 C2-3:X1:P2 . . . X3:P2 . . 0 A1:1-2,XA1:1-3,A2:3:XA2:3 A1:P2,A2:P2 1-3" + " C0-3:P2 . . C4-6 C0-4 . . . 0 A1:0-4,B1:4-6 A1:P-2,B1:P0" + " C0-3:P2 . . C4-6 C0-4:C0-3 . . . 0 A1:0-3,B1:4-6 A1:P2,B1:P0 0-3" + " C0-3:P2 . . C3-5:C4-5 . . . . 0 A1:0-3,B1:4-5 A1:P2,B1:P0 0-3" + + # Local partition invalidation tests + " C0-3:X1-3:S+:P2 C1-3:X2-3:S+:P2 C2-3:X3:P2 \ + . . . . . 0 A1:1,A2:2,A3:3 A1:P2,A2:P2,A3:P2 1-3" + " C0-3:X1-3:S+:P2 C1-3:X2-3:S+:P2 C2-3:X3:P2 \ + . . X4 . . 0 A1:1-3,A2:1-3,A3:2-3,XA2:,XA3: A1:P2,A2:P-2,A3:P-2 1-3" + " C0-3:X1-3:S+:P2 C1-3:X2-3:S+:P2 C2-3:X3:P2 \ + . . C4 . . 0 A1:1-3,A2:1-3,A3:2-3,XA2:,XA3: A1:P2,A2:P-2,A3:P-2 1-3" + # Local partition CPU change tests + " C0-5:S+:P2 C4-5:S+:P1 . . . C3-5 . . 0 A1:0-2,A2:3-5 A1:P2,A2:P1 0-2" + " C0-5:S+:P2 C4-5:S+:P1 . . C1-5 . . . 0 A1:1-3,A2:4-5 A1:P2,A2:P1 1-3" + + # cpus_allowed/exclusive_cpus update tests + " C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3 \ + . C4 . P2 . 0 A1:4,A2:4,XA2:,XA3:,A3:4 \ + A1:P0,A3:P-2 ." + " C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3 \ + . X1 . P2 . 0 A1:0-3,A2:1-3,XA1:1,XA2:,XA3:,A3:2-3 \ + A1:P0,A3:P-2 ." + " C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3 \ + . . C3 P2 . 0 A1:0-2,A2:0-2,XA2:3,XA3:3,A3:3 \ + A1:P0,A3:P2 3" + " C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3 \ + . . X3 P2 . 0 A1:0-2,A2:1-2,XA2:3,XA3:3,A3:3 \ + A1:P0,A3:P2 3" + " C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3:P2 \ + . . X3 . . 0 A1:0-3,A2:1-3,XA2:3,XA3:3,A3:2-3 \ + A1:P0,A3:P-2 ." + " C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3:P2 \ + . . C3 . . 0 A1:0-3,A2:3,XA2:3,XA3:3,A3:3 \ + A1:P0,A3:P-2 ." + " C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3:P2 \ + . C4 . . . 0 A1:4,A2:4,A3:4,XA1:,XA2:,XA3 \ + A1:P0,A3:P-2 ." + + # old-A1 old-A2 old-A3 old-B1 new-A1 new-A2 new-A3 new-B1 fail ECPUs Pstate ISOLCPUS + # ------ ------ ------ ------ ------ ------ ------ ------ ---- ----- ------ -------- # # Incorrect change to cpuset.cpus invalidates partition root # # Adding CPUs to partition root that are not in parent's # cpuset.cpus is allowed, but those extra CPUs are ignored. - " S+ C2-3:P1:S+ C3:P1 . . . C2-4 . . 0 A1:,A2:2-3 A1:P1,A2:P1" + "C2-3:P1:S+ C3:P1 . . . C2-4 . . 0 A1:,A2:2-3 A1:P1,A2:P1" # Taking away all CPUs from parent or itself if there are tasks # will make the partition invalid. - " S+ C2-3:P1:S+ C3:P1 . . T C2-3 . . 0 A1:2-3,A2:2-3 A1:P1,A2:P-1" - " S+ C3:P1:S+ C3 . . T P1 . . 0 A1:3,A2:3 A1:P1,A2:P-1" - " S+ $SETUP_A123_PARTITIONS . T:C2-3 . . . 0 A1:2-3,A2:2-3,A3:3 A1:P1,A2:P-1,A3:P-1" - " S+ $SETUP_A123_PARTITIONS . T:C2-3:C1-3 . . . 0 A1:1,A2:2,A3:3 A1:P1,A2:P1,A3:P1" + "C2-3:P1:S+ C3:P1 . . T C2-3 . . 0 A1:2-3,A2:2-3 A1:P1,A2:P-1" + " C3:P1:S+ C3 . . T P1 . . 0 A1:3,A2:3 A1:P1,A2:P-1" + "$SETUP_A123_PARTITIONS . T:C2-3 . . . 0 A1:2-3,A2:2-3,A3:3 A1:P1,A2:P-1,A3:P-1" + "$SETUP_A123_PARTITIONS . T:C2-3:C1-3 . . . 0 A1:1,A2:2,A3:3 A1:P1,A2:P1,A3:P1" # Changing a partition root to member makes child partitions invalid - " S+ C2-3:P1:S+ C3:P1 . . P0 . . . 0 A1:2-3,A2:3 A1:P0,A2:P-1" - " S+ $SETUP_A123_PARTITIONS . C2-3 P0 . . 0 A1:2-3,A2:2-3,A3:3 A1:P1,A2:P0,A3:P-1" + "C2-3:P1:S+ C3:P1 . . P0 . . . 0 A1:2-3,A2:3 A1:P0,A2:P-1" + "$SETUP_A123_PARTITIONS . C2-3 P0 . . 0 A1:2-3,A2:2-3,A3:3 A1:P1,A2:P0,A3:P-1" # cpuset.cpus can contains cpus not in parent's cpuset.cpus as long # as they overlap. - " S+ C2-3:P1:S+ . . . . C3-4:P1 . . 0 A1:2,A2:3 A1:P1,A2:P1" + "C2-3:P1:S+ . . . . C3-4:P1 . . 0 A1:2,A2:3 A1:P1,A2:P1" # Deletion of CPUs distributed to child cgroup is allowed. - " S+ C0-1:P1:S+ C1 . C2-3 C4-5 . . . 0 A1:4-5,A2:4-5" + "C0-1:P1:S+ C1 . C2-3 C4-5 . . . 0 A1:4-5,A2:4-5" # To become a valid partition root, cpuset.cpus must overlap parent's # cpuset.cpus. - " S+ C0-1:P1 . . C2-3 S+ C4-5:P1 . . 0 A1:0-1,A2:0-1 A1:P1,A2:P-1" + " C0-1:P1 . . C2-3 S+ C4-5:P1 . . 0 A1:0-1,A2:0-1 A1:P1,A2:P-1" # Enabling partition with child cpusets is allowed - " S+ C0-1:S+ C1 . C2-3 P1 . . . 0 A1:0-1,A2:1 A1:P1" + " C0-1:S+ C1 . C2-3 P1 . . . 0 A1:0-1,A2:1 A1:P1" # A partition root with non-partition root parent is invalid, but it # can be made valid if its parent becomes a partition root too. - " S+ C0-1:S+ C1 . C2-3 . P2 . . 0 A1:0-1,A2:1 A1:P0,A2:P-2" - " S+ C0-1:S+ C1:P2 . C2-3 P1 . . . 0 A1:0,A2:1 A1:P1,A2:P2" + " C0-1:S+ C1 . C2-3 . P2 . . 0 A1:0-1,A2:1 A1:P0,A2:P-2" + " C0-1:S+ C1:P2 . C2-3 P1 . . . 0 A1:0,A2:1 A1:P1,A2:P2" # A non-exclusive cpuset.cpus change will invalidate partition and its siblings - " S+ C0-1:P1 . . C2-3 C0-2 . . . 0 A1:0-2,B1:2-3 A1:P-1,B1:P0" - " S+ C0-1:P1 . . P1:C2-3 C0-2 . . . 0 A1:0-2,B1:2-3 A1:P-1,B1:P-1" - " S+ C0-1 . . P1:C2-3 C0-2 . . . 0 A1:0-2,B1:2-3 A1:P0,B1:P-1" + " C0-1:P1 . . C2-3 C0-2 . . . 0 A1:0-2,B1:2-3 A1:P-1,B1:P0" + " C0-1:P1 . . P1:C2-3 C0-2 . . . 0 A1:0-2,B1:2-3 A1:P-1,B1:P-1" + " C0-1 . . P1:C2-3 C0-2 . . . 0 A1:0-2,B1:2-3 A1:P0,B1:P-1" - # test old-A1 old-A2 old-A3 old-B1 new-A1 new-A2 new-A3 new-B1 fail ECPUs Pstate - # ---- ------ ------ ------ ------ ------ ------ ------ ------ ---- ----- ------ + # old-A1 old-A2 old-A3 old-B1 new-A1 new-A2 new-A3 new-B1 fail ECPUs Pstate ISOLCPUS + # ------ ------ ------ ------ ------ ------ ------ ------ ---- ----- ------ -------- # Failure cases: # A task cannot be added to a partition with no cpu - " S+ C2-3:P1:S+ C3:P1 . . O2-0:T . . . 1 A1:,A2:3 A1:P1,A2:P1" + "C2-3:P1:S+ C3:P1 . . O2=0:T . . . 1 A1:,A2:3 A1:P1,A2:P1" + + # Changes to cpuset.cpus.exclusive that violate exclusivity rule is rejected + " C0-3 . . C4-5 X0-3 . . X3-5 1 A1:0-3,B1:4-5" ) # # Write to the cpu online file -# $1 - <c>-<v> where <c> = cpu number, <v> value to be written +# $1 - <c>=<v> where <c> = cpu number, <v> value to be written # write_cpu_online() { - CPU=${1%-*} - VAL=${1#*-} + CPU=${1%=*} + VAL=${1#*=} CPUFILE=//sys/devices/system/cpu/cpu${CPU}/online if [[ $VAL -eq 0 ]] then @@ -349,11 +457,12 @@ set_ctrl_state() TMPMSG=/tmp/.msg_$$ CGRP=$1 STATE=$2 - SHOWERR=${3}${VERBOSE} + SHOWERR=${3} CTRL=${CTRL:=$CONTROLLER} HASERR=0 REDIRECT="2> $TMPMSG" [[ -z "$STATE" || "$STATE" = '.' ]] && return 0 + [[ $VERBOSE -gt 0 ]] && SHOWERR=1 rm -f $TMPMSG for CMD in $(echo $STATE | sed -e "s/:/ /g") @@ -362,12 +471,18 @@ set_ctrl_state() SFILE=$CGRP/cgroup.subtree_control PFILE=$CGRP/cpuset.cpus.partition CFILE=$CGRP/cpuset.cpus + XFILE=$CGRP/cpuset.cpus.exclusive S=$(expr substr $CMD 1 1) if [[ $S = S ]] then PREFIX=${CMD#?} COMM="echo ${PREFIX}${CTRL} > $SFILE" eval $COMM $REDIRECT + elif [[ $S = X ]] + then + CPUS=${CMD#?} + COMM="echo $CPUS > $XFILE" + eval $COMM $REDIRECT elif [[ $S = C ]] then CPUS=${CMD#?} @@ -430,7 +545,7 @@ online_cpus() [[ -n "OFFLINE_CPUS" ]] && { for C in $OFFLINE_CPUS do - write_cpu_online ${C}-1 + write_cpu_online ${C}=1 done } } @@ -443,18 +558,27 @@ reset_cgroup_states() echo 0 > $CGROUP2/cgroup.procs online_cpus rmdir A1/A2/A3 A1/A2 A1 B1 > /dev/null 2>&1 - set_ctrl_state . S- + pause 0.02 + set_ctrl_state . R- pause 0.01 } dump_states() { - for DIR in A1 A1/A2 A1/A2/A3 B1 + for DIR in . A1 A1/A2 A1/A2/A3 B1 do + CPUS=$DIR/cpuset.cpus ECPUS=$DIR/cpuset.cpus.effective + XCPUS=$DIR/cpuset.cpus.exclusive + XECPUS=$DIR/cpuset.cpus.exclusive.effective PRS=$DIR/cpuset.cpus.partition - [[ -e $ECPUS ]] && echo "$ECPUS: $(cat $ECPUS)" - [[ -e $PRS ]] && echo "$PRS: $(cat $PRS)" + PCPUS=$DIR/.__DEBUG__.cpuset.cpus.subpartitions + [[ -e $CPUS ]] && echo "$CPUS: $(cat $CPUS)" + [[ -e $XCPUS ]] && echo "$XCPUS: $(cat $XCPUS)" + [[ -e $ECPUS ]] && echo "$ECPUS: $(cat $ECPUS)" + [[ -e $XECPUS ]] && echo "$XECPUS: $(cat $XECPUS)" + [[ -e $PRS ]] && echo "$PRS: $(cat $PRS)" + [[ -e $PCPUS ]] && echo "$PCPUS: $(cat $PCPUS)" done } @@ -470,11 +594,17 @@ check_effective_cpus() set -- $(echo $CHK | sed -e "s/:/ /g") CGRP=$1 CPUS=$2 + if [[ $CGRP = X* ]] + then + CGRP=${CGRP#X} + FILE=cpuset.cpus.exclusive.effective + else + FILE=cpuset.cpus.effective + fi [[ $CGRP = A2 ]] && CGRP=A1/A2 [[ $CGRP = A3 ]] && CGRP=A1/A2/A3 - FILE=$CGRP/cpuset.cpus.effective - [[ -e $FILE ]] || return 1 - [[ $CPUS = $(cat $FILE) ]] || return 1 + [[ -e $CGRP/$FILE ]] || return 1 + [[ $CPUS = $(cat $CGRP/$FILE) ]] || return 1 done } @@ -525,6 +655,65 @@ check_cgroup_states() } # +# Get isolated (including offline) CPUs by looking at +# /sys/kernel/debug/sched/domains and compare that with the expected value. +# +# Note that a sched domain of just 1 CPU will be considered isolated. +# +# $1 - expected isolated cpu list +# +check_isolcpus() +{ + EXPECT_VAL=$1 + ISOLCPUS= + LASTISOLCPU= + SCHED_DOMAINS=/sys/kernel/debug/sched/domains + [[ -d $SCHED_DOMAINS ]] || return 0 + [[ $EXPECT_VAL = . ]] && EXPECT_VAL= + + for ((CPU=0; CPU < $NR_CPUS; CPU++)) + do + [[ -n "$(ls ${SCHED_DOMAINS}/cpu$CPU)" ]] && continue + + if [[ -z "$LASTISOLCPU" ]] + then + ISOLCPUS=$CPU + LASTISOLCPU=$CPU + elif [[ "$LASTISOLCPU" -eq $((CPU - 1)) ]] + then + echo $ISOLCPUS | grep -q "\<$LASTISOLCPU\$" + if [[ $? -eq 0 ]] + then + ISOLCPUS=${ISOLCPUS}- + fi + LASTISOLCPU=$CPU + else + if [[ $ISOLCPUS = *- ]] + then + ISOLCPUS=${ISOLCPUS}$LASTISOLCPU + fi + ISOLCPUS=${ISOLCPUS},$CPU + LASTISOLCPU=$CPU + fi + done + [[ "$ISOLCPUS" = *- ]] && ISOLCPUS=${ISOLCPUS}$LASTISOLCPU + [[ "$EXPECT_VAL" = "$ISOLCPUS" ]] +} + +test_fail() +{ + TESTNUM=$1 + TESTTYPE=$2 + ADDINFO=$3 + echo "Test $TEST[$TESTNUM] failed $TESTTYPE check!" + [[ -n "$ADDINFO" ]] && echo "*** $ADDINFO ***" + eval echo \${$TEST[$I]} + echo + dump_states + exit 1 +} + +# # Run cpuset state transition test # $1 - test matrix name # @@ -536,88 +725,83 @@ run_state_test() { TEST=$1 CONTROLLER=cpuset - CPULIST=0-6 I=0 eval CNT="\${#$TEST[@]}" reset_cgroup_states - echo $CPULIST > cpuset.cpus - echo root > cpuset.cpus.partition console_msg "Running state transition test ..." while [[ $I -lt $CNT ]] do echo "Running test $I ..." > /dev/console + [[ $VERBOSE -gt 1 ]] && { + echo "" + eval echo \${$TEST[$I]} + } eval set -- "\${$TEST[$I]}" - ROOT=$1 - OLD_A1=$2 - OLD_A2=$3 - OLD_A3=$4 - OLD_B1=$5 - NEW_A1=$6 - NEW_A2=$7 - NEW_A3=$8 - NEW_B1=$9 - RESULT=${10} - ECPUS=${11} - STATES=${12} - - set_ctrl_state_noerr . $ROOT + OLD_A1=$1 + OLD_A2=$2 + OLD_A3=$3 + OLD_B1=$4 + NEW_A1=$5 + NEW_A2=$6 + NEW_A3=$7 + NEW_B1=$8 + RESULT=$9 + ECPUS=${10} + STATES=${11} + ICPUS=${12} + + set_ctrl_state_noerr B1 $OLD_B1 set_ctrl_state_noerr A1 $OLD_A1 set_ctrl_state_noerr A1/A2 $OLD_A2 set_ctrl_state_noerr A1/A2/A3 $OLD_A3 - set_ctrl_state_noerr B1 $OLD_B1 RETVAL=0 set_ctrl_state A1 $NEW_A1; ((RETVAL += $?)) set_ctrl_state A1/A2 $NEW_A2; ((RETVAL += $?)) set_ctrl_state A1/A2/A3 $NEW_A3; ((RETVAL += $?)) set_ctrl_state B1 $NEW_B1; ((RETVAL += $?)) - [[ $RETVAL -ne $RESULT ]] && { - echo "Test $TEST[$I] failed result check!" - eval echo \"\${$TEST[$I]}\" - dump_states - exit 1 - } + [[ $RETVAL -ne $RESULT ]] && test_fail $I result [[ -n "$ECPUS" && "$ECPUS" != . ]] && { check_effective_cpus $ECPUS - [[ $? -ne 0 ]] && { - echo "Test $TEST[$I] failed effective CPU check!" - eval echo \"\${$TEST[$I]}\" - echo - dump_states - exit 1 - } + [[ $? -ne 0 ]] && test_fail $I "effective CPU" } - [[ -n "$STATES" ]] && { + [[ -n "$STATES" && "$STATES" != . ]] && { check_cgroup_states $STATES - [[ $? -ne 0 ]] && { - echo "FAILED: Test $TEST[$I] failed states check!" - eval echo \"\${$TEST[$I]}\" - echo - dump_states - exit 1 - } + [[ $? -ne 0 ]] && test_fail $I states } + # Compare the expected isolated CPUs with the actual ones, + # if available + [[ -n "$ICPUS" ]] && { + check_isolcpus $ICPUS + [[ $? -ne 0 ]] && test_fail $I "isolated CPU" \ + "Expect $ICPUS, get $ISOLCPUS instead" + } reset_cgroup_states # # Check to see if effective cpu list changes # - pause 0.05 NEWLIST=$(cat cpuset.cpus.effective) + RETRY=0 + while [[ $NEWLIST != $CPULIST && $RETRY -lt 5 ]] + do + # Wait a bit longer & recheck a few times + pause 0.01 + ((RETRY++)) + NEWLIST=$(cat cpuset.cpus.effective) + done [[ $NEWLIST != $CPULIST ]] && { echo "Effective cpus changed to $NEWLIST after test $I!" exit 1 } - [[ -n "$VERBOSE" ]] && echo "Test $I done." + [[ $VERBOSE -gt 0 ]] && echo "Test $I done." ((I++)) done echo "All $I tests of $TEST PASSED." - - echo member > cpuset.cpus.partition } # @@ -642,6 +826,7 @@ test_inotify() { ERR=0 PRS=/tmp/.prs_$$ + cd $CGROUP2/test [[ -f $WAIT_INOTIFY ]] || { echo "wait_inotify not found, inotify test SKIPPED." return @@ -655,7 +840,7 @@ test_inotify() rm -f $PRS wait_inotify $PWD/cpuset.cpus.partition $PRS & pause 0.01 - set_ctrl_state . "O1-0" + set_ctrl_state . "O1=0" pause 0.01 check_cgroup_states ".:P-1" if [[ $? -ne 0 ]] @@ -689,5 +874,3 @@ run_state_test TEST_MATRIX test_isolated test_inotify echo "All tests PASSED." -cd .. -rmdir test diff --git a/tools/testing/selftests/cgroup/test_hugetlb_memcg.c b/tools/testing/selftests/cgroup/test_hugetlb_memcg.c new file mode 100644 index 000000000000..f0fefeb4cc24 --- /dev/null +++ b/tools/testing/selftests/cgroup/test_hugetlb_memcg.c @@ -0,0 +1,234 @@ +// SPDX-License-Identifier: GPL-2.0 +#define _GNU_SOURCE + +#include <linux/limits.h> +#include <sys/mman.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <fcntl.h> +#include "../kselftest.h" +#include "cgroup_util.h" + +#define ADDR ((void *)(0x0UL)) +#define FLAGS (MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB) +/* mapping 8 MBs == 4 hugepages */ +#define LENGTH (8UL*1024*1024) +#define PROTECTION (PROT_READ | PROT_WRITE) + +/* borrowed from mm/hmm-tests.c */ +static long get_hugepage_size(void) +{ + int fd; + char buf[2048]; + int len; + char *p, *q, *path = "/proc/meminfo", *tag = "Hugepagesize:"; + long val; + + fd = open(path, O_RDONLY); + if (fd < 0) { + /* Error opening the file */ + return -1; + } + + len = read(fd, buf, sizeof(buf)); + close(fd); + if (len < 0) { + /* Error in reading the file */ + return -1; + } + if (len == sizeof(buf)) { + /* Error file is too large */ + return -1; + } + buf[len] = '\0'; + + /* Search for a tag if provided */ + if (tag) { + p = strstr(buf, tag); + if (!p) + return -1; /* looks like the line we want isn't there */ + p += strlen(tag); + } else + p = buf; + + val = strtol(p, &q, 0); + if (*q != ' ') { + /* Error parsing the file */ + return -1; + } + + return val; +} + +static int set_file(const char *path, long value) +{ + FILE *file; + int ret; + + file = fopen(path, "w"); + if (!file) + return -1; + ret = fprintf(file, "%ld\n", value); + fclose(file); + return ret; +} + +static int set_nr_hugepages(long value) +{ + return set_file("/proc/sys/vm/nr_hugepages", value); +} + +static unsigned int check_first(char *addr) +{ + return *(unsigned int *)addr; +} + +static void write_data(char *addr) +{ + unsigned long i; + + for (i = 0; i < LENGTH; i++) + *(addr + i) = (char)i; +} + +static int hugetlb_test_program(const char *cgroup, void *arg) +{ + char *test_group = (char *)arg; + void *addr; + long old_current, expected_current, current; + int ret = EXIT_FAILURE; + + old_current = cg_read_long(test_group, "memory.current"); + set_nr_hugepages(20); + current = cg_read_long(test_group, "memory.current"); + if (current - old_current >= MB(2)) { + ksft_print_msg( + "setting nr_hugepages should not increase hugepage usage.\n"); + ksft_print_msg("before: %ld, after: %ld\n", old_current, current); + return EXIT_FAILURE; + } + + addr = mmap(ADDR, LENGTH, PROTECTION, FLAGS, 0, 0); + if (addr == MAP_FAILED) { + ksft_print_msg("fail to mmap.\n"); + return EXIT_FAILURE; + } + current = cg_read_long(test_group, "memory.current"); + if (current - old_current >= MB(2)) { + ksft_print_msg("mmap should not increase hugepage usage.\n"); + ksft_print_msg("before: %ld, after: %ld\n", old_current, current); + goto out_failed_munmap; + } + old_current = current; + + /* read the first page */ + check_first(addr); + expected_current = old_current + MB(2); + current = cg_read_long(test_group, "memory.current"); + if (!values_close(expected_current, current, 5)) { + ksft_print_msg("memory usage should increase by around 2MB.\n"); + ksft_print_msg( + "expected memory: %ld, actual memory: %ld\n", + expected_current, current); + goto out_failed_munmap; + } + + /* write to the whole range */ + write_data(addr); + current = cg_read_long(test_group, "memory.current"); + expected_current = old_current + MB(8); + if (!values_close(expected_current, current, 5)) { + ksft_print_msg("memory usage should increase by around 8MB.\n"); + ksft_print_msg( + "expected memory: %ld, actual memory: %ld\n", + expected_current, current); + goto out_failed_munmap; + } + + /* unmap the whole range */ + munmap(addr, LENGTH); + current = cg_read_long(test_group, "memory.current"); + expected_current = old_current; + if (!values_close(expected_current, current, 5)) { + ksft_print_msg("memory usage should go back down.\n"); + ksft_print_msg( + "expected memory: %ld, actual memory: %ld\n", + expected_current, current); + return ret; + } + + ret = EXIT_SUCCESS; + return ret; + +out_failed_munmap: + munmap(addr, LENGTH); + return ret; +} + +static int test_hugetlb_memcg(char *root) +{ + int ret = KSFT_FAIL; + char *test_group; + + test_group = cg_name(root, "hugetlb_memcg_test"); + if (!test_group || cg_create(test_group)) { + ksft_print_msg("fail to create cgroup.\n"); + goto out; + } + + if (cg_write(test_group, "memory.max", "100M")) { + ksft_print_msg("fail to set cgroup memory limit.\n"); + goto out; + } + + /* disable swap */ + if (cg_write(test_group, "memory.swap.max", "0")) { + ksft_print_msg("fail to disable swap.\n"); + goto out; + } + + if (!cg_run(test_group, hugetlb_test_program, (void *)test_group)) + ret = KSFT_PASS; +out: + cg_destroy(test_group); + free(test_group); + return ret; +} + +int main(int argc, char **argv) +{ + char root[PATH_MAX]; + int ret = EXIT_SUCCESS, has_memory_hugetlb_acc; + + has_memory_hugetlb_acc = proc_mount_contains("memory_hugetlb_accounting"); + if (has_memory_hugetlb_acc < 0) + ksft_exit_skip("Failed to query cgroup mount option\n"); + else if (!has_memory_hugetlb_acc) + ksft_exit_skip("memory hugetlb accounting is disabled\n"); + + /* Unit is kB! */ + if (get_hugepage_size() != 2048) { + ksft_print_msg("test_hugetlb_memcg requires 2MB hugepages\n"); + ksft_test_result_skip("test_hugetlb_memcg\n"); + return ret; + } + + if (cg_find_unified_root(root, sizeof(root))) + ksft_exit_skip("cgroup v2 isn't mounted\n"); + + switch (test_hugetlb_memcg(root)) { + case KSFT_PASS: + ksft_test_result_pass("test_hugetlb_memcg\n"); + break; + case KSFT_SKIP: + ksft_test_result_skip("test_hugetlb_memcg\n"); + break; + default: + ret = EXIT_FAILURE; + ksft_test_result_fail("test_hugetlb_memcg\n"); + break; + } + + return ret; +} diff --git a/tools/testing/selftests/cgroup/test_zswap.c b/tools/testing/selftests/cgroup/test_zswap.c index 49def87a909b..c99d2adaca3f 100644 --- a/tools/testing/selftests/cgroup/test_zswap.c +++ b/tools/testing/selftests/cgroup/test_zswap.c @@ -55,6 +55,11 @@ static int get_zswap_written_back_pages(size_t *value) return read_int("/sys/kernel/debug/zswap/written_back_pages", value); } +static long get_zswpout(const char *cgroup) +{ + return cg_read_key_long(cgroup, "memory.stat", "zswpout "); +} + static int allocate_bytes(const char *cgroup, void *arg) { size_t size = (size_t)arg; @@ -69,6 +74,48 @@ static int allocate_bytes(const char *cgroup, void *arg) } /* + * Sanity test to check that pages are written into zswap. + */ +static int test_zswap_usage(const char *root) +{ + long zswpout_before, zswpout_after; + int ret = KSFT_FAIL; + char *test_group; + + /* Set up */ + test_group = cg_name(root, "no_shrink_test"); + if (!test_group) + goto out; + if (cg_create(test_group)) + goto out; + if (cg_write(test_group, "memory.max", "1M")) + goto out; + + zswpout_before = get_zswpout(test_group); + if (zswpout_before < 0) { + ksft_print_msg("Failed to get zswpout\n"); + goto out; + } + + /* Allocate more than memory.max to push memory into zswap */ + if (cg_run(test_group, allocate_bytes, (void *)MB(4))) + goto out; + + /* Verify that pages come into zswap */ + zswpout_after = get_zswpout(test_group); + if (zswpout_after <= zswpout_before) { + ksft_print_msg("zswpout does not increase after test program\n"); + goto out; + } + ret = KSFT_PASS; + +out: + cg_destroy(test_group); + free(test_group); + return ret; +} + +/* * When trying to store a memcg page in zswap, if the memcg hits its memory * limit in zswap, writeback should not be triggered. * @@ -235,6 +282,7 @@ struct zswap_test { int (*fn)(const char *root); const char *name; } tests[] = { + T(test_zswap_usage), T(test_no_kmem_bypass), T(test_no_invasive_cgroup_shrink), }; diff --git a/tools/testing/selftests/clone3/clone3.c b/tools/testing/selftests/clone3/clone3.c index e60cf4da8fb0..3c9bf0cd82a8 100644 --- a/tools/testing/selftests/clone3/clone3.c +++ b/tools/testing/selftests/clone3/clone3.c @@ -7,6 +7,7 @@ #include <inttypes.h> #include <linux/types.h> #include <linux/sched.h> +#include <stdbool.h> #include <stdint.h> #include <stdio.h> #include <stdlib.h> @@ -103,8 +104,8 @@ static int call_clone3(uint64_t flags, size_t size, enum test_mode test_mode) return 0; } -static void test_clone3(uint64_t flags, size_t size, int expected, - enum test_mode test_mode) +static bool test_clone3(uint64_t flags, size_t size, int expected, + enum test_mode test_mode) { int ret; @@ -114,92 +115,223 @@ static void test_clone3(uint64_t flags, size_t size, int expected, ret = call_clone3(flags, size, test_mode); ksft_print_msg("[%d] clone3() with flags says: %d expected %d\n", getpid(), ret, expected); - if (ret != expected) - ksft_test_result_fail( + if (ret != expected) { + ksft_print_msg( "[%d] Result (%d) is different than expected (%d)\n", getpid(), ret, expected); - else - ksft_test_result_pass( - "[%d] Result (%d) matches expectation (%d)\n", - getpid(), ret, expected); -} - -int main(int argc, char *argv[]) -{ - uid_t uid = getuid(); - - ksft_print_header(); - ksft_set_plan(19); - test_clone3_supported(); - - /* Just a simple clone3() should return 0.*/ - test_clone3(0, 0, 0, CLONE3_ARGS_NO_TEST); - - /* Do a clone3() in a new PID NS.*/ - if (uid == 0) - test_clone3(CLONE_NEWPID, 0, 0, CLONE3_ARGS_NO_TEST); - else - ksft_test_result_skip("Skipping clone3() with CLONE_NEWPID\n"); - - /* Do a clone3() with CLONE_ARGS_SIZE_VER0. */ - test_clone3(0, CLONE_ARGS_SIZE_VER0, 0, CLONE3_ARGS_NO_TEST); + return false; + } - /* Do a clone3() with CLONE_ARGS_SIZE_VER0 - 8 */ - test_clone3(0, CLONE_ARGS_SIZE_VER0 - 8, -EINVAL, CLONE3_ARGS_NO_TEST); + return true; +} - /* Do a clone3() with sizeof(struct clone_args) + 8 */ - test_clone3(0, sizeof(struct __clone_args) + 8, 0, CLONE3_ARGS_NO_TEST); +typedef bool (*filter_function)(void); +typedef size_t (*size_function)(void); - /* Do a clone3() with exit_signal having highest 32 bits non-zero */ - test_clone3(0, 0, -EINVAL, CLONE3_ARGS_INVAL_EXIT_SIGNAL_BIG); +static bool not_root(void) +{ + if (getuid() != 0) { + ksft_print_msg("Not running as root\n"); + return true; + } - /* Do a clone3() with negative 32-bit exit_signal */ - test_clone3(0, 0, -EINVAL, CLONE3_ARGS_INVAL_EXIT_SIGNAL_NEG); + return false; +} - /* Do a clone3() with exit_signal not fitting into CSIGNAL mask */ - test_clone3(0, 0, -EINVAL, CLONE3_ARGS_INVAL_EXIT_SIGNAL_CSIG); +static bool no_timenamespace(void) +{ + if (not_root()) + return true; - /* Do a clone3() with NSIG < exit_signal < CSIG */ - test_clone3(0, 0, -EINVAL, CLONE3_ARGS_INVAL_EXIT_SIGNAL_NSIG); + if (!access("/proc/self/ns/time", F_OK)) + return false; - test_clone3(0, sizeof(struct __clone_args) + 8, 0, CLONE3_ARGS_ALL_0); + ksft_print_msg("Time namespaces are not supported\n"); + return true; +} - test_clone3(0, sizeof(struct __clone_args) + 16, -E2BIG, - CLONE3_ARGS_ALL_0); +static size_t page_size_plus_8(void) +{ + return getpagesize() + 8; +} - test_clone3(0, sizeof(struct __clone_args) * 2, -E2BIG, - CLONE3_ARGS_ALL_0); +struct test { + const char *name; + uint64_t flags; + size_t size; + size_function size_function; + int expected; + enum test_mode test_mode; + filter_function filter; +}; - /* Do a clone3() with > page size */ - test_clone3(0, getpagesize() + 8, -E2BIG, CLONE3_ARGS_NO_TEST); +static const struct test tests[] = { + { + .name = "simple clone3()", + .flags = 0, + .size = 0, + .expected = 0, + .test_mode = CLONE3_ARGS_NO_TEST, + }, + { + .name = "clone3() in a new PID_NS", + .flags = CLONE_NEWPID, + .size = 0, + .expected = 0, + .test_mode = CLONE3_ARGS_NO_TEST, + .filter = not_root, + }, + { + .name = "CLONE_ARGS_SIZE_VER0", + .flags = 0, + .size = CLONE_ARGS_SIZE_VER0, + .expected = 0, + .test_mode = CLONE3_ARGS_NO_TEST, + }, + { + .name = "CLONE_ARGS_SIZE_VER0 - 8", + .flags = 0, + .size = CLONE_ARGS_SIZE_VER0 - 8, + .expected = -EINVAL, + .test_mode = CLONE3_ARGS_NO_TEST, + }, + { + .name = "sizeof(struct clone_args) + 8", + .flags = 0, + .size = sizeof(struct __clone_args) + 8, + .expected = 0, + .test_mode = CLONE3_ARGS_NO_TEST, + }, + { + .name = "exit_signal with highest 32 bits non-zero", + .flags = 0, + .size = 0, + .expected = -EINVAL, + .test_mode = CLONE3_ARGS_INVAL_EXIT_SIGNAL_BIG, + }, + { + .name = "negative 32-bit exit_signal", + .flags = 0, + .size = 0, + .expected = -EINVAL, + .test_mode = CLONE3_ARGS_INVAL_EXIT_SIGNAL_NEG, + }, + { + .name = "exit_signal not fitting into CSIGNAL mask", + .flags = 0, + .size = 0, + .expected = -EINVAL, + .test_mode = CLONE3_ARGS_INVAL_EXIT_SIGNAL_CSIG, + }, + { + .name = "NSIG < exit_signal < CSIG", + .flags = 0, + .size = 0, + .expected = -EINVAL, + .test_mode = CLONE3_ARGS_INVAL_EXIT_SIGNAL_NSIG, + }, + { + .name = "Arguments sizeof(struct clone_args) + 8", + .flags = 0, + .size = sizeof(struct __clone_args) + 8, + .expected = 0, + .test_mode = CLONE3_ARGS_ALL_0, + }, + { + .name = "Arguments sizeof(struct clone_args) + 16", + .flags = 0, + .size = sizeof(struct __clone_args) + 16, + .expected = -E2BIG, + .test_mode = CLONE3_ARGS_ALL_0, + }, + { + .name = "Arguments sizeof(struct clone_arg) * 2", + .flags = 0, + .size = sizeof(struct __clone_args) + 16, + .expected = -E2BIG, + .test_mode = CLONE3_ARGS_ALL_0, + }, + { + .name = "Arguments > page size", + .flags = 0, + .size_function = page_size_plus_8, + .expected = -E2BIG, + .test_mode = CLONE3_ARGS_NO_TEST, + }, + { + .name = "CLONE_ARGS_SIZE_VER0 in a new PID NS", + .flags = CLONE_NEWPID, + .size = CLONE_ARGS_SIZE_VER0, + .expected = 0, + .test_mode = CLONE3_ARGS_NO_TEST, + .filter = not_root, + }, + { + .name = "CLONE_ARGS_SIZE_VER0 - 8 in a new PID NS", + .flags = CLONE_NEWPID, + .size = CLONE_ARGS_SIZE_VER0 - 8, + .expected = -EINVAL, + .test_mode = CLONE3_ARGS_NO_TEST, + }, + { + .name = "sizeof(struct clone_args) + 8 in a new PID NS", + .flags = CLONE_NEWPID, + .size = sizeof(struct __clone_args) + 8, + .expected = 0, + .test_mode = CLONE3_ARGS_NO_TEST, + .filter = not_root, + }, + { + .name = "Arguments > page size in a new PID NS", + .flags = CLONE_NEWPID, + .size_function = page_size_plus_8, + .expected = -E2BIG, + .test_mode = CLONE3_ARGS_NO_TEST, + }, + { + .name = "New time NS", + .flags = CLONE_NEWTIME, + .size = 0, + .expected = 0, + .test_mode = CLONE3_ARGS_NO_TEST, + .filter = no_timenamespace, + }, + { + .name = "exit signal (SIGCHLD) in flags", + .flags = SIGCHLD, + .size = 0, + .expected = -EINVAL, + .test_mode = CLONE3_ARGS_NO_TEST, + }, +}; - /* Do a clone3() with CLONE_ARGS_SIZE_VER0 in a new PID NS. */ - if (uid == 0) - test_clone3(CLONE_NEWPID, CLONE_ARGS_SIZE_VER0, 0, - CLONE3_ARGS_NO_TEST); - else - ksft_test_result_skip("Skipping clone3() with CLONE_NEWPID\n"); +int main(int argc, char *argv[]) +{ + size_t size; + int i; - /* Do a clone3() with CLONE_ARGS_SIZE_VER0 - 8 in a new PID NS */ - test_clone3(CLONE_NEWPID, CLONE_ARGS_SIZE_VER0 - 8, -EINVAL, - CLONE3_ARGS_NO_TEST); + ksft_print_header(); + ksft_set_plan(ARRAY_SIZE(tests)); + test_clone3_supported(); - /* Do a clone3() with sizeof(struct clone_args) + 8 in a new PID NS */ - if (uid == 0) - test_clone3(CLONE_NEWPID, sizeof(struct __clone_args) + 8, 0, - CLONE3_ARGS_NO_TEST); - else - ksft_test_result_skip("Skipping clone3() with CLONE_NEWPID\n"); + for (i = 0; i < ARRAY_SIZE(tests); i++) { + if (tests[i].filter && tests[i].filter()) { + ksft_test_result_skip("%s\n", tests[i].name); + continue; + } - /* Do a clone3() with > page size in a new PID NS */ - test_clone3(CLONE_NEWPID, getpagesize() + 8, -E2BIG, - CLONE3_ARGS_NO_TEST); + if (tests[i].size_function) + size = tests[i].size_function(); + else + size = tests[i].size; - /* Do a clone3() in a new time namespace */ - test_clone3(CLONE_NEWTIME, 0, 0, CLONE3_ARGS_NO_TEST); + ksft_print_msg("Running test '%s'\n", tests[i].name); - /* Do a clone3() with exit signal (SIGCHLD) in flags */ - test_clone3(SIGCHLD, 0, -EINVAL, CLONE3_ARGS_NO_TEST); + ksft_test_result(test_clone3(tests[i].flags, size, + tests[i].expected, + tests[i].test_mode), + "%s\n", tests[i].name); + } ksft_finished(); } diff --git a/tools/testing/selftests/clone3/clone3_cap_checkpoint_restore.c b/tools/testing/selftests/clone3/clone3_cap_checkpoint_restore.c index 52d3f0364bda..31b56d625655 100644 --- a/tools/testing/selftests/clone3/clone3_cap_checkpoint_restore.c +++ b/tools/testing/selftests/clone3/clone3_cap_checkpoint_restore.c @@ -27,9 +27,7 @@ #include "../kselftest_harness.h" #include "clone3_selftests.h" -#ifndef MAX_PID_NS_LEVEL #define MAX_PID_NS_LEVEL 32 -#endif static void child_exit(int ret) { diff --git a/tools/testing/selftests/clone3/clone3_clear_sighand.c b/tools/testing/selftests/clone3/clone3_clear_sighand.c index 47a8c0fc3676..54a8b2445be9 100644 --- a/tools/testing/selftests/clone3/clone3_clear_sighand.c +++ b/tools/testing/selftests/clone3/clone3_clear_sighand.c @@ -16,10 +16,6 @@ #include "../kselftest.h" #include "clone3_selftests.h" -#ifndef CLONE_CLEAR_SIGHAND -#define CLONE_CLEAR_SIGHAND 0x100000000ULL -#endif - static void nop_handler(int signo) { } diff --git a/tools/testing/selftests/clone3/clone3_selftests.h b/tools/testing/selftests/clone3/clone3_selftests.h index e81ffaaee02b..3d2663fe50ba 100644 --- a/tools/testing/selftests/clone3/clone3_selftests.h +++ b/tools/testing/selftests/clone3/clone3_selftests.h @@ -15,10 +15,6 @@ #define ptr_to_u64(ptr) ((__u64)((uintptr_t)(ptr))) -#ifndef CLONE_INTO_CGROUP -#define CLONE_INTO_CGROUP 0x200000000ULL /* Clone into a specific cgroup given the right permissions. */ -#endif - #ifndef __NR_clone3 #define __NR_clone3 -1 #endif @@ -32,18 +28,9 @@ struct __clone_args { __aligned_u64 stack; __aligned_u64 stack_size; __aligned_u64 tls; -#ifndef CLONE_ARGS_SIZE_VER0 -#define CLONE_ARGS_SIZE_VER0 64 /* sizeof first published struct */ -#endif __aligned_u64 set_tid; __aligned_u64 set_tid_size; -#ifndef CLONE_ARGS_SIZE_VER1 -#define CLONE_ARGS_SIZE_VER1 80 /* sizeof second published struct */ -#endif __aligned_u64 cgroup; -#ifndef CLONE_ARGS_SIZE_VER2 -#define CLONE_ARGS_SIZE_VER2 88 /* sizeof third published struct */ -#endif }; static pid_t sys_clone3(struct __clone_args *args, size_t size) diff --git a/tools/testing/selftests/clone3/clone3_set_tid.c b/tools/testing/selftests/clone3/clone3_set_tid.c index 0229e9ebb995..ed785afb6077 100644 --- a/tools/testing/selftests/clone3/clone3_set_tid.c +++ b/tools/testing/selftests/clone3/clone3_set_tid.c @@ -23,9 +23,7 @@ #include "../kselftest.h" #include "clone3_selftests.h" -#ifndef MAX_PID_NS_LEVEL #define MAX_PID_NS_LEVEL 32 -#endif static int pipe_1[2]; static int pipe_2[2]; diff --git a/tools/testing/selftests/core/close_range_test.c b/tools/testing/selftests/core/close_range_test.c index 749239930ca8..534576f06df1 100644 --- a/tools/testing/selftests/core/close_range_test.c +++ b/tools/testing/selftests/core/close_range_test.c @@ -16,34 +16,6 @@ #include "../kselftest_harness.h" #include "../clone3/clone3_selftests.h" -#ifndef __NR_close_range - #if defined __alpha__ - #define __NR_close_range 546 - #elif defined _MIPS_SIM - #if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */ - #define __NR_close_range (436 + 4000) - #endif - #if _MIPS_SIM == _MIPS_SIM_NABI32 /* n32 */ - #define __NR_close_range (436 + 6000) - #endif - #if _MIPS_SIM == _MIPS_SIM_ABI64 /* n64 */ - #define __NR_close_range (436 + 5000) - #endif - #elif defined __ia64__ - #define __NR_close_range (436 + 1024) - #else - #define __NR_close_range 436 - #endif -#endif - -#ifndef CLOSE_RANGE_UNSHARE -#define CLOSE_RANGE_UNSHARE (1U << 1) -#endif - -#ifndef CLOSE_RANGE_CLOEXEC -#define CLOSE_RANGE_CLOEXEC (1U << 2) -#endif - static inline int sys_close_range(unsigned int fd, unsigned int max_fd, unsigned int flags) { diff --git a/tools/testing/selftests/damon/debugfs_attrs.sh b/tools/testing/selftests/damon/debugfs_attrs.sh index 902e312bca89..902e312bca89 100644..100755 --- a/tools/testing/selftests/damon/debugfs_attrs.sh +++ b/tools/testing/selftests/damon/debugfs_attrs.sh diff --git a/tools/testing/selftests/damon/debugfs_duplicate_context_creation.sh b/tools/testing/selftests/damon/debugfs_duplicate_context_creation.sh index 4a76e37ef16b..4a76e37ef16b 100644..100755 --- a/tools/testing/selftests/damon/debugfs_duplicate_context_creation.sh +++ b/tools/testing/selftests/damon/debugfs_duplicate_context_creation.sh diff --git a/tools/testing/selftests/damon/debugfs_empty_targets.sh b/tools/testing/selftests/damon/debugfs_empty_targets.sh index 87aff8083822..87aff8083822 100644..100755 --- a/tools/testing/selftests/damon/debugfs_empty_targets.sh +++ b/tools/testing/selftests/damon/debugfs_empty_targets.sh diff --git a/tools/testing/selftests/damon/debugfs_huge_count_read_write.sh b/tools/testing/selftests/damon/debugfs_huge_count_read_write.sh index 922cadac2950..922cadac2950 100644..100755 --- a/tools/testing/selftests/damon/debugfs_huge_count_read_write.sh +++ b/tools/testing/selftests/damon/debugfs_huge_count_read_write.sh diff --git a/tools/testing/selftests/damon/debugfs_rm_non_contexts.sh b/tools/testing/selftests/damon/debugfs_rm_non_contexts.sh index f3ffeb1343cf..f3ffeb1343cf 100644..100755 --- a/tools/testing/selftests/damon/debugfs_rm_non_contexts.sh +++ b/tools/testing/selftests/damon/debugfs_rm_non_contexts.sh diff --git a/tools/testing/selftests/damon/debugfs_schemes.sh b/tools/testing/selftests/damon/debugfs_schemes.sh index 5b39ab44731c..5b39ab44731c 100644..100755 --- a/tools/testing/selftests/damon/debugfs_schemes.sh +++ b/tools/testing/selftests/damon/debugfs_schemes.sh diff --git a/tools/testing/selftests/damon/debugfs_target_ids.sh b/tools/testing/selftests/damon/debugfs_target_ids.sh index 49aeabdb0aae..49aeabdb0aae 100644..100755 --- a/tools/testing/selftests/damon/debugfs_target_ids.sh +++ b/tools/testing/selftests/damon/debugfs_target_ids.sh diff --git a/tools/testing/selftests/damon/lru_sort.sh b/tools/testing/selftests/damon/lru_sort.sh index 61b80197c896..61b80197c896 100644..100755 --- a/tools/testing/selftests/damon/lru_sort.sh +++ b/tools/testing/selftests/damon/lru_sort.sh diff --git a/tools/testing/selftests/damon/reclaim.sh b/tools/testing/selftests/damon/reclaim.sh index 78dbc2334cbe..78dbc2334cbe 100644..100755 --- a/tools/testing/selftests/damon/reclaim.sh +++ b/tools/testing/selftests/damon/reclaim.sh diff --git a/tools/testing/selftests/damon/sysfs.sh b/tools/testing/selftests/damon/sysfs.sh index 60a9a305aef0..56f0230a8b92 100644..100755 --- a/tools/testing/selftests/damon/sysfs.sh +++ b/tools/testing/selftests/damon/sysfs.sh @@ -175,6 +175,7 @@ test_scheme() ensure_dir "$scheme_dir" "exist" ensure_file "$scheme_dir/action" "exist" "600" test_access_pattern "$scheme_dir/access_pattern" + ensure_file "$scheme_dir/apply_interval_us" "exist" "600" test_quotas "$scheme_dir/quotas" test_watermarks "$scheme_dir/watermarks" test_filters "$scheme_dir/filters" diff --git a/tools/testing/selftests/damon/sysfs_update_removed_scheme_dir.sh b/tools/testing/selftests/damon/sysfs_update_removed_scheme_dir.sh index ade35576e748..ade35576e748 100644..100755 --- a/tools/testing/selftests/damon/sysfs_update_removed_scheme_dir.sh +++ b/tools/testing/selftests/damon/sysfs_update_removed_scheme_dir.sh diff --git a/tools/testing/selftests/dmabuf-heaps/.gitignore b/tools/testing/selftests/dmabuf-heaps/.gitignore new file mode 100644 index 000000000000..b500e76b9045 --- /dev/null +++ b/tools/testing/selftests/dmabuf-heaps/.gitignore @@ -0,0 +1 @@ +dmabuf-heap diff --git a/tools/testing/selftests/drivers/net/mlxsw/pci_reset.sh b/tools/testing/selftests/drivers/net/mlxsw/pci_reset.sh new file mode 100755 index 000000000000..fe0343b95e6c --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/pci_reset.sh @@ -0,0 +1,58 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test that PCI reset works correctly by verifying that only the expected reset +# methods are supported and that after issuing the reset the ifindex of the +# port changes. + +lib_dir=$(dirname $0)/../../../net/forwarding + +ALL_TESTS=" + pci_reset_test +" +NUM_NETIFS=1 +source $lib_dir/lib.sh +source $lib_dir/devlink_lib.sh + +pci_reset_test() +{ + RET=0 + + local bus=$(echo $DEVLINK_DEV | cut -d '/' -f 1) + local bdf=$(echo $DEVLINK_DEV | cut -d '/' -f 2) + + if [ $bus != "pci" ]; then + check_err 1 "devlink device is not a PCI device" + log_test "pci reset" + return + fi + + if [ ! -f /sys/bus/pci/devices/$bdf/reset_method ]; then + check_err 1 "reset is not supported" + log_test "pci reset" + return + fi + + [[ $(cat /sys/bus/pci/devices/$bdf/reset_method) == "bus" ]] + check_err $? "only \"bus\" reset method should be supported" + + local ifindex_pre=$(ip -j link show dev $swp1 | jq '.[]["ifindex"]') + + echo 1 > /sys/bus/pci/devices/$bdf/reset + check_err $? "reset failed" + + # Wait for udev to rename newly created netdev. + udevadm settle + + local ifindex_post=$(ip -j link show dev $swp1 | jq '.[]["ifindex"]') + + [[ $ifindex_pre != $ifindex_post ]] + check_err $? "reset not performed" + + log_test "pci reset" +} + +swp1=${NETIFS[p1]} +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/dt/.gitignore b/tools/testing/selftests/dt/.gitignore new file mode 100644 index 000000000000..f6476c9f2884 --- /dev/null +++ b/tools/testing/selftests/dt/.gitignore @@ -0,0 +1 @@ +compatible_list diff --git a/tools/testing/selftests/dt/Makefile b/tools/testing/selftests/dt/Makefile new file mode 100644 index 000000000000..62dc00ee4978 --- /dev/null +++ b/tools/testing/selftests/dt/Makefile @@ -0,0 +1,21 @@ +PY3 = $(shell which python3 2>/dev/null) + +ifneq ($(PY3),) + +TEST_PROGS := test_unprobed_devices.sh +TEST_GEN_FILES := compatible_list +TEST_FILES := compatible_ignore_list ktap_helpers.sh + +include ../lib.mk + +$(OUTPUT)/compatible_list: + $(top_srcdir)/scripts/dtc/dt-extract-compatibles -d $(top_srcdir) > $@ + +else + +all: no_py3_warning + +no_py3_warning: + @echo "Missing python3. This test will be skipped." + +endif diff --git a/tools/testing/selftests/dt/compatible_ignore_list b/tools/testing/selftests/dt/compatible_ignore_list new file mode 100644 index 000000000000..1323903feca9 --- /dev/null +++ b/tools/testing/selftests/dt/compatible_ignore_list @@ -0,0 +1 @@ +simple-mfd diff --git a/tools/testing/selftests/dt/ktap_helpers.sh b/tools/testing/selftests/dt/ktap_helpers.sh new file mode 100644 index 000000000000..8dfae51bb4e2 --- /dev/null +++ b/tools/testing/selftests/dt/ktap_helpers.sh @@ -0,0 +1,70 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Copyright (c) 2023 Collabora Ltd +# +# Helpers for outputting in KTAP format +# +KTAP_TESTNO=1 +KTAP_CNT_PASS=0 +KTAP_CNT_FAIL=0 +KTAP_CNT_SKIP=0 + +ktap_print_header() { + echo "TAP version 13" +} + +ktap_set_plan() { + num_tests="$1" + + echo "1..$num_tests" +} + +ktap_skip_all() { + echo -n "1..0 # SKIP " + echo $@ +} + +__ktap_test() { + result="$1" + description="$2" + directive="$3" # optional + + local directive_str= + [[ ! -z "$directive" ]] && directive_str="# $directive" + + echo $result $KTAP_TESTNO $description $directive_str + + KTAP_TESTNO=$((KTAP_TESTNO+1)) +} + +ktap_test_pass() { + description="$1" + + result="ok" + __ktap_test "$result" "$description" + + KTAP_CNT_PASS=$((KTAP_CNT_PASS+1)) +} + +ktap_test_skip() { + description="$1" + + result="ok" + directive="SKIP" + __ktap_test "$result" "$description" "$directive" + + KTAP_CNT_SKIP=$((KTAP_CNT_SKIP+1)) +} + +ktap_test_fail() { + description="$1" + + result="not ok" + __ktap_test "$result" "$description" + + KTAP_CNT_FAIL=$((KTAP_CNT_FAIL+1)) +} + +ktap_print_totals() { + echo "# Totals: pass:$KTAP_CNT_PASS fail:$KTAP_CNT_FAIL xfail:0 xpass:0 skip:$KTAP_CNT_SKIP error:0" +} diff --git a/tools/testing/selftests/dt/test_unprobed_devices.sh b/tools/testing/selftests/dt/test_unprobed_devices.sh new file mode 100755 index 000000000000..b07af2a4c4de --- /dev/null +++ b/tools/testing/selftests/dt/test_unprobed_devices.sh @@ -0,0 +1,83 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Copyright (c) 2023 Collabora Ltd +# +# Based on Frank Rowand's dt_stat script. +# +# This script tests for devices that were declared on the Devicetree and are +# expected to bind to a driver, but didn't. +# +# To achieve this, two lists are used: +# * a list of the compatibles that can be matched by a Devicetree node +# * a list of compatibles that should be ignored +# + +DIR="$(dirname $(readlink -f "$0"))" + +source "${DIR}"/ktap_helpers.sh + +PDT=/proc/device-tree/ +COMPAT_LIST="${DIR}"/compatible_list +IGNORE_LIST="${DIR}"/compatible_ignore_list + +KSFT_PASS=0 +KSFT_FAIL=1 +KSFT_SKIP=4 + +ktap_print_header + +if [[ ! -d "${PDT}" ]]; then + ktap_skip_all "${PDT} doesn't exist." + exit "${KSFT_SKIP}" +fi + +nodes_compatible=$( + for node_compat in $(find ${PDT} -name compatible); do + node=$(dirname "${node_compat}") + # Check if node is available + if [[ -e "${node}"/status ]]; then + status=$(tr -d '\000' < "${node}"/status) + [[ "${status}" != "okay" && "${status}" != "ok" ]] && continue + fi + echo "${node}" | sed -e 's|\/proc\/device-tree||' + done | sort + ) + +nodes_dev_bound=$( + IFS=$'\n' + for uevent in $(find /sys/devices -name uevent); do + if [[ -d "$(dirname "${uevent}")"/driver ]]; then + grep '^OF_FULLNAME=' "${uevent}" | sed -e 's|OF_FULLNAME=||' + fi + done + ) + +num_tests=$(echo ${nodes_compatible} | wc -w) +ktap_set_plan "${num_tests}" + +retval="${KSFT_PASS}" +for node in ${nodes_compatible}; do + if ! echo "${nodes_dev_bound}" | grep -E -q "(^| )${node}( |\$)"; then + compatibles=$(tr '\000' '\n' < "${PDT}"/"${node}"/compatible) + + for compatible in ${compatibles}; do + if grep -x -q "${compatible}" "${IGNORE_LIST}"; then + continue + fi + + if grep -x -q "${compatible}" "${COMPAT_LIST}"; then + ktap_test_fail "${node}" + retval="${KSFT_FAIL}" + continue 2 + fi + done + ktap_test_skip "${node}" + else + ktap_test_pass "${node}" + fi + +done + +ktap_print_totals +exit "${retval}" diff --git a/tools/testing/selftests/efivarfs/create-read.c b/tools/testing/selftests/efivarfs/create-read.c index 9674a19396a3..7bc7af4eb2c1 100644 --- a/tools/testing/selftests/efivarfs/create-read.c +++ b/tools/testing/selftests/efivarfs/create-read.c @@ -32,8 +32,10 @@ int main(int argc, char **argv) rc = read(fd, buf, sizeof(buf)); if (rc != 0) { fprintf(stderr, "Reading a new var should return EOF\n"); + close(fd); return EXIT_FAILURE; } + close(fd); return EXIT_SUCCESS; } diff --git a/tools/testing/selftests/exec/execveat.c b/tools/testing/selftests/exec/execveat.c index 67bf7254a48f..bf79d664c8e6 100644 --- a/tools/testing/selftests/exec/execveat.c +++ b/tools/testing/selftests/exec/execveat.c @@ -23,6 +23,9 @@ #include "../kselftest.h" +#define TESTS_EXPECTED 51 +#define TEST_NAME_LEN (PATH_MAX * 4) + static char longpath[2 * PATH_MAX] = ""; static char *envp[] = { "IN_TEST=yes", NULL, NULL }; static char *argv[] = { "execveat", "99", NULL }; @@ -43,71 +46,85 @@ static int execveat_(int fd, const char *path, char **argv, char **envp, static int _check_execveat_fail(int fd, const char *path, int flags, int expected_errno, const char *errno_str) { + char test_name[TEST_NAME_LEN]; int rc; errno = 0; - printf("Check failure of execveat(%d, '%s', %d) with %s... ", - fd, path?:"(null)", flags, errno_str); + snprintf(test_name, sizeof(test_name), + "Check failure of execveat(%d, '%s', %d) with %s", + fd, path?:"(null)", flags, errno_str); rc = execveat_(fd, path, argv, envp, flags); if (rc > 0) { - printf("[FAIL] (unexpected success from execveat(2))\n"); + ksft_print_msg("unexpected success from execveat(2)\n"); + ksft_test_result_fail("%s\n", test_name); return 1; } if (errno != expected_errno) { - printf("[FAIL] (expected errno %d (%s) not %d (%s)\n", - expected_errno, strerror(expected_errno), - errno, strerror(errno)); + ksft_print_msg("expected errno %d (%s) not %d (%s)\n", + expected_errno, strerror(expected_errno), + errno, strerror(errno)); + ksft_test_result_fail("%s\n", test_name); return 1; } - printf("[OK]\n"); + ksft_test_result_pass("%s\n", test_name); return 0; } static int check_execveat_invoked_rc(int fd, const char *path, int flags, int expected_rc, int expected_rc2) { + char test_name[TEST_NAME_LEN]; int status; int rc; pid_t child; int pathlen = path ? strlen(path) : 0; if (pathlen > 40) - printf("Check success of execveat(%d, '%.20s...%s', %d)... ", - fd, path, (path + pathlen - 20), flags); + snprintf(test_name, sizeof(test_name), + "Check success of execveat(%d, '%.20s...%s', %d)... ", + fd, path, (path + pathlen - 20), flags); else - printf("Check success of execveat(%d, '%s', %d)... ", - fd, path?:"(null)", flags); + snprintf(test_name, sizeof(test_name), + "Check success of execveat(%d, '%s', %d)... ", + fd, path?:"(null)", flags); + child = fork(); if (child < 0) { - printf("[FAIL] (fork() failed)\n"); + ksft_perror("fork() failed"); + ksft_test_result_fail("%s\n", test_name); return 1; } if (child == 0) { /* Child: do execveat(). */ rc = execveat_(fd, path, argv, envp, flags); - printf("[FAIL]: execveat() failed, rc=%d errno=%d (%s)\n", - rc, errno, strerror(errno)); + ksft_print_msg("execveat() failed, rc=%d errno=%d (%s)\n", + rc, errno, strerror(errno)); + ksft_test_result_fail("%s\n", test_name); exit(1); /* should not reach here */ } /* Parent: wait for & check child's exit status. */ rc = waitpid(child, &status, 0); if (rc != child) { - printf("[FAIL] (waitpid(%d,...) returned %d)\n", child, rc); + ksft_print_msg("waitpid(%d,...) returned %d\n", child, rc); + ksft_test_result_fail("%s\n", test_name); return 1; } if (!WIFEXITED(status)) { - printf("[FAIL] (child %d did not exit cleanly, status=%08x)\n", - child, status); + ksft_print_msg("child %d did not exit cleanly, status=%08x\n", + child, status); + ksft_test_result_fail("%s\n", test_name); return 1; } if ((WEXITSTATUS(status) != expected_rc) && (WEXITSTATUS(status) != expected_rc2)) { - printf("[FAIL] (child %d exited with %d not %d nor %d)\n", - child, WEXITSTATUS(status), expected_rc, expected_rc2); + ksft_print_msg("child %d exited with %d not %d nor %d\n", + child, WEXITSTATUS(status), expected_rc, + expected_rc2); + ksft_test_result_fail("%s\n", test_name); return 1; } - printf("[OK]\n"); + ksft_test_result_pass("%s\n", test_name); return 0; } @@ -129,11 +146,9 @@ static int open_or_die(const char *filename, int flags) { int fd = open(filename, flags); - if (fd < 0) { - printf("Failed to open '%s'; " + if (fd < 0) + ksft_exit_fail_msg("Failed to open '%s'; " "check prerequisites are available\n", filename); - exit(1); - } return fd; } @@ -162,8 +177,7 @@ static int check_execveat_pathmax(int root_dfd, const char *src, int is_script) char *cwd = getcwd(NULL, 0); if (!cwd) { - printf("Failed to getcwd(), errno=%d (%s)\n", - errno, strerror(errno)); + ksft_perror("Failed to getcwd()"); return 2; } strcpy(longpath, cwd); @@ -193,12 +207,12 @@ static int check_execveat_pathmax(int root_dfd, const char *src, int is_script) */ fd = open(longpath, O_RDONLY); if (fd > 0) { - printf("Invoke copy of '%s' via filename of length %zu:\n", - src, strlen(longpath)); + ksft_print_msg("Invoke copy of '%s' via filename of length %zu:\n", + src, strlen(longpath)); fail += check_execveat(fd, "", AT_EMPTY_PATH); } else { - printf("Failed to open length %zu filename, errno=%d (%s)\n", - strlen(longpath), errno, strerror(errno)); + ksft_print_msg("Failed to open length %zu filename, errno=%d (%s)\n", + strlen(longpath), errno, strerror(errno)); fail++; } @@ -405,28 +419,31 @@ int main(int argc, char **argv) const char *in_test = getenv("IN_TEST"); if (verbose) { - printf(" invoked with:"); + ksft_print_msg("invoked with:\n"); for (ii = 0; ii < argc; ii++) - printf(" [%d]='%s'", ii, argv[ii]); - printf("\n"); + ksft_print_msg("\t[%d]='%s\n'", ii, argv[ii]); } /* Check expected environment transferred. */ if (!in_test || strcmp(in_test, "yes") != 0) { - printf("[FAIL] (no IN_TEST=yes in env)\n"); + ksft_print_msg("no IN_TEST=yes in env\n"); return 1; } /* Use the final argument as an exit code. */ rc = atoi(argv[argc - 1]); - fflush(stdout); + exit(rc); } else { + ksft_print_header(); + ksft_set_plan(TESTS_EXPECTED); prerequisites(); if (verbose) envp[1] = "VERBOSE=1"; rc = run_tests(); if (rc > 0) printf("%d tests failed\n", rc); + ksft_finished(); } + return rc; } diff --git a/tools/testing/selftests/firmware/fw_namespace.c b/tools/testing/selftests/firmware/fw_namespace.c index 4c6f0cd83c5b..04757dc7e546 100644 --- a/tools/testing/selftests/firmware/fw_namespace.c +++ b/tools/testing/selftests/firmware/fw_namespace.c @@ -17,10 +17,6 @@ #include <sys/wait.h> #include <unistd.h> -#ifndef CLONE_NEWNS -# define CLONE_NEWNS 0x00020000 -#endif - static char *fw_path = NULL; static void die(char *fmt, ...) diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_char.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_char.tc index ff7499eb98d6..e21c9c27ece4 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_char.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_char.tc @@ -28,13 +28,21 @@ s390*) mips*) ARG1=%r4 ;; +loongarch*) + ARG1=%r4 +;; +riscv*) + ARG1=%a0 +;; *) echo "Please implement other architecture here" exit_untested esac : "Test get argument (1)" -if grep -q eventfs_add_dir available_filter_functions; then +if grep -q eventfs_create_dir available_filter_functions; then + DIR_NAME="eventfs_create_dir" +elif grep -q eventfs_add_dir available_filter_functions; then DIR_NAME="eventfs_add_dir" else DIR_NAME="tracefs_create_dir" diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc index a202b2ea4baf..93217d459556 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc @@ -31,13 +31,18 @@ mips*) loongarch*) ARG1=%r4 ;; +riscv*) + ARG1=%a0 +;; *) echo "Please implement other architecture here" exit_untested esac : "Test get argument (1)" -if grep -q eventfs_add_dir available_filter_functions; then +if grep -q eventfs_create_dir available_filter_functions; then + DIR_NAME="eventfs_create_dir" +elif grep -q eventfs_add_dir available_filter_functions; then DIR_NAME="eventfs_add_dir" else DIR_NAME="tracefs_create_dir" diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_syntax.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_syntax.tc index 1df61e13a812..8f1292ad80ff 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_syntax.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_syntax.tc @@ -44,6 +44,10 @@ loongarch*) GOODREG=%r4 BADREG=%r12 ;; +riscv*) + GOODREG=%a0 + BADREG=%a8 +;; *) echo "Please implement other architecture here" exit_untested diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_non_uniq_symbol.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_non_uniq_symbol.tc new file mode 100644 index 000000000000..bc9514428dba --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_non_uniq_symbol.tc @@ -0,0 +1,13 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# description: Test failure of registering kprobe on non unique symbol +# requires: kprobe_events + +SYMBOL='name_show' + +# We skip this test on kernel where SYMBOL is unique or does not exist. +if [ "$(grep -c -E "[[:alnum:]]+ t ${SYMBOL}" /proc/kallsyms)" -le '1' ]; then + exit_unsupported +fi + +! echo "p:test_non_unique ${SYMBOL}" > kprobe_events diff --git a/tools/testing/selftests/hid/Makefile b/tools/testing/selftests/hid/Makefile index 2e986cbf1a46..2b5ea18bde38 100644 --- a/tools/testing/selftests/hid/Makefile +++ b/tools/testing/selftests/hid/Makefile @@ -21,7 +21,9 @@ CXX ?= $(CROSS_COMPILE)g++ HOSTPKG_CONFIG := pkg-config -CFLAGS += -g -O0 -rdynamic -Wall -Werror -I$(KHDR_INCLUDES) -I$(OUTPUT) +CFLAGS += -g -O0 -rdynamic -Wall -Werror -I$(OUTPUT) +CFLAGS += -I$(OUTPUT)/tools/include + LDLIBS += -lelf -lz -lrt -lpthread # Silence some warnings when compiled with clang @@ -65,7 +67,6 @@ BPFTOOLDIR := $(TOOLSDIR)/bpf/bpftool SCRATCH_DIR := $(OUTPUT)/tools BUILD_DIR := $(SCRATCH_DIR)/build INCLUDE_DIR := $(SCRATCH_DIR)/include -KHDR_INCLUDES := $(SCRATCH_DIR)/uapi/include BPFOBJ := $(BUILD_DIR)/libbpf/libbpf.a ifneq ($(CROSS_COMPILE),) HOST_BUILD_DIR := $(BUILD_DIR)/host @@ -151,9 +152,6 @@ else $(Q)cp "$(VMLINUX_H)" $@ endif -$(KHDR_INCLUDES)/linux/hid.h: $(top_srcdir)/include/uapi/linux/hid.h - $(MAKE) -C $(top_srcdir) INSTALL_HDR_PATH=$(SCRATCH_DIR)/uapi headers_install - $(RESOLVE_BTFIDS): $(HOST_BPFOBJ) | $(HOST_BUILD_DIR)/resolve_btfids \ $(TOOLSDIR)/bpf/resolve_btfids/main.c \ $(TOOLSDIR)/lib/rbtree.c \ @@ -231,7 +229,7 @@ $(BPF_SKELS): %.skel.h: %.bpf.o $(BPFTOOL) | $(OUTPUT) $(Q)$(BPFTOOL) gen object $(<:.o=.linked1.o) $< $(Q)$(BPFTOOL) gen skeleton $(<:.o=.linked1.o) name $(notdir $(<:.bpf.o=)) > $@ -$(OUTPUT)/%.o: %.c $(BPF_SKELS) $(KHDR_INCLUDES)/linux/hid.h +$(OUTPUT)/%.o: %.c $(BPF_SKELS) $(call msg,CC,,$@) $(Q)$(CC) $(CFLAGS) -c $(filter %.c,$^) $(LDLIBS) -o $@ diff --git a/tools/testing/selftests/hid/progs/hid.c b/tools/testing/selftests/hid/progs/hid.c index 88c593f753b5..1e558826b809 100644 --- a/tools/testing/selftests/hid/progs/hid.c +++ b/tools/testing/selftests/hid/progs/hid.c @@ -1,8 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2022 Red hat */ -#include "vmlinux.h" -#include <bpf/bpf_helpers.h> -#include <bpf/bpf_tracing.h> #include "hid_bpf_helpers.h" char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/hid/progs/hid_bpf_helpers.h b/tools/testing/selftests/hid/progs/hid_bpf_helpers.h index 4fff31dbe0e7..65e657ac1198 100644 --- a/tools/testing/selftests/hid/progs/hid_bpf_helpers.h +++ b/tools/testing/selftests/hid/progs/hid_bpf_helpers.h @@ -5,6 +5,83 @@ #ifndef __HID_BPF_HELPERS_H #define __HID_BPF_HELPERS_H +/* "undefine" structs and enums in vmlinux.h, because we "override" them below */ +#define hid_bpf_ctx hid_bpf_ctx___not_used +#define hid_report_type hid_report_type___not_used +#define hid_class_request hid_class_request___not_used +#define hid_bpf_attach_flags hid_bpf_attach_flags___not_used +#define HID_INPUT_REPORT HID_INPUT_REPORT___not_used +#define HID_OUTPUT_REPORT HID_OUTPUT_REPORT___not_used +#define HID_FEATURE_REPORT HID_FEATURE_REPORT___not_used +#define HID_REPORT_TYPES HID_REPORT_TYPES___not_used +#define HID_REQ_GET_REPORT HID_REQ_GET_REPORT___not_used +#define HID_REQ_GET_IDLE HID_REQ_GET_IDLE___not_used +#define HID_REQ_GET_PROTOCOL HID_REQ_GET_PROTOCOL___not_used +#define HID_REQ_SET_REPORT HID_REQ_SET_REPORT___not_used +#define HID_REQ_SET_IDLE HID_REQ_SET_IDLE___not_used +#define HID_REQ_SET_PROTOCOL HID_REQ_SET_PROTOCOL___not_used +#define HID_BPF_FLAG_NONE HID_BPF_FLAG_NONE___not_used +#define HID_BPF_FLAG_INSERT_HEAD HID_BPF_FLAG_INSERT_HEAD___not_used +#define HID_BPF_FLAG_MAX HID_BPF_FLAG_MAX___not_used + +#include "vmlinux.h" + +#undef hid_bpf_ctx +#undef hid_report_type +#undef hid_class_request +#undef hid_bpf_attach_flags +#undef HID_INPUT_REPORT +#undef HID_OUTPUT_REPORT +#undef HID_FEATURE_REPORT +#undef HID_REPORT_TYPES +#undef HID_REQ_GET_REPORT +#undef HID_REQ_GET_IDLE +#undef HID_REQ_GET_PROTOCOL +#undef HID_REQ_SET_REPORT +#undef HID_REQ_SET_IDLE +#undef HID_REQ_SET_PROTOCOL +#undef HID_BPF_FLAG_NONE +#undef HID_BPF_FLAG_INSERT_HEAD +#undef HID_BPF_FLAG_MAX + +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include <linux/const.h> + +enum hid_report_type { + HID_INPUT_REPORT = 0, + HID_OUTPUT_REPORT = 1, + HID_FEATURE_REPORT = 2, + + HID_REPORT_TYPES, +}; + +struct hid_bpf_ctx { + __u32 index; + const struct hid_device *hid; + __u32 allocated_size; + enum hid_report_type report_type; + union { + __s32 retval; + __s32 size; + }; +} __attribute__((preserve_access_index)); + +enum hid_class_request { + HID_REQ_GET_REPORT = 0x01, + HID_REQ_GET_IDLE = 0x02, + HID_REQ_GET_PROTOCOL = 0x03, + HID_REQ_SET_REPORT = 0x09, + HID_REQ_SET_IDLE = 0x0A, + HID_REQ_SET_PROTOCOL = 0x0B, +}; + +enum hid_bpf_attach_flags { + HID_BPF_FLAG_NONE = 0, + HID_BPF_FLAG_INSERT_HEAD = _BITUL(0), + HID_BPF_FLAG_MAX, +}; + /* following are kfuncs exported by HID for HID-BPF */ extern __u8 *hid_bpf_get_data(struct hid_bpf_ctx *ctx, unsigned int offset, diff --git a/tools/testing/selftests/iommu/iommufd.c b/tools/testing/selftests/iommu/iommufd.c index 33d08600be13..6ed328c863c4 100644 --- a/tools/testing/selftests/iommu/iommufd.c +++ b/tools/testing/selftests/iommu/iommufd.c @@ -86,12 +86,13 @@ TEST_F(iommufd, cmd_fail) TEST_F(iommufd, cmd_length) { -#define TEST_LENGTH(_struct, _ioctl) \ +#define TEST_LENGTH(_struct, _ioctl, _last) \ { \ + size_t min_size = offsetofend(struct _struct, _last); \ struct { \ struct _struct cmd; \ uint8_t extra; \ - } cmd = { .cmd = { .size = sizeof(struct _struct) - 1 }, \ + } cmd = { .cmd = { .size = min_size - 1 }, \ .extra = UINT8_MAX }; \ int old_errno; \ int rc; \ @@ -112,16 +113,19 @@ TEST_F(iommufd, cmd_length) } \ } - TEST_LENGTH(iommu_destroy, IOMMU_DESTROY); - TEST_LENGTH(iommu_hw_info, IOMMU_GET_HW_INFO); - TEST_LENGTH(iommu_ioas_alloc, IOMMU_IOAS_ALLOC); - TEST_LENGTH(iommu_ioas_iova_ranges, IOMMU_IOAS_IOVA_RANGES); - TEST_LENGTH(iommu_ioas_allow_iovas, IOMMU_IOAS_ALLOW_IOVAS); - TEST_LENGTH(iommu_ioas_map, IOMMU_IOAS_MAP); - TEST_LENGTH(iommu_ioas_copy, IOMMU_IOAS_COPY); - TEST_LENGTH(iommu_ioas_unmap, IOMMU_IOAS_UNMAP); - TEST_LENGTH(iommu_option, IOMMU_OPTION); - TEST_LENGTH(iommu_vfio_ioas, IOMMU_VFIO_IOAS); + TEST_LENGTH(iommu_destroy, IOMMU_DESTROY, id); + TEST_LENGTH(iommu_hw_info, IOMMU_GET_HW_INFO, __reserved); + TEST_LENGTH(iommu_hwpt_alloc, IOMMU_HWPT_ALLOC, __reserved); + TEST_LENGTH(iommu_ioas_alloc, IOMMU_IOAS_ALLOC, out_ioas_id); + TEST_LENGTH(iommu_ioas_iova_ranges, IOMMU_IOAS_IOVA_RANGES, + out_iova_alignment); + TEST_LENGTH(iommu_ioas_allow_iovas, IOMMU_IOAS_ALLOW_IOVAS, + allowed_iovas); + TEST_LENGTH(iommu_ioas_map, IOMMU_IOAS_MAP, iova); + TEST_LENGTH(iommu_ioas_copy, IOMMU_IOAS_COPY, src_iova); + TEST_LENGTH(iommu_ioas_unmap, IOMMU_IOAS_UNMAP, length); + TEST_LENGTH(iommu_option, IOMMU_OPTION, val64); + TEST_LENGTH(iommu_vfio_ioas, IOMMU_VFIO_IOAS, __reserved); #undef TEST_LENGTH } @@ -260,6 +264,121 @@ TEST_F(iommufd_ioas, ioas_destroy) } } +TEST_F(iommufd_ioas, alloc_hwpt_nested) +{ + const uint32_t min_data_len = + offsetofend(struct iommu_hwpt_selftest, iotlb); + struct iommu_hwpt_selftest data = { + .iotlb = IOMMU_TEST_IOTLB_DEFAULT, + }; + uint32_t nested_hwpt_id[2] = {}; + uint32_t parent_hwpt_id = 0; + uint32_t parent_hwpt_id_not_work = 0; + uint32_t test_hwpt_id = 0; + + if (self->device_id) { + /* Negative tests */ + test_err_hwpt_alloc(ENOENT, self->ioas_id, self->device_id, 0, + &test_hwpt_id); + test_err_hwpt_alloc(EINVAL, self->device_id, self->device_id, 0, + &test_hwpt_id); + + test_cmd_hwpt_alloc(self->device_id, self->ioas_id, + IOMMU_HWPT_ALLOC_NEST_PARENT, + &parent_hwpt_id); + + test_cmd_hwpt_alloc(self->device_id, self->ioas_id, 0, + &parent_hwpt_id_not_work); + + /* Negative nested tests */ + test_err_hwpt_alloc_nested(EINVAL, self->device_id, + parent_hwpt_id, 0, + &nested_hwpt_id[0], + IOMMU_HWPT_DATA_NONE, &data, + sizeof(data)); + test_err_hwpt_alloc_nested(EOPNOTSUPP, self->device_id, + parent_hwpt_id, 0, + &nested_hwpt_id[0], + IOMMU_HWPT_DATA_SELFTEST + 1, &data, + sizeof(data)); + test_err_hwpt_alloc_nested(EINVAL, self->device_id, + parent_hwpt_id, 0, + &nested_hwpt_id[0], + IOMMU_HWPT_DATA_SELFTEST, &data, + min_data_len - 1); + test_err_hwpt_alloc_nested(EFAULT, self->device_id, + parent_hwpt_id, 0, + &nested_hwpt_id[0], + IOMMU_HWPT_DATA_SELFTEST, NULL, + sizeof(data)); + test_err_hwpt_alloc_nested( + EOPNOTSUPP, self->device_id, parent_hwpt_id, + IOMMU_HWPT_ALLOC_NEST_PARENT, &nested_hwpt_id[0], + IOMMU_HWPT_DATA_SELFTEST, &data, sizeof(data)); + test_err_hwpt_alloc_nested(EINVAL, self->device_id, + parent_hwpt_id_not_work, 0, + &nested_hwpt_id[0], + IOMMU_HWPT_DATA_SELFTEST, &data, + sizeof(data)); + + /* Allocate two nested hwpts sharing one common parent hwpt */ + test_cmd_hwpt_alloc_nested(self->device_id, parent_hwpt_id, 0, + &nested_hwpt_id[0], + IOMMU_HWPT_DATA_SELFTEST, &data, + sizeof(data)); + test_cmd_hwpt_alloc_nested(self->device_id, parent_hwpt_id, 0, + &nested_hwpt_id[1], + IOMMU_HWPT_DATA_SELFTEST, &data, + sizeof(data)); + + /* Negative test: a nested hwpt on top of a nested hwpt */ + test_err_hwpt_alloc_nested(EINVAL, self->device_id, + nested_hwpt_id[0], 0, &test_hwpt_id, + IOMMU_HWPT_DATA_SELFTEST, &data, + sizeof(data)); + /* Negative test: parent hwpt now cannot be freed */ + EXPECT_ERRNO(EBUSY, + _test_ioctl_destroy(self->fd, parent_hwpt_id)); + + /* Attach device to nested_hwpt_id[0] that then will be busy */ + test_cmd_mock_domain_replace(self->stdev_id, nested_hwpt_id[0]); + EXPECT_ERRNO(EBUSY, + _test_ioctl_destroy(self->fd, nested_hwpt_id[0])); + + /* Switch from nested_hwpt_id[0] to nested_hwpt_id[1] */ + test_cmd_mock_domain_replace(self->stdev_id, nested_hwpt_id[1]); + EXPECT_ERRNO(EBUSY, + _test_ioctl_destroy(self->fd, nested_hwpt_id[1])); + test_ioctl_destroy(nested_hwpt_id[0]); + + /* Detach from nested_hwpt_id[1] and destroy it */ + test_cmd_mock_domain_replace(self->stdev_id, parent_hwpt_id); + test_ioctl_destroy(nested_hwpt_id[1]); + + /* Detach from the parent hw_pagetable and destroy it */ + test_cmd_mock_domain_replace(self->stdev_id, self->ioas_id); + test_ioctl_destroy(parent_hwpt_id); + test_ioctl_destroy(parent_hwpt_id_not_work); + } else { + test_err_hwpt_alloc(ENOENT, self->device_id, self->ioas_id, 0, + &parent_hwpt_id); + test_err_hwpt_alloc_nested(ENOENT, self->device_id, + parent_hwpt_id, 0, + &nested_hwpt_id[0], + IOMMU_HWPT_DATA_SELFTEST, &data, + sizeof(data)); + test_err_hwpt_alloc_nested(ENOENT, self->device_id, + parent_hwpt_id, 0, + &nested_hwpt_id[1], + IOMMU_HWPT_DATA_SELFTEST, &data, + sizeof(data)); + test_err_mock_domain_replace(ENOENT, self->stdev_id, + nested_hwpt_id[0]); + test_err_mock_domain_replace(ENOENT, self->stdev_id, + nested_hwpt_id[1]); + } +} + TEST_F(iommufd_ioas, hwpt_attach) { /* Create a device attached directly to a hwpt */ @@ -1404,16 +1523,242 @@ TEST_F(iommufd_mock_domain, alloc_hwpt) int i; for (i = 0; i != variant->mock_domains; i++) { + uint32_t hwpt_id[2]; uint32_t stddev_id; - uint32_t hwpt_id; - test_cmd_hwpt_alloc(self->idev_ids[0], self->ioas_id, &hwpt_id); - test_cmd_mock_domain(hwpt_id, &stddev_id, NULL, NULL); + test_err_hwpt_alloc(EOPNOTSUPP, + self->idev_ids[i], self->ioas_id, + ~IOMMU_HWPT_ALLOC_NEST_PARENT, &hwpt_id[0]); + test_cmd_hwpt_alloc(self->idev_ids[i], self->ioas_id, + 0, &hwpt_id[0]); + test_cmd_hwpt_alloc(self->idev_ids[i], self->ioas_id, + IOMMU_HWPT_ALLOC_NEST_PARENT, &hwpt_id[1]); + + /* Do a hw_pagetable rotation test */ + test_cmd_mock_domain_replace(self->stdev_ids[i], hwpt_id[0]); + EXPECT_ERRNO(EBUSY, _test_ioctl_destroy(self->fd, hwpt_id[0])); + test_cmd_mock_domain_replace(self->stdev_ids[i], hwpt_id[1]); + EXPECT_ERRNO(EBUSY, _test_ioctl_destroy(self->fd, hwpt_id[1])); + test_cmd_mock_domain_replace(self->stdev_ids[i], self->ioas_id); + test_ioctl_destroy(hwpt_id[1]); + + test_cmd_mock_domain(hwpt_id[0], &stddev_id, NULL, NULL); test_ioctl_destroy(stddev_id); - test_ioctl_destroy(hwpt_id); + test_ioctl_destroy(hwpt_id[0]); } } +FIXTURE(iommufd_dirty_tracking) +{ + int fd; + uint32_t ioas_id; + uint32_t hwpt_id; + uint32_t stdev_id; + uint32_t idev_id; + unsigned long page_size; + unsigned long bitmap_size; + void *bitmap; + void *buffer; +}; + +FIXTURE_VARIANT(iommufd_dirty_tracking) +{ + unsigned long buffer_size; +}; + +FIXTURE_SETUP(iommufd_dirty_tracking) +{ + void *vrc; + int rc; + + self->fd = open("/dev/iommu", O_RDWR); + ASSERT_NE(-1, self->fd); + + rc = posix_memalign(&self->buffer, HUGEPAGE_SIZE, variant->buffer_size); + if (rc || !self->buffer) { + SKIP(return, "Skipping buffer_size=%lu due to errno=%d", + variant->buffer_size, rc); + } + + assert((uintptr_t)self->buffer % HUGEPAGE_SIZE == 0); + vrc = mmap(self->buffer, variant->buffer_size, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_ANONYMOUS | MAP_FIXED, -1, 0); + assert(vrc == self->buffer); + + self->page_size = MOCK_PAGE_SIZE; + self->bitmap_size = + variant->buffer_size / self->page_size / BITS_PER_BYTE; + + /* Provision with an extra (MOCK_PAGE_SIZE) for the unaligned case */ + rc = posix_memalign(&self->bitmap, PAGE_SIZE, + self->bitmap_size + MOCK_PAGE_SIZE); + assert(!rc); + assert(self->bitmap); + assert((uintptr_t)self->bitmap % PAGE_SIZE == 0); + + test_ioctl_ioas_alloc(&self->ioas_id); + test_cmd_mock_domain(self->ioas_id, &self->stdev_id, &self->hwpt_id, + &self->idev_id); +} + +FIXTURE_TEARDOWN(iommufd_dirty_tracking) +{ + munmap(self->buffer, variant->buffer_size); + munmap(self->bitmap, self->bitmap_size); + teardown_iommufd(self->fd, _metadata); +} + +FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty128k) +{ + /* one u32 index bitmap */ + .buffer_size = 128UL * 1024UL, +}; + +FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty256k) +{ + /* one u64 index bitmap */ + .buffer_size = 256UL * 1024UL, +}; + +FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty640k) +{ + /* two u64 index and trailing end bitmap */ + .buffer_size = 640UL * 1024UL, +}; + +FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty128M) +{ + /* 4K bitmap (128M IOVA range) */ + .buffer_size = 128UL * 1024UL * 1024UL, +}; + +FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty256M) +{ + /* 8K bitmap (256M IOVA range) */ + .buffer_size = 256UL * 1024UL * 1024UL, +}; + +TEST_F(iommufd_dirty_tracking, enforce_dirty) +{ + uint32_t ioas_id, stddev_id, idev_id; + uint32_t hwpt_id, _hwpt_id; + uint32_t dev_flags; + + /* Regular case */ + dev_flags = MOCK_FLAGS_DEVICE_NO_DIRTY; + test_cmd_hwpt_alloc(self->idev_id, self->ioas_id, + IOMMU_HWPT_ALLOC_DIRTY_TRACKING, &hwpt_id); + test_cmd_mock_domain(hwpt_id, &stddev_id, NULL, NULL); + test_err_mock_domain_flags(EINVAL, hwpt_id, dev_flags, &stddev_id, + NULL); + test_ioctl_destroy(stddev_id); + test_ioctl_destroy(hwpt_id); + + /* IOMMU device does not support dirty tracking */ + test_ioctl_ioas_alloc(&ioas_id); + test_cmd_mock_domain_flags(ioas_id, dev_flags, &stddev_id, &_hwpt_id, + &idev_id); + test_err_hwpt_alloc(EOPNOTSUPP, idev_id, ioas_id, + IOMMU_HWPT_ALLOC_DIRTY_TRACKING, &hwpt_id); + test_ioctl_destroy(stddev_id); +} + +TEST_F(iommufd_dirty_tracking, set_dirty_tracking) +{ + uint32_t stddev_id; + uint32_t hwpt_id; + + test_cmd_hwpt_alloc(self->idev_id, self->ioas_id, + IOMMU_HWPT_ALLOC_DIRTY_TRACKING, &hwpt_id); + test_cmd_mock_domain(hwpt_id, &stddev_id, NULL, NULL); + test_cmd_set_dirty_tracking(hwpt_id, true); + test_cmd_set_dirty_tracking(hwpt_id, false); + + test_ioctl_destroy(stddev_id); + test_ioctl_destroy(hwpt_id); +} + +TEST_F(iommufd_dirty_tracking, device_dirty_capability) +{ + uint32_t caps = 0; + uint32_t stddev_id; + uint32_t hwpt_id; + + test_cmd_hwpt_alloc(self->idev_id, self->ioas_id, 0, &hwpt_id); + test_cmd_mock_domain(hwpt_id, &stddev_id, NULL, NULL); + test_cmd_get_hw_capabilities(self->idev_id, caps, + IOMMU_HW_CAP_DIRTY_TRACKING); + ASSERT_EQ(IOMMU_HW_CAP_DIRTY_TRACKING, + caps & IOMMU_HW_CAP_DIRTY_TRACKING); + + test_ioctl_destroy(stddev_id); + test_ioctl_destroy(hwpt_id); +} + +TEST_F(iommufd_dirty_tracking, get_dirty_bitmap) +{ + uint32_t stddev_id; + uint32_t hwpt_id; + uint32_t ioas_id; + + test_ioctl_ioas_alloc(&ioas_id); + test_ioctl_ioas_map_fixed_id(ioas_id, self->buffer, + variant->buffer_size, MOCK_APERTURE_START); + + test_cmd_hwpt_alloc(self->idev_id, ioas_id, + IOMMU_HWPT_ALLOC_DIRTY_TRACKING, &hwpt_id); + test_cmd_mock_domain(hwpt_id, &stddev_id, NULL, NULL); + + test_cmd_set_dirty_tracking(hwpt_id, true); + + test_mock_dirty_bitmaps(hwpt_id, variant->buffer_size, + MOCK_APERTURE_START, self->page_size, + self->bitmap, self->bitmap_size, 0, _metadata); + + /* PAGE_SIZE unaligned bitmap */ + test_mock_dirty_bitmaps(hwpt_id, variant->buffer_size, + MOCK_APERTURE_START, self->page_size, + self->bitmap + MOCK_PAGE_SIZE, + self->bitmap_size, 0, _metadata); + + test_ioctl_destroy(stddev_id); + test_ioctl_destroy(hwpt_id); +} + +TEST_F(iommufd_dirty_tracking, get_dirty_bitmap_no_clear) +{ + uint32_t stddev_id; + uint32_t hwpt_id; + uint32_t ioas_id; + + test_ioctl_ioas_alloc(&ioas_id); + test_ioctl_ioas_map_fixed_id(ioas_id, self->buffer, + variant->buffer_size, MOCK_APERTURE_START); + + test_cmd_hwpt_alloc(self->idev_id, ioas_id, + IOMMU_HWPT_ALLOC_DIRTY_TRACKING, &hwpt_id); + test_cmd_mock_domain(hwpt_id, &stddev_id, NULL, NULL); + + test_cmd_set_dirty_tracking(hwpt_id, true); + + test_mock_dirty_bitmaps(hwpt_id, variant->buffer_size, + MOCK_APERTURE_START, self->page_size, + self->bitmap, self->bitmap_size, + IOMMU_HWPT_GET_DIRTY_BITMAP_NO_CLEAR, + _metadata); + + /* Unaligned bitmap */ + test_mock_dirty_bitmaps(hwpt_id, variant->buffer_size, + MOCK_APERTURE_START, self->page_size, + self->bitmap + MOCK_PAGE_SIZE, + self->bitmap_size, + IOMMU_HWPT_GET_DIRTY_BITMAP_NO_CLEAR, + _metadata); + + test_ioctl_destroy(stddev_id); + test_ioctl_destroy(hwpt_id); +} + /* VFIO compatibility IOCTLs */ TEST_F(iommufd, simple_ioctls) @@ -1729,7 +2074,7 @@ TEST_F(vfio_compat_mock_domain, map) ASSERT_EQ(0, ioctl(self->fd, VFIO_IOMMU_UNMAP_DMA, &unmap_cmd)); ASSERT_EQ(BUFFER_SIZE, unmap_cmd.size); - /* UNMAP_FLAG_ALL requres 0 iova/size */ + /* UNMAP_FLAG_ALL requires 0 iova/size */ ASSERT_EQ(0, ioctl(self->fd, VFIO_IOMMU_MAP_DMA, &map_cmd)); unmap_cmd.flags = VFIO_DMA_UNMAP_FLAG_ALL; EXPECT_ERRNO(EINVAL, ioctl(self->fd, VFIO_IOMMU_UNMAP_DMA, &unmap_cmd)); diff --git a/tools/testing/selftests/iommu/iommufd_fail_nth.c b/tools/testing/selftests/iommu/iommufd_fail_nth.c index a220ca2a689d..f590417cd67a 100644 --- a/tools/testing/selftests/iommu/iommufd_fail_nth.c +++ b/tools/testing/selftests/iommu/iommufd_fail_nth.c @@ -105,7 +105,7 @@ static bool fail_nth_next(struct __test_metadata *_metadata, /* * This is just an arbitrary limit based on the current kernel - * situation. Changes in the kernel can dramtically change the number of + * situation. Changes in the kernel can dramatically change the number of * required fault injection sites, so if this hits it doesn't * necessarily mean a test failure, just that the limit has to be made * bigger. @@ -612,10 +612,11 @@ TEST_FAIL_NTH(basic_fail_nth, device) &idev_id)) return -1; - if (_test_cmd_get_hw_info(self->fd, idev_id, &info, sizeof(info))) + if (_test_cmd_get_hw_info(self->fd, idev_id, &info, sizeof(info), NULL)) return -1; - if (_test_cmd_hwpt_alloc(self->fd, idev_id, ioas_id, &hwpt_id)) + if (_test_cmd_hwpt_alloc(self->fd, idev_id, ioas_id, 0, &hwpt_id, + IOMMU_HWPT_DATA_NONE, 0, 0)) return -1; if (_test_cmd_mock_domain_replace(self->fd, stdev_id, ioas_id2, NULL)) diff --git a/tools/testing/selftests/iommu/iommufd_utils.h b/tools/testing/selftests/iommu/iommufd_utils.h index e0753d03ecaa..050e9751321c 100644 --- a/tools/testing/selftests/iommu/iommufd_utils.h +++ b/tools/testing/selftests/iommu/iommufd_utils.h @@ -16,6 +16,25 @@ /* Hack to make assertions more readable */ #define _IOMMU_TEST_CMD(x) IOMMU_TEST_CMD +/* Imported from include/asm-generic/bitops/generic-non-atomic.h */ +#define BITS_PER_BYTE 8 +#define BITS_PER_LONG __BITS_PER_LONG +#define BIT_MASK(nr) (1UL << ((nr) % __BITS_PER_LONG)) +#define BIT_WORD(nr) ((nr) / __BITS_PER_LONG) + +static inline void set_bit(unsigned int nr, unsigned long *addr) +{ + unsigned long mask = BIT_MASK(nr); + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + + *p |= mask; +} + +static inline bool test_bit(unsigned int nr, unsigned long *addr) +{ + return 1UL & (addr[BIT_WORD(nr)] >> (nr & (BITS_PER_LONG - 1))); +} + static void *buffer; static unsigned long BUFFER_SIZE; @@ -74,6 +93,38 @@ static int _test_cmd_mock_domain(int fd, unsigned int ioas_id, __u32 *stdev_id, EXPECT_ERRNO(_errno, _test_cmd_mock_domain(self->fd, ioas_id, \ stdev_id, hwpt_id, NULL)) +static int _test_cmd_mock_domain_flags(int fd, unsigned int ioas_id, + __u32 stdev_flags, __u32 *stdev_id, + __u32 *hwpt_id, __u32 *idev_id) +{ + struct iommu_test_cmd cmd = { + .size = sizeof(cmd), + .op = IOMMU_TEST_OP_MOCK_DOMAIN_FLAGS, + .id = ioas_id, + .mock_domain_flags = { .dev_flags = stdev_flags }, + }; + int ret; + + ret = ioctl(fd, IOMMU_TEST_CMD, &cmd); + if (ret) + return ret; + if (stdev_id) + *stdev_id = cmd.mock_domain_flags.out_stdev_id; + assert(cmd.id != 0); + if (hwpt_id) + *hwpt_id = cmd.mock_domain_flags.out_hwpt_id; + if (idev_id) + *idev_id = cmd.mock_domain_flags.out_idev_id; + return 0; +} +#define test_cmd_mock_domain_flags(ioas_id, flags, stdev_id, hwpt_id, idev_id) \ + ASSERT_EQ(0, _test_cmd_mock_domain_flags(self->fd, ioas_id, flags, \ + stdev_id, hwpt_id, idev_id)) +#define test_err_mock_domain_flags(_errno, ioas_id, flags, stdev_id, hwpt_id) \ + EXPECT_ERRNO(_errno, \ + _test_cmd_mock_domain_flags(self->fd, ioas_id, flags, \ + stdev_id, hwpt_id, NULL)) + static int _test_cmd_mock_domain_replace(int fd, __u32 stdev_id, __u32 pt_id, __u32 *hwpt_id) { @@ -103,12 +154,17 @@ static int _test_cmd_mock_domain_replace(int fd, __u32 stdev_id, __u32 pt_id, pt_id, NULL)) static int _test_cmd_hwpt_alloc(int fd, __u32 device_id, __u32 pt_id, - __u32 *hwpt_id) + __u32 flags, __u32 *hwpt_id, __u32 data_type, + void *data, size_t data_len) { struct iommu_hwpt_alloc cmd = { .size = sizeof(cmd), + .flags = flags, .dev_id = device_id, .pt_id = pt_id, + .data_type = data_type, + .data_len = data_len, + .data_uptr = (uint64_t)data, }; int ret; @@ -120,8 +176,24 @@ static int _test_cmd_hwpt_alloc(int fd, __u32 device_id, __u32 pt_id, return 0; } -#define test_cmd_hwpt_alloc(device_id, pt_id, hwpt_id) \ - ASSERT_EQ(0, _test_cmd_hwpt_alloc(self->fd, device_id, pt_id, hwpt_id)) +#define test_cmd_hwpt_alloc(device_id, pt_id, flags, hwpt_id) \ + ASSERT_EQ(0, _test_cmd_hwpt_alloc(self->fd, device_id, pt_id, flags, \ + hwpt_id, IOMMU_HWPT_DATA_NONE, NULL, \ + 0)) +#define test_err_hwpt_alloc(_errno, device_id, pt_id, flags, hwpt_id) \ + EXPECT_ERRNO(_errno, _test_cmd_hwpt_alloc( \ + self->fd, device_id, pt_id, flags, \ + hwpt_id, IOMMU_HWPT_DATA_NONE, NULL, 0)) + +#define test_cmd_hwpt_alloc_nested(device_id, pt_id, flags, hwpt_id, \ + data_type, data, data_len) \ + ASSERT_EQ(0, _test_cmd_hwpt_alloc(self->fd, device_id, pt_id, flags, \ + hwpt_id, data_type, data, data_len)) +#define test_err_hwpt_alloc_nested(_errno, device_id, pt_id, flags, hwpt_id, \ + data_type, data, data_len) \ + EXPECT_ERRNO(_errno, \ + _test_cmd_hwpt_alloc(self->fd, device_id, pt_id, flags, \ + hwpt_id, data_type, data, data_len)) static int _test_cmd_access_replace_ioas(int fd, __u32 access_id, unsigned int ioas_id) @@ -142,6 +214,126 @@ static int _test_cmd_access_replace_ioas(int fd, __u32 access_id, #define test_cmd_access_replace_ioas(access_id, ioas_id) \ ASSERT_EQ(0, _test_cmd_access_replace_ioas(self->fd, access_id, ioas_id)) +static int _test_cmd_set_dirty_tracking(int fd, __u32 hwpt_id, bool enabled) +{ + struct iommu_hwpt_set_dirty_tracking cmd = { + .size = sizeof(cmd), + .flags = enabled ? IOMMU_HWPT_DIRTY_TRACKING_ENABLE : 0, + .hwpt_id = hwpt_id, + }; + int ret; + + ret = ioctl(fd, IOMMU_HWPT_SET_DIRTY_TRACKING, &cmd); + if (ret) + return -errno; + return 0; +} +#define test_cmd_set_dirty_tracking(hwpt_id, enabled) \ + ASSERT_EQ(0, _test_cmd_set_dirty_tracking(self->fd, hwpt_id, enabled)) + +static int _test_cmd_get_dirty_bitmap(int fd, __u32 hwpt_id, size_t length, + __u64 iova, size_t page_size, + __u64 *bitmap, __u32 flags) +{ + struct iommu_hwpt_get_dirty_bitmap cmd = { + .size = sizeof(cmd), + .hwpt_id = hwpt_id, + .flags = flags, + .iova = iova, + .length = length, + .page_size = page_size, + .data = (uintptr_t)bitmap, + }; + int ret; + + ret = ioctl(fd, IOMMU_HWPT_GET_DIRTY_BITMAP, &cmd); + if (ret) + return ret; + return 0; +} + +#define test_cmd_get_dirty_bitmap(fd, hwpt_id, length, iova, page_size, \ + bitmap, flags) \ + ASSERT_EQ(0, _test_cmd_get_dirty_bitmap(fd, hwpt_id, length, iova, \ + page_size, bitmap, flags)) + +static int _test_cmd_mock_domain_set_dirty(int fd, __u32 hwpt_id, size_t length, + __u64 iova, size_t page_size, + __u64 *bitmap, __u64 *dirty) +{ + struct iommu_test_cmd cmd = { + .size = sizeof(cmd), + .op = IOMMU_TEST_OP_DIRTY, + .id = hwpt_id, + .dirty = { + .iova = iova, + .length = length, + .page_size = page_size, + .uptr = (uintptr_t)bitmap, + } + }; + int ret; + + ret = ioctl(fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_DIRTY), &cmd); + if (ret) + return -ret; + if (dirty) + *dirty = cmd.dirty.out_nr_dirty; + return 0; +} + +#define test_cmd_mock_domain_set_dirty(fd, hwpt_id, length, iova, page_size, \ + bitmap, nr) \ + ASSERT_EQ(0, \ + _test_cmd_mock_domain_set_dirty(fd, hwpt_id, length, iova, \ + page_size, bitmap, nr)) + +static int _test_mock_dirty_bitmaps(int fd, __u32 hwpt_id, size_t length, + __u64 iova, size_t page_size, __u64 *bitmap, + __u64 bitmap_size, __u32 flags, + struct __test_metadata *_metadata) +{ + unsigned long i, count, nbits = bitmap_size * BITS_PER_BYTE; + unsigned long nr = nbits / 2; + __u64 out_dirty = 0; + + /* Mark all even bits as dirty in the mock domain */ + for (count = 0, i = 0; i < nbits; count += !(i % 2), i++) + if (!(i % 2)) + set_bit(i, (unsigned long *)bitmap); + ASSERT_EQ(nr, count); + + test_cmd_mock_domain_set_dirty(fd, hwpt_id, length, iova, page_size, + bitmap, &out_dirty); + ASSERT_EQ(nr, out_dirty); + + /* Expect all even bits as dirty in the user bitmap */ + memset(bitmap, 0, bitmap_size); + test_cmd_get_dirty_bitmap(fd, hwpt_id, length, iova, page_size, bitmap, + flags); + for (count = 0, i = 0; i < nbits; count += !(i % 2), i++) + ASSERT_EQ(!(i % 2), test_bit(i, (unsigned long *)bitmap)); + ASSERT_EQ(count, out_dirty); + + memset(bitmap, 0, bitmap_size); + test_cmd_get_dirty_bitmap(fd, hwpt_id, length, iova, page_size, bitmap, + flags); + + /* It as read already -- expect all zeroes */ + for (i = 0; i < nbits; i++) { + ASSERT_EQ(!(i % 2) && (flags & + IOMMU_HWPT_GET_DIRTY_BITMAP_NO_CLEAR), + test_bit(i, (unsigned long *)bitmap)); + } + + return 0; +} +#define test_mock_dirty_bitmaps(hwpt_id, length, iova, page_size, bitmap, \ + bitmap_size, flags, _metadata) \ + ASSERT_EQ(0, _test_mock_dirty_bitmaps(self->fd, hwpt_id, length, iova, \ + page_size, bitmap, bitmap_size, \ + flags, _metadata)) + static int _test_cmd_create_access(int fd, unsigned int ioas_id, __u32 *access_id, unsigned int flags) { @@ -266,6 +458,17 @@ static int _test_ioctl_ioas_map(int fd, unsigned int ioas_id, void *buffer, IOMMU_IOAS_MAP_READABLE)); \ }) +#define test_ioctl_ioas_map_fixed_id(ioas_id, buffer, length, iova) \ + ({ \ + __u64 __iova = iova; \ + ASSERT_EQ(0, \ + _test_ioctl_ioas_map( \ + self->fd, ioas_id, buffer, length, &__iova, \ + IOMMU_IOAS_MAP_FIXED_IOVA | \ + IOMMU_IOAS_MAP_WRITEABLE | \ + IOMMU_IOAS_MAP_READABLE)); \ + }) + #define test_err_ioctl_ioas_map_fixed(_errno, buffer, length, iova) \ ({ \ __u64 __iova = iova; \ @@ -354,8 +557,8 @@ static void teardown_iommufd(int fd, struct __test_metadata *_metadata) #endif /* @data can be NULL */ -static int _test_cmd_get_hw_info(int fd, __u32 device_id, - void *data, size_t data_len) +static int _test_cmd_get_hw_info(int fd, __u32 device_id, void *data, + size_t data_len, uint32_t *capabilities) { struct iommu_test_hw_info *info = (struct iommu_test_hw_info *)data; struct iommu_hw_info cmd = { @@ -363,6 +566,7 @@ static int _test_cmd_get_hw_info(int fd, __u32 device_id, .dev_id = device_id, .data_len = data_len, .data_uptr = (uint64_t)data, + .out_capabilities = 0, }; int ret; @@ -399,14 +603,19 @@ static int _test_cmd_get_hw_info(int fd, __u32 device_id, assert(!info->flags); } + if (capabilities) + *capabilities = cmd.out_capabilities; + return 0; } -#define test_cmd_get_hw_info(device_id, data, data_len) \ - ASSERT_EQ(0, _test_cmd_get_hw_info(self->fd, device_id, \ - data, data_len)) +#define test_cmd_get_hw_info(device_id, data, data_len) \ + ASSERT_EQ(0, _test_cmd_get_hw_info(self->fd, device_id, data, \ + data_len, NULL)) + +#define test_err_get_hw_info(_errno, device_id, data, data_len) \ + EXPECT_ERRNO(_errno, _test_cmd_get_hw_info(self->fd, device_id, data, \ + data_len, NULL)) -#define test_err_get_hw_info(_errno, device_id, data, data_len) \ - EXPECT_ERRNO(_errno, \ - _test_cmd_get_hw_info(self->fd, device_id, \ - data, data_len)) +#define test_cmd_get_hw_capabilities(device_id, caps, mask) \ + ASSERT_EQ(0, _test_cmd_get_hw_info(self->fd, device_id, NULL, 0, &caps)) diff --git a/tools/testing/selftests/kselftest.h b/tools/testing/selftests/kselftest.h index 529d29a35900..a781e6311810 100644 --- a/tools/testing/selftests/kselftest.h +++ b/tools/testing/selftests/kselftest.h @@ -48,6 +48,7 @@ #include <stdlib.h> #include <unistd.h> #include <stdarg.h> +#include <string.h> #include <stdio.h> #endif @@ -77,6 +78,8 @@ #define KSFT_XPASS 3 #define KSFT_SKIP 4 +#define __printf(a, b) __attribute__((format(printf, a, b))) + /* counters */ struct ksft_count { unsigned int ksft_pass; @@ -129,7 +132,7 @@ static inline void ksft_print_header(void) static inline void ksft_set_plan(unsigned int plan) { ksft_plan = plan; - printf("1..%d\n", ksft_plan); + printf("1..%u\n", ksft_plan); } static inline void ksft_print_cnts(void) @@ -137,13 +140,13 @@ static inline void ksft_print_cnts(void) if (ksft_plan != ksft_test_num()) printf("# Planned tests != run tests (%u != %u)\n", ksft_plan, ksft_test_num()); - printf("# Totals: pass:%d fail:%d xfail:%d xpass:%d skip:%d error:%d\n", + printf("# Totals: pass:%u fail:%u xfail:%u xpass:%u skip:%u error:%u\n", ksft_cnt.ksft_pass, ksft_cnt.ksft_fail, ksft_cnt.ksft_xfail, ksft_cnt.ksft_xpass, ksft_cnt.ksft_xskip, ksft_cnt.ksft_error); } -static inline void ksft_print_msg(const char *msg, ...) +static inline __printf(1, 2) void ksft_print_msg(const char *msg, ...) { int saved_errno = errno; va_list args; @@ -155,7 +158,20 @@ static inline void ksft_print_msg(const char *msg, ...) va_end(args); } -static inline void ksft_test_result_pass(const char *msg, ...) +static inline void ksft_perror(const char *msg) +{ +#ifndef NOLIBC + ksft_print_msg("%s: %s (%d)\n", msg, strerror(errno), errno); +#else + /* + * nolibc doesn't provide strerror() and it seems + * inappropriate to add one, just print the errno. + */ + ksft_print_msg("%s: %d)\n", msg, errno); +#endif +} + +static inline __printf(1, 2) void ksft_test_result_pass(const char *msg, ...) { int saved_errno = errno; va_list args; @@ -163,13 +179,13 @@ static inline void ksft_test_result_pass(const char *msg, ...) ksft_cnt.ksft_pass++; va_start(args, msg); - printf("ok %d ", ksft_test_num()); + printf("ok %u ", ksft_test_num()); errno = saved_errno; vprintf(msg, args); va_end(args); } -static inline void ksft_test_result_fail(const char *msg, ...) +static inline __printf(1, 2) void ksft_test_result_fail(const char *msg, ...) { int saved_errno = errno; va_list args; @@ -177,7 +193,7 @@ static inline void ksft_test_result_fail(const char *msg, ...) ksft_cnt.ksft_fail++; va_start(args, msg); - printf("not ok %d ", ksft_test_num()); + printf("not ok %u ", ksft_test_num()); errno = saved_errno; vprintf(msg, args); va_end(args); @@ -195,7 +211,7 @@ static inline void ksft_test_result_fail(const char *msg, ...) ksft_test_result_fail(fmt, ##__VA_ARGS__);\ } while (0) -static inline void ksft_test_result_xfail(const char *msg, ...) +static inline __printf(1, 2) void ksft_test_result_xfail(const char *msg, ...) { int saved_errno = errno; va_list args; @@ -203,13 +219,13 @@ static inline void ksft_test_result_xfail(const char *msg, ...) ksft_cnt.ksft_xfail++; va_start(args, msg); - printf("ok %d # XFAIL ", ksft_test_num()); + printf("ok %u # XFAIL ", ksft_test_num()); errno = saved_errno; vprintf(msg, args); va_end(args); } -static inline void ksft_test_result_skip(const char *msg, ...) +static inline __printf(1, 2) void ksft_test_result_skip(const char *msg, ...) { int saved_errno = errno; va_list args; @@ -217,14 +233,14 @@ static inline void ksft_test_result_skip(const char *msg, ...) ksft_cnt.ksft_xskip++; va_start(args, msg); - printf("ok %d # SKIP ", ksft_test_num()); + printf("ok %u # SKIP ", ksft_test_num()); errno = saved_errno; vprintf(msg, args); va_end(args); } /* TODO: how does "error" differ from "fail" or "skip"? */ -static inline void ksft_test_result_error(const char *msg, ...) +static inline __printf(1, 2) void ksft_test_result_error(const char *msg, ...) { int saved_errno = errno; va_list args; @@ -232,7 +248,7 @@ static inline void ksft_test_result_error(const char *msg, ...) ksft_cnt.ksft_error++; va_start(args, msg); - printf("not ok %d # error ", ksft_test_num()); + printf("not ok %u # error ", ksft_test_num()); errno = saved_errno; vprintf(msg, args); va_end(args); @@ -271,7 +287,7 @@ static inline int ksft_exit_fail(void) ksft_cnt.ksft_xfail + \ ksft_cnt.ksft_xskip) -static inline int ksft_exit_fail_msg(const char *msg, ...) +static inline __printf(1, 2) int ksft_exit_fail_msg(const char *msg, ...) { int saved_errno = errno; va_list args; @@ -298,7 +314,7 @@ static inline int ksft_exit_xpass(void) exit(KSFT_XPASS); } -static inline int ksft_exit_skip(const char *msg, ...) +static inline __printf(1, 2) int ksft_exit_skip(const char *msg, ...) { int saved_errno = errno; va_list args; diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index a3bb36fb3cfc..a5963ab9215b 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -17,6 +17,15 @@ else ARCH_DIR := $(ARCH) endif +ifeq ($(ARCH),arm64) +arm64_tools_dir := $(top_srcdir)/tools/arch/arm64/tools/ +GEN_HDRS := $(top_srcdir)/tools/arch/arm64/include/generated/ +CFLAGS += -I$(GEN_HDRS) + +$(GEN_HDRS): $(wildcard $(arm64_tools_dir)/*) + $(MAKE) -C $(arm64_tools_dir) +endif + LIBKVM += lib/assert.c LIBKVM += lib/elf.c LIBKVM += lib/guest_modes.c @@ -66,6 +75,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/dirty_log_page_splitting_test TEST_GEN_PROGS_x86_64 += x86_64/get_msr_index_features TEST_GEN_PROGS_x86_64 += x86_64/exit_on_emulation_failure_test TEST_GEN_PROGS_x86_64 += x86_64/fix_hypercall_test +TEST_GEN_PROGS_x86_64 += x86_64/hwcr_msr_test TEST_GEN_PROGS_x86_64 += x86_64/hyperv_clock TEST_GEN_PROGS_x86_64 += x86_64/hyperv_cpuid TEST_GEN_PROGS_x86_64 += x86_64/hyperv_evmcs @@ -145,10 +155,12 @@ TEST_GEN_PROGS_aarch64 += aarch64/debug-exceptions TEST_GEN_PROGS_aarch64 += aarch64/hypercalls TEST_GEN_PROGS_aarch64 += aarch64/page_fault_test TEST_GEN_PROGS_aarch64 += aarch64/psci_test +TEST_GEN_PROGS_aarch64 += aarch64/set_id_regs TEST_GEN_PROGS_aarch64 += aarch64/smccc_filter TEST_GEN_PROGS_aarch64 += aarch64/vcpu_width_config TEST_GEN_PROGS_aarch64 += aarch64/vgic_init TEST_GEN_PROGS_aarch64 += aarch64/vgic_irq +TEST_GEN_PROGS_aarch64 += aarch64/vpmu_counter_access TEST_GEN_PROGS_aarch64 += access_tracking_perf_test TEST_GEN_PROGS_aarch64 += demand_paging_test TEST_GEN_PROGS_aarch64 += dirty_log_test @@ -256,13 +268,18 @@ $(TEST_GEN_OBJ): $(OUTPUT)/%.o: %.c $(SPLIT_TESTS_TARGETS): %: %.o $(SPLIT_TESTS_OBJS) $(CC) $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) $(TARGET_ARCH) $^ $(LDLIBS) -o $@ -EXTRA_CLEAN += $(LIBKVM_OBJS) $(TEST_DEP_FILES) $(TEST_GEN_OBJ) $(SPLIT_TESTS_OBJS) cscope.* +EXTRA_CLEAN += $(GEN_HDRS) \ + $(LIBKVM_OBJS) \ + $(SPLIT_TESTS_OBJS) \ + $(TEST_DEP_FILES) \ + $(TEST_GEN_OBJ) \ + cscope.* x := $(shell mkdir -p $(sort $(dir $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ)))) -$(LIBKVM_C_OBJ): $(OUTPUT)/%.o: %.c +$(LIBKVM_C_OBJ): $(OUTPUT)/%.o: %.c $(GEN_HDRS) $(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@ -$(LIBKVM_S_OBJ): $(OUTPUT)/%.o: %.S +$(LIBKVM_S_OBJ): $(OUTPUT)/%.o: %.S $(GEN_HDRS) $(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@ # Compile the string overrides as freestanding to prevent the compiler from @@ -272,8 +289,10 @@ $(LIBKVM_STRING_OBJ): $(OUTPUT)/%.o: %.c $(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c -ffreestanding $< -o $@ x := $(shell mkdir -p $(sort $(dir $(TEST_GEN_PROGS)))) +$(SPLIT_TESTS_OBJS): $(GEN_HDRS) $(TEST_GEN_PROGS): $(LIBKVM_OBJS) $(TEST_GEN_PROGS_EXTENDED): $(LIBKVM_OBJS) +$(TEST_GEN_OBJ): $(GEN_HDRS) cscope: include_paths = $(LINUX_TOOL_INCLUDE) $(LINUX_HDR_PATH) include lib .. cscope: diff --git a/tools/testing/selftests/kvm/aarch64/aarch32_id_regs.c b/tools/testing/selftests/kvm/aarch64/aarch32_id_regs.c index b90580840b22..8e5bd07a3727 100644 --- a/tools/testing/selftests/kvm/aarch64/aarch32_id_regs.c +++ b/tools/testing/selftests/kvm/aarch64/aarch32_id_regs.c @@ -146,8 +146,8 @@ static bool vcpu_aarch64_only(struct kvm_vcpu *vcpu) vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1), &val); - el0 = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL0), val); - return el0 == ID_AA64PFR0_ELx_64BIT_ONLY; + el0 = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL0), val); + return el0 == ID_AA64PFR0_EL1_ELx_64BIT_ONLY; } int main(void) diff --git a/tools/testing/selftests/kvm/aarch64/debug-exceptions.c b/tools/testing/selftests/kvm/aarch64/debug-exceptions.c index f5b6cb3a0019..866002917441 100644 --- a/tools/testing/selftests/kvm/aarch64/debug-exceptions.c +++ b/tools/testing/selftests/kvm/aarch64/debug-exceptions.c @@ -116,12 +116,12 @@ static void reset_debug_state(void) /* Reset all bcr/bvr/wcr/wvr registers */ dfr0 = read_sysreg(id_aa64dfr0_el1); - brps = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_BRPS), dfr0); + brps = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_BRPs), dfr0); for (i = 0; i <= brps; i++) { write_dbgbcr(i, 0); write_dbgbvr(i, 0); } - wrps = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_WRPS), dfr0); + wrps = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_WRPs), dfr0); for (i = 0; i <= wrps; i++) { write_dbgwcr(i, 0); write_dbgwvr(i, 0); @@ -418,7 +418,7 @@ static void guest_code_ss(int test_cnt) static int debug_version(uint64_t id_aa64dfr0) { - return FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_DEBUGVER), id_aa64dfr0); + return FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_DebugVer), id_aa64dfr0); } static void test_guest_debug_exceptions(uint8_t bpn, uint8_t wpn, uint8_t ctx_bpn) @@ -539,14 +539,14 @@ void test_guest_debug_exceptions_all(uint64_t aa64dfr0) int b, w, c; /* Number of breakpoints */ - brp_num = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_BRPS), aa64dfr0) + 1; + brp_num = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_BRPs), aa64dfr0) + 1; __TEST_REQUIRE(brp_num >= 2, "At least two breakpoints are required"); /* Number of watchpoints */ - wrp_num = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_WRPS), aa64dfr0) + 1; + wrp_num = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_WRPs), aa64dfr0) + 1; /* Number of context aware breakpoints */ - ctx_brp_num = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_CTX_CMPS), aa64dfr0) + 1; + ctx_brp_num = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_CTX_CMPs), aa64dfr0) + 1; pr_debug("%s brp_num:%d, wrp_num:%d, ctx_brp_num:%d\n", __func__, brp_num, wrp_num, ctx_brp_num); diff --git a/tools/testing/selftests/kvm/aarch64/page_fault_test.c b/tools/testing/selftests/kvm/aarch64/page_fault_test.c index 47bb914ab2fa..eb4217b7c768 100644 --- a/tools/testing/selftests/kvm/aarch64/page_fault_test.c +++ b/tools/testing/selftests/kvm/aarch64/page_fault_test.c @@ -96,14 +96,14 @@ static bool guest_check_lse(void) uint64_t isar0 = read_sysreg(id_aa64isar0_el1); uint64_t atomic; - atomic = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64ISAR0_ATOMICS), isar0); + atomic = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_ATOMIC), isar0); return atomic >= 2; } static bool guest_check_dc_zva(void) { uint64_t dczid = read_sysreg(dczid_el0); - uint64_t dzp = FIELD_GET(ARM64_FEATURE_MASK(DCZID_DZP), dczid); + uint64_t dzp = FIELD_GET(ARM64_FEATURE_MASK(DCZID_EL0_DZP), dczid); return dzp == 0; } @@ -135,8 +135,8 @@ static void guest_at(void) uint64_t par; asm volatile("at s1e1r, %0" :: "r" (guest_test_memory)); - par = read_sysreg(par_el1); isb(); + par = read_sysreg(par_el1); /* Bit 1 indicates whether the AT was successful */ GUEST_ASSERT_EQ(par & 1, 0); @@ -196,7 +196,7 @@ static bool guest_set_ha(void) uint64_t hadbs, tcr; /* Skip if HA is not supported. */ - hadbs = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR1_HADBS), mmfr1); + hadbs = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_HAFDBS), mmfr1); if (hadbs == 0) return false; @@ -842,6 +842,7 @@ static void help(char *name) .name = SCAT2(ro_memslot_no_syndrome, _access), \ .data_memslot_flags = KVM_MEM_READONLY, \ .pt_memslot_flags = KVM_MEM_READONLY, \ + .guest_prepare = { _PREPARE(_access) }, \ .guest_test = _access, \ .fail_vcpu_run_handler = fail_vcpu_run_mmio_no_syndrome_handler, \ .expected_events = { .fail_vcpu_runs = 1 }, \ @@ -865,6 +866,7 @@ static void help(char *name) .name = SCAT2(ro_memslot_no_syn_and_dlog, _access), \ .data_memslot_flags = KVM_MEM_READONLY | KVM_MEM_LOG_DIRTY_PAGES, \ .pt_memslot_flags = KVM_MEM_READONLY | KVM_MEM_LOG_DIRTY_PAGES, \ + .guest_prepare = { _PREPARE(_access) }, \ .guest_test = _access, \ .guest_test_check = { _test_check }, \ .fail_vcpu_run_handler = fail_vcpu_run_mmio_no_syndrome_handler, \ @@ -894,6 +896,7 @@ static void help(char *name) .data_memslot_flags = KVM_MEM_READONLY, \ .pt_memslot_flags = KVM_MEM_READONLY, \ .mem_mark_cmd = CMD_HOLE_DATA | CMD_HOLE_PT, \ + .guest_prepare = { _PREPARE(_access) }, \ .guest_test = _access, \ .uffd_data_handler = _uffd_data_handler, \ .uffd_pt_handler = uffd_pt_handler, \ diff --git a/tools/testing/selftests/kvm/aarch64/set_id_regs.c b/tools/testing/selftests/kvm/aarch64/set_id_regs.c new file mode 100644 index 000000000000..bac05210b539 --- /dev/null +++ b/tools/testing/selftests/kvm/aarch64/set_id_regs.c @@ -0,0 +1,481 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * set_id_regs - Test for setting ID register from usersapce. + * + * Copyright (c) 2023 Google LLC. + * + * + * Test that KVM supports setting ID registers from userspace and handles the + * feature set correctly. + */ + +#include <stdint.h> +#include "kvm_util.h" +#include "processor.h" +#include "test_util.h" +#include <linux/bitfield.h> + +enum ftr_type { + FTR_EXACT, /* Use a predefined safe value */ + FTR_LOWER_SAFE, /* Smaller value is safe */ + FTR_HIGHER_SAFE, /* Bigger value is safe */ + FTR_HIGHER_OR_ZERO_SAFE, /* Bigger value is safe, but 0 is biggest */ + FTR_END, /* Mark the last ftr bits */ +}; + +#define FTR_SIGNED true /* Value should be treated as signed */ +#define FTR_UNSIGNED false /* Value should be treated as unsigned */ + +struct reg_ftr_bits { + char *name; + bool sign; + enum ftr_type type; + uint8_t shift; + uint64_t mask; + int64_t safe_val; +}; + +struct test_feature_reg { + uint32_t reg; + const struct reg_ftr_bits *ftr_bits; +}; + +#define __REG_FTR_BITS(NAME, SIGNED, TYPE, SHIFT, MASK, SAFE_VAL) \ + { \ + .name = #NAME, \ + .sign = SIGNED, \ + .type = TYPE, \ + .shift = SHIFT, \ + .mask = MASK, \ + .safe_val = SAFE_VAL, \ + } + +#define REG_FTR_BITS(type, reg, field, safe_val) \ + __REG_FTR_BITS(reg##_##field, FTR_UNSIGNED, type, reg##_##field##_SHIFT, \ + reg##_##field##_MASK, safe_val) + +#define S_REG_FTR_BITS(type, reg, field, safe_val) \ + __REG_FTR_BITS(reg##_##field, FTR_SIGNED, type, reg##_##field##_SHIFT, \ + reg##_##field##_MASK, safe_val) + +#define REG_FTR_END \ + { \ + .type = FTR_END, \ + } + +static const struct reg_ftr_bits ftr_id_aa64dfr0_el1[] = { + S_REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64DFR0_EL1, PMUVer, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64DFR0_EL1, DebugVer, 0), + REG_FTR_END, +}; + +static const struct reg_ftr_bits ftr_id_dfr0_el1[] = { + S_REG_FTR_BITS(FTR_LOWER_SAFE, ID_DFR0_EL1, PerfMon, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_DFR0_EL1, CopDbg, 0), + REG_FTR_END, +}; + +static const struct reg_ftr_bits ftr_id_aa64isar0_el1[] = { + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, RNDR, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, TLB, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, TS, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, FHM, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, DP, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, SM4, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, SM3, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, SHA3, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, RDM, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, TME, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, ATOMIC, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, CRC32, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, SHA2, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, SHA1, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, AES, 0), + REG_FTR_END, +}; + +static const struct reg_ftr_bits ftr_id_aa64isar1_el1[] = { + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, LS64, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, XS, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, I8MM, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, DGH, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, BF16, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, SPECRES, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, SB, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, FRINTTS, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, LRCPC, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, FCMA, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, JSCVT, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, DPB, 0), + REG_FTR_END, +}; + +static const struct reg_ftr_bits ftr_id_aa64isar2_el1[] = { + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR2_EL1, BC, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR2_EL1, RPRES, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR2_EL1, WFxT, 0), + REG_FTR_END, +}; + +static const struct reg_ftr_bits ftr_id_aa64pfr0_el1[] = { + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, CSV3, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, CSV2, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, DIT, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, SEL2, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL3, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL2, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL1, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL0, 0), + REG_FTR_END, +}; + +static const struct reg_ftr_bits ftr_id_aa64mmfr0_el1[] = { + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, ECV, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, EXS, 0), + S_REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, TGRAN4, 0), + S_REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, TGRAN64, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, TGRAN16, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, BIGENDEL0, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, SNSMEM, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, BIGEND, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, ASIDBITS, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, PARANGE, 0), + REG_FTR_END, +}; + +static const struct reg_ftr_bits ftr_id_aa64mmfr1_el1[] = { + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, TIDCP1, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, AFP, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, ETS, 0), + REG_FTR_BITS(FTR_HIGHER_SAFE, ID_AA64MMFR1_EL1, SpecSEI, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, PAN, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, LO, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, HPDS, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, HAFDBS, 0), + REG_FTR_END, +}; + +static const struct reg_ftr_bits ftr_id_aa64mmfr2_el1[] = { + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR2_EL1, E0PD, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR2_EL1, BBM, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR2_EL1, TTL, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR2_EL1, AT, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR2_EL1, ST, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR2_EL1, VARange, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR2_EL1, IESB, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR2_EL1, LSM, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR2_EL1, UAO, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR2_EL1, CnP, 0), + REG_FTR_END, +}; + +static const struct reg_ftr_bits ftr_id_aa64zfr0_el1[] = { + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, F64MM, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, F32MM, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, I8MM, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, SM4, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, SHA3, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, BF16, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, BitPerm, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, AES, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, SVEver, 0), + REG_FTR_END, +}; + +#define TEST_REG(id, table) \ + { \ + .reg = id, \ + .ftr_bits = &((table)[0]), \ + } + +static struct test_feature_reg test_regs[] = { + TEST_REG(SYS_ID_AA64DFR0_EL1, ftr_id_aa64dfr0_el1), + TEST_REG(SYS_ID_DFR0_EL1, ftr_id_dfr0_el1), + TEST_REG(SYS_ID_AA64ISAR0_EL1, ftr_id_aa64isar0_el1), + TEST_REG(SYS_ID_AA64ISAR1_EL1, ftr_id_aa64isar1_el1), + TEST_REG(SYS_ID_AA64ISAR2_EL1, ftr_id_aa64isar2_el1), + TEST_REG(SYS_ID_AA64PFR0_EL1, ftr_id_aa64pfr0_el1), + TEST_REG(SYS_ID_AA64MMFR0_EL1, ftr_id_aa64mmfr0_el1), + TEST_REG(SYS_ID_AA64MMFR1_EL1, ftr_id_aa64mmfr1_el1), + TEST_REG(SYS_ID_AA64MMFR2_EL1, ftr_id_aa64mmfr2_el1), + TEST_REG(SYS_ID_AA64ZFR0_EL1, ftr_id_aa64zfr0_el1), +}; + +#define GUEST_REG_SYNC(id) GUEST_SYNC_ARGS(0, id, read_sysreg_s(id), 0, 0); + +static void guest_code(void) +{ + GUEST_REG_SYNC(SYS_ID_AA64DFR0_EL1); + GUEST_REG_SYNC(SYS_ID_DFR0_EL1); + GUEST_REG_SYNC(SYS_ID_AA64ISAR0_EL1); + GUEST_REG_SYNC(SYS_ID_AA64ISAR1_EL1); + GUEST_REG_SYNC(SYS_ID_AA64ISAR2_EL1); + GUEST_REG_SYNC(SYS_ID_AA64PFR0_EL1); + GUEST_REG_SYNC(SYS_ID_AA64MMFR0_EL1); + GUEST_REG_SYNC(SYS_ID_AA64MMFR1_EL1); + GUEST_REG_SYNC(SYS_ID_AA64MMFR2_EL1); + GUEST_REG_SYNC(SYS_ID_AA64ZFR0_EL1); + + GUEST_DONE(); +} + +/* Return a safe value to a given ftr_bits an ftr value */ +uint64_t get_safe_value(const struct reg_ftr_bits *ftr_bits, uint64_t ftr) +{ + uint64_t ftr_max = GENMASK_ULL(ARM64_FEATURE_FIELD_BITS - 1, 0); + + if (ftr_bits->type == FTR_UNSIGNED) { + switch (ftr_bits->type) { + case FTR_EXACT: + ftr = ftr_bits->safe_val; + break; + case FTR_LOWER_SAFE: + if (ftr > 0) + ftr--; + break; + case FTR_HIGHER_SAFE: + if (ftr < ftr_max) + ftr++; + break; + case FTR_HIGHER_OR_ZERO_SAFE: + if (ftr == ftr_max) + ftr = 0; + else if (ftr != 0) + ftr++; + break; + default: + break; + } + } else if (ftr != ftr_max) { + switch (ftr_bits->type) { + case FTR_EXACT: + ftr = ftr_bits->safe_val; + break; + case FTR_LOWER_SAFE: + if (ftr > 0) + ftr--; + break; + case FTR_HIGHER_SAFE: + if (ftr < ftr_max - 1) + ftr++; + break; + case FTR_HIGHER_OR_ZERO_SAFE: + if (ftr != 0 && ftr != ftr_max - 1) + ftr++; + break; + default: + break; + } + } + + return ftr; +} + +/* Return an invalid value to a given ftr_bits an ftr value */ +uint64_t get_invalid_value(const struct reg_ftr_bits *ftr_bits, uint64_t ftr) +{ + uint64_t ftr_max = GENMASK_ULL(ARM64_FEATURE_FIELD_BITS - 1, 0); + + if (ftr_bits->type == FTR_UNSIGNED) { + switch (ftr_bits->type) { + case FTR_EXACT: + ftr = max((uint64_t)ftr_bits->safe_val + 1, ftr + 1); + break; + case FTR_LOWER_SAFE: + ftr++; + break; + case FTR_HIGHER_SAFE: + ftr--; + break; + case FTR_HIGHER_OR_ZERO_SAFE: + if (ftr == 0) + ftr = ftr_max; + else + ftr--; + break; + default: + break; + } + } else if (ftr != ftr_max) { + switch (ftr_bits->type) { + case FTR_EXACT: + ftr = max((uint64_t)ftr_bits->safe_val + 1, ftr + 1); + break; + case FTR_LOWER_SAFE: + ftr++; + break; + case FTR_HIGHER_SAFE: + ftr--; + break; + case FTR_HIGHER_OR_ZERO_SAFE: + if (ftr == 0) + ftr = ftr_max - 1; + else + ftr--; + break; + default: + break; + } + } else { + ftr = 0; + } + + return ftr; +} + +static void test_reg_set_success(struct kvm_vcpu *vcpu, uint64_t reg, + const struct reg_ftr_bits *ftr_bits) +{ + uint8_t shift = ftr_bits->shift; + uint64_t mask = ftr_bits->mask; + uint64_t val, new_val, ftr; + + vcpu_get_reg(vcpu, reg, &val); + ftr = (val & mask) >> shift; + + ftr = get_safe_value(ftr_bits, ftr); + + ftr <<= shift; + val &= ~mask; + val |= ftr; + + vcpu_set_reg(vcpu, reg, val); + vcpu_get_reg(vcpu, reg, &new_val); + TEST_ASSERT_EQ(new_val, val); +} + +static void test_reg_set_fail(struct kvm_vcpu *vcpu, uint64_t reg, + const struct reg_ftr_bits *ftr_bits) +{ + uint8_t shift = ftr_bits->shift; + uint64_t mask = ftr_bits->mask; + uint64_t val, old_val, ftr; + int r; + + vcpu_get_reg(vcpu, reg, &val); + ftr = (val & mask) >> shift; + + ftr = get_invalid_value(ftr_bits, ftr); + + old_val = val; + ftr <<= shift; + val &= ~mask; + val |= ftr; + + r = __vcpu_set_reg(vcpu, reg, val); + TEST_ASSERT(r < 0 && errno == EINVAL, + "Unexpected KVM_SET_ONE_REG error: r=%d, errno=%d", r, errno); + + vcpu_get_reg(vcpu, reg, &val); + TEST_ASSERT_EQ(val, old_val); +} + +static void test_user_set_reg(struct kvm_vcpu *vcpu, bool aarch64_only) +{ + uint64_t masks[KVM_ARM_FEATURE_ID_RANGE_SIZE]; + struct reg_mask_range range = { + .addr = (__u64)masks, + }; + int ret; + + /* KVM should return error when reserved field is not zero */ + range.reserved[0] = 1; + ret = __vm_ioctl(vcpu->vm, KVM_ARM_GET_REG_WRITABLE_MASKS, &range); + TEST_ASSERT(ret, "KVM doesn't check invalid parameters."); + + /* Get writable masks for feature ID registers */ + memset(range.reserved, 0, sizeof(range.reserved)); + vm_ioctl(vcpu->vm, KVM_ARM_GET_REG_WRITABLE_MASKS, &range); + + for (int i = 0; i < ARRAY_SIZE(test_regs); i++) { + const struct reg_ftr_bits *ftr_bits = test_regs[i].ftr_bits; + uint32_t reg_id = test_regs[i].reg; + uint64_t reg = KVM_ARM64_SYS_REG(reg_id); + int idx; + + /* Get the index to masks array for the idreg */ + idx = KVM_ARM_FEATURE_ID_RANGE_IDX(sys_reg_Op0(reg_id), sys_reg_Op1(reg_id), + sys_reg_CRn(reg_id), sys_reg_CRm(reg_id), + sys_reg_Op2(reg_id)); + + for (int j = 0; ftr_bits[j].type != FTR_END; j++) { + /* Skip aarch32 reg on aarch64 only system, since they are RAZ/WI. */ + if (aarch64_only && sys_reg_CRm(reg_id) < 4) { + ksft_test_result_skip("%s on AARCH64 only system\n", + ftr_bits[j].name); + continue; + } + + /* Make sure the feature field is writable */ + TEST_ASSERT_EQ(masks[idx] & ftr_bits[j].mask, ftr_bits[j].mask); + + test_reg_set_fail(vcpu, reg, &ftr_bits[j]); + test_reg_set_success(vcpu, reg, &ftr_bits[j]); + + ksft_test_result_pass("%s\n", ftr_bits[j].name); + } + } +} + +static void test_guest_reg_read(struct kvm_vcpu *vcpu) +{ + bool done = false; + struct ucall uc; + uint64_t val; + + while (!done) { + vcpu_run(vcpu); + + switch (get_ucall(vcpu, &uc)) { + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + break; + case UCALL_SYNC: + /* Make sure the written values are seen by guest */ + vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(uc.args[2]), &val); + TEST_ASSERT_EQ(val, uc.args[3]); + break; + case UCALL_DONE: + done = true; + break; + default: + TEST_FAIL("Unexpected ucall: %lu", uc.cmd); + } + } +} + +int main(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + bool aarch64_only; + uint64_t val, el0; + int ftr_cnt; + + TEST_REQUIRE(kvm_has_cap(KVM_CAP_ARM_SUPPORTED_REG_MASK_RANGES)); + + vm = vm_create_with_one_vcpu(&vcpu, guest_code); + + /* Check for AARCH64 only system */ + vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1), &val); + el0 = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL0), val); + aarch64_only = (el0 == ID_AA64PFR0_EL1_ELx_64BIT_ONLY); + + ksft_print_header(); + + ftr_cnt = ARRAY_SIZE(ftr_id_aa64dfr0_el1) + ARRAY_SIZE(ftr_id_dfr0_el1) + + ARRAY_SIZE(ftr_id_aa64isar0_el1) + ARRAY_SIZE(ftr_id_aa64isar1_el1) + + ARRAY_SIZE(ftr_id_aa64isar2_el1) + ARRAY_SIZE(ftr_id_aa64pfr0_el1) + + ARRAY_SIZE(ftr_id_aa64mmfr0_el1) + ARRAY_SIZE(ftr_id_aa64mmfr1_el1) + + ARRAY_SIZE(ftr_id_aa64mmfr2_el1) + ARRAY_SIZE(ftr_id_aa64zfr0_el1) - + ARRAY_SIZE(test_regs); + + ksft_set_plan(ftr_cnt); + + test_user_set_reg(vcpu, aarch64_only); + test_guest_reg_read(vcpu); + + kvm_vm_free(vm); + + ksft_finished(); +} diff --git a/tools/testing/selftests/kvm/aarch64/vpmu_counter_access.c b/tools/testing/selftests/kvm/aarch64/vpmu_counter_access.c new file mode 100644 index 000000000000..5ea78986e665 --- /dev/null +++ b/tools/testing/selftests/kvm/aarch64/vpmu_counter_access.c @@ -0,0 +1,670 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * vpmu_counter_access - Test vPMU event counter access + * + * Copyright (c) 2023 Google LLC. + * + * This test checks if the guest can see the same number of the PMU event + * counters (PMCR_EL0.N) that userspace sets, if the guest can access + * those counters, and if the guest is prevented from accessing any + * other counters. + * It also checks if the userspace accesses to the PMU regsisters honor the + * PMCR.N value that's set for the guest. + * This test runs only when KVM_CAP_ARM_PMU_V3 is supported on the host. + */ +#include <kvm_util.h> +#include <processor.h> +#include <test_util.h> +#include <vgic.h> +#include <perf/arm_pmuv3.h> +#include <linux/bitfield.h> + +/* The max number of the PMU event counters (excluding the cycle counter) */ +#define ARMV8_PMU_MAX_GENERAL_COUNTERS (ARMV8_PMU_MAX_COUNTERS - 1) + +/* The cycle counter bit position that's common among the PMU registers */ +#define ARMV8_PMU_CYCLE_IDX 31 + +struct vpmu_vm { + struct kvm_vm *vm; + struct kvm_vcpu *vcpu; + int gic_fd; +}; + +static struct vpmu_vm vpmu_vm; + +struct pmreg_sets { + uint64_t set_reg_id; + uint64_t clr_reg_id; +}; + +#define PMREG_SET(set, clr) {.set_reg_id = set, .clr_reg_id = clr} + +static uint64_t get_pmcr_n(uint64_t pmcr) +{ + return (pmcr >> ARMV8_PMU_PMCR_N_SHIFT) & ARMV8_PMU_PMCR_N_MASK; +} + +static void set_pmcr_n(uint64_t *pmcr, uint64_t pmcr_n) +{ + *pmcr = *pmcr & ~(ARMV8_PMU_PMCR_N_MASK << ARMV8_PMU_PMCR_N_SHIFT); + *pmcr |= (pmcr_n << ARMV8_PMU_PMCR_N_SHIFT); +} + +static uint64_t get_counters_mask(uint64_t n) +{ + uint64_t mask = BIT(ARMV8_PMU_CYCLE_IDX); + + if (n) + mask |= GENMASK(n - 1, 0); + return mask; +} + +/* Read PMEVTCNTR<n>_EL0 through PMXEVCNTR_EL0 */ +static inline unsigned long read_sel_evcntr(int sel) +{ + write_sysreg(sel, pmselr_el0); + isb(); + return read_sysreg(pmxevcntr_el0); +} + +/* Write PMEVTCNTR<n>_EL0 through PMXEVCNTR_EL0 */ +static inline void write_sel_evcntr(int sel, unsigned long val) +{ + write_sysreg(sel, pmselr_el0); + isb(); + write_sysreg(val, pmxevcntr_el0); + isb(); +} + +/* Read PMEVTYPER<n>_EL0 through PMXEVTYPER_EL0 */ +static inline unsigned long read_sel_evtyper(int sel) +{ + write_sysreg(sel, pmselr_el0); + isb(); + return read_sysreg(pmxevtyper_el0); +} + +/* Write PMEVTYPER<n>_EL0 through PMXEVTYPER_EL0 */ +static inline void write_sel_evtyper(int sel, unsigned long val) +{ + write_sysreg(sel, pmselr_el0); + isb(); + write_sysreg(val, pmxevtyper_el0); + isb(); +} + +static inline void enable_counter(int idx) +{ + uint64_t v = read_sysreg(pmcntenset_el0); + + write_sysreg(BIT(idx) | v, pmcntenset_el0); + isb(); +} + +static inline void disable_counter(int idx) +{ + uint64_t v = read_sysreg(pmcntenset_el0); + + write_sysreg(BIT(idx) | v, pmcntenclr_el0); + isb(); +} + +static void pmu_disable_reset(void) +{ + uint64_t pmcr = read_sysreg(pmcr_el0); + + /* Reset all counters, disabling them */ + pmcr &= ~ARMV8_PMU_PMCR_E; + write_sysreg(pmcr | ARMV8_PMU_PMCR_P, pmcr_el0); + isb(); +} + +#define RETURN_READ_PMEVCNTRN(n) \ + return read_sysreg(pmevcntr##n##_el0) +static unsigned long read_pmevcntrn(int n) +{ + PMEVN_SWITCH(n, RETURN_READ_PMEVCNTRN); + return 0; +} + +#define WRITE_PMEVCNTRN(n) \ + write_sysreg(val, pmevcntr##n##_el0) +static void write_pmevcntrn(int n, unsigned long val) +{ + PMEVN_SWITCH(n, WRITE_PMEVCNTRN); + isb(); +} + +#define READ_PMEVTYPERN(n) \ + return read_sysreg(pmevtyper##n##_el0) +static unsigned long read_pmevtypern(int n) +{ + PMEVN_SWITCH(n, READ_PMEVTYPERN); + return 0; +} + +#define WRITE_PMEVTYPERN(n) \ + write_sysreg(val, pmevtyper##n##_el0) +static void write_pmevtypern(int n, unsigned long val) +{ + PMEVN_SWITCH(n, WRITE_PMEVTYPERN); + isb(); +} + +/* + * The pmc_accessor structure has pointers to PMEV{CNTR,TYPER}<n>_EL0 + * accessors that test cases will use. Each of the accessors will + * either directly reads/writes PMEV{CNTR,TYPER}<n>_EL0 + * (i.e. {read,write}_pmev{cnt,type}rn()), or reads/writes them through + * PMXEV{CNTR,TYPER}_EL0 (i.e. {read,write}_sel_ev{cnt,type}r()). + * + * This is used to test that combinations of those accessors provide + * the consistent behavior. + */ +struct pmc_accessor { + /* A function to be used to read PMEVTCNTR<n>_EL0 */ + unsigned long (*read_cntr)(int idx); + /* A function to be used to write PMEVTCNTR<n>_EL0 */ + void (*write_cntr)(int idx, unsigned long val); + /* A function to be used to read PMEVTYPER<n>_EL0 */ + unsigned long (*read_typer)(int idx); + /* A function to be used to write PMEVTYPER<n>_EL0 */ + void (*write_typer)(int idx, unsigned long val); +}; + +struct pmc_accessor pmc_accessors[] = { + /* test with all direct accesses */ + { read_pmevcntrn, write_pmevcntrn, read_pmevtypern, write_pmevtypern }, + /* test with all indirect accesses */ + { read_sel_evcntr, write_sel_evcntr, read_sel_evtyper, write_sel_evtyper }, + /* read with direct accesses, and write with indirect accesses */ + { read_pmevcntrn, write_sel_evcntr, read_pmevtypern, write_sel_evtyper }, + /* read with indirect accesses, and write with direct accesses */ + { read_sel_evcntr, write_pmevcntrn, read_sel_evtyper, write_pmevtypern }, +}; + +/* + * Convert a pointer of pmc_accessor to an index in pmc_accessors[], + * assuming that the pointer is one of the entries in pmc_accessors[]. + */ +#define PMC_ACC_TO_IDX(acc) (acc - &pmc_accessors[0]) + +#define GUEST_ASSERT_BITMAP_REG(regname, mask, set_expected) \ +{ \ + uint64_t _tval = read_sysreg(regname); \ + \ + if (set_expected) \ + __GUEST_ASSERT((_tval & mask), \ + "tval: 0x%lx; mask: 0x%lx; set_expected: 0x%lx", \ + _tval, mask, set_expected); \ + else \ + __GUEST_ASSERT(!(_tval & mask), \ + "tval: 0x%lx; mask: 0x%lx; set_expected: 0x%lx", \ + _tval, mask, set_expected); \ +} + +/* + * Check if @mask bits in {PMCNTEN,PMINTEN,PMOVS}{SET,CLR} registers + * are set or cleared as specified in @set_expected. + */ +static void check_bitmap_pmu_regs(uint64_t mask, bool set_expected) +{ + GUEST_ASSERT_BITMAP_REG(pmcntenset_el0, mask, set_expected); + GUEST_ASSERT_BITMAP_REG(pmcntenclr_el0, mask, set_expected); + GUEST_ASSERT_BITMAP_REG(pmintenset_el1, mask, set_expected); + GUEST_ASSERT_BITMAP_REG(pmintenclr_el1, mask, set_expected); + GUEST_ASSERT_BITMAP_REG(pmovsset_el0, mask, set_expected); + GUEST_ASSERT_BITMAP_REG(pmovsclr_el0, mask, set_expected); +} + +/* + * Check if the bit in {PMCNTEN,PMINTEN,PMOVS}{SET,CLR} registers corresponding + * to the specified counter (@pmc_idx) can be read/written as expected. + * When @set_op is true, it tries to set the bit for the counter in + * those registers by writing the SET registers (the bit won't be set + * if the counter is not implemented though). + * Otherwise, it tries to clear the bits in the registers by writing + * the CLR registers. + * Then, it checks if the values indicated in the registers are as expected. + */ +static void test_bitmap_pmu_regs(int pmc_idx, bool set_op) +{ + uint64_t pmcr_n, test_bit = BIT(pmc_idx); + bool set_expected = false; + + if (set_op) { + write_sysreg(test_bit, pmcntenset_el0); + write_sysreg(test_bit, pmintenset_el1); + write_sysreg(test_bit, pmovsset_el0); + + /* The bit will be set only if the counter is implemented */ + pmcr_n = get_pmcr_n(read_sysreg(pmcr_el0)); + set_expected = (pmc_idx < pmcr_n) ? true : false; + } else { + write_sysreg(test_bit, pmcntenclr_el0); + write_sysreg(test_bit, pmintenclr_el1); + write_sysreg(test_bit, pmovsclr_el0); + } + check_bitmap_pmu_regs(test_bit, set_expected); +} + +/* + * Tests for reading/writing registers for the (implemented) event counter + * specified by @pmc_idx. + */ +static void test_access_pmc_regs(struct pmc_accessor *acc, int pmc_idx) +{ + uint64_t write_data, read_data; + + /* Disable all PMCs and reset all PMCs to zero. */ + pmu_disable_reset(); + + /* + * Tests for reading/writing {PMCNTEN,PMINTEN,PMOVS}{SET,CLR}_EL1. + */ + + /* Make sure that the bit in those registers are set to 0 */ + test_bitmap_pmu_regs(pmc_idx, false); + /* Test if setting the bit in those registers works */ + test_bitmap_pmu_regs(pmc_idx, true); + /* Test if clearing the bit in those registers works */ + test_bitmap_pmu_regs(pmc_idx, false); + + /* + * Tests for reading/writing the event type register. + */ + + /* + * Set the event type register to an arbitrary value just for testing + * of reading/writing the register. + * Arm ARM says that for the event from 0x0000 to 0x003F, + * the value indicated in the PMEVTYPER<n>_EL0.evtCount field is + * the value written to the field even when the specified event + * is not supported. + */ + write_data = (ARMV8_PMU_EXCLUDE_EL1 | ARMV8_PMUV3_PERFCTR_INST_RETIRED); + acc->write_typer(pmc_idx, write_data); + read_data = acc->read_typer(pmc_idx); + __GUEST_ASSERT(read_data == write_data, + "pmc_idx: 0x%lx; acc_idx: 0x%lx; read_data: 0x%lx; write_data: 0x%lx", + pmc_idx, PMC_ACC_TO_IDX(acc), read_data, write_data); + + /* + * Tests for reading/writing the event count register. + */ + + read_data = acc->read_cntr(pmc_idx); + + /* The count value must be 0, as it is disabled and reset */ + __GUEST_ASSERT(read_data == 0, + "pmc_idx: 0x%lx; acc_idx: 0x%lx; read_data: 0x%lx", + pmc_idx, PMC_ACC_TO_IDX(acc), read_data); + + write_data = read_data + pmc_idx + 0x12345; + acc->write_cntr(pmc_idx, write_data); + read_data = acc->read_cntr(pmc_idx); + __GUEST_ASSERT(read_data == write_data, + "pmc_idx: 0x%lx; acc_idx: 0x%lx; read_data: 0x%lx; write_data: 0x%lx", + pmc_idx, PMC_ACC_TO_IDX(acc), read_data, write_data); +} + +#define INVALID_EC (-1ul) +uint64_t expected_ec = INVALID_EC; + +static void guest_sync_handler(struct ex_regs *regs) +{ + uint64_t esr, ec; + + esr = read_sysreg(esr_el1); + ec = (esr >> ESR_EC_SHIFT) & ESR_EC_MASK; + + __GUEST_ASSERT(expected_ec == ec, + "PC: 0x%lx; ESR: 0x%lx; EC: 0x%lx; EC expected: 0x%lx", + regs->pc, esr, ec, expected_ec); + + /* skip the trapping instruction */ + regs->pc += 4; + + /* Use INVALID_EC to indicate an exception occurred */ + expected_ec = INVALID_EC; +} + +/* + * Run the given operation that should trigger an exception with the + * given exception class. The exception handler (guest_sync_handler) + * will reset op_end_addr to 0, expected_ec to INVALID_EC, and skip + * the instruction that trapped. + */ +#define TEST_EXCEPTION(ec, ops) \ +({ \ + GUEST_ASSERT(ec != INVALID_EC); \ + WRITE_ONCE(expected_ec, ec); \ + dsb(ish); \ + ops; \ + GUEST_ASSERT(expected_ec == INVALID_EC); \ +}) + +/* + * Tests for reading/writing registers for the unimplemented event counter + * specified by @pmc_idx (>= PMCR_EL0.N). + */ +static void test_access_invalid_pmc_regs(struct pmc_accessor *acc, int pmc_idx) +{ + /* + * Reading/writing the event count/type registers should cause + * an UNDEFINED exception. + */ + TEST_EXCEPTION(ESR_EC_UNKNOWN, acc->read_cntr(pmc_idx)); + TEST_EXCEPTION(ESR_EC_UNKNOWN, acc->write_cntr(pmc_idx, 0)); + TEST_EXCEPTION(ESR_EC_UNKNOWN, acc->read_typer(pmc_idx)); + TEST_EXCEPTION(ESR_EC_UNKNOWN, acc->write_typer(pmc_idx, 0)); + /* + * The bit corresponding to the (unimplemented) counter in + * {PMCNTEN,PMINTEN,PMOVS}{SET,CLR} registers should be RAZ. + */ + test_bitmap_pmu_regs(pmc_idx, 1); + test_bitmap_pmu_regs(pmc_idx, 0); +} + +/* + * The guest is configured with PMUv3 with @expected_pmcr_n number of + * event counters. + * Check if @expected_pmcr_n is consistent with PMCR_EL0.N, and + * if reading/writing PMU registers for implemented or unimplemented + * counters works as expected. + */ +static void guest_code(uint64_t expected_pmcr_n) +{ + uint64_t pmcr, pmcr_n, unimp_mask; + int i, pmc; + + __GUEST_ASSERT(expected_pmcr_n <= ARMV8_PMU_MAX_GENERAL_COUNTERS, + "Expected PMCR.N: 0x%lx; ARMv8 general counters: 0x%lx", + expected_pmcr_n, ARMV8_PMU_MAX_GENERAL_COUNTERS); + + pmcr = read_sysreg(pmcr_el0); + pmcr_n = get_pmcr_n(pmcr); + + /* Make sure that PMCR_EL0.N indicates the value userspace set */ + __GUEST_ASSERT(pmcr_n == expected_pmcr_n, + "Expected PMCR.N: 0x%lx, PMCR.N: 0x%lx", + expected_pmcr_n, pmcr_n); + + /* + * Make sure that (RAZ) bits corresponding to unimplemented event + * counters in {PMCNTEN,PMINTEN,PMOVS}{SET,CLR} registers are reset + * to zero. + * (NOTE: bits for implemented event counters are reset to UNKNOWN) + */ + unimp_mask = GENMASK_ULL(ARMV8_PMU_MAX_GENERAL_COUNTERS - 1, pmcr_n); + check_bitmap_pmu_regs(unimp_mask, false); + + /* + * Tests for reading/writing PMU registers for implemented counters. + * Use each combination of PMEV{CNTR,TYPER}<n>_EL0 accessor functions. + */ + for (i = 0; i < ARRAY_SIZE(pmc_accessors); i++) { + for (pmc = 0; pmc < pmcr_n; pmc++) + test_access_pmc_regs(&pmc_accessors[i], pmc); + } + + /* + * Tests for reading/writing PMU registers for unimplemented counters. + * Use each combination of PMEV{CNTR,TYPER}<n>_EL0 accessor functions. + */ + for (i = 0; i < ARRAY_SIZE(pmc_accessors); i++) { + for (pmc = pmcr_n; pmc < ARMV8_PMU_MAX_GENERAL_COUNTERS; pmc++) + test_access_invalid_pmc_regs(&pmc_accessors[i], pmc); + } + + GUEST_DONE(); +} + +#define GICD_BASE_GPA 0x8000000ULL +#define GICR_BASE_GPA 0x80A0000ULL + +/* Create a VM that has one vCPU with PMUv3 configured. */ +static void create_vpmu_vm(void *guest_code) +{ + struct kvm_vcpu_init init; + uint8_t pmuver, ec; + uint64_t dfr0, irq = 23; + struct kvm_device_attr irq_attr = { + .group = KVM_ARM_VCPU_PMU_V3_CTRL, + .attr = KVM_ARM_VCPU_PMU_V3_IRQ, + .addr = (uint64_t)&irq, + }; + struct kvm_device_attr init_attr = { + .group = KVM_ARM_VCPU_PMU_V3_CTRL, + .attr = KVM_ARM_VCPU_PMU_V3_INIT, + }; + + /* The test creates the vpmu_vm multiple times. Ensure a clean state */ + memset(&vpmu_vm, 0, sizeof(vpmu_vm)); + + vpmu_vm.vm = vm_create(1); + vm_init_descriptor_tables(vpmu_vm.vm); + for (ec = 0; ec < ESR_EC_NUM; ec++) { + vm_install_sync_handler(vpmu_vm.vm, VECTOR_SYNC_CURRENT, ec, + guest_sync_handler); + } + + /* Create vCPU with PMUv3 */ + vm_ioctl(vpmu_vm.vm, KVM_ARM_PREFERRED_TARGET, &init); + init.features[0] |= (1 << KVM_ARM_VCPU_PMU_V3); + vpmu_vm.vcpu = aarch64_vcpu_add(vpmu_vm.vm, 0, &init, guest_code); + vcpu_init_descriptor_tables(vpmu_vm.vcpu); + vpmu_vm.gic_fd = vgic_v3_setup(vpmu_vm.vm, 1, 64, + GICD_BASE_GPA, GICR_BASE_GPA); + __TEST_REQUIRE(vpmu_vm.gic_fd >= 0, + "Failed to create vgic-v3, skipping"); + + /* Make sure that PMUv3 support is indicated in the ID register */ + vcpu_get_reg(vpmu_vm.vcpu, + KVM_ARM64_SYS_REG(SYS_ID_AA64DFR0_EL1), &dfr0); + pmuver = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMUVer), dfr0); + TEST_ASSERT(pmuver != ID_AA64DFR0_EL1_PMUVer_IMP_DEF && + pmuver >= ID_AA64DFR0_EL1_PMUVer_IMP, + "Unexpected PMUVER (0x%x) on the vCPU with PMUv3", pmuver); + + /* Initialize vPMU */ + vcpu_ioctl(vpmu_vm.vcpu, KVM_SET_DEVICE_ATTR, &irq_attr); + vcpu_ioctl(vpmu_vm.vcpu, KVM_SET_DEVICE_ATTR, &init_attr); +} + +static void destroy_vpmu_vm(void) +{ + close(vpmu_vm.gic_fd); + kvm_vm_free(vpmu_vm.vm); +} + +static void run_vcpu(struct kvm_vcpu *vcpu, uint64_t pmcr_n) +{ + struct ucall uc; + + vcpu_args_set(vcpu, 1, pmcr_n); + vcpu_run(vcpu); + switch (get_ucall(vcpu, &uc)) { + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + break; + case UCALL_DONE: + break; + default: + TEST_FAIL("Unknown ucall %lu", uc.cmd); + break; + } +} + +static void test_create_vpmu_vm_with_pmcr_n(uint64_t pmcr_n, bool expect_fail) +{ + struct kvm_vcpu *vcpu; + uint64_t pmcr, pmcr_orig; + + create_vpmu_vm(guest_code); + vcpu = vpmu_vm.vcpu; + + vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_PMCR_EL0), &pmcr_orig); + pmcr = pmcr_orig; + + /* + * Setting a larger value of PMCR.N should not modify the field, and + * return a success. + */ + set_pmcr_n(&pmcr, pmcr_n); + vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_PMCR_EL0), pmcr); + vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_PMCR_EL0), &pmcr); + + if (expect_fail) + TEST_ASSERT(pmcr_orig == pmcr, + "PMCR.N modified by KVM to a larger value (PMCR: 0x%lx) for pmcr_n: 0x%lx\n", + pmcr, pmcr_n); + else + TEST_ASSERT(pmcr_n == get_pmcr_n(pmcr), + "Failed to update PMCR.N to %lu (received: %lu)\n", + pmcr_n, get_pmcr_n(pmcr)); +} + +/* + * Create a guest with one vCPU, set the PMCR_EL0.N for the vCPU to @pmcr_n, + * and run the test. + */ +static void run_access_test(uint64_t pmcr_n) +{ + uint64_t sp; + struct kvm_vcpu *vcpu; + struct kvm_vcpu_init init; + + pr_debug("Test with pmcr_n %lu\n", pmcr_n); + + test_create_vpmu_vm_with_pmcr_n(pmcr_n, false); + vcpu = vpmu_vm.vcpu; + + /* Save the initial sp to restore them later to run the guest again */ + vcpu_get_reg(vcpu, ARM64_CORE_REG(sp_el1), &sp); + + run_vcpu(vcpu, pmcr_n); + + /* + * Reset and re-initialize the vCPU, and run the guest code again to + * check if PMCR_EL0.N is preserved. + */ + vm_ioctl(vpmu_vm.vm, KVM_ARM_PREFERRED_TARGET, &init); + init.features[0] |= (1 << KVM_ARM_VCPU_PMU_V3); + aarch64_vcpu_setup(vcpu, &init); + vcpu_init_descriptor_tables(vcpu); + vcpu_set_reg(vcpu, ARM64_CORE_REG(sp_el1), sp); + vcpu_set_reg(vcpu, ARM64_CORE_REG(regs.pc), (uint64_t)guest_code); + + run_vcpu(vcpu, pmcr_n); + + destroy_vpmu_vm(); +} + +static struct pmreg_sets validity_check_reg_sets[] = { + PMREG_SET(SYS_PMCNTENSET_EL0, SYS_PMCNTENCLR_EL0), + PMREG_SET(SYS_PMINTENSET_EL1, SYS_PMINTENCLR_EL1), + PMREG_SET(SYS_PMOVSSET_EL0, SYS_PMOVSCLR_EL0), +}; + +/* + * Create a VM, and check if KVM handles the userspace accesses of + * the PMU register sets in @validity_check_reg_sets[] correctly. + */ +static void run_pmregs_validity_test(uint64_t pmcr_n) +{ + int i; + struct kvm_vcpu *vcpu; + uint64_t set_reg_id, clr_reg_id, reg_val; + uint64_t valid_counters_mask, max_counters_mask; + + test_create_vpmu_vm_with_pmcr_n(pmcr_n, false); + vcpu = vpmu_vm.vcpu; + + valid_counters_mask = get_counters_mask(pmcr_n); + max_counters_mask = get_counters_mask(ARMV8_PMU_MAX_COUNTERS); + + for (i = 0; i < ARRAY_SIZE(validity_check_reg_sets); i++) { + set_reg_id = validity_check_reg_sets[i].set_reg_id; + clr_reg_id = validity_check_reg_sets[i].clr_reg_id; + + /* + * Test if the 'set' and 'clr' variants of the registers + * are initialized based on the number of valid counters. + */ + vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(set_reg_id), ®_val); + TEST_ASSERT((reg_val & (~valid_counters_mask)) == 0, + "Initial read of set_reg: 0x%llx has unimplemented counters enabled: 0x%lx\n", + KVM_ARM64_SYS_REG(set_reg_id), reg_val); + + vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(clr_reg_id), ®_val); + TEST_ASSERT((reg_val & (~valid_counters_mask)) == 0, + "Initial read of clr_reg: 0x%llx has unimplemented counters enabled: 0x%lx\n", + KVM_ARM64_SYS_REG(clr_reg_id), reg_val); + + /* + * Using the 'set' variant, force-set the register to the + * max number of possible counters and test if KVM discards + * the bits for unimplemented counters as it should. + */ + vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(set_reg_id), max_counters_mask); + + vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(set_reg_id), ®_val); + TEST_ASSERT((reg_val & (~valid_counters_mask)) == 0, + "Read of set_reg: 0x%llx has unimplemented counters enabled: 0x%lx\n", + KVM_ARM64_SYS_REG(set_reg_id), reg_val); + + vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(clr_reg_id), ®_val); + TEST_ASSERT((reg_val & (~valid_counters_mask)) == 0, + "Read of clr_reg: 0x%llx has unimplemented counters enabled: 0x%lx\n", + KVM_ARM64_SYS_REG(clr_reg_id), reg_val); + } + + destroy_vpmu_vm(); +} + +/* + * Create a guest with one vCPU, and attempt to set the PMCR_EL0.N for + * the vCPU to @pmcr_n, which is larger than the host value. + * The attempt should fail as @pmcr_n is too big to set for the vCPU. + */ +static void run_error_test(uint64_t pmcr_n) +{ + pr_debug("Error test with pmcr_n %lu (larger than the host)\n", pmcr_n); + + test_create_vpmu_vm_with_pmcr_n(pmcr_n, true); + destroy_vpmu_vm(); +} + +/* + * Return the default number of implemented PMU event counters excluding + * the cycle counter (i.e. PMCR_EL0.N value) for the guest. + */ +static uint64_t get_pmcr_n_limit(void) +{ + uint64_t pmcr; + + create_vpmu_vm(guest_code); + vcpu_get_reg(vpmu_vm.vcpu, KVM_ARM64_SYS_REG(SYS_PMCR_EL0), &pmcr); + destroy_vpmu_vm(); + return get_pmcr_n(pmcr); +} + +int main(void) +{ + uint64_t i, pmcr_n; + + TEST_REQUIRE(kvm_has_cap(KVM_CAP_ARM_PMU_V3)); + + pmcr_n = get_pmcr_n_limit(); + for (i = 0; i <= pmcr_n; i++) { + run_access_test(i); + run_pmregs_validity_test(i); + } + + for (i = pmcr_n + 1; i < ARMV8_PMU_MAX_COUNTERS; i++) + run_error_test(i); + + return 0; +} diff --git a/tools/testing/selftests/kvm/include/aarch64/processor.h b/tools/testing/selftests/kvm/include/aarch64/processor.h index cb537253a6b9..c42d683102c7 100644 --- a/tools/testing/selftests/kvm/include/aarch64/processor.h +++ b/tools/testing/selftests/kvm/include/aarch64/processor.h @@ -104,6 +104,7 @@ enum { #define ESR_EC_SHIFT 26 #define ESR_EC_MASK (ESR_EC_NUM - 1) +#define ESR_EC_UNKNOWN 0x0 #define ESR_EC_SVC64 0x15 #define ESR_EC_IABT 0x21 #define ESR_EC_DABT 0x25 diff --git a/tools/testing/selftests/kvm/include/test_util.h b/tools/testing/selftests/kvm/include/test_util.h index 7e614adc6cf4..8e5f413a593d 100644 --- a/tools/testing/selftests/kvm/include/test_util.h +++ b/tools/testing/selftests/kvm/include/test_util.h @@ -33,7 +33,7 @@ static inline int _no_printf(const char *format, ...) { return 0; } #define pr_info(...) _no_printf(__VA_ARGS__) #endif -void print_skip(const char *fmt, ...) __attribute__((format(printf, 1, 2))); +void __printf(1, 2) print_skip(const char *fmt, ...); #define __TEST_REQUIRE(f, fmt, ...) \ do { \ if (!(f)) \ @@ -46,9 +46,9 @@ ssize_t test_write(int fd, const void *buf, size_t count); ssize_t test_read(int fd, void *buf, size_t count); int test_seq_read(const char *path, char **bufp, size_t *sizep); -void test_assert(bool exp, const char *exp_str, - const char *file, unsigned int line, const char *fmt, ...) - __attribute__((format(printf, 5, 6))); +void __printf(5, 6) test_assert(bool exp, const char *exp_str, + const char *file, unsigned int line, + const char *fmt, ...); #define TEST_ASSERT(e, fmt, ...) \ test_assert((e), #e, __FILE__, __LINE__, fmt, ##__VA_ARGS__) diff --git a/tools/testing/selftests/kvm/include/ucall_common.h b/tools/testing/selftests/kvm/include/ucall_common.h index 112bc1da732a..ce33d306c2cb 100644 --- a/tools/testing/selftests/kvm/include/ucall_common.h +++ b/tools/testing/selftests/kvm/include/ucall_common.h @@ -1,7 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* - * tools/testing/selftests/kvm/include/kvm_util.h - * * Copyright (C) 2018, Google LLC. */ #ifndef SELFTEST_KVM_UCALL_COMMON_H diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h index 4fd042112526..25bc61dac5fb 100644 --- a/tools/testing/selftests/kvm/include/x86_64/processor.h +++ b/tools/testing/selftests/kvm/include/x86_64/processor.h @@ -68,6 +68,12 @@ struct xstate { #define XFEATURE_MASK_OPMASK BIT_ULL(5) #define XFEATURE_MASK_ZMM_Hi256 BIT_ULL(6) #define XFEATURE_MASK_Hi16_ZMM BIT_ULL(7) +#define XFEATURE_MASK_PT BIT_ULL(8) +#define XFEATURE_MASK_PKRU BIT_ULL(9) +#define XFEATURE_MASK_PASID BIT_ULL(10) +#define XFEATURE_MASK_CET_USER BIT_ULL(11) +#define XFEATURE_MASK_CET_KERNEL BIT_ULL(12) +#define XFEATURE_MASK_LBR BIT_ULL(15) #define XFEATURE_MASK_XTILE_CFG BIT_ULL(17) #define XFEATURE_MASK_XTILE_DATA BIT_ULL(18) @@ -147,6 +153,7 @@ struct kvm_x86_cpu_feature { #define X86_FEATURE_CLWB KVM_X86_CPU_FEATURE(0x7, 0, EBX, 24) #define X86_FEATURE_UMIP KVM_X86_CPU_FEATURE(0x7, 0, ECX, 2) #define X86_FEATURE_PKU KVM_X86_CPU_FEATURE(0x7, 0, ECX, 3) +#define X86_FEATURE_OSPKE KVM_X86_CPU_FEATURE(0x7, 0, ECX, 4) #define X86_FEATURE_LA57 KVM_X86_CPU_FEATURE(0x7, 0, ECX, 16) #define X86_FEATURE_RDPID KVM_X86_CPU_FEATURE(0x7, 0, ECX, 22) #define X86_FEATURE_SGX_LC KVM_X86_CPU_FEATURE(0x7, 0, ECX, 30) @@ -553,6 +560,13 @@ static inline void xsetbv(u32 index, u64 value) __asm__ __volatile__("xsetbv" :: "a" (eax), "d" (edx), "c" (index)); } +static inline void wrpkru(u32 pkru) +{ + /* Note, ECX and EDX are architecturally required to be '0'. */ + asm volatile(".byte 0x0f,0x01,0xef\n\t" + : : "a" (pkru), "c"(0), "d"(0)); +} + static inline struct desc_ptr get_gdt(void) { struct desc_ptr gdt; @@ -908,6 +922,15 @@ static inline bool kvm_pmu_has(struct kvm_x86_pmu_feature feature) !kvm_cpu_has(feature.anti_feature); } +static __always_inline uint64_t kvm_cpu_supported_xcr0(void) +{ + if (!kvm_cpu_has_p(X86_PROPERTY_SUPPORTED_XCR0_LO)) + return 0; + + return kvm_cpu_property(X86_PROPERTY_SUPPORTED_XCR0_LO) | + ((uint64_t)kvm_cpu_property(X86_PROPERTY_SUPPORTED_XCR0_HI) << 32); +} + static inline size_t kvm_cpuid2_size(int nr_entries) { return sizeof(struct kvm_cpuid2) + diff --git a/tools/testing/selftests/kvm/lib/aarch64/processor.c b/tools/testing/selftests/kvm/lib/aarch64/processor.c index 3a0259e25335..6fe12e985ba5 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/processor.c +++ b/tools/testing/selftests/kvm/lib/aarch64/processor.c @@ -518,9 +518,9 @@ void aarch64_get_supported_page_sizes(uint32_t ipa, err = ioctl(vcpu_fd, KVM_GET_ONE_REG, ®); TEST_ASSERT(err == 0, KVM_IOCTL_ERROR(KVM_GET_ONE_REG, vcpu_fd)); - *ps4k = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR0_TGRAN4), val) != 0xf; - *ps64k = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR0_TGRAN64), val) == 0; - *ps16k = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR0_TGRAN16), val) != 0; + *ps4k = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_TGRAN4), val) != 0xf; + *ps64k = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_TGRAN64), val) == 0; + *ps16k = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_TGRAN16), val) != 0; close(vcpu_fd); close(vm_fd); diff --git a/tools/testing/selftests/kvm/lib/guest_sprintf.c b/tools/testing/selftests/kvm/lib/guest_sprintf.c index c4a69d8aeb68..74627514c4d4 100644 --- a/tools/testing/selftests/kvm/lib/guest_sprintf.c +++ b/tools/testing/selftests/kvm/lib/guest_sprintf.c @@ -200,6 +200,13 @@ repeat: ++fmt; } + /* + * Play nice with %llu, %llx, etc. KVM selftests only support + * 64-bit builds, so just treat %ll* the same as %l*. + */ + if (qualifier == 'l' && *fmt == 'l') + ++fmt; + /* default base */ base = 10; diff --git a/tools/testing/selftests/kvm/lib/x86_64/apic.c b/tools/testing/selftests/kvm/lib/x86_64/apic.c index 7168e25c194e..89153a333e83 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/apic.c +++ b/tools/testing/selftests/kvm/lib/x86_64/apic.c @@ -1,7 +1,5 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * tools/testing/selftests/kvm/lib/x86_64/processor.c - * * Copyright (C) 2021, Google LLC. */ diff --git a/tools/testing/selftests/kvm/memslot_perf_test.c b/tools/testing/selftests/kvm/memslot_perf_test.c index 20eb2e730800..8698d1ab60d0 100644 --- a/tools/testing/selftests/kvm/memslot_perf_test.c +++ b/tools/testing/selftests/kvm/memslot_perf_test.c @@ -1033,9 +1033,8 @@ static bool test_loop(const struct test_data *data, struct test_result *rbestruntime) { uint64_t maxslots; - struct test_result result; + struct test_result result = {}; - result.nloops = 0; if (!test_execute(targs->nslots, &maxslots, targs->seconds, data, &result.nloops, &result.slot_runtime, &result.guest_runtime)) { @@ -1089,7 +1088,7 @@ int main(int argc, char *argv[]) .seconds = 5, .runs = 1, }; - struct test_result rbestslottime; + struct test_result rbestslottime = {}; int tctr; if (!check_memory_sizes()) @@ -1098,11 +1097,10 @@ int main(int argc, char *argv[]) if (!parse_args(argc, argv, &targs)) return -1; - rbestslottime.slottimens = 0; for (tctr = targs.tfirst; tctr <= targs.tlast; tctr++) { const struct test_data *data = &tests[tctr]; unsigned int runctr; - struct test_result rbestruntime; + struct test_result rbestruntime = {}; if (tctr > targs.tfirst) pr_info("\n"); @@ -1110,7 +1108,6 @@ int main(int argc, char *argv[]) pr_info("Testing %s performance with %i runs, %d seconds each\n", data->name, targs.runs, targs.seconds); - rbestruntime.runtimens = 0; for (runctr = 0; runctr < targs.runs; runctr++) if (!test_loop(data, &targs, &rbestslottime, &rbestruntime)) diff --git a/tools/testing/selftests/kvm/riscv/get-reg-list.c b/tools/testing/selftests/kvm/riscv/get-reg-list.c index 9f99ea42f45f..6bedaea95395 100644 --- a/tools/testing/selftests/kvm/riscv/get-reg-list.c +++ b/tools/testing/selftests/kvm/riscv/get-reg-list.c @@ -25,6 +25,8 @@ bool filter_reg(__u64 reg) * the visibility of the ISA_EXT register itself. * * Based on above, we should filter-out all ISA_EXT registers. + * + * Note: The below list is alphabetically sorted. */ case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_A: case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_C: @@ -33,21 +35,23 @@ bool filter_reg(__u64 reg) case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_H: case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_I: case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_M: - case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_SVPBMT: + case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_V: + case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_SMSTATEEN: + case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_SSAIA: case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_SSTC: case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_SVINVAL: - case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZIHINTPAUSE: - case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZICBOM: - case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZICBOZ: - case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZBB: - case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_SSAIA: - case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_V: case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_SVNAPOT: + case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_SVPBMT: case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZBA: + case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZBB: case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZBS: + case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZICBOM: + case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZICBOZ: case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZICNTR: + case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZICOND: case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZICSR: case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZIFENCEI: + case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZIHINTPAUSE: case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZIHPM: return true; /* AIA registers are always available when Ssaia can't be disabled */ @@ -112,11 +116,13 @@ void finalize_vcpu(struct kvm_vcpu *vcpu, struct vcpu_reg_list *c) } } -static const char *config_id_to_str(__u64 id) +static const char *config_id_to_str(const char *prefix, __u64 id) { /* reg_off is the offset into struct kvm_riscv_config */ __u64 reg_off = id & ~(REG_MASK | KVM_REG_RISCV_CONFIG); + assert((id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_CONFIG); + switch (reg_off) { case KVM_REG_RISCV_CONFIG_REG(isa): return "KVM_REG_RISCV_CONFIG_REG(isa)"; @@ -134,11 +140,7 @@ static const char *config_id_to_str(__u64 id) return "KVM_REG_RISCV_CONFIG_REG(satp_mode)"; } - /* - * Config regs would grow regularly with new pseudo reg added, so - * just show raw id to indicate a new pseudo config reg. - */ - return strdup_printf("KVM_REG_RISCV_CONFIG_REG(%lld) /* UNKNOWN */", reg_off); + return strdup_printf("%lld /* UNKNOWN */", reg_off); } static const char *core_id_to_str(const char *prefix, __u64 id) @@ -146,6 +148,8 @@ static const char *core_id_to_str(const char *prefix, __u64 id) /* reg_off is the offset into struct kvm_riscv_core */ __u64 reg_off = id & ~(REG_MASK | KVM_REG_RISCV_CORE); + assert((id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_CORE); + switch (reg_off) { case KVM_REG_RISCV_CORE_REG(regs.pc): return "KVM_REG_RISCV_CORE_REG(regs.pc)"; @@ -176,14 +180,15 @@ static const char *core_id_to_str(const char *prefix, __u64 id) return "KVM_REG_RISCV_CORE_REG(mode)"; } - TEST_FAIL("%s: Unknown core reg id: 0x%llx", prefix, id); - return NULL; + return strdup_printf("%lld /* UNKNOWN */", reg_off); } #define RISCV_CSR_GENERAL(csr) \ "KVM_REG_RISCV_CSR_GENERAL | KVM_REG_RISCV_CSR_REG(" #csr ")" #define RISCV_CSR_AIA(csr) \ "KVM_REG_RISCV_CSR_AIA | KVM_REG_RISCV_CSR_REG(" #csr ")" +#define RISCV_CSR_SMSTATEEN(csr) \ + "KVM_REG_RISCV_CSR_SMSTATEEN | KVM_REG_RISCV_CSR_REG(" #csr ")" static const char *general_csr_id_to_str(__u64 reg_off) { @@ -209,10 +214,11 @@ static const char *general_csr_id_to_str(__u64 reg_off) return RISCV_CSR_GENERAL(satp); case KVM_REG_RISCV_CSR_REG(scounteren): return RISCV_CSR_GENERAL(scounteren); + case KVM_REG_RISCV_CSR_REG(senvcfg): + return RISCV_CSR_GENERAL(senvcfg); } - TEST_FAIL("Unknown general csr reg: 0x%llx", reg_off); - return NULL; + return strdup_printf("KVM_REG_RISCV_CSR_GENERAL | %lld /* UNKNOWN */", reg_off); } static const char *aia_csr_id_to_str(__u64 reg_off) @@ -235,7 +241,18 @@ static const char *aia_csr_id_to_str(__u64 reg_off) return RISCV_CSR_AIA(iprio2h); } - TEST_FAIL("Unknown aia csr reg: 0x%llx", reg_off); + return strdup_printf("KVM_REG_RISCV_CSR_AIA | %lld /* UNKNOWN */", reg_off); +} + +static const char *smstateen_csr_id_to_str(__u64 reg_off) +{ + /* reg_off is the offset into struct kvm_riscv_smstateen_csr */ + switch (reg_off) { + case KVM_REG_RISCV_CSR_SMSTATEEN_REG(sstateen0): + return RISCV_CSR_SMSTATEEN(sstateen0); + } + + TEST_FAIL("Unknown smstateen csr reg: 0x%llx", reg_off); return NULL; } @@ -244,6 +261,8 @@ static const char *csr_id_to_str(const char *prefix, __u64 id) __u64 reg_off = id & ~(REG_MASK | KVM_REG_RISCV_CSR); __u64 reg_subtype = reg_off & KVM_REG_RISCV_SUBTYPE_MASK; + assert((id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_CSR); + reg_off &= ~KVM_REG_RISCV_SUBTYPE_MASK; switch (reg_subtype) { @@ -251,10 +270,11 @@ static const char *csr_id_to_str(const char *prefix, __u64 id) return general_csr_id_to_str(reg_off); case KVM_REG_RISCV_CSR_AIA: return aia_csr_id_to_str(reg_off); + case KVM_REG_RISCV_CSR_SMSTATEEN: + return smstateen_csr_id_to_str(reg_off); } - TEST_FAIL("%s: Unknown csr subtype: 0x%llx", prefix, reg_subtype); - return NULL; + return strdup_printf("%lld | %lld /* UNKNOWN */", reg_subtype, reg_off); } static const char *timer_id_to_str(const char *prefix, __u64 id) @@ -262,6 +282,8 @@ static const char *timer_id_to_str(const char *prefix, __u64 id) /* reg_off is the offset into struct kvm_riscv_timer */ __u64 reg_off = id & ~(REG_MASK | KVM_REG_RISCV_TIMER); + assert((id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_TIMER); + switch (reg_off) { case KVM_REG_RISCV_TIMER_REG(frequency): return "KVM_REG_RISCV_TIMER_REG(frequency)"; @@ -273,8 +295,7 @@ static const char *timer_id_to_str(const char *prefix, __u64 id) return "KVM_REG_RISCV_TIMER_REG(state)"; } - TEST_FAIL("%s: Unknown timer reg id: 0x%llx", prefix, id); - return NULL; + return strdup_printf("%lld /* UNKNOWN */", reg_off); } static const char *fp_f_id_to_str(const char *prefix, __u64 id) @@ -282,6 +303,8 @@ static const char *fp_f_id_to_str(const char *prefix, __u64 id) /* reg_off is the offset into struct __riscv_f_ext_state */ __u64 reg_off = id & ~(REG_MASK | KVM_REG_RISCV_FP_F); + assert((id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_FP_F); + switch (reg_off) { case KVM_REG_RISCV_FP_F_REG(f[0]) ... KVM_REG_RISCV_FP_F_REG(f[31]): @@ -290,8 +313,7 @@ static const char *fp_f_id_to_str(const char *prefix, __u64 id) return "KVM_REG_RISCV_FP_F_REG(fcsr)"; } - TEST_FAIL("%s: Unknown fp_f reg id: 0x%llx", prefix, id); - return NULL; + return strdup_printf("%lld /* UNKNOWN */", reg_off); } static const char *fp_d_id_to_str(const char *prefix, __u64 id) @@ -299,6 +321,8 @@ static const char *fp_d_id_to_str(const char *prefix, __u64 id) /* reg_off is the offset into struct __riscv_d_ext_state */ __u64 reg_off = id & ~(REG_MASK | KVM_REG_RISCV_FP_D); + assert((id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_FP_D); + switch (reg_off) { case KVM_REG_RISCV_FP_D_REG(f[0]) ... KVM_REG_RISCV_FP_D_REG(f[31]): @@ -307,96 +331,93 @@ static const char *fp_d_id_to_str(const char *prefix, __u64 id) return "KVM_REG_RISCV_FP_D_REG(fcsr)"; } - TEST_FAIL("%s: Unknown fp_d reg id: 0x%llx", prefix, id); - return NULL; + return strdup_printf("%lld /* UNKNOWN */", reg_off); } -static const char *isa_ext_id_to_str(__u64 id) +#define KVM_ISA_EXT_ARR(ext) \ +[KVM_RISCV_ISA_EXT_##ext] = "KVM_RISCV_ISA_EXT_" #ext + +static const char *isa_ext_id_to_str(const char *prefix, __u64 id) { /* reg_off is the offset into unsigned long kvm_isa_ext_arr[] */ __u64 reg_off = id & ~(REG_MASK | KVM_REG_RISCV_ISA_EXT); + assert((id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_ISA_EXT); + static const char * const kvm_isa_ext_reg_name[] = { - "KVM_RISCV_ISA_EXT_A", - "KVM_RISCV_ISA_EXT_C", - "KVM_RISCV_ISA_EXT_D", - "KVM_RISCV_ISA_EXT_F", - "KVM_RISCV_ISA_EXT_H", - "KVM_RISCV_ISA_EXT_I", - "KVM_RISCV_ISA_EXT_M", - "KVM_RISCV_ISA_EXT_SVPBMT", - "KVM_RISCV_ISA_EXT_SSTC", - "KVM_RISCV_ISA_EXT_SVINVAL", - "KVM_RISCV_ISA_EXT_ZIHINTPAUSE", - "KVM_RISCV_ISA_EXT_ZICBOM", - "KVM_RISCV_ISA_EXT_ZICBOZ", - "KVM_RISCV_ISA_EXT_ZBB", - "KVM_RISCV_ISA_EXT_SSAIA", - "KVM_RISCV_ISA_EXT_V", - "KVM_RISCV_ISA_EXT_SVNAPOT", - "KVM_RISCV_ISA_EXT_ZBA", - "KVM_RISCV_ISA_EXT_ZBS", - "KVM_RISCV_ISA_EXT_ZICNTR", - "KVM_RISCV_ISA_EXT_ZICSR", - "KVM_RISCV_ISA_EXT_ZIFENCEI", - "KVM_RISCV_ISA_EXT_ZIHPM", + KVM_ISA_EXT_ARR(A), + KVM_ISA_EXT_ARR(C), + KVM_ISA_EXT_ARR(D), + KVM_ISA_EXT_ARR(F), + KVM_ISA_EXT_ARR(H), + KVM_ISA_EXT_ARR(I), + KVM_ISA_EXT_ARR(M), + KVM_ISA_EXT_ARR(V), + KVM_ISA_EXT_ARR(SMSTATEEN), + KVM_ISA_EXT_ARR(SSAIA), + KVM_ISA_EXT_ARR(SSTC), + KVM_ISA_EXT_ARR(SVINVAL), + KVM_ISA_EXT_ARR(SVNAPOT), + KVM_ISA_EXT_ARR(SVPBMT), + KVM_ISA_EXT_ARR(ZBA), + KVM_ISA_EXT_ARR(ZBB), + KVM_ISA_EXT_ARR(ZBS), + KVM_ISA_EXT_ARR(ZICBOM), + KVM_ISA_EXT_ARR(ZICBOZ), + KVM_ISA_EXT_ARR(ZICNTR), + KVM_ISA_EXT_ARR(ZICOND), + KVM_ISA_EXT_ARR(ZICSR), + KVM_ISA_EXT_ARR(ZIFENCEI), + KVM_ISA_EXT_ARR(ZIHINTPAUSE), + KVM_ISA_EXT_ARR(ZIHPM), }; - if (reg_off >= ARRAY_SIZE(kvm_isa_ext_reg_name)) { - /* - * isa_ext regs would grow regularly with new isa extension added, so - * just show "reg" to indicate a new extension. - */ + if (reg_off >= ARRAY_SIZE(kvm_isa_ext_reg_name)) return strdup_printf("%lld /* UNKNOWN */", reg_off); - } return kvm_isa_ext_reg_name[reg_off]; } +#define KVM_SBI_EXT_ARR(ext) \ +[ext] = "KVM_REG_RISCV_SBI_SINGLE | " #ext + static const char *sbi_ext_single_id_to_str(__u64 reg_off) { /* reg_off is KVM_RISCV_SBI_EXT_ID */ static const char * const kvm_sbi_ext_reg_name[] = { - "KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_V01", - "KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_TIME", - "KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_IPI", - "KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_RFENCE", - "KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_SRST", - "KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_HSM", - "KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_PMU", - "KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_EXPERIMENTAL", - "KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_VENDOR", + KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_V01), + KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_TIME), + KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_IPI), + KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_RFENCE), + KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_SRST), + KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_HSM), + KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_PMU), + KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_EXPERIMENTAL), + KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_VENDOR), + KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_DBCN), }; - if (reg_off >= ARRAY_SIZE(kvm_sbi_ext_reg_name)) { - /* - * sbi_ext regs would grow regularly with new sbi extension added, so - * just show "reg" to indicate a new extension. - */ + if (reg_off >= ARRAY_SIZE(kvm_sbi_ext_reg_name)) return strdup_printf("KVM_REG_RISCV_SBI_SINGLE | %lld /* UNKNOWN */", reg_off); - } return kvm_sbi_ext_reg_name[reg_off]; } static const char *sbi_ext_multi_id_to_str(__u64 reg_subtype, __u64 reg_off) { - if (reg_off > KVM_REG_RISCV_SBI_MULTI_REG_LAST) { - /* - * sbi_ext regs would grow regularly with new sbi extension added, so - * just show "reg" to indicate a new extension. - */ - return strdup_printf("%lld /* UNKNOWN */", reg_off); - } + const char *unknown = ""; + + if (reg_off > KVM_REG_RISCV_SBI_MULTI_REG_LAST) + unknown = " /* UNKNOWN */"; switch (reg_subtype) { case KVM_REG_RISCV_SBI_MULTI_EN: - return strdup_printf("KVM_REG_RISCV_SBI_MULTI_EN | %lld", reg_off); + return strdup_printf("KVM_REG_RISCV_SBI_MULTI_EN | %lld%s", reg_off, unknown); case KVM_REG_RISCV_SBI_MULTI_DIS: - return strdup_printf("KVM_REG_RISCV_SBI_MULTI_DIS | %lld", reg_off); + return strdup_printf("KVM_REG_RISCV_SBI_MULTI_DIS | %lld%s", reg_off, unknown); } - return NULL; + return strdup_printf("%lld | %lld /* UNKNOWN */", reg_subtype, reg_off); } static const char *sbi_ext_id_to_str(const char *prefix, __u64 id) @@ -404,6 +425,8 @@ static const char *sbi_ext_id_to_str(const char *prefix, __u64 id) __u64 reg_off = id & ~(REG_MASK | KVM_REG_RISCV_SBI_EXT); __u64 reg_subtype = reg_off & KVM_REG_RISCV_SUBTYPE_MASK; + assert((id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_SBI_EXT); + reg_off &= ~KVM_REG_RISCV_SUBTYPE_MASK; switch (reg_subtype) { @@ -414,8 +437,7 @@ static const char *sbi_ext_id_to_str(const char *prefix, __u64 id) return sbi_ext_multi_id_to_str(reg_subtype, reg_off); } - TEST_FAIL("%s: Unknown sbi ext subtype: 0x%llx", prefix, reg_subtype); - return NULL; + return strdup_printf("%lld | %lld /* UNKNOWN */", reg_subtype, reg_off); } void print_reg(const char *prefix, __u64 id) @@ -436,14 +458,14 @@ void print_reg(const char *prefix, __u64 id) reg_size = "KVM_REG_SIZE_U128"; break; default: - TEST_FAIL("%s: Unexpected reg size: 0x%llx in reg id: 0x%llx", - prefix, (id & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT, id); + printf("\tKVM_REG_RISCV | (%lld << KVM_REG_SIZE_SHIFT) | 0x%llx /* UNKNOWN */,", + (id & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT, id & REG_MASK); } switch (id & KVM_REG_RISCV_TYPE_MASK) { case KVM_REG_RISCV_CONFIG: printf("\tKVM_REG_RISCV | %s | KVM_REG_RISCV_CONFIG | %s,\n", - reg_size, config_id_to_str(id)); + reg_size, config_id_to_str(prefix, id)); break; case KVM_REG_RISCV_CORE: printf("\tKVM_REG_RISCV | %s | KVM_REG_RISCV_CORE | %s,\n", @@ -467,15 +489,15 @@ void print_reg(const char *prefix, __u64 id) break; case KVM_REG_RISCV_ISA_EXT: printf("\tKVM_REG_RISCV | %s | KVM_REG_RISCV_ISA_EXT | %s,\n", - reg_size, isa_ext_id_to_str(id)); + reg_size, isa_ext_id_to_str(prefix, id)); break; case KVM_REG_RISCV_SBI_EXT: printf("\tKVM_REG_RISCV | %s | KVM_REG_RISCV_SBI_EXT | %s,\n", reg_size, sbi_ext_id_to_str(prefix, id)); break; default: - TEST_FAIL("%s: Unexpected reg type: 0x%llx in reg id: 0x%llx", prefix, - (id & KVM_REG_RISCV_TYPE_MASK) >> KVM_REG_RISCV_TYPE_SHIFT, id); + printf("\tKVM_REG_RISCV | %s | 0x%llx /* UNKNOWN */,", + reg_size, id & REG_MASK); } } @@ -532,6 +554,7 @@ static __u64 base_regs[] = { KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_GENERAL | KVM_REG_RISCV_CSR_REG(sip), KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_GENERAL | KVM_REG_RISCV_CSR_REG(satp), KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_GENERAL | KVM_REG_RISCV_CSR_REG(scounteren), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_GENERAL | KVM_REG_RISCV_CSR_REG(senvcfg), KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_TIMER | KVM_REG_RISCV_TIMER_REG(frequency), KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_TIMER | KVM_REG_RISCV_TIMER_REG(time), KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_TIMER | KVM_REG_RISCV_TIMER_REG(compare), @@ -545,6 +568,7 @@ static __u64 base_regs[] = { KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_PMU, KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_EXPERIMENTAL, KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_VENDOR, + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_DBCN, KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_MULTI_EN | 0, KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_MULTI_DIS | 0, }; @@ -603,6 +627,10 @@ static __u64 zicntr_regs[] = { KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZICNTR, }; +static __u64 zicond_regs[] = { + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZICOND, +}; + static __u64 zicsr_regs[] = { KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZICSR, }; @@ -626,6 +654,11 @@ static __u64 aia_regs[] = { KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_SSAIA, }; +static __u64 smstateen_regs[] = { + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_SMSTATEEN | KVM_REG_RISCV_CSR_SMSTATEEN_REG(sstateen0), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_SMSTATEEN, +}; + static __u64 fp_f_regs[] = { KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[0]), KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[1]), @@ -725,6 +758,8 @@ static __u64 fp_d_regs[] = { {"zbs", .feature = KVM_RISCV_ISA_EXT_ZBS, .regs = zbs_regs, .regs_n = ARRAY_SIZE(zbs_regs),} #define ZICNTR_REGS_SUBLIST \ {"zicntr", .feature = KVM_RISCV_ISA_EXT_ZICNTR, .regs = zicntr_regs, .regs_n = ARRAY_SIZE(zicntr_regs),} +#define ZICOND_REGS_SUBLIST \ + {"zicond", .feature = KVM_RISCV_ISA_EXT_ZICOND, .regs = zicond_regs, .regs_n = ARRAY_SIZE(zicond_regs),} #define ZICSR_REGS_SUBLIST \ {"zicsr", .feature = KVM_RISCV_ISA_EXT_ZICSR, .regs = zicsr_regs, .regs_n = ARRAY_SIZE(zicsr_regs),} #define ZIFENCEI_REGS_SUBLIST \ @@ -733,6 +768,8 @@ static __u64 fp_d_regs[] = { {"zihpm", .feature = KVM_RISCV_ISA_EXT_ZIHPM, .regs = zihpm_regs, .regs_n = ARRAY_SIZE(zihpm_regs),} #define AIA_REGS_SUBLIST \ {"aia", .feature = KVM_RISCV_ISA_EXT_SSAIA, .regs = aia_regs, .regs_n = ARRAY_SIZE(aia_regs),} +#define SMSTATEEN_REGS_SUBLIST \ + {"smstateen", .feature = KVM_RISCV_ISA_EXT_SMSTATEEN, .regs = smstateen_regs, .regs_n = ARRAY_SIZE(smstateen_regs),} #define FP_F_REGS_SUBLIST \ {"fp_f", .feature = KVM_RISCV_ISA_EXT_F, .regs = fp_f_regs, \ .regs_n = ARRAY_SIZE(fp_f_regs),} @@ -828,6 +865,14 @@ static struct vcpu_reg_list zicntr_config = { }, }; +static struct vcpu_reg_list zicond_config = { + .sublists = { + BASE_SUBLIST, + ZICOND_REGS_SUBLIST, + {0}, + }, +}; + static struct vcpu_reg_list zicsr_config = { .sublists = { BASE_SUBLIST, @@ -860,6 +905,14 @@ static struct vcpu_reg_list aia_config = { }, }; +static struct vcpu_reg_list smstateen_config = { + .sublists = { + BASE_SUBLIST, + SMSTATEEN_REGS_SUBLIST, + {0}, + }, +}; + static struct vcpu_reg_list fp_f_config = { .sublists = { BASE_SUBLIST, @@ -888,10 +941,12 @@ struct vcpu_reg_list *vcpu_configs[] = { &zbb_config, &zbs_config, &zicntr_config, + &zicond_config, &zicsr_config, &zifencei_config, &zihpm_config, &aia_config, + &smstateen_config, &fp_f_config, &fp_d_config, }; diff --git a/tools/testing/selftests/kvm/x86_64/hwcr_msr_test.c b/tools/testing/selftests/kvm/x86_64/hwcr_msr_test.c new file mode 100644 index 000000000000..df351ae17029 --- /dev/null +++ b/tools/testing/selftests/kvm/x86_64/hwcr_msr_test.c @@ -0,0 +1,47 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2023, Google LLC. + */ + +#define _GNU_SOURCE /* for program_invocation_short_name */ +#include <sys/ioctl.h> + +#include "test_util.h" +#include "kvm_util.h" +#include "vmx.h" + +void test_hwcr_bit(struct kvm_vcpu *vcpu, unsigned int bit) +{ + const uint64_t ignored = BIT_ULL(3) | BIT_ULL(6) | BIT_ULL(8); + const uint64_t valid = BIT_ULL(18) | BIT_ULL(24); + const uint64_t legal = ignored | valid; + uint64_t val = BIT_ULL(bit); + uint64_t actual; + int r; + + r = _vcpu_set_msr(vcpu, MSR_K7_HWCR, val); + TEST_ASSERT(val & ~legal ? !r : r == 1, + "Expected KVM_SET_MSRS(MSR_K7_HWCR) = 0x%lx to %s", + val, val & ~legal ? "fail" : "succeed"); + + actual = vcpu_get_msr(vcpu, MSR_K7_HWCR); + TEST_ASSERT(actual == (val & valid), + "Bit %u: unexpected HWCR 0x%lx; expected 0x%lx", + bit, actual, (val & valid)); + + vcpu_set_msr(vcpu, MSR_K7_HWCR, 0); +} + +int main(int argc, char *argv[]) +{ + struct kvm_vm *vm; + struct kvm_vcpu *vcpu; + unsigned int bit; + + vm = vm_create_with_one_vcpu(&vcpu, NULL); + + for (bit = 0; bit < BITS_PER_LONG; bit++) + test_hwcr_bit(vcpu, bit); + + kvm_vm_free(vm); +} diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c b/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c index e446d76d1c0c..6c1278562090 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c +++ b/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c @@ -1,7 +1,5 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * KVM_GET/SET_* tests - * * Copyright (C) 2022, Red Hat, Inc. * * Tests for Hyper-V extensions to SVM. diff --git a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c index 7f36c32fa760..18ac5c1952a3 100644 --- a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c +++ b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c @@ -1,7 +1,5 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * tools/testing/selftests/kvm/nx_huge_page_test.c - * * Usage: to be run via nx_huge_page_test.sh, which does the necessary * environment setup and teardown * diff --git a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.sh b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.sh index 0560149e66ed..7cbb409801ee 100755 --- a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.sh +++ b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.sh @@ -4,7 +4,6 @@ # Wrapper script which performs setup and cleanup for nx_huge_pages_test. # Makes use of root privileges to set up huge pages and KVM module parameters. # -# tools/testing/selftests/kvm/nx_huge_page_test.sh # Copyright (C) 2022, Google LLC. set -e diff --git a/tools/testing/selftests/kvm/x86_64/state_test.c b/tools/testing/selftests/kvm/x86_64/state_test.c index 4c4925a8ab45..88b58aab7207 100644 --- a/tools/testing/selftests/kvm/x86_64/state_test.c +++ b/tools/testing/selftests/kvm/x86_64/state_test.c @@ -139,6 +139,83 @@ static void vmx_l1_guest_code(struct vmx_pages *vmx_pages) static void __attribute__((__flatten__)) guest_code(void *arg) { GUEST_SYNC(1); + + if (this_cpu_has(X86_FEATURE_XSAVE)) { + uint64_t supported_xcr0 = this_cpu_supported_xcr0(); + uint8_t buffer[4096]; + + memset(buffer, 0xcc, sizeof(buffer)); + + set_cr4(get_cr4() | X86_CR4_OSXSAVE); + GUEST_ASSERT(this_cpu_has(X86_FEATURE_OSXSAVE)); + + xsetbv(0, xgetbv(0) | supported_xcr0); + + /* + * Modify state for all supported xfeatures to take them out of + * their "init" state, i.e. to make them show up in XSTATE_BV. + * + * Note off-by-default features, e.g. AMX, are out of scope for + * this particular testcase as they have a different ABI. + */ + GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_FP); + asm volatile ("fincstp"); + + GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_SSE); + asm volatile ("vmovdqu %0, %%xmm0" :: "m" (buffer)); + + if (supported_xcr0 & XFEATURE_MASK_YMM) + asm volatile ("vmovdqu %0, %%ymm0" :: "m" (buffer)); + + if (supported_xcr0 & XFEATURE_MASK_AVX512) { + asm volatile ("kmovq %0, %%k1" :: "r" (-1ull)); + asm volatile ("vmovupd %0, %%zmm0" :: "m" (buffer)); + asm volatile ("vmovupd %0, %%zmm16" :: "m" (buffer)); + } + + if (this_cpu_has(X86_FEATURE_MPX)) { + uint64_t bounds[2] = { 10, 0xffffffffull }; + uint64_t output[2] = { }; + + GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_BNDREGS); + GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_BNDCSR); + + /* + * Don't bother trying to get BNDCSR into the INUSE + * state. MSR_IA32_BNDCFGS doesn't count as it isn't + * managed via XSAVE/XRSTOR, and BNDCFGU can only be + * modified by XRSTOR. Stuffing XSTATE_BV in the host + * is simpler than doing XRSTOR here in the guest. + * + * However, temporarily enable MPX in BNDCFGS so that + * BNDMOV actually loads BND1. If MPX isn't *fully* + * enabled, all MPX instructions are treated as NOPs. + * + * Hand encode "bndmov (%rax),%bnd1" as support for MPX + * mnemonics/registers has been removed from gcc and + * clang (and was never fully supported by clang). + */ + wrmsr(MSR_IA32_BNDCFGS, BIT_ULL(0)); + asm volatile (".byte 0x66,0x0f,0x1a,0x08" :: "a" (bounds)); + /* + * Hand encode "bndmov %bnd1, (%rax)" to sanity check + * that BND1 actually got loaded. + */ + asm volatile (".byte 0x66,0x0f,0x1b,0x08" :: "a" (output)); + wrmsr(MSR_IA32_BNDCFGS, 0); + + GUEST_ASSERT_EQ(bounds[0], output[0]); + GUEST_ASSERT_EQ(bounds[1], output[1]); + } + if (this_cpu_has(X86_FEATURE_PKU)) { + GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_PKRU); + set_cr4(get_cr4() | X86_CR4_PKE); + GUEST_ASSERT(this_cpu_has(X86_FEATURE_OSPKE)); + + wrpkru(-1u); + } + } + GUEST_SYNC(2); if (arg) { @@ -153,10 +230,11 @@ static void __attribute__((__flatten__)) guest_code(void *arg) int main(int argc, char *argv[]) { + uint64_t *xstate_bv, saved_xstate_bv; vm_vaddr_t nested_gva = 0; - + struct kvm_cpuid2 empty_cpuid = {}; struct kvm_regs regs1, regs2; - struct kvm_vcpu *vcpu; + struct kvm_vcpu *vcpu, *vcpuN; struct kvm_vm *vm; struct kvm_x86_state *state; struct ucall uc; @@ -209,6 +287,34 @@ int main(int argc, char *argv[]) /* Restore state in a new VM. */ vcpu = vm_recreate_with_one_vcpu(vm); vcpu_load_state(vcpu, state); + + /* + * Restore XSAVE state in a dummy vCPU, first without doing + * KVM_SET_CPUID2, and then with an empty guest CPUID. Except + * for off-by-default xfeatures, e.g. AMX, KVM is supposed to + * allow KVM_SET_XSAVE regardless of guest CPUID. Manually + * load only XSAVE state, MSRs in particular have a much more + * convoluted ABI. + * + * Load two versions of XSAVE state: one with the actual guest + * XSAVE state, and one with all supported features forced "on" + * in xstate_bv, e.g. to ensure that KVM allows loading all + * supported features, even if something goes awry in saving + * the original snapshot. + */ + xstate_bv = (void *)&((uint8_t *)state->xsave->region)[512]; + saved_xstate_bv = *xstate_bv; + + vcpuN = __vm_vcpu_add(vm, vcpu->id + 1); + vcpu_xsave_set(vcpuN, state->xsave); + *xstate_bv = kvm_cpu_supported_xcr0(); + vcpu_xsave_set(vcpuN, state->xsave); + + vcpu_init_cpuid(vcpuN, &empty_cpuid); + vcpu_xsave_set(vcpuN, state->xsave); + *xstate_bv = saved_xstate_bv; + vcpu_xsave_set(vcpuN, state->xsave); + kvm_x86_state_cleanup(state); memset(®s2, 0, sizeof(regs2)); diff --git a/tools/testing/selftests/kvm/x86_64/tsc_scaling_sync.c b/tools/testing/selftests/kvm/x86_64/tsc_scaling_sync.c index 5b669818e39a..59c7304f805e 100644 --- a/tools/testing/selftests/kvm/x86_64/tsc_scaling_sync.c +++ b/tools/testing/selftests/kvm/x86_64/tsc_scaling_sync.c @@ -1,10 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * svm_vmcall_test - * * Copyright © 2021 Amazon.com, Inc. or its affiliates. - * - * Xen shared_info / pvclock testing */ #include "test_util.h" diff --git a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c index 05898ad9f4d9..9ec9ab60b63e 100644 --- a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c +++ b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c @@ -1,10 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * svm_vmcall_test - * * Copyright © 2021 Amazon.com, Inc. or its affiliates. - * - * Xen shared_info / pvclock testing */ #include "test_util.h" diff --git a/tools/testing/selftests/landlock/base_test.c b/tools/testing/selftests/landlock/base_test.c index 792c3f0a59b4..646f778dfb1e 100644 --- a/tools/testing/selftests/landlock/base_test.c +++ b/tools/testing/selftests/landlock/base_test.c @@ -75,7 +75,7 @@ TEST(abi_version) const struct landlock_ruleset_attr ruleset_attr = { .handled_access_fs = LANDLOCK_ACCESS_FS_READ_FILE, }; - ASSERT_EQ(3, landlock_create_ruleset(NULL, 0, + ASSERT_EQ(4, landlock_create_ruleset(NULL, 0, LANDLOCK_CREATE_RULESET_VERSION)); ASSERT_EQ(-1, landlock_create_ruleset(&ruleset_attr, 0, diff --git a/tools/testing/selftests/landlock/common.h b/tools/testing/selftests/landlock/common.h index d7987ae8d7fc..5b79758cae62 100644 --- a/tools/testing/selftests/landlock/common.h +++ b/tools/testing/selftests/landlock/common.h @@ -112,10 +112,13 @@ static void _init_caps(struct __test_metadata *const _metadata, bool drop_all) cap_t cap_p; /* Only these three capabilities are useful for the tests. */ const cap_value_t caps[] = { + /* clang-format off */ CAP_DAC_OVERRIDE, CAP_MKNOD, CAP_SYS_ADMIN, CAP_SYS_CHROOT, + CAP_NET_BIND_SERVICE, + /* clang-format on */ }; cap_p = cap_get_proc(); @@ -256,3 +259,13 @@ static int __maybe_unused send_fd(int usock, int fd_tx) return -errno; return 0; } + +static void __maybe_unused +enforce_ruleset(struct __test_metadata *const _metadata, const int ruleset_fd) +{ + ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)); + ASSERT_EQ(0, landlock_restrict_self(ruleset_fd, 0)) + { + TH_LOG("Failed to enforce ruleset: %s", strerror(errno)); + } +} diff --git a/tools/testing/selftests/landlock/config b/tools/testing/selftests/landlock/config index 3dc9e438eab1..0086efaa7b68 100644 --- a/tools/testing/selftests/landlock/config +++ b/tools/testing/selftests/landlock/config @@ -1,5 +1,9 @@ CONFIG_CGROUPS=y CONFIG_CGROUP_SCHED=y +CONFIG_INET=y +CONFIG_IPV6=y +CONFIG_NET=y +CONFIG_NET_NS=y CONFIG_OVERLAY_FS=y CONFIG_PROC_FS=y CONFIG_SECURITY=y diff --git a/tools/testing/selftests/landlock/fs_test.c b/tools/testing/selftests/landlock/fs_test.c index 251594306d40..18e1f86a6234 100644 --- a/tools/testing/selftests/landlock/fs_test.c +++ b/tools/testing/selftests/landlock/fs_test.c @@ -677,16 +677,6 @@ static int create_ruleset(struct __test_metadata *const _metadata, return ruleset_fd; } -static void enforce_ruleset(struct __test_metadata *const _metadata, - const int ruleset_fd) -{ - ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)); - ASSERT_EQ(0, landlock_restrict_self(ruleset_fd, 0)) - { - TH_LOG("Failed to enforce ruleset: %s", strerror(errno)); - } -} - TEST_F_FORK(layout0, proc_nsfs) { const struct rule rules[] = { @@ -1635,6 +1625,65 @@ TEST_F_FORK(layout1, move_mount) clear_cap(_metadata, CAP_SYS_ADMIN); } +TEST_F_FORK(layout1, topology_changes_with_net_only) +{ + const struct landlock_ruleset_attr ruleset_net = { + .handled_access_net = LANDLOCK_ACCESS_NET_BIND_TCP | + LANDLOCK_ACCESS_NET_CONNECT_TCP, + }; + int ruleset_fd; + + /* Add network restrictions. */ + ruleset_fd = + landlock_create_ruleset(&ruleset_net, sizeof(ruleset_net), 0); + ASSERT_LE(0, ruleset_fd); + enforce_ruleset(_metadata, ruleset_fd); + ASSERT_EQ(0, close(ruleset_fd)); + + /* Mount, remount, move_mount, umount, and pivot_root checks. */ + set_cap(_metadata, CAP_SYS_ADMIN); + ASSERT_EQ(0, mount_opt(&mnt_tmp, dir_s1d2)); + ASSERT_EQ(0, mount(NULL, dir_s1d2, NULL, MS_PRIVATE | MS_REC, NULL)); + ASSERT_EQ(0, syscall(__NR_move_mount, AT_FDCWD, dir_s1d2, AT_FDCWD, + dir_s2d2, 0)); + ASSERT_EQ(0, umount(dir_s2d2)); + ASSERT_EQ(0, syscall(__NR_pivot_root, dir_s3d2, dir_s3d3)); + ASSERT_EQ(0, chdir("/")); + clear_cap(_metadata, CAP_SYS_ADMIN); +} + +TEST_F_FORK(layout1, topology_changes_with_net_and_fs) +{ + const struct landlock_ruleset_attr ruleset_net_fs = { + .handled_access_net = LANDLOCK_ACCESS_NET_BIND_TCP | + LANDLOCK_ACCESS_NET_CONNECT_TCP, + .handled_access_fs = LANDLOCK_ACCESS_FS_EXECUTE, + }; + int ruleset_fd; + + /* Add network and filesystem restrictions. */ + ruleset_fd = landlock_create_ruleset(&ruleset_net_fs, + sizeof(ruleset_net_fs), 0); + ASSERT_LE(0, ruleset_fd); + enforce_ruleset(_metadata, ruleset_fd); + ASSERT_EQ(0, close(ruleset_fd)); + + /* Mount, remount, move_mount, umount, and pivot_root checks. */ + set_cap(_metadata, CAP_SYS_ADMIN); + ASSERT_EQ(-1, mount_opt(&mnt_tmp, dir_s1d2)); + ASSERT_EQ(EPERM, errno); + ASSERT_EQ(-1, mount(NULL, dir_s3d2, NULL, MS_PRIVATE | MS_REC, NULL)); + ASSERT_EQ(EPERM, errno); + ASSERT_EQ(-1, syscall(__NR_move_mount, AT_FDCWD, dir_s3d2, AT_FDCWD, + dir_s2d2, 0)); + ASSERT_EQ(EPERM, errno); + ASSERT_EQ(-1, umount(dir_s3d2)); + ASSERT_EQ(EPERM, errno); + ASSERT_EQ(-1, syscall(__NR_pivot_root, dir_s3d2, dir_s3d3)); + ASSERT_EQ(EPERM, errno); + clear_cap(_metadata, CAP_SYS_ADMIN); +} + TEST_F_FORK(layout1, release_inodes) { const struct rule rules[] = { diff --git a/tools/testing/selftests/landlock/net_test.c b/tools/testing/selftests/landlock/net_test.c new file mode 100644 index 000000000000..929e21c4db05 --- /dev/null +++ b/tools/testing/selftests/landlock/net_test.c @@ -0,0 +1,1738 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Landlock tests - Network + * + * Copyright © 2022-2023 Huawei Tech. Co., Ltd. + * Copyright © 2023 Microsoft Corporation + */ + +#define _GNU_SOURCE +#include <arpa/inet.h> +#include <errno.h> +#include <fcntl.h> +#include <linux/landlock.h> +#include <linux/in.h> +#include <sched.h> +#include <stdint.h> +#include <string.h> +#include <sys/prctl.h> +#include <sys/socket.h> +#include <sys/un.h> + +#include "common.h" + +const short sock_port_start = (1 << 10); + +static const char loopback_ipv4[] = "127.0.0.1"; +static const char loopback_ipv6[] = "::1"; + +/* Number pending connections queue to be hold. */ +const short backlog = 10; + +enum sandbox_type { + NO_SANDBOX, + /* This may be used to test rules that allow *and* deny accesses. */ + TCP_SANDBOX, +}; + +struct protocol_variant { + int domain; + int type; +}; + +struct service_fixture { + struct protocol_variant protocol; + /* port is also stored in ipv4_addr.sin_port or ipv6_addr.sin6_port */ + unsigned short port; + union { + struct sockaddr_in ipv4_addr; + struct sockaddr_in6 ipv6_addr; + struct { + struct sockaddr_un unix_addr; + socklen_t unix_addr_len; + }; + }; +}; + +static int set_service(struct service_fixture *const srv, + const struct protocol_variant prot, + const unsigned short index) +{ + memset(srv, 0, sizeof(*srv)); + + /* + * Copies all protocol properties in case of the variant only contains + * a subset of them. + */ + srv->protocol = prot; + + /* Checks for port overflow. */ + if (index > 2) + return 1; + srv->port = sock_port_start << (2 * index); + + switch (prot.domain) { + case AF_UNSPEC: + case AF_INET: + srv->ipv4_addr.sin_family = prot.domain; + srv->ipv4_addr.sin_port = htons(srv->port); + srv->ipv4_addr.sin_addr.s_addr = inet_addr(loopback_ipv4); + return 0; + + case AF_INET6: + srv->ipv6_addr.sin6_family = prot.domain; + srv->ipv6_addr.sin6_port = htons(srv->port); + inet_pton(AF_INET6, loopback_ipv6, &srv->ipv6_addr.sin6_addr); + return 0; + + case AF_UNIX: + srv->unix_addr.sun_family = prot.domain; + sprintf(srv->unix_addr.sun_path, + "_selftests-landlock-net-tid%d-index%d", gettid(), + index); + srv->unix_addr_len = SUN_LEN(&srv->unix_addr); + srv->unix_addr.sun_path[0] = '\0'; + return 0; + } + return 1; +} + +static void setup_loopback(struct __test_metadata *const _metadata) +{ + set_cap(_metadata, CAP_SYS_ADMIN); + ASSERT_EQ(0, unshare(CLONE_NEWNET)); + ASSERT_EQ(0, system("ip link set dev lo up")); + clear_cap(_metadata, CAP_SYS_ADMIN); +} + +static bool is_restricted(const struct protocol_variant *const prot, + const enum sandbox_type sandbox) +{ + switch (prot->domain) { + case AF_INET: + case AF_INET6: + switch (prot->type) { + case SOCK_STREAM: + return sandbox == TCP_SANDBOX; + } + break; + } + return false; +} + +static int socket_variant(const struct service_fixture *const srv) +{ + int ret; + + ret = socket(srv->protocol.domain, srv->protocol.type | SOCK_CLOEXEC, + 0); + if (ret < 0) + return -errno; + return ret; +} + +#ifndef SIN6_LEN_RFC2133 +#define SIN6_LEN_RFC2133 24 +#endif + +static socklen_t get_addrlen(const struct service_fixture *const srv, + const bool minimal) +{ + switch (srv->protocol.domain) { + case AF_UNSPEC: + case AF_INET: + return sizeof(srv->ipv4_addr); + + case AF_INET6: + if (minimal) + return SIN6_LEN_RFC2133; + return sizeof(srv->ipv6_addr); + + case AF_UNIX: + if (minimal) + return sizeof(srv->unix_addr) - + sizeof(srv->unix_addr.sun_path); + return srv->unix_addr_len; + + default: + return 0; + } +} + +static void set_port(struct service_fixture *const srv, uint16_t port) +{ + switch (srv->protocol.domain) { + case AF_UNSPEC: + case AF_INET: + srv->ipv4_addr.sin_port = htons(port); + return; + + case AF_INET6: + srv->ipv6_addr.sin6_port = htons(port); + return; + + default: + return; + } +} + +static uint16_t get_binded_port(int socket_fd, + const struct protocol_variant *const prot) +{ + struct sockaddr_in ipv4_addr; + struct sockaddr_in6 ipv6_addr; + socklen_t ipv4_addr_len, ipv6_addr_len; + + /* Gets binded port. */ + switch (prot->domain) { + case AF_UNSPEC: + case AF_INET: + ipv4_addr_len = sizeof(ipv4_addr); + getsockname(socket_fd, &ipv4_addr, &ipv4_addr_len); + return ntohs(ipv4_addr.sin_port); + + case AF_INET6: + ipv6_addr_len = sizeof(ipv6_addr); + getsockname(socket_fd, &ipv6_addr, &ipv6_addr_len); + return ntohs(ipv6_addr.sin6_port); + + default: + return 0; + } +} + +static int bind_variant_addrlen(const int sock_fd, + const struct service_fixture *const srv, + const socklen_t addrlen) +{ + int ret; + + switch (srv->protocol.domain) { + case AF_UNSPEC: + case AF_INET: + ret = bind(sock_fd, &srv->ipv4_addr, addrlen); + break; + + case AF_INET6: + ret = bind(sock_fd, &srv->ipv6_addr, addrlen); + break; + + case AF_UNIX: + ret = bind(sock_fd, &srv->unix_addr, addrlen); + break; + + default: + errno = EAFNOSUPPORT; + return -errno; + } + + if (ret < 0) + return -errno; + return ret; +} + +static int bind_variant(const int sock_fd, + const struct service_fixture *const srv) +{ + return bind_variant_addrlen(sock_fd, srv, get_addrlen(srv, false)); +} + +static int connect_variant_addrlen(const int sock_fd, + const struct service_fixture *const srv, + const socklen_t addrlen) +{ + int ret; + + switch (srv->protocol.domain) { + case AF_UNSPEC: + case AF_INET: + ret = connect(sock_fd, &srv->ipv4_addr, addrlen); + break; + + case AF_INET6: + ret = connect(sock_fd, &srv->ipv6_addr, addrlen); + break; + + case AF_UNIX: + ret = connect(sock_fd, &srv->unix_addr, addrlen); + break; + + default: + errno = -EAFNOSUPPORT; + return -errno; + } + + if (ret < 0) + return -errno; + return ret; +} + +static int connect_variant(const int sock_fd, + const struct service_fixture *const srv) +{ + return connect_variant_addrlen(sock_fd, srv, get_addrlen(srv, false)); +} + +FIXTURE(protocol) +{ + struct service_fixture srv0, srv1, srv2, unspec_any0, unspec_srv0; +}; + +FIXTURE_VARIANT(protocol) +{ + const enum sandbox_type sandbox; + const struct protocol_variant prot; +}; + +FIXTURE_SETUP(protocol) +{ + const struct protocol_variant prot_unspec = { + .domain = AF_UNSPEC, + .type = SOCK_STREAM, + }; + + disable_caps(_metadata); + + ASSERT_EQ(0, set_service(&self->srv0, variant->prot, 0)); + ASSERT_EQ(0, set_service(&self->srv1, variant->prot, 1)); + ASSERT_EQ(0, set_service(&self->srv2, variant->prot, 2)); + + ASSERT_EQ(0, set_service(&self->unspec_srv0, prot_unspec, 0)); + + ASSERT_EQ(0, set_service(&self->unspec_any0, prot_unspec, 0)); + self->unspec_any0.ipv4_addr.sin_addr.s_addr = htonl(INADDR_ANY); + + setup_loopback(_metadata); +}; + +FIXTURE_TEARDOWN(protocol) +{ +} + +/* clang-format off */ +FIXTURE_VARIANT_ADD(protocol, no_sandbox_with_ipv4_tcp) { + /* clang-format on */ + .sandbox = NO_SANDBOX, + .prot = { + .domain = AF_INET, + .type = SOCK_STREAM, + }, +}; + +/* clang-format off */ +FIXTURE_VARIANT_ADD(protocol, no_sandbox_with_ipv6_tcp) { + /* clang-format on */ + .sandbox = NO_SANDBOX, + .prot = { + .domain = AF_INET6, + .type = SOCK_STREAM, + }, +}; + +/* clang-format off */ +FIXTURE_VARIANT_ADD(protocol, no_sandbox_with_ipv4_udp) { + /* clang-format on */ + .sandbox = NO_SANDBOX, + .prot = { + .domain = AF_INET, + .type = SOCK_DGRAM, + }, +}; + +/* clang-format off */ +FIXTURE_VARIANT_ADD(protocol, no_sandbox_with_ipv6_udp) { + /* clang-format on */ + .sandbox = NO_SANDBOX, + .prot = { + .domain = AF_INET6, + .type = SOCK_DGRAM, + }, +}; + +/* clang-format off */ +FIXTURE_VARIANT_ADD(protocol, no_sandbox_with_unix_stream) { + /* clang-format on */ + .sandbox = NO_SANDBOX, + .prot = { + .domain = AF_UNIX, + .type = SOCK_STREAM, + }, +}; + +/* clang-format off */ +FIXTURE_VARIANT_ADD(protocol, no_sandbox_with_unix_datagram) { + /* clang-format on */ + .sandbox = NO_SANDBOX, + .prot = { + .domain = AF_UNIX, + .type = SOCK_DGRAM, + }, +}; + +/* clang-format off */ +FIXTURE_VARIANT_ADD(protocol, tcp_sandbox_with_ipv4_tcp) { + /* clang-format on */ + .sandbox = TCP_SANDBOX, + .prot = { + .domain = AF_INET, + .type = SOCK_STREAM, + }, +}; + +/* clang-format off */ +FIXTURE_VARIANT_ADD(protocol, tcp_sandbox_with_ipv6_tcp) { + /* clang-format on */ + .sandbox = TCP_SANDBOX, + .prot = { + .domain = AF_INET6, + .type = SOCK_STREAM, + }, +}; + +/* clang-format off */ +FIXTURE_VARIANT_ADD(protocol, tcp_sandbox_with_ipv4_udp) { + /* clang-format on */ + .sandbox = TCP_SANDBOX, + .prot = { + .domain = AF_INET, + .type = SOCK_DGRAM, + }, +}; + +/* clang-format off */ +FIXTURE_VARIANT_ADD(protocol, tcp_sandbox_with_ipv6_udp) { + /* clang-format on */ + .sandbox = TCP_SANDBOX, + .prot = { + .domain = AF_INET6, + .type = SOCK_DGRAM, + }, +}; + +/* clang-format off */ +FIXTURE_VARIANT_ADD(protocol, tcp_sandbox_with_unix_stream) { + /* clang-format on */ + .sandbox = TCP_SANDBOX, + .prot = { + .domain = AF_UNIX, + .type = SOCK_STREAM, + }, +}; + +/* clang-format off */ +FIXTURE_VARIANT_ADD(protocol, tcp_sandbox_with_unix_datagram) { + /* clang-format on */ + .sandbox = TCP_SANDBOX, + .prot = { + .domain = AF_UNIX, + .type = SOCK_DGRAM, + }, +}; + +static void test_bind_and_connect(struct __test_metadata *const _metadata, + const struct service_fixture *const srv, + const bool deny_bind, const bool deny_connect) +{ + char buf = '\0'; + int inval_fd, bind_fd, client_fd, status, ret; + pid_t child; + + /* Starts invalid addrlen tests with bind. */ + inval_fd = socket_variant(srv); + ASSERT_LE(0, inval_fd) + { + TH_LOG("Failed to create socket: %s", strerror(errno)); + } + + /* Tries to bind with zero as addrlen. */ + EXPECT_EQ(-EINVAL, bind_variant_addrlen(inval_fd, srv, 0)); + + /* Tries to bind with too small addrlen. */ + EXPECT_EQ(-EINVAL, bind_variant_addrlen(inval_fd, srv, + get_addrlen(srv, true) - 1)); + + /* Tries to bind with minimal addrlen. */ + ret = bind_variant_addrlen(inval_fd, srv, get_addrlen(srv, true)); + if (deny_bind) { + EXPECT_EQ(-EACCES, ret); + } else { + EXPECT_EQ(0, ret) + { + TH_LOG("Failed to bind to socket: %s", strerror(errno)); + } + } + EXPECT_EQ(0, close(inval_fd)); + + /* Starts invalid addrlen tests with connect. */ + inval_fd = socket_variant(srv); + ASSERT_LE(0, inval_fd); + + /* Tries to connect with zero as addrlen. */ + EXPECT_EQ(-EINVAL, connect_variant_addrlen(inval_fd, srv, 0)); + + /* Tries to connect with too small addrlen. */ + EXPECT_EQ(-EINVAL, connect_variant_addrlen(inval_fd, srv, + get_addrlen(srv, true) - 1)); + + /* Tries to connect with minimal addrlen. */ + ret = connect_variant_addrlen(inval_fd, srv, get_addrlen(srv, true)); + if (srv->protocol.domain == AF_UNIX) { + EXPECT_EQ(-EINVAL, ret); + } else if (deny_connect) { + EXPECT_EQ(-EACCES, ret); + } else if (srv->protocol.type == SOCK_STREAM) { + /* No listening server, whatever the value of deny_bind. */ + EXPECT_EQ(-ECONNREFUSED, ret); + } else { + EXPECT_EQ(0, ret) + { + TH_LOG("Failed to connect to socket: %s", + strerror(errno)); + } + } + EXPECT_EQ(0, close(inval_fd)); + + /* Starts connection tests. */ + bind_fd = socket_variant(srv); + ASSERT_LE(0, bind_fd); + + ret = bind_variant(bind_fd, srv); + if (deny_bind) { + EXPECT_EQ(-EACCES, ret); + } else { + EXPECT_EQ(0, ret); + + /* Creates a listening socket. */ + if (srv->protocol.type == SOCK_STREAM) + EXPECT_EQ(0, listen(bind_fd, backlog)); + } + + child = fork(); + ASSERT_LE(0, child); + if (child == 0) { + int connect_fd, ret; + + /* Closes listening socket for the child. */ + EXPECT_EQ(0, close(bind_fd)); + + /* Starts connection tests. */ + connect_fd = socket_variant(srv); + ASSERT_LE(0, connect_fd); + ret = connect_variant(connect_fd, srv); + if (deny_connect) { + EXPECT_EQ(-EACCES, ret); + } else if (deny_bind) { + /* No listening server. */ + EXPECT_EQ(-ECONNREFUSED, ret); + } else { + EXPECT_EQ(0, ret); + EXPECT_EQ(1, write(connect_fd, ".", 1)); + } + + EXPECT_EQ(0, close(connect_fd)); + _exit(_metadata->passed ? EXIT_SUCCESS : EXIT_FAILURE); + return; + } + + /* Accepts connection from the child. */ + client_fd = bind_fd; + if (!deny_bind && !deny_connect) { + if (srv->protocol.type == SOCK_STREAM) { + client_fd = accept(bind_fd, NULL, 0); + ASSERT_LE(0, client_fd); + } + + EXPECT_EQ(1, read(client_fd, &buf, 1)); + EXPECT_EQ('.', buf); + } + + EXPECT_EQ(child, waitpid(child, &status, 0)); + EXPECT_EQ(1, WIFEXITED(status)); + EXPECT_EQ(EXIT_SUCCESS, WEXITSTATUS(status)); + + /* Closes connection, if any. */ + if (client_fd != bind_fd) + EXPECT_LE(0, close(client_fd)); + + /* Closes listening socket. */ + EXPECT_EQ(0, close(bind_fd)); +} + +TEST_F(protocol, bind) +{ + if (variant->sandbox == TCP_SANDBOX) { + const struct landlock_ruleset_attr ruleset_attr = { + .handled_access_net = LANDLOCK_ACCESS_NET_BIND_TCP | + LANDLOCK_ACCESS_NET_CONNECT_TCP, + }; + const struct landlock_net_port_attr tcp_bind_connect_p0 = { + .allowed_access = LANDLOCK_ACCESS_NET_BIND_TCP | + LANDLOCK_ACCESS_NET_CONNECT_TCP, + .port = self->srv0.port, + }; + const struct landlock_net_port_attr tcp_connect_p1 = { + .allowed_access = LANDLOCK_ACCESS_NET_CONNECT_TCP, + .port = self->srv1.port, + }; + int ruleset_fd; + + ruleset_fd = landlock_create_ruleset(&ruleset_attr, + sizeof(ruleset_attr), 0); + ASSERT_LE(0, ruleset_fd); + + /* Allows connect and bind for the first port. */ + ASSERT_EQ(0, + landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT, + &tcp_bind_connect_p0, 0)); + + /* Allows connect and denies bind for the second port. */ + ASSERT_EQ(0, + landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT, + &tcp_connect_p1, 0)); + + enforce_ruleset(_metadata, ruleset_fd); + EXPECT_EQ(0, close(ruleset_fd)); + } + + /* Binds a socket to the first port. */ + test_bind_and_connect(_metadata, &self->srv0, false, false); + + /* Binds a socket to the second port. */ + test_bind_and_connect(_metadata, &self->srv1, + is_restricted(&variant->prot, variant->sandbox), + false); + + /* Binds a socket to the third port. */ + test_bind_and_connect(_metadata, &self->srv2, + is_restricted(&variant->prot, variant->sandbox), + is_restricted(&variant->prot, variant->sandbox)); +} + +TEST_F(protocol, connect) +{ + if (variant->sandbox == TCP_SANDBOX) { + const struct landlock_ruleset_attr ruleset_attr = { + .handled_access_net = LANDLOCK_ACCESS_NET_BIND_TCP | + LANDLOCK_ACCESS_NET_CONNECT_TCP, + }; + const struct landlock_net_port_attr tcp_bind_connect_p0 = { + .allowed_access = LANDLOCK_ACCESS_NET_BIND_TCP | + LANDLOCK_ACCESS_NET_CONNECT_TCP, + .port = self->srv0.port, + }; + const struct landlock_net_port_attr tcp_bind_p1 = { + .allowed_access = LANDLOCK_ACCESS_NET_BIND_TCP, + .port = self->srv1.port, + }; + int ruleset_fd; + + ruleset_fd = landlock_create_ruleset(&ruleset_attr, + sizeof(ruleset_attr), 0); + ASSERT_LE(0, ruleset_fd); + + /* Allows connect and bind for the first port. */ + ASSERT_EQ(0, + landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT, + &tcp_bind_connect_p0, 0)); + + /* Allows bind and denies connect for the second port. */ + ASSERT_EQ(0, + landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT, + &tcp_bind_p1, 0)); + + enforce_ruleset(_metadata, ruleset_fd); + EXPECT_EQ(0, close(ruleset_fd)); + } + + test_bind_and_connect(_metadata, &self->srv0, false, false); + + test_bind_and_connect(_metadata, &self->srv1, false, + is_restricted(&variant->prot, variant->sandbox)); + + test_bind_and_connect(_metadata, &self->srv2, + is_restricted(&variant->prot, variant->sandbox), + is_restricted(&variant->prot, variant->sandbox)); +} + +TEST_F(protocol, bind_unspec) +{ + const struct landlock_ruleset_attr ruleset_attr = { + .handled_access_net = LANDLOCK_ACCESS_NET_BIND_TCP, + }; + const struct landlock_net_port_attr tcp_bind = { + .allowed_access = LANDLOCK_ACCESS_NET_BIND_TCP, + .port = self->srv0.port, + }; + int bind_fd, ret; + + if (variant->sandbox == TCP_SANDBOX) { + const int ruleset_fd = landlock_create_ruleset( + &ruleset_attr, sizeof(ruleset_attr), 0); + ASSERT_LE(0, ruleset_fd); + + /* Allows bind. */ + ASSERT_EQ(0, + landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT, + &tcp_bind, 0)); + enforce_ruleset(_metadata, ruleset_fd); + EXPECT_EQ(0, close(ruleset_fd)); + } + + bind_fd = socket_variant(&self->srv0); + ASSERT_LE(0, bind_fd); + + /* Allowed bind on AF_UNSPEC/INADDR_ANY. */ + ret = bind_variant(bind_fd, &self->unspec_any0); + if (variant->prot.domain == AF_INET) { + EXPECT_EQ(0, ret) + { + TH_LOG("Failed to bind to unspec/any socket: %s", + strerror(errno)); + } + } else { + EXPECT_EQ(-EINVAL, ret); + } + EXPECT_EQ(0, close(bind_fd)); + + if (variant->sandbox == TCP_SANDBOX) { + const int ruleset_fd = landlock_create_ruleset( + &ruleset_attr, sizeof(ruleset_attr), 0); + ASSERT_LE(0, ruleset_fd); + + /* Denies bind. */ + enforce_ruleset(_metadata, ruleset_fd); + EXPECT_EQ(0, close(ruleset_fd)); + } + + bind_fd = socket_variant(&self->srv0); + ASSERT_LE(0, bind_fd); + + /* Denied bind on AF_UNSPEC/INADDR_ANY. */ + ret = bind_variant(bind_fd, &self->unspec_any0); + if (variant->prot.domain == AF_INET) { + if (is_restricted(&variant->prot, variant->sandbox)) { + EXPECT_EQ(-EACCES, ret); + } else { + EXPECT_EQ(0, ret); + } + } else { + EXPECT_EQ(-EINVAL, ret); + } + EXPECT_EQ(0, close(bind_fd)); + + /* Checks bind with AF_UNSPEC and the loopback address. */ + bind_fd = socket_variant(&self->srv0); + ASSERT_LE(0, bind_fd); + ret = bind_variant(bind_fd, &self->unspec_srv0); + if (variant->prot.domain == AF_INET) { + EXPECT_EQ(-EAFNOSUPPORT, ret); + } else { + EXPECT_EQ(-EINVAL, ret) + { + TH_LOG("Wrong bind error: %s", strerror(errno)); + } + } + EXPECT_EQ(0, close(bind_fd)); +} + +TEST_F(protocol, connect_unspec) +{ + const struct landlock_ruleset_attr ruleset_attr = { + .handled_access_net = LANDLOCK_ACCESS_NET_CONNECT_TCP, + }; + const struct landlock_net_port_attr tcp_connect = { + .allowed_access = LANDLOCK_ACCESS_NET_CONNECT_TCP, + .port = self->srv0.port, + }; + int bind_fd, client_fd, status; + pid_t child; + + /* Specific connection tests. */ + bind_fd = socket_variant(&self->srv0); + ASSERT_LE(0, bind_fd); + EXPECT_EQ(0, bind_variant(bind_fd, &self->srv0)); + if (self->srv0.protocol.type == SOCK_STREAM) + EXPECT_EQ(0, listen(bind_fd, backlog)); + + child = fork(); + ASSERT_LE(0, child); + if (child == 0) { + int connect_fd, ret; + + /* Closes listening socket for the child. */ + EXPECT_EQ(0, close(bind_fd)); + + connect_fd = socket_variant(&self->srv0); + ASSERT_LE(0, connect_fd); + EXPECT_EQ(0, connect_variant(connect_fd, &self->srv0)); + + /* Tries to connect again, or set peer. */ + ret = connect_variant(connect_fd, &self->srv0); + if (self->srv0.protocol.type == SOCK_STREAM) { + EXPECT_EQ(-EISCONN, ret); + } else { + EXPECT_EQ(0, ret); + } + + if (variant->sandbox == TCP_SANDBOX) { + const int ruleset_fd = landlock_create_ruleset( + &ruleset_attr, sizeof(ruleset_attr), 0); + ASSERT_LE(0, ruleset_fd); + + /* Allows connect. */ + ASSERT_EQ(0, landlock_add_rule(ruleset_fd, + LANDLOCK_RULE_NET_PORT, + &tcp_connect, 0)); + enforce_ruleset(_metadata, ruleset_fd); + EXPECT_EQ(0, close(ruleset_fd)); + } + + /* Disconnects already connected socket, or set peer. */ + ret = connect_variant(connect_fd, &self->unspec_any0); + if (self->srv0.protocol.domain == AF_UNIX && + self->srv0.protocol.type == SOCK_STREAM) { + EXPECT_EQ(-EINVAL, ret); + } else { + EXPECT_EQ(0, ret); + } + + /* Tries to reconnect, or set peer. */ + ret = connect_variant(connect_fd, &self->srv0); + if (self->srv0.protocol.domain == AF_UNIX && + self->srv0.protocol.type == SOCK_STREAM) { + EXPECT_EQ(-EISCONN, ret); + } else { + EXPECT_EQ(0, ret); + } + + if (variant->sandbox == TCP_SANDBOX) { + const int ruleset_fd = landlock_create_ruleset( + &ruleset_attr, sizeof(ruleset_attr), 0); + ASSERT_LE(0, ruleset_fd); + + /* Denies connect. */ + enforce_ruleset(_metadata, ruleset_fd); + EXPECT_EQ(0, close(ruleset_fd)); + } + + ret = connect_variant(connect_fd, &self->unspec_any0); + if (self->srv0.protocol.domain == AF_UNIX && + self->srv0.protocol.type == SOCK_STREAM) { + EXPECT_EQ(-EINVAL, ret); + } else { + /* Always allowed to disconnect. */ + EXPECT_EQ(0, ret); + } + + EXPECT_EQ(0, close(connect_fd)); + _exit(_metadata->passed ? EXIT_SUCCESS : EXIT_FAILURE); + return; + } + + client_fd = bind_fd; + if (self->srv0.protocol.type == SOCK_STREAM) { + client_fd = accept(bind_fd, NULL, 0); + ASSERT_LE(0, client_fd); + } + + EXPECT_EQ(child, waitpid(child, &status, 0)); + EXPECT_EQ(1, WIFEXITED(status)); + EXPECT_EQ(EXIT_SUCCESS, WEXITSTATUS(status)); + + /* Closes connection, if any. */ + if (client_fd != bind_fd) + EXPECT_LE(0, close(client_fd)); + + /* Closes listening socket. */ + EXPECT_EQ(0, close(bind_fd)); +} + +FIXTURE(ipv4) +{ + struct service_fixture srv0, srv1; +}; + +FIXTURE_VARIANT(ipv4) +{ + const enum sandbox_type sandbox; + const int type; +}; + +/* clang-format off */ +FIXTURE_VARIANT_ADD(ipv4, no_sandbox_with_tcp) { + /* clang-format on */ + .sandbox = NO_SANDBOX, + .type = SOCK_STREAM, +}; + +/* clang-format off */ +FIXTURE_VARIANT_ADD(ipv4, tcp_sandbox_with_tcp) { + /* clang-format on */ + .sandbox = TCP_SANDBOX, + .type = SOCK_STREAM, +}; + +/* clang-format off */ +FIXTURE_VARIANT_ADD(ipv4, no_sandbox_with_udp) { + /* clang-format on */ + .sandbox = NO_SANDBOX, + .type = SOCK_DGRAM, +}; + +/* clang-format off */ +FIXTURE_VARIANT_ADD(ipv4, tcp_sandbox_with_udp) { + /* clang-format on */ + .sandbox = TCP_SANDBOX, + .type = SOCK_DGRAM, +}; + +FIXTURE_SETUP(ipv4) +{ + const struct protocol_variant prot = { + .domain = AF_INET, + .type = variant->type, + }; + + disable_caps(_metadata); + + set_service(&self->srv0, prot, 0); + set_service(&self->srv1, prot, 1); + + setup_loopback(_metadata); +}; + +FIXTURE_TEARDOWN(ipv4) +{ +} + +TEST_F(ipv4, from_unix_to_inet) +{ + int unix_stream_fd, unix_dgram_fd; + + if (variant->sandbox == TCP_SANDBOX) { + const struct landlock_ruleset_attr ruleset_attr = { + .handled_access_net = LANDLOCK_ACCESS_NET_BIND_TCP | + LANDLOCK_ACCESS_NET_CONNECT_TCP, + }; + const struct landlock_net_port_attr tcp_bind_connect_p0 = { + .allowed_access = LANDLOCK_ACCESS_NET_BIND_TCP | + LANDLOCK_ACCESS_NET_CONNECT_TCP, + .port = self->srv0.port, + }; + int ruleset_fd; + + /* Denies connect and bind to check errno value. */ + ruleset_fd = landlock_create_ruleset(&ruleset_attr, + sizeof(ruleset_attr), 0); + ASSERT_LE(0, ruleset_fd); + + /* Allows connect and bind for srv0. */ + ASSERT_EQ(0, + landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT, + &tcp_bind_connect_p0, 0)); + + enforce_ruleset(_metadata, ruleset_fd); + EXPECT_EQ(0, close(ruleset_fd)); + } + + unix_stream_fd = socket(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0); + ASSERT_LE(0, unix_stream_fd); + + unix_dgram_fd = socket(AF_UNIX, SOCK_DGRAM | SOCK_CLOEXEC, 0); + ASSERT_LE(0, unix_dgram_fd); + + /* Checks unix stream bind and connect for srv0. */ + EXPECT_EQ(-EINVAL, bind_variant(unix_stream_fd, &self->srv0)); + EXPECT_EQ(-EINVAL, connect_variant(unix_stream_fd, &self->srv0)); + + /* Checks unix stream bind and connect for srv1. */ + EXPECT_EQ(-EINVAL, bind_variant(unix_stream_fd, &self->srv1)) + { + TH_LOG("Wrong bind error: %s", strerror(errno)); + } + EXPECT_EQ(-EINVAL, connect_variant(unix_stream_fd, &self->srv1)); + + /* Checks unix datagram bind and connect for srv0. */ + EXPECT_EQ(-EINVAL, bind_variant(unix_dgram_fd, &self->srv0)); + EXPECT_EQ(-EINVAL, connect_variant(unix_dgram_fd, &self->srv0)); + + /* Checks unix datagram bind and connect for srv1. */ + EXPECT_EQ(-EINVAL, bind_variant(unix_dgram_fd, &self->srv1)); + EXPECT_EQ(-EINVAL, connect_variant(unix_dgram_fd, &self->srv1)); +} + +FIXTURE(tcp_layers) +{ + struct service_fixture srv0, srv1; +}; + +FIXTURE_VARIANT(tcp_layers) +{ + const size_t num_layers; + const int domain; +}; + +FIXTURE_SETUP(tcp_layers) +{ + const struct protocol_variant prot = { + .domain = variant->domain, + .type = SOCK_STREAM, + }; + + disable_caps(_metadata); + + ASSERT_EQ(0, set_service(&self->srv0, prot, 0)); + ASSERT_EQ(0, set_service(&self->srv1, prot, 1)); + + setup_loopback(_metadata); +}; + +FIXTURE_TEARDOWN(tcp_layers) +{ +} + +/* clang-format off */ +FIXTURE_VARIANT_ADD(tcp_layers, no_sandbox_with_ipv4) { + /* clang-format on */ + .domain = AF_INET, + .num_layers = 0, +}; + +/* clang-format off */ +FIXTURE_VARIANT_ADD(tcp_layers, one_sandbox_with_ipv4) { + /* clang-format on */ + .domain = AF_INET, + .num_layers = 1, +}; + +/* clang-format off */ +FIXTURE_VARIANT_ADD(tcp_layers, two_sandboxes_with_ipv4) { + /* clang-format on */ + .domain = AF_INET, + .num_layers = 2, +}; + +/* clang-format off */ +FIXTURE_VARIANT_ADD(tcp_layers, three_sandboxes_with_ipv4) { + /* clang-format on */ + .domain = AF_INET, + .num_layers = 3, +}; + +/* clang-format off */ +FIXTURE_VARIANT_ADD(tcp_layers, no_sandbox_with_ipv6) { + /* clang-format on */ + .domain = AF_INET6, + .num_layers = 0, +}; + +/* clang-format off */ +FIXTURE_VARIANT_ADD(tcp_layers, one_sandbox_with_ipv6) { + /* clang-format on */ + .domain = AF_INET6, + .num_layers = 1, +}; + +/* clang-format off */ +FIXTURE_VARIANT_ADD(tcp_layers, two_sandboxes_with_ipv6) { + /* clang-format on */ + .domain = AF_INET6, + .num_layers = 2, +}; + +/* clang-format off */ +FIXTURE_VARIANT_ADD(tcp_layers, three_sandboxes_with_ipv6) { + /* clang-format on */ + .domain = AF_INET6, + .num_layers = 3, +}; + +TEST_F(tcp_layers, ruleset_overlap) +{ + const struct landlock_ruleset_attr ruleset_attr = { + .handled_access_net = LANDLOCK_ACCESS_NET_BIND_TCP | + LANDLOCK_ACCESS_NET_CONNECT_TCP, + }; + const struct landlock_net_port_attr tcp_bind = { + .allowed_access = LANDLOCK_ACCESS_NET_BIND_TCP, + .port = self->srv0.port, + }; + const struct landlock_net_port_attr tcp_bind_connect = { + .allowed_access = LANDLOCK_ACCESS_NET_BIND_TCP | + LANDLOCK_ACCESS_NET_CONNECT_TCP, + .port = self->srv0.port, + }; + + if (variant->num_layers >= 1) { + int ruleset_fd; + + ruleset_fd = landlock_create_ruleset(&ruleset_attr, + sizeof(ruleset_attr), 0); + ASSERT_LE(0, ruleset_fd); + + /* Allows bind. */ + ASSERT_EQ(0, + landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT, + &tcp_bind, 0)); + /* Also allows bind, but allows connect too. */ + ASSERT_EQ(0, + landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT, + &tcp_bind_connect, 0)); + enforce_ruleset(_metadata, ruleset_fd); + EXPECT_EQ(0, close(ruleset_fd)); + } + + if (variant->num_layers >= 2) { + int ruleset_fd; + + /* Creates another ruleset layer. */ + ruleset_fd = landlock_create_ruleset(&ruleset_attr, + sizeof(ruleset_attr), 0); + ASSERT_LE(0, ruleset_fd); + + /* Only allows bind. */ + ASSERT_EQ(0, + landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT, + &tcp_bind, 0)); + enforce_ruleset(_metadata, ruleset_fd); + EXPECT_EQ(0, close(ruleset_fd)); + } + + if (variant->num_layers >= 3) { + int ruleset_fd; + + /* Creates another ruleset layer. */ + ruleset_fd = landlock_create_ruleset(&ruleset_attr, + sizeof(ruleset_attr), 0); + ASSERT_LE(0, ruleset_fd); + + /* Try to allow bind and connect. */ + ASSERT_EQ(0, + landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT, + &tcp_bind_connect, 0)); + enforce_ruleset(_metadata, ruleset_fd); + EXPECT_EQ(0, close(ruleset_fd)); + } + + /* + * Forbids to connect to the socket because only one ruleset layer + * allows connect. + */ + test_bind_and_connect(_metadata, &self->srv0, false, + variant->num_layers >= 2); +} + +TEST_F(tcp_layers, ruleset_expand) +{ + if (variant->num_layers >= 1) { + const struct landlock_ruleset_attr ruleset_attr = { + .handled_access_net = LANDLOCK_ACCESS_NET_BIND_TCP, + }; + /* Allows bind for srv0. */ + const struct landlock_net_port_attr bind_srv0 = { + .allowed_access = LANDLOCK_ACCESS_NET_BIND_TCP, + .port = self->srv0.port, + }; + int ruleset_fd; + + ruleset_fd = landlock_create_ruleset(&ruleset_attr, + sizeof(ruleset_attr), 0); + ASSERT_LE(0, ruleset_fd); + ASSERT_EQ(0, + landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT, + &bind_srv0, 0)); + enforce_ruleset(_metadata, ruleset_fd); + EXPECT_EQ(0, close(ruleset_fd)); + } + + if (variant->num_layers >= 2) { + /* Expands network mask with connect action. */ + const struct landlock_ruleset_attr ruleset_attr = { + .handled_access_net = LANDLOCK_ACCESS_NET_BIND_TCP | + LANDLOCK_ACCESS_NET_CONNECT_TCP, + }; + /* Allows bind for srv0 and connect to srv0. */ + const struct landlock_net_port_attr tcp_bind_connect_p0 = { + .allowed_access = LANDLOCK_ACCESS_NET_BIND_TCP | + LANDLOCK_ACCESS_NET_CONNECT_TCP, + .port = self->srv0.port, + }; + /* Try to allow bind for srv1. */ + const struct landlock_net_port_attr tcp_bind_p1 = { + .allowed_access = LANDLOCK_ACCESS_NET_BIND_TCP, + .port = self->srv1.port, + }; + int ruleset_fd; + + ruleset_fd = landlock_create_ruleset(&ruleset_attr, + sizeof(ruleset_attr), 0); + ASSERT_LE(0, ruleset_fd); + ASSERT_EQ(0, + landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT, + &tcp_bind_connect_p0, 0)); + ASSERT_EQ(0, + landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT, + &tcp_bind_p1, 0)); + enforce_ruleset(_metadata, ruleset_fd); + EXPECT_EQ(0, close(ruleset_fd)); + } + + if (variant->num_layers >= 3) { + const struct landlock_ruleset_attr ruleset_attr = { + .handled_access_net = LANDLOCK_ACCESS_NET_BIND_TCP | + LANDLOCK_ACCESS_NET_CONNECT_TCP, + }; + /* Allows connect to srv0, without bind rule. */ + const struct landlock_net_port_attr tcp_bind_p0 = { + .allowed_access = LANDLOCK_ACCESS_NET_BIND_TCP, + .port = self->srv0.port, + }; + int ruleset_fd; + + ruleset_fd = landlock_create_ruleset(&ruleset_attr, + sizeof(ruleset_attr), 0); + ASSERT_LE(0, ruleset_fd); + ASSERT_EQ(0, + landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT, + &tcp_bind_p0, 0)); + enforce_ruleset(_metadata, ruleset_fd); + EXPECT_EQ(0, close(ruleset_fd)); + } + + test_bind_and_connect(_metadata, &self->srv0, false, + variant->num_layers >= 3); + + test_bind_and_connect(_metadata, &self->srv1, variant->num_layers >= 1, + variant->num_layers >= 2); +} + +/* clang-format off */ +FIXTURE(mini) {}; +/* clang-format on */ + +FIXTURE_SETUP(mini) +{ + disable_caps(_metadata); + + setup_loopback(_metadata); +}; + +FIXTURE_TEARDOWN(mini) +{ +} + +/* clang-format off */ + +#define ACCESS_LAST LANDLOCK_ACCESS_NET_CONNECT_TCP + +#define ACCESS_ALL ( \ + LANDLOCK_ACCESS_NET_BIND_TCP | \ + LANDLOCK_ACCESS_NET_CONNECT_TCP) + +/* clang-format on */ + +TEST_F(mini, network_access_rights) +{ + const struct landlock_ruleset_attr ruleset_attr = { + .handled_access_net = ACCESS_ALL, + }; + struct landlock_net_port_attr net_port = { + .port = sock_port_start, + }; + int ruleset_fd; + __u64 access; + + ruleset_fd = + landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0); + ASSERT_LE(0, ruleset_fd); + + for (access = 1; access <= ACCESS_LAST; access <<= 1) { + net_port.allowed_access = access; + EXPECT_EQ(0, + landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT, + &net_port, 0)) + { + TH_LOG("Failed to add rule with access 0x%llx: %s", + access, strerror(errno)); + } + } + EXPECT_EQ(0, close(ruleset_fd)); +} + +/* Checks invalid attribute, out of landlock network access range. */ +TEST_F(mini, unknown_access_rights) +{ + __u64 access_mask; + + for (access_mask = 1ULL << 63; access_mask != ACCESS_LAST; + access_mask >>= 1) { + const struct landlock_ruleset_attr ruleset_attr = { + .handled_access_net = access_mask, + }; + + EXPECT_EQ(-1, landlock_create_ruleset(&ruleset_attr, + sizeof(ruleset_attr), 0)); + EXPECT_EQ(EINVAL, errno); + } +} + +TEST_F(mini, inval) +{ + const struct landlock_ruleset_attr ruleset_attr = { + .handled_access_net = LANDLOCK_ACCESS_NET_BIND_TCP + }; + const struct landlock_net_port_attr tcp_bind_connect = { + .allowed_access = LANDLOCK_ACCESS_NET_BIND_TCP | + LANDLOCK_ACCESS_NET_CONNECT_TCP, + .port = sock_port_start, + }; + const struct landlock_net_port_attr tcp_denied = { + .allowed_access = 0, + .port = sock_port_start, + }; + const struct landlock_net_port_attr tcp_bind = { + .allowed_access = LANDLOCK_ACCESS_NET_BIND_TCP, + .port = sock_port_start, + }; + int ruleset_fd; + + ruleset_fd = + landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0); + ASSERT_LE(0, ruleset_fd); + + /* Checks unhandled allowed_access. */ + EXPECT_EQ(-1, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT, + &tcp_bind_connect, 0)); + EXPECT_EQ(EINVAL, errno); + + /* Checks zero access value. */ + EXPECT_EQ(-1, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT, + &tcp_denied, 0)); + EXPECT_EQ(ENOMSG, errno); + + /* Adds with legitimate values. */ + ASSERT_EQ(0, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT, + &tcp_bind, 0)); +} + +TEST_F(mini, tcp_port_overflow) +{ + const struct landlock_ruleset_attr ruleset_attr = { + .handled_access_net = LANDLOCK_ACCESS_NET_BIND_TCP | + LANDLOCK_ACCESS_NET_CONNECT_TCP, + }; + const struct landlock_net_port_attr port_max_bind = { + .allowed_access = LANDLOCK_ACCESS_NET_BIND_TCP, + .port = UINT16_MAX, + }; + const struct landlock_net_port_attr port_max_connect = { + .allowed_access = LANDLOCK_ACCESS_NET_CONNECT_TCP, + .port = UINT16_MAX, + }; + const struct landlock_net_port_attr port_overflow1 = { + .allowed_access = LANDLOCK_ACCESS_NET_BIND_TCP, + .port = UINT16_MAX + 1, + }; + const struct landlock_net_port_attr port_overflow2 = { + .allowed_access = LANDLOCK_ACCESS_NET_BIND_TCP, + .port = UINT16_MAX + 2, + }; + const struct landlock_net_port_attr port_overflow3 = { + .allowed_access = LANDLOCK_ACCESS_NET_BIND_TCP, + .port = UINT32_MAX + 1UL, + }; + const struct landlock_net_port_attr port_overflow4 = { + .allowed_access = LANDLOCK_ACCESS_NET_BIND_TCP, + .port = UINT32_MAX + 2UL, + }; + const struct protocol_variant ipv4_tcp = { + .domain = AF_INET, + .type = SOCK_STREAM, + }; + struct service_fixture srv_denied, srv_max_allowed; + int ruleset_fd; + + ASSERT_EQ(0, set_service(&srv_denied, ipv4_tcp, 0)); + + /* Be careful to avoid port inconsistencies. */ + srv_max_allowed = srv_denied; + srv_max_allowed.port = port_max_bind.port; + srv_max_allowed.ipv4_addr.sin_port = htons(port_max_bind.port); + + ruleset_fd = + landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0); + ASSERT_LE(0, ruleset_fd); + + ASSERT_EQ(0, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT, + &port_max_bind, 0)); + + EXPECT_EQ(-1, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT, + &port_overflow1, 0)); + EXPECT_EQ(EINVAL, errno); + + EXPECT_EQ(-1, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT, + &port_overflow2, 0)); + EXPECT_EQ(EINVAL, errno); + + EXPECT_EQ(-1, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT, + &port_overflow3, 0)); + EXPECT_EQ(EINVAL, errno); + + /* Interleaves with invalid rule additions. */ + ASSERT_EQ(0, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT, + &port_max_connect, 0)); + + EXPECT_EQ(-1, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT, + &port_overflow4, 0)); + EXPECT_EQ(EINVAL, errno); + + enforce_ruleset(_metadata, ruleset_fd); + + test_bind_and_connect(_metadata, &srv_denied, true, true); + test_bind_and_connect(_metadata, &srv_max_allowed, false, false); +} + +FIXTURE(ipv4_tcp) +{ + struct service_fixture srv0, srv1; +}; + +FIXTURE_SETUP(ipv4_tcp) +{ + const struct protocol_variant ipv4_tcp = { + .domain = AF_INET, + .type = SOCK_STREAM, + }; + + disable_caps(_metadata); + + ASSERT_EQ(0, set_service(&self->srv0, ipv4_tcp, 0)); + ASSERT_EQ(0, set_service(&self->srv1, ipv4_tcp, 1)); + + setup_loopback(_metadata); +}; + +FIXTURE_TEARDOWN(ipv4_tcp) +{ +} + +TEST_F(ipv4_tcp, port_endianness) +{ + const struct landlock_ruleset_attr ruleset_attr = { + .handled_access_net = LANDLOCK_ACCESS_NET_BIND_TCP | + LANDLOCK_ACCESS_NET_CONNECT_TCP, + }; + const struct landlock_net_port_attr bind_host_endian_p0 = { + .allowed_access = LANDLOCK_ACCESS_NET_BIND_TCP, + /* Host port format. */ + .port = self->srv0.port, + }; + const struct landlock_net_port_attr connect_big_endian_p0 = { + .allowed_access = LANDLOCK_ACCESS_NET_CONNECT_TCP, + /* Big endian port format. */ + .port = htons(self->srv0.port), + }; + const struct landlock_net_port_attr bind_connect_host_endian_p1 = { + .allowed_access = LANDLOCK_ACCESS_NET_BIND_TCP | + LANDLOCK_ACCESS_NET_CONNECT_TCP, + /* Host port format. */ + .port = self->srv1.port, + }; + const unsigned int one = 1; + const char little_endian = *(const char *)&one; + int ruleset_fd; + + ruleset_fd = + landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0); + ASSERT_LE(0, ruleset_fd); + ASSERT_EQ(0, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT, + &bind_host_endian_p0, 0)); + ASSERT_EQ(0, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT, + &connect_big_endian_p0, 0)); + ASSERT_EQ(0, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT, + &bind_connect_host_endian_p1, 0)); + enforce_ruleset(_metadata, ruleset_fd); + + /* No restriction for big endinan CPU. */ + test_bind_and_connect(_metadata, &self->srv0, false, little_endian); + + /* No restriction for any CPU. */ + test_bind_and_connect(_metadata, &self->srv1, false, false); +} + +TEST_F(ipv4_tcp, with_fs) +{ + const struct landlock_ruleset_attr ruleset_attr_fs_net = { + .handled_access_fs = LANDLOCK_ACCESS_FS_READ_DIR, + .handled_access_net = LANDLOCK_ACCESS_NET_BIND_TCP, + }; + struct landlock_path_beneath_attr path_beneath = { + .allowed_access = LANDLOCK_ACCESS_FS_READ_DIR, + .parent_fd = -1, + }; + struct landlock_net_port_attr tcp_bind = { + .allowed_access = LANDLOCK_ACCESS_NET_BIND_TCP, + .port = self->srv0.port, + }; + int ruleset_fd, bind_fd, dir_fd; + + /* Creates ruleset both for filesystem and network access. */ + ruleset_fd = landlock_create_ruleset(&ruleset_attr_fs_net, + sizeof(ruleset_attr_fs_net), 0); + ASSERT_LE(0, ruleset_fd); + + /* Adds a filesystem rule. */ + path_beneath.parent_fd = open("/dev", O_PATH | O_DIRECTORY | O_CLOEXEC); + ASSERT_LE(0, path_beneath.parent_fd); + ASSERT_EQ(0, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH, + &path_beneath, 0)); + EXPECT_EQ(0, close(path_beneath.parent_fd)); + + /* Adds a network rule. */ + ASSERT_EQ(0, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT, + &tcp_bind, 0)); + + enforce_ruleset(_metadata, ruleset_fd); + EXPECT_EQ(0, close(ruleset_fd)); + + /* Tests file access. */ + dir_fd = open("/dev", O_RDONLY); + EXPECT_LE(0, dir_fd); + EXPECT_EQ(0, close(dir_fd)); + + dir_fd = open("/", O_RDONLY); + EXPECT_EQ(-1, dir_fd); + EXPECT_EQ(EACCES, errno); + + /* Tests port binding. */ + bind_fd = socket(AF_INET, SOCK_STREAM | SOCK_CLOEXEC, 0); + ASSERT_LE(0, bind_fd); + EXPECT_EQ(0, bind_variant(bind_fd, &self->srv0)); + EXPECT_EQ(0, close(bind_fd)); + + bind_fd = socket(AF_INET, SOCK_STREAM | SOCK_CLOEXEC, 0); + ASSERT_LE(0, bind_fd); + EXPECT_EQ(-EACCES, bind_variant(bind_fd, &self->srv1)); +} + +FIXTURE(port_specific) +{ + struct service_fixture srv0; +}; + +FIXTURE_VARIANT(port_specific) +{ + const enum sandbox_type sandbox; + const struct protocol_variant prot; +}; + +/* clang-format off */ +FIXTURE_VARIANT_ADD(port_specific, no_sandbox_with_ipv4) { + /* clang-format on */ + .sandbox = NO_SANDBOX, + .prot = { + .domain = AF_INET, + .type = SOCK_STREAM, + }, +}; + +/* clang-format off */ +FIXTURE_VARIANT_ADD(port_specific, sandbox_with_ipv4) { + /* clang-format on */ + .sandbox = TCP_SANDBOX, + .prot = { + .domain = AF_INET, + .type = SOCK_STREAM, + }, +}; + +/* clang-format off */ +FIXTURE_VARIANT_ADD(port_specific, no_sandbox_with_ipv6) { + /* clang-format on */ + .sandbox = NO_SANDBOX, + .prot = { + .domain = AF_INET6, + .type = SOCK_STREAM, + }, +}; + +/* clang-format off */ +FIXTURE_VARIANT_ADD(port_specific, sandbox_with_ipv6) { + /* clang-format on */ + .sandbox = TCP_SANDBOX, + .prot = { + .domain = AF_INET6, + .type = SOCK_STREAM, + }, +}; + +FIXTURE_SETUP(port_specific) +{ + disable_caps(_metadata); + + ASSERT_EQ(0, set_service(&self->srv0, variant->prot, 0)); + + setup_loopback(_metadata); +}; + +FIXTURE_TEARDOWN(port_specific) +{ +} + +TEST_F(port_specific, bind_connect_zero) +{ + int bind_fd, connect_fd, ret; + uint16_t port; + + /* Adds a rule layer with bind and connect actions. */ + if (variant->sandbox == TCP_SANDBOX) { + const struct landlock_ruleset_attr ruleset_attr = { + .handled_access_net = LANDLOCK_ACCESS_NET_BIND_TCP | + LANDLOCK_ACCESS_NET_CONNECT_TCP + }; + const struct landlock_net_port_attr tcp_bind_connect_zero = { + .allowed_access = LANDLOCK_ACCESS_NET_BIND_TCP | + LANDLOCK_ACCESS_NET_CONNECT_TCP, + .port = 0, + }; + int ruleset_fd; + + ruleset_fd = landlock_create_ruleset(&ruleset_attr, + sizeof(ruleset_attr), 0); + ASSERT_LE(0, ruleset_fd); + + /* Checks zero port value on bind and connect actions. */ + EXPECT_EQ(0, + landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT, + &tcp_bind_connect_zero, 0)); + + enforce_ruleset(_metadata, ruleset_fd); + EXPECT_EQ(0, close(ruleset_fd)); + } + + bind_fd = socket_variant(&self->srv0); + ASSERT_LE(0, bind_fd); + + connect_fd = socket_variant(&self->srv0); + ASSERT_LE(0, connect_fd); + + /* Sets address port to 0 for both protocol families. */ + set_port(&self->srv0, 0); + /* + * Binds on port 0, which selects a random port within + * ip_local_port_range. + */ + ret = bind_variant(bind_fd, &self->srv0); + EXPECT_EQ(0, ret); + + EXPECT_EQ(0, listen(bind_fd, backlog)); + + /* Connects on port 0. */ + ret = connect_variant(connect_fd, &self->srv0); + EXPECT_EQ(-ECONNREFUSED, ret); + + /* Sets binded port for both protocol families. */ + port = get_binded_port(bind_fd, &variant->prot); + EXPECT_NE(0, port); + set_port(&self->srv0, port); + /* Connects on the binded port. */ + ret = connect_variant(connect_fd, &self->srv0); + if (is_restricted(&variant->prot, variant->sandbox)) { + /* Denied by Landlock. */ + EXPECT_EQ(-EACCES, ret); + } else { + EXPECT_EQ(0, ret); + } + + EXPECT_EQ(0, close(connect_fd)); + EXPECT_EQ(0, close(bind_fd)); +} + +TEST_F(port_specific, bind_connect_1023) +{ + int bind_fd, connect_fd, ret; + + /* Adds a rule layer with bind and connect actions. */ + if (variant->sandbox == TCP_SANDBOX) { + const struct landlock_ruleset_attr ruleset_attr = { + .handled_access_net = LANDLOCK_ACCESS_NET_BIND_TCP | + LANDLOCK_ACCESS_NET_CONNECT_TCP + }; + /* A rule with port value less than 1024. */ + const struct landlock_net_port_attr tcp_bind_connect_low_range = { + .allowed_access = LANDLOCK_ACCESS_NET_BIND_TCP | + LANDLOCK_ACCESS_NET_CONNECT_TCP, + .port = 1023, + }; + /* A rule with 1024 port. */ + const struct landlock_net_port_attr tcp_bind_connect = { + .allowed_access = LANDLOCK_ACCESS_NET_BIND_TCP | + LANDLOCK_ACCESS_NET_CONNECT_TCP, + .port = 1024, + }; + int ruleset_fd; + + ruleset_fd = landlock_create_ruleset(&ruleset_attr, + sizeof(ruleset_attr), 0); + ASSERT_LE(0, ruleset_fd); + + ASSERT_EQ(0, + landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT, + &tcp_bind_connect_low_range, 0)); + ASSERT_EQ(0, + landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT, + &tcp_bind_connect, 0)); + + enforce_ruleset(_metadata, ruleset_fd); + EXPECT_EQ(0, close(ruleset_fd)); + } + + bind_fd = socket_variant(&self->srv0); + ASSERT_LE(0, bind_fd); + + connect_fd = socket_variant(&self->srv0); + ASSERT_LE(0, connect_fd); + + /* Sets address port to 1023 for both protocol families. */ + set_port(&self->srv0, 1023); + /* Binds on port 1023. */ + ret = bind_variant(bind_fd, &self->srv0); + /* Denied by the system. */ + EXPECT_EQ(-EACCES, ret); + + /* Binds on port 1023. */ + set_cap(_metadata, CAP_NET_BIND_SERVICE); + ret = bind_variant(bind_fd, &self->srv0); + clear_cap(_metadata, CAP_NET_BIND_SERVICE); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, listen(bind_fd, backlog)); + + /* Connects on the binded port 1023. */ + ret = connect_variant(connect_fd, &self->srv0); + EXPECT_EQ(0, ret); + + EXPECT_EQ(0, close(connect_fd)); + EXPECT_EQ(0, close(bind_fd)); + + bind_fd = socket_variant(&self->srv0); + ASSERT_LE(0, bind_fd); + + connect_fd = socket_variant(&self->srv0); + ASSERT_LE(0, connect_fd); + + /* Sets address port to 1024 for both protocol families. */ + set_port(&self->srv0, 1024); + /* Binds on port 1024. */ + ret = bind_variant(bind_fd, &self->srv0); + EXPECT_EQ(0, ret); + EXPECT_EQ(0, listen(bind_fd, backlog)); + + /* Connects on the binded port 1024. */ + ret = connect_variant(connect_fd, &self->srv0); + EXPECT_EQ(0, ret); + + EXPECT_EQ(0, close(connect_fd)); + EXPECT_EQ(0, close(bind_fd)); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/lkdtm/config b/tools/testing/selftests/lkdtm/config index 5d52f64dfb43..7afe05e8c4d7 100644 --- a/tools/testing/selftests/lkdtm/config +++ b/tools/testing/selftests/lkdtm/config @@ -9,7 +9,6 @@ CONFIG_INIT_ON_FREE_DEFAULT_ON=y CONFIG_INIT_ON_ALLOC_DEFAULT_ON=y CONFIG_UBSAN=y CONFIG_UBSAN_BOUNDS=y -CONFIG_UBSAN_TRAP=y CONFIG_STACKPROTECTOR_STRONG=y CONFIG_SLUB_DEBUG=y CONFIG_SLUB_DEBUG_ON=y diff --git a/tools/testing/selftests/lkdtm/tests.txt b/tools/testing/selftests/lkdtm/tests.txt index 607b8d7e3ea3..368973f05250 100644 --- a/tools/testing/selftests/lkdtm/tests.txt +++ b/tools/testing/selftests/lkdtm/tests.txt @@ -1,4 +1,5 @@ #PANIC +#PANIC_STOP_IRQOFF Crashes entire system BUG kernel BUG at WARNING WARNING: WARNING_MESSAGE message trigger @@ -7,7 +8,7 @@ EXCEPTION #EXHAUST_STACK Corrupts memory on failure #CORRUPT_STACK Crashes entire system on success #CORRUPT_STACK_STRONG Crashes entire system on success -ARRAY_BOUNDS +ARRAY_BOUNDS call trace:|UBSAN: array-index-out-of-bounds CORRUPT_LIST_ADD list_add corruption CORRUPT_LIST_DEL list_del corruption STACK_GUARD_PAGE_LEADING diff --git a/tools/testing/selftests/mm/.gitignore b/tools/testing/selftests/mm/.gitignore index cdc9ce4426b9..4ff10ea61461 100644 --- a/tools/testing/selftests/mm/.gitignore +++ b/tools/testing/selftests/mm/.gitignore @@ -18,6 +18,8 @@ mremap_dontunmap mremap_test on-fault-limit transhuge-stress +pagemap_ioctl +*.tmp* protection_keys protection_keys_32 protection_keys_64 @@ -43,3 +45,4 @@ mdwe_test gup_longterm mkdirty va_high_addr_switch +hugetlb_fault_after_madv diff --git a/tools/testing/selftests/mm/Makefile b/tools/testing/selftests/mm/Makefile index 6a9fc5693145..78dfec8bc676 100644 --- a/tools/testing/selftests/mm/Makefile +++ b/tools/testing/selftests/mm/Makefile @@ -33,7 +33,7 @@ endif MAKEFLAGS += --no-builtin-rules CFLAGS = -Wall -I $(top_srcdir) $(EXTRA_CFLAGS) $(KHDR_INCLUDES) -LDLIBS = -lrt -lpthread +LDLIBS = -lrt -lpthread -lm TEST_GEN_FILES = cow TEST_GEN_FILES += compaction_test @@ -60,6 +60,7 @@ TEST_GEN_FILES += mrelease_test TEST_GEN_FILES += mremap_dontunmap TEST_GEN_FILES += mremap_test TEST_GEN_FILES += on-fault-limit +TEST_GEN_PROGS += pagemap_ioctl TEST_GEN_FILES += thuge-gen TEST_GEN_FILES += transhuge-stress TEST_GEN_FILES += uffd-stress @@ -68,6 +69,7 @@ TEST_GEN_FILES += split_huge_page_test TEST_GEN_FILES += ksm_tests TEST_GEN_FILES += ksm_functional_tests TEST_GEN_FILES += mdwe_test +TEST_GEN_FILES += hugetlb_fault_after_madv ifneq ($(ARCH),arm64) TEST_GEN_PROGS += soft-dirty diff --git a/tools/testing/selftests/mm/config b/tools/testing/selftests/mm/config index be087c4bc396..4309916f629e 100644 --- a/tools/testing/selftests/mm/config +++ b/tools/testing/selftests/mm/config @@ -1,5 +1,6 @@ CONFIG_SYSVIPC=y CONFIG_USERFAULTFD=y +CONFIG_PTE_MARKER_UFFD_WP=y CONFIG_TEST_VMALLOC=m CONFIG_DEVICE_PRIVATE=y CONFIG_TEST_HMM=m diff --git a/tools/testing/selftests/mm/gup_longterm.c b/tools/testing/selftests/mm/gup_longterm.c index d33d3e68ffab..ad168d35b23b 100644 --- a/tools/testing/selftests/mm/gup_longterm.c +++ b/tools/testing/selftests/mm/gup_longterm.c @@ -265,10 +265,11 @@ static void run_with_tmpfile(test_fn fn, const char *desc) fd = fileno(file); if (fd < 0) { ksft_test_result_fail("fileno() failed\n"); - return; + goto close; } fn(fd, pagesize); +close: fclose(file); } diff --git a/tools/testing/selftests/mm/hugetlb-madvise.c b/tools/testing/selftests/mm/hugetlb-madvise.c index d55322df4b73..f32d99565c5e 100644 --- a/tools/testing/selftests/mm/hugetlb-madvise.c +++ b/tools/testing/selftests/mm/hugetlb-madvise.c @@ -36,25 +36,6 @@ unsigned long huge_page_size; unsigned long base_page_size; -unsigned long get_free_hugepages(void) -{ - unsigned long fhp = 0; - char *line = NULL; - size_t linelen = 0; - FILE *f = fopen("/proc/meminfo", "r"); - - if (!f) - return fhp; - while (getline(&line, &linelen, f) > 0) { - if (sscanf(line, "HugePages_Free: %lu", &fhp) == 1) - break; - } - - free(line); - fclose(f); - return fhp; -} - void write_fault_pages(void *addr, unsigned long nr_pages) { unsigned long i; diff --git a/tools/testing/selftests/mm/hugetlb_fault_after_madv.c b/tools/testing/selftests/mm/hugetlb_fault_after_madv.c new file mode 100644 index 000000000000..73b81c632366 --- /dev/null +++ b/tools/testing/selftests/mm/hugetlb_fault_after_madv.c @@ -0,0 +1,73 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <pthread.h> +#include <stdio.h> +#include <stdlib.h> +#include <sys/mman.h> +#include <sys/types.h> +#include <unistd.h> + +#include "vm_util.h" +#include "../kselftest.h" + +#define MMAP_SIZE (1 << 21) +#define INLOOP_ITER 100 + +char *huge_ptr; + +/* Touch the memory while it is being madvised() */ +void *touch(void *unused) +{ + char *ptr = (char *)huge_ptr; + + for (int i = 0; i < INLOOP_ITER; i++) + ptr[0] = '.'; + + return NULL; +} + +void *madv(void *unused) +{ + usleep(rand() % 10); + + for (int i = 0; i < INLOOP_ITER; i++) + madvise(huge_ptr, MMAP_SIZE, MADV_DONTNEED); + + return NULL; +} + +int main(void) +{ + unsigned long free_hugepages; + pthread_t thread1, thread2; + /* + * On kernel 6.4, we are able to reproduce the problem with ~1000 + * interactions + */ + int max = 10000; + + srand(getpid()); + + free_hugepages = get_free_hugepages(); + if (free_hugepages != 1) { + ksft_exit_skip("This test needs one and only one page to execute. Got %lu\n", + free_hugepages); + } + + while (max--) { + huge_ptr = mmap(NULL, MMAP_SIZE, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB, + -1, 0); + + if ((unsigned long)huge_ptr == -1) + ksft_exit_skip("Failed to allocated huge page\n"); + + pthread_create(&thread1, NULL, madv, NULL); + pthread_create(&thread2, NULL, touch, NULL); + + pthread_join(thread1, NULL); + pthread_join(thread2, NULL); + munmap(huge_ptr, MMAP_SIZE); + } + + return KSFT_PASS; +} diff --git a/tools/testing/selftests/mm/ksm_functional_tests.c b/tools/testing/selftests/mm/ksm_functional_tests.c index 901e950f9138..fbff0dd09191 100644 --- a/tools/testing/selftests/mm/ksm_functional_tests.c +++ b/tools/testing/selftests/mm/ksm_functional_tests.c @@ -26,6 +26,7 @@ #define KiB 1024u #define MiB (1024 * KiB) +#define FORK_EXEC_CHILD_PRG_NAME "ksm_fork_exec_child" static int mem_fd; static int ksm_fd; @@ -479,6 +480,64 @@ static void test_prctl_fork(void) ksft_test_result_pass("PR_SET_MEMORY_MERGE value is inherited\n"); } +static int ksm_fork_exec_child(void) +{ + /* Test if KSM is enabled for the process. */ + return prctl(PR_GET_MEMORY_MERGE, 0, 0, 0, 0) == 1; +} + +static void test_prctl_fork_exec(void) +{ + int ret, status; + pid_t child_pid; + + ksft_print_msg("[RUN] %s\n", __func__); + + ret = prctl(PR_SET_MEMORY_MERGE, 1, 0, 0, 0); + if (ret < 0 && errno == EINVAL) { + ksft_test_result_skip("PR_SET_MEMORY_MERGE not supported\n"); + return; + } else if (ret) { + ksft_test_result_fail("PR_SET_MEMORY_MERGE=1 failed\n"); + return; + } + + child_pid = fork(); + if (child_pid == -1) { + ksft_test_result_skip("fork() failed\n"); + return; + } else if (child_pid == 0) { + char *prg_name = "./ksm_functional_tests"; + char *argv_for_program[] = { prg_name, FORK_EXEC_CHILD_PRG_NAME }; + + execv(prg_name, argv_for_program); + return; + } + + if (waitpid(child_pid, &status, 0) > 0) { + if (WIFEXITED(status)) { + status = WEXITSTATUS(status); + if (status) { + ksft_test_result_fail("KSM not enabled\n"); + return; + } + } else { + ksft_test_result_fail("program didn't terminate normally\n"); + return; + } + } else { + ksft_test_result_fail("waitpid() failed\n"); + return; + } + + if (prctl(PR_SET_MEMORY_MERGE, 0, 0, 0, 0)) { + ksft_test_result_fail("PR_SET_MEMORY_MERGE=0 failed\n"); + return; + } + + ksft_test_result_pass("PR_SET_MEMORY_MERGE value is inherited\n"); +} + static void test_prctl_unmerge(void) { const unsigned int size = 2 * MiB; @@ -536,9 +595,13 @@ unmap: int main(int argc, char **argv) { - unsigned int tests = 7; + unsigned int tests = 8; int err; + if (argc > 1 && !strcmp(argv[1], FORK_EXEC_CHILD_PRG_NAME)) { + exit(ksm_fork_exec_child() == 1 ? 0 : 1); + } + #ifdef __NR_userfaultfd tests++; #endif @@ -576,6 +639,7 @@ int main(int argc, char **argv) test_prctl(); test_prctl_fork(); + test_prctl_fork_exec(); test_prctl_unmerge(); err = ksft_get_fail_cnt(); diff --git a/tools/testing/selftests/mm/mdwe_test.c b/tools/testing/selftests/mm/mdwe_test.c index bc91bef5d254..200bedcdc32e 100644 --- a/tools/testing/selftests/mm/mdwe_test.c +++ b/tools/testing/selftests/mm/mdwe_test.c @@ -22,15 +22,104 @@ TEST(prctl_flags) { + EXPECT_LT(prctl(PR_SET_MDWE, PR_MDWE_NO_INHERIT, 0L, 0L, 7L), 0); + EXPECT_EQ(errno, EINVAL); + EXPECT_LT(prctl(PR_SET_MDWE, 7L, 0L, 0L, 0L), 0); + EXPECT_EQ(errno, EINVAL); EXPECT_LT(prctl(PR_SET_MDWE, 0L, 7L, 0L, 0L), 0); + EXPECT_EQ(errno, EINVAL); EXPECT_LT(prctl(PR_SET_MDWE, 0L, 0L, 7L, 0L), 0); + EXPECT_EQ(errno, EINVAL); EXPECT_LT(prctl(PR_SET_MDWE, 0L, 0L, 0L, 7L), 0); + EXPECT_EQ(errno, EINVAL); EXPECT_LT(prctl(PR_GET_MDWE, 7L, 0L, 0L, 0L), 0); + EXPECT_EQ(errno, EINVAL); EXPECT_LT(prctl(PR_GET_MDWE, 0L, 7L, 0L, 0L), 0); + EXPECT_EQ(errno, EINVAL); EXPECT_LT(prctl(PR_GET_MDWE, 0L, 0L, 7L, 0L), 0); + EXPECT_EQ(errno, EINVAL); EXPECT_LT(prctl(PR_GET_MDWE, 0L, 0L, 0L, 7L), 0); + EXPECT_EQ(errno, EINVAL); +} + +FIXTURE(consecutive_prctl_flags) {}; +FIXTURE_SETUP(consecutive_prctl_flags) {} +FIXTURE_TEARDOWN(consecutive_prctl_flags) {} + +FIXTURE_VARIANT(consecutive_prctl_flags) +{ + unsigned long first_flags; + unsigned long second_flags; + bool should_work; +}; + +FIXTURE_VARIANT_ADD(consecutive_prctl_flags, can_keep_no_flags) +{ + .first_flags = 0, + .second_flags = 0, + .should_work = true, +}; + +FIXTURE_VARIANT_ADD(consecutive_prctl_flags, can_keep_exec_gain) +{ + .first_flags = PR_MDWE_REFUSE_EXEC_GAIN, + .second_flags = PR_MDWE_REFUSE_EXEC_GAIN, + .should_work = true, +}; + +FIXTURE_VARIANT_ADD(consecutive_prctl_flags, can_keep_both_flags) +{ + .first_flags = PR_MDWE_REFUSE_EXEC_GAIN | PR_MDWE_NO_INHERIT, + .second_flags = PR_MDWE_REFUSE_EXEC_GAIN | PR_MDWE_NO_INHERIT, + .should_work = true, +}; + +FIXTURE_VARIANT_ADD(consecutive_prctl_flags, cant_disable_mdwe) +{ + .first_flags = PR_MDWE_REFUSE_EXEC_GAIN, + .second_flags = 0, + .should_work = false, +}; + +FIXTURE_VARIANT_ADD(consecutive_prctl_flags, cant_disable_mdwe_no_inherit) +{ + .first_flags = PR_MDWE_REFUSE_EXEC_GAIN | PR_MDWE_NO_INHERIT, + .second_flags = 0, + .should_work = false, +}; + +FIXTURE_VARIANT_ADD(consecutive_prctl_flags, cant_disable_no_inherit) +{ + .first_flags = PR_MDWE_REFUSE_EXEC_GAIN | PR_MDWE_NO_INHERIT, + .second_flags = PR_MDWE_REFUSE_EXEC_GAIN, + .should_work = false, +}; + +FIXTURE_VARIANT_ADD(consecutive_prctl_flags, cant_enable_no_inherit) +{ + .first_flags = PR_MDWE_REFUSE_EXEC_GAIN, + .second_flags = PR_MDWE_REFUSE_EXEC_GAIN | PR_MDWE_NO_INHERIT, + .should_work = false, +}; + +TEST_F(consecutive_prctl_flags, two_prctls) +{ + int ret; + + EXPECT_EQ(prctl(PR_SET_MDWE, variant->first_flags, 0L, 0L, 0L), 0); + + ret = prctl(PR_SET_MDWE, variant->second_flags, 0L, 0L, 0L); + if (variant->should_work) { + EXPECT_EQ(ret, 0); + + ret = prctl(PR_GET_MDWE, 0L, 0L, 0L, 0L); + ASSERT_EQ(ret, variant->second_flags); + } else { + EXPECT_NE(ret, 0); + ASSERT_EQ(errno, EPERM); + } } FIXTURE(mdwe) @@ -45,28 +134,45 @@ FIXTURE_VARIANT(mdwe) { bool enabled; bool forked; + bool inherit; }; FIXTURE_VARIANT_ADD(mdwe, stock) { - .enabled = false, + .enabled = false, .forked = false, + .inherit = false, }; FIXTURE_VARIANT_ADD(mdwe, enabled) { - .enabled = true, + .enabled = true, .forked = false, + .inherit = true, +}; + +FIXTURE_VARIANT_ADD(mdwe, inherited) +{ + .enabled = true, + .forked = true, + .inherit = true, }; -FIXTURE_VARIANT_ADD(mdwe, forked) +FIXTURE_VARIANT_ADD(mdwe, not_inherited) { - .enabled = true, + .enabled = true, .forked = true, + .inherit = false, }; +static bool executable_map_should_fail(const FIXTURE_VARIANT(mdwe) *variant) +{ + return variant->enabled && (!variant->forked || variant->inherit); +} + FIXTURE_SETUP(mdwe) { + unsigned long mdwe_flags; int ret, status; self->p = NULL; @@ -76,13 +182,17 @@ FIXTURE_SETUP(mdwe) if (!variant->enabled) return; - ret = prctl(PR_SET_MDWE, PR_MDWE_REFUSE_EXEC_GAIN, 0L, 0L, 0L); + mdwe_flags = PR_MDWE_REFUSE_EXEC_GAIN; + if (!variant->inherit) + mdwe_flags |= PR_MDWE_NO_INHERIT; + + ret = prctl(PR_SET_MDWE, mdwe_flags, 0L, 0L, 0L); ASSERT_EQ(ret, 0) { TH_LOG("PR_SET_MDWE failed or unsupported"); } ret = prctl(PR_GET_MDWE, 0L, 0L, 0L, 0L); - ASSERT_EQ(ret, 1); + ASSERT_EQ(ret, mdwe_flags); if (variant->forked) { self->pid = fork(); @@ -113,7 +223,7 @@ TEST_F(mdwe, mmap_READ_EXEC) TEST_F(mdwe, mmap_WRITE_EXEC) { self->p = mmap(NULL, self->size, PROT_WRITE | PROT_EXEC, self->flags, 0, 0); - if (variant->enabled) { + if (executable_map_should_fail(variant)) { EXPECT_EQ(self->p, MAP_FAILED); } else { EXPECT_NE(self->p, MAP_FAILED); @@ -139,7 +249,7 @@ TEST_F(mdwe, mprotect_add_EXEC) ASSERT_NE(self->p, MAP_FAILED); ret = mprotect(self->p, self->size, PROT_READ | PROT_EXEC); - if (variant->enabled) { + if (executable_map_should_fail(variant)) { EXPECT_LT(ret, 0); } else { EXPECT_EQ(ret, 0); @@ -154,7 +264,7 @@ TEST_F(mdwe, mprotect_WRITE_EXEC) ASSERT_NE(self->p, MAP_FAILED); ret = mprotect(self->p, self->size, PROT_WRITE | PROT_EXEC); - if (variant->enabled) { + if (executable_map_should_fail(variant)) { EXPECT_LT(ret, 0); } else { EXPECT_EQ(ret, 0); @@ -168,13 +278,10 @@ TEST_F(mdwe, mmap_FIXED) self->p = mmap(NULL, self->size, PROT_READ, self->flags, 0, 0); ASSERT_NE(self->p, MAP_FAILED); - p = mmap(self->p + self->size, self->size, PROT_READ | PROT_EXEC, + /* MAP_FIXED unmaps the existing page before mapping which is allowed */ + p = mmap(self->p, self->size, PROT_READ | PROT_EXEC, self->flags | MAP_FIXED, 0, 0); - if (variant->enabled) { - EXPECT_EQ(p, MAP_FAILED); - } else { - EXPECT_EQ(p, self->p); - } + EXPECT_EQ(p, self->p); } TEST_F(mdwe, arm64_BTI) diff --git a/tools/testing/selftests/mm/mremap_dontunmap.c b/tools/testing/selftests/mm/mremap_dontunmap.c index ca2359835e75..a06e73ec8568 100644 --- a/tools/testing/selftests/mm/mremap_dontunmap.c +++ b/tools/testing/selftests/mm/mremap_dontunmap.c @@ -7,6 +7,7 @@ */ #define _GNU_SOURCE #include <sys/mman.h> +#include <linux/mman.h> #include <errno.h> #include <stdio.h> #include <stdlib.h> diff --git a/tools/testing/selftests/mm/mremap_test.c b/tools/testing/selftests/mm/mremap_test.c index 5c3773de9f0f..1d4c1589c305 100644 --- a/tools/testing/selftests/mm/mremap_test.c +++ b/tools/testing/selftests/mm/mremap_test.c @@ -23,12 +23,15 @@ #define VALIDATION_NO_THRESHOLD 0 /* Verify the entire region */ #define MIN(X, Y) ((X) < (Y) ? (X) : (Y)) +#define SIZE_MB(m) ((size_t)m * (1024 * 1024)) +#define SIZE_KB(k) ((size_t)k * 1024) struct config { unsigned long long src_alignment; unsigned long long dest_alignment; unsigned long long region_size; int overlapping; + int dest_preamble_size; }; struct test { @@ -44,6 +47,7 @@ enum { _1MB = 1ULL << 20, _2MB = 2ULL << 20, _4MB = 4ULL << 20, + _5MB = 5ULL << 20, _1GB = 1ULL << 30, _2GB = 2ULL << 30, PMD = _2MB, @@ -146,6 +150,60 @@ static bool is_range_mapped(FILE *maps_fp, void *start, void *end) } /* + * Returns the start address of the mapping on success, else returns + * NULL on failure. + */ +static void *get_source_mapping(struct config c) +{ + unsigned long long addr = 0ULL; + void *src_addr = NULL; + unsigned long long mmap_min_addr; + + mmap_min_addr = get_mmap_min_addr(); + /* + * For some tests, we need to not have any mappings below the + * source mapping. Add some headroom to mmap_min_addr for this. + */ + mmap_min_addr += 10 * _4MB; + +retry: + addr += c.src_alignment; + if (addr < mmap_min_addr) + goto retry; + + src_addr = mmap((void *) addr, c.region_size, PROT_READ | PROT_WRITE, + MAP_FIXED_NOREPLACE | MAP_ANONYMOUS | MAP_SHARED, + -1, 0); + if (src_addr == MAP_FAILED) { + if (errno == EPERM || errno == EEXIST) + goto retry; + goto error; + } + /* + * Check that the address is aligned to the specified alignment. + * Addresses which have alignments that are multiples of that + * specified are not considered valid. For instance, 1GB address is + * 2MB-aligned, however it will not be considered valid for a + * requested alignment of 2MB. This is done to reduce coincidental + * alignment in the tests. + */ + if (((unsigned long long) src_addr & (c.src_alignment - 1)) || + !((unsigned long long) src_addr & c.src_alignment)) { + munmap(src_addr, c.region_size); + goto retry; + } + + if (!src_addr) + goto error; + + return src_addr; +error: + ksft_print_msg("Failed to map source region: %s\n", + strerror(errno)); + return NULL; +} + +/* * This test validates that merge is called when expanding a mapping. * Mapping containing three pages is created, middle page is unmapped * and then the mapping containing the first page is expanded so that @@ -225,59 +283,83 @@ out: } /* - * Returns the start address of the mapping on success, else returns - * NULL on failure. + * Verify that an mremap within a range does not cause corruption + * of unrelated part of range. + * + * Consider the following range which is 2MB aligned and is + * a part of a larger 20MB range which is not shown. Each + * character is 256KB below making the source and destination + * 2MB each. The lower case letters are moved (s to d) and the + * upper case letters are not moved. The below test verifies + * that the upper case S letters are not corrupted by the + * adjacent mremap. + * + * |DDDDddddSSSSssss| */ -static void *get_source_mapping(struct config c) +static void mremap_move_within_range(char pattern_seed) { - unsigned long long addr = 0ULL; - void *src_addr = NULL; - unsigned long long mmap_min_addr; + char *test_name = "mremap mremap move within range"; + void *src, *dest; + int i, success = 1; + + size_t size = SIZE_MB(20); + void *ptr = mmap(NULL, size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (ptr == MAP_FAILED) { + perror("mmap"); + success = 0; + goto out; + } + memset(ptr, 0, size); - mmap_min_addr = get_mmap_min_addr(); + src = ptr + SIZE_MB(6); + src = (void *)((unsigned long)src & ~(SIZE_MB(2) - 1)); -retry: - addr += c.src_alignment; - if (addr < mmap_min_addr) - goto retry; + /* Set byte pattern for source block. */ + srand(pattern_seed); + for (i = 0; i < SIZE_MB(2); i++) { + ((char *)src)[i] = (char) rand(); + } - src_addr = mmap((void *) addr, c.region_size, PROT_READ | PROT_WRITE, - MAP_FIXED_NOREPLACE | MAP_ANONYMOUS | MAP_SHARED, - -1, 0); - if (src_addr == MAP_FAILED) { - if (errno == EPERM || errno == EEXIST) - goto retry; - goto error; + dest = src - SIZE_MB(2); + + void *new_ptr = mremap(src + SIZE_MB(1), SIZE_MB(1), SIZE_MB(1), + MREMAP_MAYMOVE | MREMAP_FIXED, dest + SIZE_MB(1)); + if (new_ptr == MAP_FAILED) { + perror("mremap"); + success = 0; + goto out; } - /* - * Check that the address is aligned to the specified alignment. - * Addresses which have alignments that are multiples of that - * specified are not considered valid. For instance, 1GB address is - * 2MB-aligned, however it will not be considered valid for a - * requested alignment of 2MB. This is done to reduce coincidental - * alignment in the tests. - */ - if (((unsigned long long) src_addr & (c.src_alignment - 1)) || - !((unsigned long long) src_addr & c.src_alignment)) { - munmap(src_addr, c.region_size); - goto retry; + + /* Verify byte pattern after remapping */ + srand(pattern_seed); + for (i = 0; i < SIZE_MB(1); i++) { + char c = (char) rand(); + + if (((char *)src)[i] != c) { + ksft_print_msg("Data at src at %d got corrupted due to unrelated mremap\n", + i); + ksft_print_msg("Expected: %#x\t Got: %#x\n", c & 0xff, + ((char *) src)[i] & 0xff); + success = 0; + } } - if (!src_addr) - goto error; +out: + if (munmap(ptr, size) == -1) + perror("munmap"); - return src_addr; -error: - ksft_print_msg("Failed to map source region: %s\n", - strerror(errno)); - return NULL; + if (success) + ksft_test_result_pass("%s\n", test_name); + else + ksft_test_result_fail("%s\n", test_name); } /* Returns the time taken for the remap on success else returns -1. */ static long long remap_region(struct config c, unsigned int threshold_mb, char pattern_seed) { - void *addr, *src_addr, *dest_addr; + void *addr, *src_addr, *dest_addr, *dest_preamble_addr; unsigned long long i; struct timespec t_start = {0, 0}, t_end = {0, 0}; long long start_ns, end_ns, align_mask, ret, offset; @@ -294,7 +376,7 @@ static long long remap_region(struct config c, unsigned int threshold_mb, goto out; } - /* Set byte pattern */ + /* Set byte pattern for source block. */ srand(pattern_seed); for (i = 0; i < threshold; i++) memset((char *) src_addr + i, (char) rand(), 1); @@ -306,6 +388,9 @@ static long long remap_region(struct config c, unsigned int threshold_mb, addr = (void *) (((unsigned long long) src_addr + c.region_size + offset) & align_mask); + /* Remap after the destination block preamble. */ + addr += c.dest_preamble_size; + /* See comment in get_source_mapping() */ if (!((unsigned long long) addr & c.dest_alignment)) addr = (void *) ((unsigned long long) addr | c.dest_alignment); @@ -316,11 +401,29 @@ static long long remap_region(struct config c, unsigned int threshold_mb, if (addr + c.dest_alignment < addr) { ksft_print_msg("Couldn't find a valid region to remap to\n"); ret = -1; - goto out; + goto clean_up_src; } addr += c.dest_alignment; } + if (c.dest_preamble_size) { + dest_preamble_addr = mmap((void *) addr - c.dest_preamble_size, c.dest_preamble_size, + PROT_READ | PROT_WRITE, + MAP_FIXED_NOREPLACE | MAP_ANONYMOUS | MAP_SHARED, + -1, 0); + if (dest_preamble_addr == MAP_FAILED) { + ksft_print_msg("Failed to map dest preamble region: %s\n", + strerror(errno)); + ret = -1; + goto clean_up_src; + } + + /* Set byte pattern for the dest preamble block. */ + srand(pattern_seed); + for (i = 0; i < c.dest_preamble_size; i++) + memset((char *) dest_preamble_addr + i, (char) rand(), 1); + } + clock_gettime(CLOCK_MONOTONIC, &t_start); dest_addr = mremap(src_addr, c.region_size, c.region_size, MREMAP_MAYMOVE|MREMAP_FIXED, (char *) addr); @@ -329,7 +432,7 @@ static long long remap_region(struct config c, unsigned int threshold_mb, if (dest_addr == MAP_FAILED) { ksft_print_msg("mremap failed: %s\n", strerror(errno)); ret = -1; - goto clean_up_src; + goto clean_up_dest_preamble; } /* Verify byte pattern after remapping */ @@ -338,7 +441,7 @@ static long long remap_region(struct config c, unsigned int threshold_mb, char c = (char) rand(); if (((char *) dest_addr)[i] != c) { - ksft_print_msg("Data after remap doesn't match at offset %d\n", + ksft_print_msg("Data after remap doesn't match at offset %llu\n", i); ksft_print_msg("Expected: %#x\t Got: %#x\n", c & 0xff, ((char *) dest_addr)[i] & 0xff); @@ -347,6 +450,23 @@ static long long remap_region(struct config c, unsigned int threshold_mb, } } + /* Verify the dest preamble byte pattern after remapping */ + if (c.dest_preamble_size) { + srand(pattern_seed); + for (i = 0; i < c.dest_preamble_size; i++) { + char c = (char) rand(); + + if (((char *) dest_preamble_addr)[i] != c) { + ksft_print_msg("Preamble data after remap doesn't match at offset %d\n", + i); + ksft_print_msg("Expected: %#x\t Got: %#x\n", c & 0xff, + ((char *) dest_preamble_addr)[i] & 0xff); + ret = -1; + goto clean_up_dest; + } + } + } + start_ns = t_start.tv_sec * NS_PER_SEC + t_start.tv_nsec; end_ns = t_end.tv_sec * NS_PER_SEC + t_end.tv_nsec; ret = end_ns - start_ns; @@ -359,12 +479,92 @@ static long long remap_region(struct config c, unsigned int threshold_mb, */ clean_up_dest: munmap(dest_addr, c.region_size); +clean_up_dest_preamble: + if (c.dest_preamble_size && dest_preamble_addr) + munmap(dest_preamble_addr, c.dest_preamble_size); clean_up_src: munmap(src_addr, c.region_size); out: return ret; } +/* + * Verify that an mremap aligning down does not destroy + * the beginning of the mapping just because the aligned + * down address landed on a mapping that maybe does not exist. + */ +static void mremap_move_1mb_from_start(char pattern_seed) +{ + char *test_name = "mremap move 1mb from start at 1MB+256KB aligned src"; + void *src = NULL, *dest = NULL; + int i, success = 1; + + /* Config to reuse get_source_mapping() to do an aligned mmap. */ + struct config c = { + .src_alignment = SIZE_MB(1) + SIZE_KB(256), + .region_size = SIZE_MB(6) + }; + + src = get_source_mapping(c); + if (!src) { + success = 0; + goto out; + } + + c.src_alignment = SIZE_MB(1) + SIZE_KB(256); + dest = get_source_mapping(c); + if (!dest) { + success = 0; + goto out; + } + + /* Set byte pattern for source block. */ + srand(pattern_seed); + for (i = 0; i < SIZE_MB(2); i++) { + ((char *)src)[i] = (char) rand(); + } + + /* + * Unmap the beginning of dest so that the aligned address + * falls on no mapping. + */ + munmap(dest, SIZE_MB(1)); + + void *new_ptr = mremap(src + SIZE_MB(1), SIZE_MB(1), SIZE_MB(1), + MREMAP_MAYMOVE | MREMAP_FIXED, dest + SIZE_MB(1)); + if (new_ptr == MAP_FAILED) { + perror("mremap"); + success = 0; + goto out; + } + + /* Verify byte pattern after remapping */ + srand(pattern_seed); + for (i = 0; i < SIZE_MB(1); i++) { + char c = (char) rand(); + + if (((char *)src)[i] != c) { + ksft_print_msg("Data at src at %d got corrupted due to unrelated mremap\n", + i); + ksft_print_msg("Expected: %#x\t Got: %#x\n", c & 0xff, + ((char *) src)[i] & 0xff); + success = 0; + } + } + +out: + if (src && munmap(src, c.region_size) == -1) + perror("munmap src"); + + if (dest && munmap(dest, c.region_size) == -1) + perror("munmap dest"); + + if (success) + ksft_test_result_pass("%s\n", test_name); + else + ksft_test_result_fail("%s\n", test_name); +} + static void run_mremap_test_case(struct test test_case, int *failures, unsigned int threshold_mb, unsigned int pattern_seed) @@ -434,7 +634,7 @@ static int parse_args(int argc, char **argv, unsigned int *threshold_mb, return 0; } -#define MAX_TEST 13 +#define MAX_TEST 15 #define MAX_PERF_TEST 3 int main(int argc, char **argv) { @@ -443,7 +643,8 @@ int main(int argc, char **argv) unsigned int threshold_mb = VALIDATION_DEFAULT_THRESHOLD; unsigned int pattern_seed; int num_expand_tests = 2; - struct test test_cases[MAX_TEST]; + int num_misc_tests = 2; + struct test test_cases[MAX_TEST] = {}; struct test perf_test_cases[MAX_PERF_TEST]; int page_size; time_t t; @@ -500,6 +701,15 @@ int main(int argc, char **argv) test_cases[12] = MAKE_TEST(PUD, PUD, _2GB, NON_OVERLAPPING, EXPECT_SUCCESS, "2GB mremap - Source PUD-aligned, Destination PUD-aligned"); + /* Src and Dest addr 1MB aligned. 5MB mremap. */ + test_cases[13] = MAKE_TEST(_1MB, _1MB, _5MB, NON_OVERLAPPING, EXPECT_SUCCESS, + "5MB mremap - Source 1MB-aligned, Destination 1MB-aligned"); + + /* Src and Dest addr 1MB aligned. 5MB mremap. */ + test_cases[14] = MAKE_TEST(_1MB, _1MB, _5MB, NON_OVERLAPPING, EXPECT_SUCCESS, + "5MB mremap - Source 1MB-aligned, Dest 1MB-aligned with 40MB Preamble"); + test_cases[14].config.dest_preamble_size = 10 * _4MB; + perf_test_cases[0] = MAKE_TEST(page_size, page_size, _1GB, NON_OVERLAPPING, EXPECT_SUCCESS, "1GB mremap - Source PTE-aligned, Destination PTE-aligned"); /* @@ -515,7 +725,7 @@ int main(int argc, char **argv) (threshold_mb * _1MB >= _1GB); ksft_set_plan(ARRAY_SIZE(test_cases) + (run_perf_tests ? - ARRAY_SIZE(perf_test_cases) : 0) + num_expand_tests); + ARRAY_SIZE(perf_test_cases) : 0) + num_expand_tests + num_misc_tests); for (i = 0; i < ARRAY_SIZE(test_cases); i++) run_mremap_test_case(test_cases[i], &failures, threshold_mb, @@ -533,6 +743,9 @@ int main(int argc, char **argv) fclose(maps_fp); + mremap_move_within_range(pattern_seed); + mremap_move_1mb_from_start(pattern_seed); + if (run_perf_tests) { ksft_print_msg("\n%s\n", "mremap HAVE_MOVE_PMD/PUD optimization time comparison for 1GB region:"); diff --git a/tools/testing/selftests/mm/pagemap_ioctl.c b/tools/testing/selftests/mm/pagemap_ioctl.c new file mode 100644 index 000000000000..befab43719ba --- /dev/null +++ b/tools/testing/selftests/mm/pagemap_ioctl.c @@ -0,0 +1,1661 @@ +// SPDX-License-Identifier: GPL-2.0 +#define _GNU_SOURCE +#include <stdio.h> +#include <fcntl.h> +#include <string.h> +#include <sys/mman.h> +#include <errno.h> +#include <malloc.h> +#include "vm_util.h" +#include "../kselftest.h" +#include <linux/types.h> +#include <linux/memfd.h> +#include <linux/userfaultfd.h> +#include <linux/fs.h> +#include <sys/ioctl.h> +#include <sys/stat.h> +#include <math.h> +#include <asm/unistd.h> +#include <pthread.h> +#include <sys/resource.h> +#include <assert.h> +#include <sys/ipc.h> +#include <sys/shm.h> + +#define PAGEMAP_BITS_ALL (PAGE_IS_WPALLOWED | PAGE_IS_WRITTEN | \ + PAGE_IS_FILE | PAGE_IS_PRESENT | \ + PAGE_IS_SWAPPED | PAGE_IS_PFNZERO | \ + PAGE_IS_HUGE) +#define PAGEMAP_NON_WRITTEN_BITS (PAGE_IS_WPALLOWED | PAGE_IS_FILE | \ + PAGE_IS_PRESENT | PAGE_IS_SWAPPED | \ + PAGE_IS_PFNZERO | PAGE_IS_HUGE) + +#define TEST_ITERATIONS 100 +#define PAGEMAP "/proc/self/pagemap" +int pagemap_fd; +int uffd; +int page_size; +int hpage_size; + +#define LEN(region) ((region.end - region.start)/page_size) + +static long pagemap_ioctl(void *start, int len, void *vec, int vec_len, int flag, + int max_pages, long required_mask, long anyof_mask, long excluded_mask, + long return_mask) +{ + struct pm_scan_arg arg; + + arg.start = (uintptr_t)start; + arg.end = (uintptr_t)(start + len); + arg.vec = (uintptr_t)vec; + arg.vec_len = vec_len; + arg.flags = flag; + arg.size = sizeof(struct pm_scan_arg); + arg.max_pages = max_pages; + arg.category_mask = required_mask; + arg.category_anyof_mask = anyof_mask; + arg.category_inverted = excluded_mask; + arg.return_mask = return_mask; + + return ioctl(pagemap_fd, PAGEMAP_SCAN, &arg); +} + +static long pagemap_ioc(void *start, int len, void *vec, int vec_len, int flag, + int max_pages, long required_mask, long anyof_mask, long excluded_mask, + long return_mask, long *walk_end) +{ + struct pm_scan_arg arg; + int ret; + + arg.start = (uintptr_t)start; + arg.end = (uintptr_t)(start + len); + arg.vec = (uintptr_t)vec; + arg.vec_len = vec_len; + arg.flags = flag; + arg.size = sizeof(struct pm_scan_arg); + arg.max_pages = max_pages; + arg.category_mask = required_mask; + arg.category_anyof_mask = anyof_mask; + arg.category_inverted = excluded_mask; + arg.return_mask = return_mask; + + ret = ioctl(pagemap_fd, PAGEMAP_SCAN, &arg); + + if (walk_end) + *walk_end = arg.walk_end; + + return ret; +} + + +int init_uffd(void) +{ + struct uffdio_api uffdio_api; + + uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK | UFFD_USER_MODE_ONLY); + if (uffd == -1) + return uffd; + + uffdio_api.api = UFFD_API; + uffdio_api.features = UFFD_FEATURE_WP_UNPOPULATED | UFFD_FEATURE_WP_ASYNC | + UFFD_FEATURE_WP_HUGETLBFS_SHMEM; + if (ioctl(uffd, UFFDIO_API, &uffdio_api)) + return -1; + + if (!(uffdio_api.api & UFFDIO_REGISTER_MODE_WP) || + !(uffdio_api.features & UFFD_FEATURE_WP_UNPOPULATED) || + !(uffdio_api.features & UFFD_FEATURE_WP_ASYNC) || + !(uffdio_api.features & UFFD_FEATURE_WP_HUGETLBFS_SHMEM)) + return -1; + + return 0; +} + +int wp_init(void *lpBaseAddress, int dwRegionSize) +{ + struct uffdio_register uffdio_register; + struct uffdio_writeprotect wp; + + uffdio_register.range.start = (unsigned long)lpBaseAddress; + uffdio_register.range.len = dwRegionSize; + uffdio_register.mode = UFFDIO_REGISTER_MODE_WP; + if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register)) + ksft_exit_fail_msg("ioctl(UFFDIO_REGISTER) %d %s\n", errno, strerror(errno)); + + if (!(uffdio_register.ioctls & UFFDIO_WRITEPROTECT)) + ksft_exit_fail_msg("ioctl set is incorrect\n"); + + wp.range.start = (unsigned long)lpBaseAddress; + wp.range.len = dwRegionSize; + wp.mode = UFFDIO_WRITEPROTECT_MODE_WP; + + if (ioctl(uffd, UFFDIO_WRITEPROTECT, &wp)) + ksft_exit_fail_msg("ioctl(UFFDIO_WRITEPROTECT)\n"); + + return 0; +} + +int wp_free(void *lpBaseAddress, int dwRegionSize) +{ + struct uffdio_register uffdio_register; + + uffdio_register.range.start = (unsigned long)lpBaseAddress; + uffdio_register.range.len = dwRegionSize; + uffdio_register.mode = UFFDIO_REGISTER_MODE_WP; + if (ioctl(uffd, UFFDIO_UNREGISTER, &uffdio_register.range)) + ksft_exit_fail_msg("ioctl unregister failure\n"); + return 0; +} + +int wp_addr_range(void *lpBaseAddress, int dwRegionSize) +{ + if (pagemap_ioctl(lpBaseAddress, dwRegionSize, NULL, 0, + PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC, + 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN) < 0) + ksft_exit_fail_msg("error %d %d %s\n", 1, errno, strerror(errno)); + + return 0; +} + +void *gethugetlb_mem(int size, int *shmid) +{ + char *mem; + + if (shmid) { + *shmid = shmget(2, size, SHM_HUGETLB | IPC_CREAT | SHM_R | SHM_W); + if (*shmid < 0) + return NULL; + + mem = shmat(*shmid, 0, 0); + if (mem == (char *)-1) { + shmctl(*shmid, IPC_RMID, NULL); + ksft_exit_fail_msg("Shared memory attach failure\n"); + } + } else { + mem = mmap(NULL, size, PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_HUGETLB | MAP_PRIVATE, -1, 0); + if (mem == MAP_FAILED) + return NULL; + } + + return mem; +} + +int userfaultfd_tests(void) +{ + int mem_size, vec_size, written, num_pages = 16; + char *mem, *vec; + + mem_size = num_pages * page_size; + mem = mmap(NULL, mem_size, PROT_NONE, MAP_PRIVATE | MAP_ANON, -1, 0); + if (mem == MAP_FAILED) + ksft_exit_fail_msg("error nomem\n"); + + wp_init(mem, mem_size); + + /* Change protection of pages differently */ + mprotect(mem, mem_size/8, PROT_READ|PROT_WRITE); + mprotect(mem + 1 * mem_size/8, mem_size/8, PROT_READ); + mprotect(mem + 2 * mem_size/8, mem_size/8, PROT_READ|PROT_WRITE); + mprotect(mem + 3 * mem_size/8, mem_size/8, PROT_READ); + mprotect(mem + 4 * mem_size/8, mem_size/8, PROT_READ|PROT_WRITE); + mprotect(mem + 5 * mem_size/8, mem_size/8, PROT_NONE); + mprotect(mem + 6 * mem_size/8, mem_size/8, PROT_READ|PROT_WRITE); + mprotect(mem + 7 * mem_size/8, mem_size/8, PROT_READ); + + wp_addr_range(mem + (mem_size/16), mem_size - 2 * (mem_size/8)); + wp_addr_range(mem, mem_size); + + vec_size = mem_size/page_size; + vec = malloc(sizeof(struct page_region) * vec_size); + + written = pagemap_ioctl(mem, mem_size, vec, 1, PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC, + vec_size - 2, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN); + if (written < 0) + ksft_exit_fail_msg("error %d %d %s\n", written, errno, strerror(errno)); + + ksft_test_result(written == 0, "%s all new pages must not be written (dirty)\n", __func__); + + wp_free(mem, mem_size); + munmap(mem, mem_size); + free(vec); + return 0; +} + +int get_reads(struct page_region *vec, int vec_size) +{ + int i, sum = 0; + + for (i = 0; i < vec_size; i++) + sum += LEN(vec[i]); + + return sum; +} + +int sanity_tests_sd(void) +{ + int mem_size, vec_size, ret, ret2, ret3, i, num_pages = 1000, total_pages = 0; + int total_writes, total_reads, reads, count; + struct page_region *vec, *vec2; + char *mem, *m[2]; + long walk_end; + + vec_size = num_pages/2; + mem_size = num_pages * page_size; + + vec = malloc(sizeof(struct page_region) * vec_size); + if (!vec) + ksft_exit_fail_msg("error nomem\n"); + + vec2 = malloc(sizeof(struct page_region) * vec_size); + if (!vec2) + ksft_exit_fail_msg("error nomem\n"); + + mem = mmap(NULL, mem_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0); + if (mem == MAP_FAILED) + ksft_exit_fail_msg("error nomem\n"); + + wp_init(mem, mem_size); + wp_addr_range(mem, mem_size); + + /* 1. wrong operation */ + ksft_test_result(pagemap_ioctl(mem, 0, vec, vec_size, 0, + 0, PAGEMAP_BITS_ALL, 0, 0, PAGEMAP_BITS_ALL) == 0, + "%s Zero range size is valid\n", __func__); + + ksft_test_result(pagemap_ioctl(mem, mem_size, NULL, vec_size, 0, + 0, PAGEMAP_BITS_ALL, 0, 0, PAGEMAP_BITS_ALL) < 0, + "%s output buffer must be specified with size\n", __func__); + + ksft_test_result(pagemap_ioctl(mem, mem_size, vec, 0, 0, + 0, PAGEMAP_BITS_ALL, 0, 0, PAGEMAP_BITS_ALL) == 0, + "%s output buffer can be 0\n", __func__); + + ksft_test_result(pagemap_ioctl(mem, mem_size, 0, 0, 0, + 0, PAGEMAP_BITS_ALL, 0, 0, PAGEMAP_BITS_ALL) == 0, + "%s output buffer can be 0\n", __func__); + + ksft_test_result(pagemap_ioctl(mem, mem_size, vec, vec_size, -1, + 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN) < 0, + "%s wrong flag specified\n", __func__); + + ksft_test_result(pagemap_ioctl(mem, mem_size, vec, vec_size, + PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC | 0xFF, + 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN) < 0, + "%s flag has extra bits specified\n", __func__); + + ksft_test_result(pagemap_ioctl(mem, mem_size, vec, vec_size, 0, + 0, 0, 0, 0, PAGE_IS_WRITTEN) >= 0, + "%s no selection mask is specified\n", __func__); + + ksft_test_result(pagemap_ioctl(mem, mem_size, vec, vec_size, 0, + 0, PAGE_IS_WRITTEN, PAGE_IS_WRITTEN, 0, 0) == 0, + "%s no return mask is specified\n", __func__); + + ksft_test_result(pagemap_ioctl(mem, mem_size, vec, vec_size, 0, + 0, PAGE_IS_WRITTEN, 0, 0, 0x1000) < 0, + "%s wrong return mask specified\n", __func__); + + ksft_test_result(pagemap_ioctl(mem, mem_size, vec, vec_size, + PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC, + 0, 0xFFF, PAGE_IS_WRITTEN, 0, PAGE_IS_WRITTEN) < 0, + "%s mixture of correct and wrong flag\n", __func__); + + ksft_test_result(pagemap_ioctl(mem, mem_size, vec, vec_size, + PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC, + 0, 0, 0, PAGEMAP_BITS_ALL, PAGE_IS_WRITTEN) >= 0, + "%s PAGEMAP_BITS_ALL can be specified with PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC\n", + __func__); + + /* 2. Clear area with larger vec size */ + ret = pagemap_ioctl(mem, mem_size, vec, vec_size, + PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC, 0, + PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN); + ksft_test_result(ret >= 0, "%s Clear area with larger vec size\n", __func__); + + /* 3. Repeated pattern of written and non-written pages */ + for (i = 0; i < mem_size; i += 2 * page_size) + mem[i]++; + + ret = pagemap_ioctl(mem, mem_size, vec, vec_size, 0, 0, PAGE_IS_WRITTEN, 0, + 0, PAGE_IS_WRITTEN); + if (ret < 0) + ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + + ksft_test_result(ret == mem_size/(page_size * 2), + "%s Repeated pattern of written and non-written pages\n", __func__); + + /* 4. Repeated pattern of written and non-written pages in parts */ + ret = pagemap_ioctl(mem, mem_size, vec, vec_size, + PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC, + num_pages/2 - 2, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN); + if (ret < 0) + ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + + ret2 = pagemap_ioctl(mem, mem_size, vec, 2, 0, 0, PAGE_IS_WRITTEN, 0, 0, + PAGE_IS_WRITTEN); + if (ret2 < 0) + ksft_exit_fail_msg("error %d %d %s\n", ret2, errno, strerror(errno)); + + ret3 = pagemap_ioctl(mem, mem_size, vec, vec_size, + PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC, + 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN); + if (ret3 < 0) + ksft_exit_fail_msg("error %d %d %s\n", ret3, errno, strerror(errno)); + + ksft_test_result((ret + ret3) == num_pages/2 && ret2 == 2, + "%s Repeated pattern of written and non-written pages in parts %d %d %d\n", + __func__, ret, ret3, ret2); + + /* 5. Repeated pattern of written and non-written pages max_pages */ + for (i = 0; i < mem_size; i += 2 * page_size) + mem[i]++; + mem[(mem_size/page_size - 1) * page_size]++; + + ret = pagemap_ioctl(mem, mem_size, vec, vec_size, + PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC, + num_pages/2, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN); + if (ret < 0) + ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + + ret2 = pagemap_ioctl(mem, mem_size, vec, vec_size, + PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC, + 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN); + if (ret2 < 0) + ksft_exit_fail_msg("error %d %d %s\n", ret2, errno, strerror(errno)); + + ksft_test_result(ret == num_pages/2 && ret2 == 1, + "%s Repeated pattern of written and non-written pages max_pages\n", + __func__); + + /* 6. only get 2 dirty pages and clear them as well */ + vec_size = mem_size/page_size; + memset(mem, -1, mem_size); + + /* get and clear second and third pages */ + ret = pagemap_ioctl(mem + page_size, 2 * page_size, vec, 1, + PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC, + 2, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN); + if (ret < 0) + ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + + ret2 = pagemap_ioctl(mem, mem_size, vec2, vec_size, 0, 0, + PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN); + if (ret2 < 0) + ksft_exit_fail_msg("error %d %d %s\n", ret2, errno, strerror(errno)); + + ksft_test_result(ret == 1 && LEN(vec[0]) == 2 && + vec[0].start == (uintptr_t)(mem + page_size) && + ret2 == 2 && LEN(vec2[0]) == 1 && vec2[0].start == (uintptr_t)mem && + LEN(vec2[1]) == vec_size - 3 && + vec2[1].start == (uintptr_t)(mem + 3 * page_size), + "%s only get 2 written pages and clear them as well\n", __func__); + + wp_free(mem, mem_size); + munmap(mem, mem_size); + + /* 7. Two regions */ + m[0] = mmap(NULL, mem_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0); + if (m[0] == MAP_FAILED) + ksft_exit_fail_msg("error nomem\n"); + m[1] = mmap(NULL, mem_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0); + if (m[1] == MAP_FAILED) + ksft_exit_fail_msg("error nomem\n"); + + wp_init(m[0], mem_size); + wp_init(m[1], mem_size); + wp_addr_range(m[0], mem_size); + wp_addr_range(m[1], mem_size); + + memset(m[0], 'a', mem_size); + memset(m[1], 'b', mem_size); + + wp_addr_range(m[0], mem_size); + + ret = pagemap_ioctl(m[1], mem_size, vec, 1, 0, 0, PAGE_IS_WRITTEN, 0, 0, + PAGE_IS_WRITTEN); + if (ret < 0) + ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + + ksft_test_result(ret == 1 && LEN(vec[0]) == mem_size/page_size, + "%s Two regions\n", __func__); + + wp_free(m[0], mem_size); + wp_free(m[1], mem_size); + munmap(m[0], mem_size); + munmap(m[1], mem_size); + + free(vec); + free(vec2); + + /* 8. Smaller vec */ + mem_size = 1050 * page_size; + vec_size = mem_size/(page_size*2); + + vec = malloc(sizeof(struct page_region) * vec_size); + if (!vec) + ksft_exit_fail_msg("error nomem\n"); + + mem = mmap(NULL, mem_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0); + if (mem == MAP_FAILED) + ksft_exit_fail_msg("error nomem\n"); + + wp_init(mem, mem_size); + wp_addr_range(mem, mem_size); + + ret = pagemap_ioctl(mem, mem_size, vec, vec_size, + PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC, 0, + PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN); + if (ret < 0) + ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + + for (i = 0; i < mem_size/page_size; i += 2) + mem[i * page_size]++; + + ret = pagemap_ioctl(mem, mem_size, vec, vec_size, + PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC, + mem_size/(page_size*5), PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN); + if (ret < 0) + ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + + total_pages += ret; + + ret = pagemap_ioctl(mem, mem_size, vec, vec_size, + PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC, + mem_size/(page_size*5), PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN); + if (ret < 0) + ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + + total_pages += ret; + + ret = pagemap_ioctl(mem, mem_size, vec, vec_size, + PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC, + mem_size/(page_size*5), PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN); + if (ret < 0) + ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + + total_pages += ret; + + ksft_test_result(total_pages == mem_size/(page_size*2), "%s Smaller max_pages\n", __func__); + + free(vec); + wp_free(mem, mem_size); + munmap(mem, mem_size); + total_pages = 0; + + /* 9. Smaller vec */ + mem_size = 10000 * page_size; + vec_size = 50; + + vec = malloc(sizeof(struct page_region) * vec_size); + if (!vec) + ksft_exit_fail_msg("error nomem\n"); + + mem = mmap(NULL, mem_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0); + if (mem == MAP_FAILED) + ksft_exit_fail_msg("error nomem\n"); + + wp_init(mem, mem_size); + wp_addr_range(mem, mem_size); + + for (count = 0; count < TEST_ITERATIONS; count++) { + total_writes = total_reads = 0; + walk_end = (long)mem; + + for (i = 0; i < mem_size; i += page_size) { + if (rand() % 2) { + mem[i]++; + total_writes++; + } + } + + while (total_reads < total_writes) { + ret = pagemap_ioc((void *)walk_end, mem_size-(walk_end - (long)mem), vec, + vec_size, PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC, + 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end); + if (ret < 0) + ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + + if (ret > vec_size) + break; + + reads = get_reads(vec, ret); + total_reads += reads; + } + + if (total_reads != total_writes) + break; + } + + ksft_test_result(count == TEST_ITERATIONS, "Smaller vec\n"); + + free(vec); + wp_free(mem, mem_size); + munmap(mem, mem_size); + + /* 10. Walk_end tester */ + vec_size = 1000; + mem_size = vec_size * page_size; + + vec = malloc(sizeof(struct page_region) * vec_size); + if (!vec) + ksft_exit_fail_msg("error nomem\n"); + + mem = mmap(NULL, mem_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0); + if (mem == MAP_FAILED) + ksft_exit_fail_msg("error nomem\n"); + + wp_init(mem, mem_size); + wp_addr_range(mem, mem_size); + + memset(mem, 0, mem_size); + + ret = pagemap_ioc(mem, 0, vec, vec_size, 0, + 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end); + if (ret < 0) + ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + ksft_test_result(ret == 0 && walk_end == (long)mem, + "Walk_end: Same start and end address\n"); + + ret = pagemap_ioc(mem, 0, vec, vec_size, PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC, + 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end); + if (ret < 0) + ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + ksft_test_result(ret == 0 && walk_end == (long)mem, + "Walk_end: Same start and end with WP\n"); + + ret = pagemap_ioc(mem, 0, vec, 0, PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC, + 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end); + if (ret < 0) + ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + ksft_test_result(ret == 0 && walk_end == (long)mem, + "Walk_end: Same start and end with 0 output buffer\n"); + + ret = pagemap_ioc(mem, mem_size, vec, vec_size, 0, + 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end); + if (ret < 0) + ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + ksft_test_result(ret == 1 && walk_end == (long)(mem + mem_size), + "Walk_end: Big vec\n"); + + ret = pagemap_ioc(mem, mem_size, vec, 1, 0, + 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end); + if (ret < 0) + ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + ksft_test_result(ret == 1 && walk_end == (long)(mem + mem_size), + "Walk_end: vec of minimum length\n"); + + ret = pagemap_ioc(mem, mem_size, vec, 1, 0, + vec_size, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end); + if (ret < 0) + ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + ksft_test_result(ret == 1 && walk_end == (long)(mem + mem_size), + "Walk_end: Max pages specified\n"); + + ret = pagemap_ioc(mem, mem_size, vec, vec_size, 0, + vec_size/2, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end); + if (ret < 0) + ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + ksft_test_result(ret == 1 && walk_end == (long)(mem + mem_size/2), + "Walk_end: Half max pages\n"); + + ret = pagemap_ioc(mem, mem_size, vec, vec_size, 0, + 1, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end); + if (ret < 0) + ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + ksft_test_result(ret == 1 && walk_end == (long)(mem + page_size), + "Walk_end: 1 max page\n"); + + ret = pagemap_ioc(mem, mem_size, vec, vec_size, 0, + -1, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end); + if (ret < 0) + ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + ksft_test_result(ret == 1 && walk_end == (long)(mem + mem_size), + "Walk_end: max pages\n"); + + wp_addr_range(mem, mem_size); + for (i = 0; i < mem_size; i += 2 * page_size) + mem[i]++; + + ret = pagemap_ioc(mem, mem_size, vec, vec_size, 0, + 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end); + if (ret < 0) + ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + ksft_test_result(ret == vec_size/2 && walk_end == (long)(mem + mem_size), + "Walk_end sparse: Big vec\n"); + + ret = pagemap_ioc(mem, mem_size, vec, 1, 0, + 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end); + if (ret < 0) + ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + ksft_test_result(ret == 1 && walk_end == (long)(mem + page_size * 2), + "Walk_end sparse: vec of minimum length\n"); + + ret = pagemap_ioc(mem, mem_size, vec, 1, 0, + vec_size, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end); + if (ret < 0) + ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + ksft_test_result(ret == 1 && walk_end == (long)(mem + page_size * 2), + "Walk_end sparse: Max pages specified\n"); + + ret = pagemap_ioc(mem, mem_size, vec, vec_size/2, 0, + vec_size, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end); + if (ret < 0) + ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + ksft_test_result(ret == vec_size/2 && walk_end == (long)(mem + mem_size), + "Walk_end sparse: Max pages specified\n"); + + ret = pagemap_ioc(mem, mem_size, vec, vec_size, 0, + vec_size, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end); + if (ret < 0) + ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + ksft_test_result(ret == vec_size/2 && walk_end == (long)(mem + mem_size), + "Walk_end sparse: Max pages specified\n"); + + ret = pagemap_ioc(mem, mem_size, vec, vec_size, 0, + vec_size/2, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end); + if (ret < 0) + ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + ksft_test_result(ret == vec_size/2 && walk_end == (long)(mem + mem_size), + "Walk_endsparse : Half max pages\n"); + + ret = pagemap_ioc(mem, mem_size, vec, vec_size, 0, + 1, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end); + if (ret < 0) + ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + ksft_test_result(ret == 1 && walk_end == (long)(mem + page_size * 2), + "Walk_end: 1 max page\n"); + + free(vec); + wp_free(mem, mem_size); + munmap(mem, mem_size); + + return 0; +} + +int base_tests(char *prefix, char *mem, int mem_size, int skip) +{ + int vec_size, written; + struct page_region *vec, *vec2; + + if (skip) { + ksft_test_result_skip("%s all new pages must not be written (dirty)\n", prefix); + ksft_test_result_skip("%s all pages must be written (dirty)\n", prefix); + ksft_test_result_skip("%s all pages dirty other than first and the last one\n", + prefix); + ksft_test_result_skip("%s PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC\n", prefix); + ksft_test_result_skip("%s only middle page dirty\n", prefix); + ksft_test_result_skip("%s only two middle pages dirty\n", prefix); + return 0; + } + + vec_size = mem_size/page_size; + vec = malloc(sizeof(struct page_region) * vec_size); + vec2 = malloc(sizeof(struct page_region) * vec_size); + + /* 1. all new pages must be not be written (dirty) */ + written = pagemap_ioctl(mem, mem_size, vec, 1, PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC, + vec_size - 2, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN); + if (written < 0) + ksft_exit_fail_msg("error %d %d %s\n", written, errno, strerror(errno)); + + ksft_test_result(written == 0, "%s all new pages must not be written (dirty)\n", prefix); + + /* 2. all pages must be written */ + memset(mem, -1, mem_size); + + written = pagemap_ioctl(mem, mem_size, vec, 1, 0, 0, PAGE_IS_WRITTEN, 0, 0, + PAGE_IS_WRITTEN); + if (written < 0) + ksft_exit_fail_msg("error %d %d %s\n", written, errno, strerror(errno)); + + ksft_test_result(written == 1 && LEN(vec[0]) == mem_size/page_size, + "%s all pages must be written (dirty)\n", prefix); + + /* 3. all pages dirty other than first and the last one */ + written = pagemap_ioctl(mem, mem_size, vec, 1, PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC, + 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN); + if (written < 0) + ksft_exit_fail_msg("error %d %d %s\n", written, errno, strerror(errno)); + + memset(mem + page_size, 0, mem_size - (2 * page_size)); + + written = pagemap_ioctl(mem, mem_size, vec, 1, PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC, + 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN); + if (written < 0) + ksft_exit_fail_msg("error %d %d %s\n", written, errno, strerror(errno)); + + ksft_test_result(written == 1 && LEN(vec[0]) >= vec_size - 2 && LEN(vec[0]) <= vec_size, + "%s all pages dirty other than first and the last one\n", prefix); + + written = pagemap_ioctl(mem, mem_size, vec, 1, 0, 0, + PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN); + if (written < 0) + ksft_exit_fail_msg("error %d %d %s\n", written, errno, strerror(errno)); + + ksft_test_result(written == 0, + "%s PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC\n", prefix); + + /* 4. only middle page dirty */ + written = pagemap_ioctl(mem, mem_size, vec, 1, PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC, + 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN); + if (written < 0) + ksft_exit_fail_msg("error %d %d %s\n", written, errno, strerror(errno)); + + mem[vec_size/2 * page_size]++; + + written = pagemap_ioctl(mem, mem_size, vec, vec_size, 0, 0, PAGE_IS_WRITTEN, + 0, 0, PAGE_IS_WRITTEN); + if (written < 0) + ksft_exit_fail_msg("error %d %d %s\n", written, errno, strerror(errno)); + + ksft_test_result(written == 1 && LEN(vec[0]) >= 1, + "%s only middle page dirty\n", prefix); + + /* 5. only two middle pages dirty and walk over only middle pages */ + written = pagemap_ioctl(mem, mem_size, vec, 1, PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC, + 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN | PAGE_IS_HUGE); + if (written < 0) + ksft_exit_fail_msg("error %d %d %s\n", written, errno, strerror(errno)); + + mem[vec_size/2 * page_size]++; + mem[(vec_size/2 + 1) * page_size]++; + + written = pagemap_ioctl(&mem[vec_size/2 * page_size], 2 * page_size, vec, 1, 0, + 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN | PAGE_IS_HUGE); + if (written < 0) + ksft_exit_fail_msg("error %d %d %s\n", written, errno, strerror(errno)); + + ksft_test_result(written == 1 && vec[0].start == (uintptr_t)(&mem[vec_size/2 * page_size]) + && LEN(vec[0]) == 2, + "%s only two middle pages dirty\n", prefix); + + free(vec); + free(vec2); + return 0; +} + +void *gethugepage(int map_size) +{ + int ret; + char *map; + + map = memalign(hpage_size, map_size); + if (!map) + ksft_exit_fail_msg("memalign failed %d %s\n", errno, strerror(errno)); + + ret = madvise(map, map_size, MADV_HUGEPAGE); + if (ret) + return NULL; + + memset(map, 0, map_size); + + return map; +} + +int hpage_unit_tests(void) +{ + char *map; + int ret, ret2; + size_t num_pages = 10; + int map_size = hpage_size * num_pages; + int vec_size = map_size/page_size; + struct page_region *vec, *vec2; + + vec = malloc(sizeof(struct page_region) * vec_size); + vec2 = malloc(sizeof(struct page_region) * vec_size); + if (!vec || !vec2) + ksft_exit_fail_msg("malloc failed\n"); + + map = gethugepage(map_size); + if (map) { + wp_init(map, map_size); + wp_addr_range(map, map_size); + + /* 1. all new huge page must not be written (dirty) */ + ret = pagemap_ioctl(map, map_size, vec, vec_size, + PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC, 0, + PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN); + if (ret < 0) + ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + + ksft_test_result(ret == 0, "%s all new huge page must not be written (dirty)\n", + __func__); + + /* 2. all the huge page must not be written */ + ret = pagemap_ioctl(map, map_size, vec, vec_size, 0, 0, + PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN); + if (ret < 0) + ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + + ksft_test_result(ret == 0, "%s all the huge page must not be written\n", __func__); + + /* 3. all the huge page must be written and clear dirty as well */ + memset(map, -1, map_size); + ret = pagemap_ioctl(map, map_size, vec, vec_size, + PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC, + 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN); + if (ret < 0) + ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + + ksft_test_result(ret == 1 && vec[0].start == (uintptr_t)map && + LEN(vec[0]) == vec_size && vec[0].categories == PAGE_IS_WRITTEN, + "%s all the huge page must be written and clear\n", __func__); + + /* 4. only middle page written */ + wp_free(map, map_size); + free(map); + map = gethugepage(map_size); + wp_init(map, map_size); + wp_addr_range(map, map_size); + map[vec_size/2 * page_size]++; + + ret = pagemap_ioctl(map, map_size, vec, vec_size, 0, 0, + PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN); + if (ret < 0) + ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + + ksft_test_result(ret == 1 && LEN(vec[0]) > 0, + "%s only middle page written\n", __func__); + + wp_free(map, map_size); + free(map); + } else { + ksft_test_result_skip("%s all new huge page must be written\n", __func__); + ksft_test_result_skip("%s all the huge page must not be written\n", __func__); + ksft_test_result_skip("%s all the huge page must be written and clear\n", __func__); + ksft_test_result_skip("%s only middle page written\n", __func__); + } + + /* 5. clear first half of huge page */ + map = gethugepage(map_size); + if (map) { + wp_init(map, map_size); + wp_addr_range(map, map_size); + + memset(map, 0, map_size); + + wp_addr_range(map, map_size/2); + + ret = pagemap_ioctl(map, map_size, vec, vec_size, 0, 0, + PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN); + if (ret < 0) + ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + + ksft_test_result(ret == 1 && LEN(vec[0]) == vec_size/2 && + vec[0].start == (uintptr_t)(map + map_size/2), + "%s clear first half of huge page\n", __func__); + wp_free(map, map_size); + free(map); + } else { + ksft_test_result_skip("%s clear first half of huge page\n", __func__); + } + + /* 6. clear first half of huge page with limited buffer */ + map = gethugepage(map_size); + if (map) { + wp_init(map, map_size); + wp_addr_range(map, map_size); + + memset(map, 0, map_size); + + ret = pagemap_ioctl(map, map_size, vec, vec_size, + PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC, + vec_size/2, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN); + if (ret < 0) + ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + + ret = pagemap_ioctl(map, map_size, vec, vec_size, 0, 0, + PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN); + if (ret < 0) + ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + + ksft_test_result(ret == 1 && LEN(vec[0]) == vec_size/2 && + vec[0].start == (uintptr_t)(map + map_size/2), + "%s clear first half of huge page with limited buffer\n", + __func__); + wp_free(map, map_size); + free(map); + } else { + ksft_test_result_skip("%s clear first half of huge page with limited buffer\n", + __func__); + } + + /* 7. clear second half of huge page */ + map = gethugepage(map_size); + if (map) { + wp_init(map, map_size); + wp_addr_range(map, map_size); + + memset(map, -1, map_size); + + ret = pagemap_ioctl(map + map_size/2, map_size/2, vec, vec_size, + PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC, vec_size/2, + PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN); + if (ret < 0) + ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + + ret = pagemap_ioctl(map, map_size, vec, vec_size, 0, 0, + PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN); + if (ret < 0) + ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + + ksft_test_result(ret == 1 && LEN(vec[0]) == vec_size/2, + "%s clear second half huge page\n", __func__); + wp_free(map, map_size); + free(map); + } else { + ksft_test_result_skip("%s clear second half huge page\n", __func__); + } + + /* 8. get half huge page */ + map = gethugepage(map_size); + if (map) { + wp_init(map, map_size); + wp_addr_range(map, map_size); + + memset(map, -1, map_size); + usleep(100); + + ret = pagemap_ioctl(map, map_size, vec, 1, + PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC, + hpage_size/(2*page_size), PAGE_IS_WRITTEN, 0, 0, + PAGE_IS_WRITTEN); + if (ret < 0) + ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + + ksft_test_result(ret == 1 && LEN(vec[0]) == hpage_size/(2*page_size), + "%s get half huge page\n", __func__); + + ret2 = pagemap_ioctl(map, map_size, vec, vec_size, 0, 0, + PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN); + if (ret2 < 0) + ksft_exit_fail_msg("error %d %d %s\n", ret2, errno, strerror(errno)); + + ksft_test_result(ret2 == 1 && LEN(vec[0]) == (map_size - hpage_size/2)/page_size, + "%s get half huge page\n", __func__); + + wp_free(map, map_size); + free(map); + } else { + ksft_test_result_skip("%s get half huge page\n", __func__); + ksft_test_result_skip("%s get half huge page\n", __func__); + } + + free(vec); + free(vec2); + return 0; +} + +int unmapped_region_tests(void) +{ + void *start = (void *)0x10000000; + int written, len = 0x00040000; + int vec_size = len / page_size; + struct page_region *vec = malloc(sizeof(struct page_region) * vec_size); + + /* 1. Get written pages */ + written = pagemap_ioctl(start, len, vec, vec_size, 0, 0, + PAGEMAP_NON_WRITTEN_BITS, 0, 0, PAGEMAP_NON_WRITTEN_BITS); + if (written < 0) + ksft_exit_fail_msg("error %d %d %s\n", written, errno, strerror(errno)); + + ksft_test_result(written >= 0, "%s Get status of pages\n", __func__); + + free(vec); + return 0; +} + +static void test_simple(void) +{ + int i; + char *map; + struct page_region vec; + + map = aligned_alloc(page_size, page_size); + if (!map) + ksft_exit_fail_msg("aligned_alloc failed\n"); + + wp_init(map, page_size); + wp_addr_range(map, page_size); + + for (i = 0 ; i < TEST_ITERATIONS; i++) { + if (pagemap_ioctl(map, page_size, &vec, 1, 0, 0, + PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN) == 1) { + ksft_print_msg("written bit was 1, but should be 0 (i=%d)\n", i); + break; + } + + wp_addr_range(map, page_size); + /* Write something to the page to get the written bit enabled on the page */ + map[0]++; + + if (pagemap_ioctl(map, page_size, &vec, 1, 0, 0, + PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN) == 0) { + ksft_print_msg("written bit was 0, but should be 1 (i=%d)\n", i); + break; + } + + wp_addr_range(map, page_size); + } + wp_free(map, page_size); + free(map); + + ksft_test_result(i == TEST_ITERATIONS, "Test %s\n", __func__); +} + +int sanity_tests(void) +{ + int mem_size, vec_size, ret, fd, i, buf_size; + struct page_region *vec; + char *mem, *fmem; + struct stat sbuf; + char *tmp_buf; + + /* 1. wrong operation */ + mem_size = 10 * page_size; + vec_size = mem_size / page_size; + + vec = malloc(sizeof(struct page_region) * vec_size); + mem = mmap(NULL, mem_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0); + if (mem == MAP_FAILED || vec == MAP_FAILED) + ksft_exit_fail_msg("error nomem\n"); + + wp_init(mem, mem_size); + wp_addr_range(mem, mem_size); + + ksft_test_result(pagemap_ioctl(mem, mem_size, vec, vec_size, + PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC, + 0, PAGEMAP_BITS_ALL, 0, 0, PAGEMAP_BITS_ALL) >= 0, + "%s WP op can be specified with !PAGE_IS_WRITTEN\n", __func__); + ksft_test_result(pagemap_ioctl(mem, mem_size, vec, vec_size, 0, 0, + PAGEMAP_BITS_ALL, 0, 0, PAGEMAP_BITS_ALL) >= 0, + "%s required_mask specified\n", __func__); + ksft_test_result(pagemap_ioctl(mem, mem_size, vec, vec_size, 0, 0, + 0, PAGEMAP_BITS_ALL, 0, PAGEMAP_BITS_ALL) >= 0, + "%s anyof_mask specified\n", __func__); + ksft_test_result(pagemap_ioctl(mem, mem_size, vec, vec_size, 0, 0, + 0, 0, PAGEMAP_BITS_ALL, PAGEMAP_BITS_ALL) >= 0, + "%s excluded_mask specified\n", __func__); + ksft_test_result(pagemap_ioctl(mem, mem_size, vec, vec_size, 0, 0, + PAGEMAP_BITS_ALL, PAGEMAP_BITS_ALL, 0, + PAGEMAP_BITS_ALL) >= 0, + "%s required_mask and anyof_mask specified\n", __func__); + wp_free(mem, mem_size); + munmap(mem, mem_size); + + /* 2. Get sd and present pages with anyof_mask */ + mem = mmap(NULL, mem_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0); + if (mem == MAP_FAILED) + ksft_exit_fail_msg("error nomem\n"); + wp_init(mem, mem_size); + wp_addr_range(mem, mem_size); + + memset(mem, 0, mem_size); + + ret = pagemap_ioctl(mem, mem_size, vec, vec_size, 0, 0, + 0, PAGEMAP_BITS_ALL, 0, PAGEMAP_BITS_ALL); + ksft_test_result(ret >= 0 && vec[0].start == (uintptr_t)mem && LEN(vec[0]) == vec_size && + (vec[0].categories & (PAGE_IS_WRITTEN | PAGE_IS_PRESENT)) == + (PAGE_IS_WRITTEN | PAGE_IS_PRESENT), + "%s Get sd and present pages with anyof_mask\n", __func__); + + /* 3. Get sd and present pages with required_mask */ + ret = pagemap_ioctl(mem, mem_size, vec, vec_size, 0, 0, + PAGEMAP_BITS_ALL, 0, 0, PAGEMAP_BITS_ALL); + ksft_test_result(ret >= 0 && vec[0].start == (uintptr_t)mem && LEN(vec[0]) == vec_size && + (vec[0].categories & (PAGE_IS_WRITTEN | PAGE_IS_PRESENT)) == + (PAGE_IS_WRITTEN | PAGE_IS_PRESENT), + "%s Get all the pages with required_mask\n", __func__); + + /* 4. Get sd and present pages with required_mask and anyof_mask */ + ret = pagemap_ioctl(mem, mem_size, vec, vec_size, 0, 0, + PAGE_IS_WRITTEN, PAGE_IS_PRESENT, 0, PAGEMAP_BITS_ALL); + ksft_test_result(ret >= 0 && vec[0].start == (uintptr_t)mem && LEN(vec[0]) == vec_size && + (vec[0].categories & (PAGE_IS_WRITTEN | PAGE_IS_PRESENT)) == + (PAGE_IS_WRITTEN | PAGE_IS_PRESENT), + "%s Get sd and present pages with required_mask and anyof_mask\n", + __func__); + + /* 5. Don't get sd pages */ + ret = pagemap_ioctl(mem, mem_size, vec, vec_size, 0, 0, + PAGE_IS_WRITTEN, 0, PAGE_IS_WRITTEN, PAGEMAP_BITS_ALL); + ksft_test_result(ret == 0, "%s Don't get sd pages\n", __func__); + + /* 6. Don't get present pages */ + ret = pagemap_ioctl(mem, mem_size, vec, vec_size, 0, 0, + PAGE_IS_PRESENT, 0, PAGE_IS_PRESENT, PAGEMAP_BITS_ALL); + ksft_test_result(ret == 0, "%s Don't get present pages\n", __func__); + + wp_free(mem, mem_size); + munmap(mem, mem_size); + + /* 8. Find written present pages with return mask */ + mem = mmap(NULL, mem_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0); + if (mem == MAP_FAILED) + ksft_exit_fail_msg("error nomem\n"); + wp_init(mem, mem_size); + wp_addr_range(mem, mem_size); + + memset(mem, 0, mem_size); + + ret = pagemap_ioctl(mem, mem_size, vec, vec_size, + PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC, 0, + 0, PAGEMAP_BITS_ALL, 0, PAGE_IS_WRITTEN); + ksft_test_result(ret >= 0 && vec[0].start == (uintptr_t)mem && LEN(vec[0]) == vec_size && + vec[0].categories == PAGE_IS_WRITTEN, + "%s Find written present pages with return mask\n", __func__); + wp_free(mem, mem_size); + munmap(mem, mem_size); + + /* 9. Memory mapped file */ + fd = open(__FILE__, O_RDONLY); + if (fd < 0) + ksft_exit_fail_msg("%s Memory mapped file\n", __func__); + + ret = stat(__FILE__, &sbuf); + if (ret < 0) + ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + + fmem = mmap(NULL, sbuf.st_size, PROT_READ, MAP_PRIVATE, fd, 0); + if (fmem == MAP_FAILED) + ksft_exit_fail_msg("error nomem %d %s\n", errno, strerror(errno)); + + tmp_buf = malloc(sbuf.st_size); + memcpy(tmp_buf, fmem, sbuf.st_size); + + ret = pagemap_ioctl(fmem, sbuf.st_size, vec, vec_size, 0, 0, + 0, PAGEMAP_NON_WRITTEN_BITS, 0, PAGEMAP_NON_WRITTEN_BITS); + + ksft_test_result(ret >= 0 && vec[0].start == (uintptr_t)fmem && + LEN(vec[0]) == ceilf((float)sbuf.st_size/page_size) && + (vec[0].categories & PAGE_IS_FILE), + "%s Memory mapped file\n", __func__); + + munmap(fmem, sbuf.st_size); + close(fd); + + /* 10. Create and read/write to a memory mapped file */ + buf_size = page_size * 10; + + fd = open(__FILE__".tmp2", O_RDWR | O_CREAT, 0666); + if (fd < 0) + ksft_exit_fail_msg("Read/write to memory: %s\n", + strerror(errno)); + + for (i = 0; i < buf_size; i++) + if (write(fd, "c", 1) < 0) + ksft_exit_fail_msg("Create and read/write to a memory mapped file\n"); + + fmem = mmap(NULL, buf_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0); + if (fmem == MAP_FAILED) + ksft_exit_fail_msg("error nomem %d %s\n", errno, strerror(errno)); + + wp_init(fmem, buf_size); + wp_addr_range(fmem, buf_size); + + for (i = 0; i < buf_size; i++) + fmem[i] = 'z'; + + msync(fmem, buf_size, MS_SYNC); + + ret = pagemap_ioctl(fmem, buf_size, vec, vec_size, 0, 0, + PAGE_IS_WRITTEN, PAGE_IS_PRESENT | PAGE_IS_SWAPPED | PAGE_IS_FILE, 0, + PAGEMAP_BITS_ALL); + + ksft_test_result(ret >= 0 && vec[0].start == (uintptr_t)fmem && + LEN(vec[0]) == (buf_size/page_size) && + (vec[0].categories & PAGE_IS_WRITTEN), + "%s Read/write to memory\n", __func__); + + wp_free(fmem, buf_size); + munmap(fmem, buf_size); + close(fd); + + free(vec); + return 0; +} + +int mprotect_tests(void) +{ + int ret; + char *mem, *mem2; + struct page_region vec; + int pagemap_fd = open("/proc/self/pagemap", O_RDONLY); + + if (pagemap_fd < 0) { + fprintf(stderr, "open() failed\n"); + exit(1); + } + + /* 1. Map two pages */ + mem = mmap(0, 2 * page_size, PROT_READ|PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0); + if (mem == MAP_FAILED) + ksft_exit_fail_msg("error nomem\n"); + wp_init(mem, 2 * page_size); + wp_addr_range(mem, 2 * page_size); + + /* Populate both pages. */ + memset(mem, 1, 2 * page_size); + + ret = pagemap_ioctl(mem, 2 * page_size, &vec, 1, 0, 0, PAGE_IS_WRITTEN, + 0, 0, PAGE_IS_WRITTEN); + if (ret < 0) + ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + + ksft_test_result(ret == 1 && LEN(vec) == 2, "%s Both pages written\n", __func__); + + /* 2. Start tracking */ + wp_addr_range(mem, 2 * page_size); + + ksft_test_result(pagemap_ioctl(mem, 2 * page_size, &vec, 1, 0, 0, + PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN) == 0, + "%s Both pages are not written (dirty)\n", __func__); + + /* 3. Remap the second page */ + mem2 = mmap(mem + page_size, page_size, PROT_READ|PROT_WRITE, + MAP_PRIVATE|MAP_ANON|MAP_FIXED, -1, 0); + if (mem2 == MAP_FAILED) + ksft_exit_fail_msg("error nomem\n"); + wp_init(mem2, page_size); + wp_addr_range(mem2, page_size); + + /* Protect + unprotect. */ + mprotect(mem, page_size, PROT_NONE); + mprotect(mem, 2 * page_size, PROT_READ); + mprotect(mem, 2 * page_size, PROT_READ|PROT_WRITE); + + /* Modify both pages. */ + memset(mem, 2, 2 * page_size); + + /* Protect + unprotect. */ + mprotect(mem, page_size, PROT_NONE); + mprotect(mem, page_size, PROT_READ); + mprotect(mem, page_size, PROT_READ|PROT_WRITE); + + ret = pagemap_ioctl(mem, 2 * page_size, &vec, 1, 0, 0, PAGE_IS_WRITTEN, + 0, 0, PAGE_IS_WRITTEN); + if (ret < 0) + ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + + ksft_test_result(ret == 1 && LEN(vec) == 2, + "%s Both pages written after remap and mprotect\n", __func__); + + /* 4. Clear and make the pages written */ + wp_addr_range(mem, 2 * page_size); + + memset(mem, 'A', 2 * page_size); + + ret = pagemap_ioctl(mem, 2 * page_size, &vec, 1, 0, 0, PAGE_IS_WRITTEN, + 0, 0, PAGE_IS_WRITTEN); + if (ret < 0) + ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + + ksft_test_result(ret == 1 && LEN(vec) == 2, + "%s Clear and make the pages written\n", __func__); + + wp_free(mem, 2 * page_size); + munmap(mem, 2 * page_size); + return 0; +} + +/* transact test */ +static const unsigned int nthreads = 6, pages_per_thread = 32, access_per_thread = 8; +static pthread_barrier_t start_barrier, end_barrier; +static unsigned int extra_thread_faults; +static unsigned int iter_count = 1000; +static volatile int finish; + +static ssize_t get_dirty_pages_reset(char *mem, unsigned int count, + int reset, int page_size) +{ + struct pm_scan_arg arg = {0}; + struct page_region rgns[256]; + int i, j, cnt, ret; + + arg.size = sizeof(struct pm_scan_arg); + arg.start = (uintptr_t)mem; + arg.max_pages = count; + arg.end = (uintptr_t)(mem + count * page_size); + arg.vec = (uintptr_t)rgns; + arg.vec_len = sizeof(rgns) / sizeof(*rgns); + if (reset) + arg.flags |= PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC; + arg.category_mask = PAGE_IS_WRITTEN; + arg.return_mask = PAGE_IS_WRITTEN; + + ret = ioctl(pagemap_fd, PAGEMAP_SCAN, &arg); + if (ret < 0) + ksft_exit_fail_msg("ioctl failed\n"); + + cnt = 0; + for (i = 0; i < ret; ++i) { + if (rgns[i].categories != PAGE_IS_WRITTEN) + ksft_exit_fail_msg("wrong flags\n"); + + for (j = 0; j < LEN(rgns[i]); ++j) + cnt++; + } + + return cnt; +} + +void *thread_proc(void *mem) +{ + int *m = mem; + long curr_faults, faults; + struct rusage r; + unsigned int i; + int ret; + + if (getrusage(RUSAGE_THREAD, &r)) + ksft_exit_fail_msg("getrusage\n"); + + curr_faults = r.ru_minflt; + + while (!finish) { + ret = pthread_barrier_wait(&start_barrier); + if (ret && ret != PTHREAD_BARRIER_SERIAL_THREAD) + ksft_exit_fail_msg("pthread_barrier_wait\n"); + + for (i = 0; i < access_per_thread; ++i) + __atomic_add_fetch(m + i * (0x1000 / sizeof(*m)), 1, __ATOMIC_SEQ_CST); + + ret = pthread_barrier_wait(&end_barrier); + if (ret && ret != PTHREAD_BARRIER_SERIAL_THREAD) + ksft_exit_fail_msg("pthread_barrier_wait\n"); + + if (getrusage(RUSAGE_THREAD, &r)) + ksft_exit_fail_msg("getrusage\n"); + + faults = r.ru_minflt - curr_faults; + if (faults < access_per_thread) + ksft_exit_fail_msg("faults < access_per_thread"); + + __atomic_add_fetch(&extra_thread_faults, faults - access_per_thread, + __ATOMIC_SEQ_CST); + curr_faults = r.ru_minflt; + } + + return NULL; +} + +static void transact_test(int page_size) +{ + unsigned int i, count, extra_pages; + pthread_t th; + char *mem; + int ret, c; + + if (pthread_barrier_init(&start_barrier, NULL, nthreads + 1)) + ksft_exit_fail_msg("pthread_barrier_init\n"); + + if (pthread_barrier_init(&end_barrier, NULL, nthreads + 1)) + ksft_exit_fail_msg("pthread_barrier_init\n"); + + mem = mmap(NULL, 0x1000 * nthreads * pages_per_thread, PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); + if (mem == MAP_FAILED) + ksft_exit_fail_msg("Error mmap %s.\n", strerror(errno)); + + wp_init(mem, 0x1000 * nthreads * pages_per_thread); + wp_addr_range(mem, 0x1000 * nthreads * pages_per_thread); + + memset(mem, 0, 0x1000 * nthreads * pages_per_thread); + + count = get_dirty_pages_reset(mem, nthreads * pages_per_thread, 1, page_size); + ksft_test_result(count > 0, "%s count %d\n", __func__, count); + count = get_dirty_pages_reset(mem, nthreads * pages_per_thread, 1, page_size); + ksft_test_result(count == 0, "%s count %d\n", __func__, count); + + finish = 0; + for (i = 0; i < nthreads; ++i) + pthread_create(&th, NULL, thread_proc, mem + 0x1000 * i * pages_per_thread); + + extra_pages = 0; + for (i = 0; i < iter_count; ++i) { + count = 0; + + ret = pthread_barrier_wait(&start_barrier); + if (ret && ret != PTHREAD_BARRIER_SERIAL_THREAD) + ksft_exit_fail_msg("pthread_barrier_wait\n"); + + count = get_dirty_pages_reset(mem, nthreads * pages_per_thread, 1, + page_size); + + ret = pthread_barrier_wait(&end_barrier); + if (ret && ret != PTHREAD_BARRIER_SERIAL_THREAD) + ksft_exit_fail_msg("pthread_barrier_wait\n"); + + if (count > nthreads * access_per_thread) + ksft_exit_fail_msg("Too big count %d expected %d, iter %d\n", + count, nthreads * access_per_thread, i); + + c = get_dirty_pages_reset(mem, nthreads * pages_per_thread, 1, page_size); + count += c; + + if (c > nthreads * access_per_thread) { + ksft_test_result_fail(" %s count > nthreads\n", __func__); + return; + } + + if (count != nthreads * access_per_thread) { + /* + * The purpose of the test is to make sure that no page updates are lost + * when the page updates and read-resetting soft dirty flags are performed + * in parallel. However, it is possible that the application will get the + * soft dirty flags twice on the two consecutive read-resets. This seems + * unavoidable as soft dirty flag is handled in software through page faults + * in kernel. While the updating the flags is supposed to be synchronized + * between page fault handling and read-reset, it is possible that + * read-reset happens after page fault PTE update but before the application + * re-executes write instruction. So read-reset gets the flag, clears write + * access and application gets page fault again for the same write. + */ + if (count < nthreads * access_per_thread) { + ksft_test_result_fail("Lost update, iter %d, %d vs %d.\n", i, count, + nthreads * access_per_thread); + return; + } + + extra_pages += count - nthreads * access_per_thread; + } + } + + pthread_barrier_wait(&start_barrier); + finish = 1; + pthread_barrier_wait(&end_barrier); + + ksft_test_result_pass("%s Extra pages %u (%.1lf%%), extra thread faults %d.\n", __func__, + extra_pages, + 100.0 * extra_pages / (iter_count * nthreads * access_per_thread), + extra_thread_faults); +} + +int main(void) +{ + int mem_size, shmid, buf_size, fd, i, ret; + char *mem, *map, *fmem; + struct stat sbuf; + + ksft_print_header(); + + if (init_uffd()) + return ksft_exit_pass(); + + ksft_set_plan(115); + + page_size = getpagesize(); + hpage_size = read_pmd_pagesize(); + + pagemap_fd = open(PAGEMAP, O_RDONLY); + if (pagemap_fd < 0) + return -EINVAL; + + /* 1. Sanity testing */ + sanity_tests_sd(); + + /* 2. Normal page testing */ + mem_size = 10 * page_size; + mem = mmap(NULL, mem_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0); + if (mem == MAP_FAILED) + ksft_exit_fail_msg("error nomem\n"); + wp_init(mem, mem_size); + wp_addr_range(mem, mem_size); + + base_tests("Page testing:", mem, mem_size, 0); + + wp_free(mem, mem_size); + munmap(mem, mem_size); + + /* 3. Large page testing */ + mem_size = 512 * 10 * page_size; + mem = mmap(NULL, mem_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0); + if (mem == MAP_FAILED) + ksft_exit_fail_msg("error nomem\n"); + wp_init(mem, mem_size); + wp_addr_range(mem, mem_size); + + base_tests("Large Page testing:", mem, mem_size, 0); + + wp_free(mem, mem_size); + munmap(mem, mem_size); + + /* 4. Huge page testing */ + map = gethugepage(hpage_size); + if (map) { + wp_init(map, hpage_size); + wp_addr_range(map, hpage_size); + base_tests("Huge page testing:", map, hpage_size, 0); + wp_free(map, hpage_size); + free(map); + } else { + base_tests("Huge page testing:", NULL, 0, 1); + } + + /* 5. SHM Hugetlb page testing */ + mem_size = 2*1024*1024; + mem = gethugetlb_mem(mem_size, &shmid); + if (mem) { + wp_init(mem, mem_size); + wp_addr_range(mem, mem_size); + + base_tests("Hugetlb shmem testing:", mem, mem_size, 0); + + wp_free(mem, mem_size); + shmctl(shmid, IPC_RMID, NULL); + } else { + base_tests("Hugetlb shmem testing:", NULL, 0, 1); + } + + /* 6. Hugetlb page testing */ + mem = gethugetlb_mem(mem_size, NULL); + if (mem) { + wp_init(mem, mem_size); + wp_addr_range(mem, mem_size); + + base_tests("Hugetlb mem testing:", mem, mem_size, 0); + + wp_free(mem, mem_size); + } else { + base_tests("Hugetlb mem testing:", NULL, 0, 1); + } + + /* 7. File Hugetlb testing */ + mem_size = 2*1024*1024; + fd = memfd_create("uffd-test", MFD_HUGETLB | MFD_NOEXEC_SEAL); + mem = mmap(NULL, mem_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + if (mem) { + wp_init(mem, mem_size); + wp_addr_range(mem, mem_size); + + base_tests("Hugetlb shmem testing:", mem, mem_size, 0); + + wp_free(mem, mem_size); + shmctl(shmid, IPC_RMID, NULL); + } else { + base_tests("Hugetlb shmem testing:", NULL, 0, 1); + } + close(fd); + + /* 8. File memory testing */ + buf_size = page_size * 10; + + fd = open(__FILE__".tmp0", O_RDWR | O_CREAT, 0777); + if (fd < 0) + ksft_exit_fail_msg("Create and read/write to a memory mapped file: %s\n", + strerror(errno)); + + for (i = 0; i < buf_size; i++) + if (write(fd, "c", 1) < 0) + ksft_exit_fail_msg("Create and read/write to a memory mapped file\n"); + + ret = stat(__FILE__".tmp0", &sbuf); + if (ret < 0) + ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + + fmem = mmap(NULL, sbuf.st_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0); + if (fmem == MAP_FAILED) + ksft_exit_fail_msg("error nomem %d %s\n", errno, strerror(errno)); + + wp_init(fmem, sbuf.st_size); + wp_addr_range(fmem, sbuf.st_size); + + base_tests("File memory testing:", fmem, sbuf.st_size, 0); + + wp_free(fmem, sbuf.st_size); + munmap(fmem, sbuf.st_size); + close(fd); + + /* 9. File memory testing */ + buf_size = page_size * 10; + + fd = memfd_create(__FILE__".tmp00", MFD_NOEXEC_SEAL); + if (fd < 0) + ksft_exit_fail_msg("Create and read/write to a memory mapped file: %s\n", + strerror(errno)); + + if (ftruncate(fd, buf_size)) + ksft_exit_fail_msg("Error ftruncate\n"); + + for (i = 0; i < buf_size; i++) + if (write(fd, "c", 1) < 0) + ksft_exit_fail_msg("Create and read/write to a memory mapped file\n"); + + fmem = mmap(NULL, buf_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0); + if (fmem == MAP_FAILED) + ksft_exit_fail_msg("error nomem %d %s\n", errno, strerror(errno)); + + wp_init(fmem, buf_size); + wp_addr_range(fmem, buf_size); + + base_tests("File anonymous memory testing:", fmem, buf_size, 0); + + wp_free(fmem, buf_size); + munmap(fmem, buf_size); + close(fd); + + /* 10. Huge page tests */ + hpage_unit_tests(); + + /* 11. Iterative test */ + test_simple(); + + /* 12. Mprotect test */ + mprotect_tests(); + + /* 13. Transact test */ + transact_test(page_size); + + /* 14. Sanity testing */ + sanity_tests(); + + /*15. Unmapped address test */ + unmapped_region_tests(); + + /* 16. Userfaultfd tests */ + userfaultfd_tests(); + + close(pagemap_fd); + return ksft_exit_pass(); +} diff --git a/tools/testing/selftests/mm/pkey-helpers.h b/tools/testing/selftests/mm/pkey-helpers.h index 92f3be3dd8e5..1af3156a9db8 100644 --- a/tools/testing/selftests/mm/pkey-helpers.h +++ b/tools/testing/selftests/mm/pkey-helpers.h @@ -34,7 +34,7 @@ extern int test_nr; extern int iteration_nr; #ifdef __GNUC__ -__attribute__((format(printf, 1, 2))) +__printf(1, 2) #endif static inline void sigsafe_printf(const char *format, ...) { diff --git a/tools/testing/selftests/mm/run_vmtests.sh b/tools/testing/selftests/mm/run_vmtests.sh index 3e2bc818d566..00757445278e 100755 --- a/tools/testing/selftests/mm/run_vmtests.sh +++ b/tools/testing/selftests/mm/run_vmtests.sh @@ -56,6 +56,8 @@ separated by spaces: memory protection key tests - soft_dirty test soft dirty page bit semantics +- pagemap + test pagemap_scan IOCTL - cow test copy-on-write semantics - thp @@ -221,6 +223,13 @@ CATEGORY="hugetlb" run_test ./hugepage-mremap CATEGORY="hugetlb" run_test ./hugepage-vmemmap CATEGORY="hugetlb" run_test ./hugetlb-madvise +nr_hugepages_tmp=$(cat /proc/sys/vm/nr_hugepages) +# For this test, we need one and just one huge page +echo 1 > /proc/sys/vm/nr_hugepages +CATEGORY="hugetlb" run_test ./hugetlb_fault_after_madv +# Restore the previous number of huge pages, since further tests rely on it +echo "$nr_hugepages_tmp" > /proc/sys/vm/nr_hugepages + if test_selected "hugetlb"; then echo "NOTE: These hugetlb tests provide minimal coverage. Use" echo " https://github.com/libhugetlbfs/libhugetlbfs.git for" @@ -303,6 +312,7 @@ CATEGORY="hmm" run_test bash ./test_hmm.sh smoke # MADV_POPULATE_READ and MADV_POPULATE_WRITE tests CATEGORY="madv_populate" run_test ./madv_populate +echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope CATEGORY="memfd_secret" run_test ./memfd_secret # KSM KSM_MERGE_TIME_HUGE_PAGES test with size of 100 @@ -342,6 +352,8 @@ then CATEGORY="soft_dirty" run_test ./soft-dirty fi +CATEGORY="pagemap" run_test ./pagemap_ioctl + # COW tests CATEGORY="cow" run_test ./cow diff --git a/tools/testing/selftests/mm/vm_util.c b/tools/testing/selftests/mm/vm_util.c index 558c9cd8901c..3082b40492dd 100644 --- a/tools/testing/selftests/mm/vm_util.c +++ b/tools/testing/selftests/mm/vm_util.c @@ -269,3 +269,22 @@ int uffd_unregister(int uffd, void *addr, uint64_t len) return ret; } + +unsigned long get_free_hugepages(void) +{ + unsigned long fhp = 0; + char *line = NULL; + size_t linelen = 0; + FILE *f = fopen("/proc/meminfo", "r"); + + if (!f) + return fhp; + while (getline(&line, &linelen, f) > 0) { + if (sscanf(line, "HugePages_Free: %lu", &fhp) == 1) + break; + } + + free(line); + fclose(f); + return fhp; +} diff --git a/tools/testing/selftests/mm/vm_util.h b/tools/testing/selftests/mm/vm_util.h index c7fa61f0dff8..c02990bbd56f 100644 --- a/tools/testing/selftests/mm/vm_util.h +++ b/tools/testing/selftests/mm/vm_util.h @@ -51,6 +51,7 @@ int uffd_register(int uffd, void *addr, uint64_t len, int uffd_unregister(int uffd, void *addr, uint64_t len); int uffd_register_with_ioctls(int uffd, void *addr, uint64_t len, bool miss, bool wp, bool minor, uint64_t *ioctls); +unsigned long get_free_hugepages(void); /* * On ppc64 this will only work with radix 2M hugepage size diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 61939a695f95..9274edfb76ff 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -34,6 +34,7 @@ TEST_PROGS += gro.sh TEST_PROGS += gre_gso.sh TEST_PROGS += cmsg_so_mark.sh TEST_PROGS += cmsg_time.sh cmsg_ipv6.sh +TEST_PROGS += netns-name.sh TEST_PROGS += srv6_end_dt46_l3vpn_test.sh TEST_PROGS += srv6_end_dt4_l3vpn_test.sh TEST_PROGS += srv6_end_dt6_l3vpn_test.sh @@ -90,6 +91,7 @@ TEST_PROGS += test_bridge_neigh_suppress.sh TEST_PROGS += test_vxlan_nolocalbypass.sh TEST_PROGS += test_bridge_backup_port.sh TEST_PROGS += fdb_flush.sh +TEST_PROGS += fq_band_pktlimit.sh TEST_FILES := settings @@ -99,6 +101,7 @@ $(OUTPUT)/reuseport_bpf_numa: LDLIBS += -lnuma $(OUTPUT)/tcp_mmap: LDLIBS += -lpthread -lcrypto $(OUTPUT)/tcp_inq: LDLIBS += -lpthread $(OUTPUT)/bind_bhash: LDLIBS += -lpthread +$(OUTPUT)/io_uring_zerocopy_tx: CFLAGS += -I../../../include/ # Rules to generate bpf obj nat6to4.o CLANG ?= clang diff --git a/tools/testing/selftests/net/cmsg_sender.c b/tools/testing/selftests/net/cmsg_sender.c index 24b21b15ed3f..8d7575389f58 100644 --- a/tools/testing/selftests/net/cmsg_sender.c +++ b/tools/testing/selftests/net/cmsg_sender.c @@ -45,11 +45,13 @@ struct options { const char *host; const char *service; unsigned int size; + unsigned int num_pkt; struct { unsigned int mark; unsigned int dontfrag; unsigned int tclass; unsigned int hlimit; + unsigned int priority; } sockopt; struct { unsigned int family; @@ -72,6 +74,7 @@ struct options { } v6; } opt = { .size = 13, + .num_pkt = 1, .sock = { .family = AF_UNSPEC, .type = SOCK_DGRAM, @@ -112,7 +115,7 @@ static void cs_parse_args(int argc, char *argv[]) { int o; - while ((o = getopt(argc, argv, "46sS:p:m:M:d:tf:F:c:C:l:L:H:")) != -1) { + while ((o = getopt(argc, argv, "46sS:p:P:m:M:n:d:tf:F:c:C:l:L:H:")) != -1) { switch (o) { case 's': opt.silent_send = true; @@ -138,7 +141,9 @@ static void cs_parse_args(int argc, char *argv[]) cs_usage(argv[0]); } break; - + case 'P': + opt.sockopt.priority = atoi(optarg); + break; case 'm': opt.mark.ena = true; opt.mark.val = atoi(optarg); @@ -146,6 +151,9 @@ static void cs_parse_args(int argc, char *argv[]) case 'M': opt.sockopt.mark = atoi(optarg); break; + case 'n': + opt.num_pkt = atoi(optarg); + break; case 'd': opt.txtime.ena = true; opt.txtime.delay = atoi(optarg); @@ -410,6 +418,10 @@ static void ca_set_sockopts(int fd) setsockopt(fd, SOL_IPV6, IPV6_UNICAST_HOPS, &opt.sockopt.hlimit, sizeof(opt.sockopt.hlimit))) error(ERN_SOCKOPT, errno, "setsockopt IPV6_HOPLIMIT"); + if (opt.sockopt.priority && + setsockopt(fd, SOL_SOCKET, SO_PRIORITY, + &opt.sockopt.priority, sizeof(opt.sockopt.priority))) + error(ERN_SOCKOPT, errno, "setsockopt SO_PRIORITY"); } int main(int argc, char *argv[]) @@ -421,6 +433,7 @@ int main(int argc, char *argv[]) char *buf; int err; int fd; + int i; cs_parse_args(argc, argv); @@ -480,24 +493,27 @@ int main(int argc, char *argv[]) cs_write_cmsg(fd, &msg, cbuf, sizeof(cbuf)); - err = sendmsg(fd, &msg, 0); - if (err < 0) { - if (!opt.silent_send) - fprintf(stderr, "send failed: %s\n", strerror(errno)); - err = ERN_SEND; - goto err_out; - } else if (err != (int)opt.size) { - fprintf(stderr, "short send\n"); - err = ERN_SEND_SHORT; - goto err_out; - } else { - err = ERN_SUCCESS; + for (i = 0; i < opt.num_pkt; i++) { + err = sendmsg(fd, &msg, 0); + if (err < 0) { + if (!opt.silent_send) + fprintf(stderr, "send failed: %s\n", strerror(errno)); + err = ERN_SEND; + goto err_out; + } else if (err != (int)opt.size) { + fprintf(stderr, "short send\n"); + err = ERN_SEND_SHORT; + goto err_out; + } } + err = ERN_SUCCESS; - /* Make sure all timestamps have time to loop back */ - usleep(opt.txtime.delay); + if (opt.ts.ena) { + /* Make sure all timestamps have time to loop back */ + usleep(opt.txtime.delay); - cs_read_cmsg(fd, &msg, cbuf, sizeof(cbuf)); + cs_read_cmsg(fd, &msg, cbuf, sizeof(cbuf)); + } err_out: close(fd); diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh index e7d2a530618a..66d0db7a2614 100755 --- a/tools/testing/selftests/net/fib_tests.sh +++ b/tools/testing/selftests/net/fib_tests.sh @@ -2437,6 +2437,9 @@ ipv4_mpath_list_test() run_cmd "ip -n ns2 route add 203.0.113.0/24 nexthop via 172.16.201.2 nexthop via 172.16.202.2" run_cmd "ip netns exec ns2 sysctl -qw net.ipv4.fib_multipath_hash_policy=1" + run_cmd "ip netns exec ns2 sysctl -qw net.ipv4.conf.veth2.rp_filter=0" + run_cmd "ip netns exec ns2 sysctl -qw net.ipv4.conf.all.rp_filter=0" + run_cmd "ip netns exec ns2 sysctl -qw net.ipv4.conf.default.rp_filter=0" set +e local dmac=$(ip -n ns2 -j link show dev veth2 | jq -r '.[]["address"]') @@ -2449,7 +2452,7 @@ ipv4_mpath_list_test() # words, the FIB lookup tracepoint needs to be triggered for every # packet. local t0_rx_pkts=$(link_stats_get ns2 veth2 rx packets) - run_cmd "perf stat -e fib:fib_table_lookup --filter 'err == 0' -j -o $tmp_file -- $cmd" + run_cmd "perf stat -a -e fib:fib_table_lookup --filter 'err == 0' -j -o $tmp_file -- $cmd" local t1_rx_pkts=$(link_stats_get ns2 veth2 rx packets) local diff=$(echo $t1_rx_pkts - $t0_rx_pkts | bc -l) list_rcv_eval $tmp_file $diff @@ -2494,7 +2497,7 @@ ipv6_mpath_list_test() # words, the FIB lookup tracepoint needs to be triggered for every # packet. local t0_rx_pkts=$(link_stats_get ns2 veth2 rx packets) - run_cmd "perf stat -e fib6:fib6_table_lookup --filter 'err == 0' -j -o $tmp_file -- $cmd" + run_cmd "perf stat -a -e fib6:fib6_table_lookup --filter 'err == 0' -j -o $tmp_file -- $cmd" local t1_rx_pkts=$(link_stats_get ns2 veth2 rx packets) local diff=$(echo $t1_rx_pkts - $t0_rx_pkts | bc -l) list_rcv_eval $tmp_file $diff diff --git a/tools/testing/selftests/net/forwarding/bridge_mdb.sh b/tools/testing/selftests/net/forwarding/bridge_mdb.sh index d0c6c499d5da..e4e3e9405056 100755 --- a/tools/testing/selftests/net/forwarding/bridge_mdb.sh +++ b/tools/testing/selftests/net/forwarding/bridge_mdb.sh @@ -145,14 +145,14 @@ cfg_test_host_common() # Check basic add, replace and delete behavior. bridge mdb add dev br0 port br0 grp $grp $state vid 10 - bridge mdb show dev br0 vid 10 | grep -q "$grp" + bridge mdb get dev br0 grp $grp vid 10 &> /dev/null check_err $? "Failed to add $name host entry" bridge mdb replace dev br0 port br0 grp $grp $state vid 10 &> /dev/null check_fail $? "Managed to replace $name host entry" bridge mdb del dev br0 port br0 grp $grp $state vid 10 - bridge mdb show dev br0 vid 10 | grep -q "$grp" + bridge mdb get dev br0 grp $grp vid 10 &> /dev/null check_fail $? "Failed to delete $name host entry" # Check error cases. @@ -200,7 +200,7 @@ cfg_test_port_common() # Check basic add, replace and delete behavior. bridge mdb add dev br0 port $swp1 $grp_key permanent vid 10 - bridge mdb show dev br0 vid 10 | grep -q "$grp_key" + bridge mdb get dev br0 $grp_key vid 10 &> /dev/null check_err $? "Failed to add $name entry" bridge mdb replace dev br0 port $swp1 $grp_key permanent vid 10 \ @@ -208,31 +208,31 @@ cfg_test_port_common() check_err $? "Failed to replace $name entry" bridge mdb del dev br0 port $swp1 $grp_key permanent vid 10 - bridge mdb show dev br0 vid 10 | grep -q "$grp_key" + bridge mdb get dev br0 $grp_key vid 10 &> /dev/null check_fail $? "Failed to delete $name entry" # Check default protocol and replacement. bridge mdb add dev br0 port $swp1 $grp_key permanent vid 10 - bridge -d mdb show dev br0 vid 10 | grep "$grp_key" | grep -q "static" + bridge -d mdb get dev br0 $grp_key vid 10 | grep -q "static" check_err $? "$name entry not added with default \"static\" protocol" bridge mdb replace dev br0 port $swp1 $grp_key permanent vid 10 \ proto 123 - bridge -d mdb show dev br0 vid 10 | grep "$grp_key" | grep -q "123" + bridge -d mdb get dev br0 $grp_key vid 10 | grep -q "123" check_err $? "Failed to replace protocol of $name entry" bridge mdb del dev br0 port $swp1 $grp_key permanent vid 10 # Check behavior when VLAN is not specified. bridge mdb add dev br0 port $swp1 $grp_key permanent - bridge mdb show dev br0 vid 10 | grep -q "$grp_key" + bridge mdb get dev br0 $grp_key vid 10 &> /dev/null check_err $? "$name entry with VLAN 10 not added when VLAN was not specified" - bridge mdb show dev br0 vid 20 | grep -q "$grp_key" + bridge mdb get dev br0 $grp_key vid 20 &> /dev/null check_err $? "$name entry with VLAN 20 not added when VLAN was not specified" bridge mdb del dev br0 port $swp1 $grp_key permanent - bridge mdb show dev br0 vid 10 | grep -q "$grp_key" + bridge mdb get dev br0 $grp_key vid 10 &> /dev/null check_fail $? "$name entry with VLAN 10 not deleted when VLAN was not specified" - bridge mdb show dev br0 vid 20 | grep -q "$grp_key" + bridge mdb get dev br0 $grp_key vid 20 &> /dev/null check_fail $? "$name entry with VLAN 20 not deleted when VLAN was not specified" # Check behavior when bridge port is down. @@ -298,21 +298,21 @@ __cfg_test_port_ip_star_g() RET=0 bridge mdb add dev br0 port $swp1 grp $grp vid 10 - bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -q "exclude" + bridge -d mdb get dev br0 grp $grp vid 10 | grep -q "exclude" check_err $? "Default filter mode is not \"exclude\"" bridge mdb del dev br0 port $swp1 grp $grp vid 10 # Check basic add and delete behavior. bridge mdb add dev br0 port $swp1 grp $grp vid 10 filter_mode exclude \ source_list $src1 - bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -q -v "src" + bridge -d mdb get dev br0 grp $grp vid 10 &> /dev/null check_err $? "(*, G) entry not created" - bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -q "src $src1" + bridge -d mdb get dev br0 grp $grp src $src1 vid 10 &> /dev/null check_err $? "(S, G) entry not created" bridge mdb del dev br0 port $swp1 grp $grp vid 10 - bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -q -v "src" + bridge -d mdb get dev br0 grp $grp vid 10 &> /dev/null check_fail $? "(*, G) entry not deleted" - bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -q "src $src1" + bridge -d mdb get dev br0 grp $grp src $src1 vid 10 &> /dev/null check_fail $? "(S, G) entry not deleted" ## State (permanent / temp) tests. @@ -321,18 +321,15 @@ __cfg_test_port_ip_star_g() bridge mdb add dev br0 port $swp1 grp $grp permanent vid 10 \ filter_mode exclude source_list $src1 - bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -v "src" | \ - grep -q "permanent" + bridge -d mdb get dev br0 grp $grp vid 10 | grep -q "permanent" check_err $? "(*, G) entry not added as \"permanent\" when should" - bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep "src" | \ + bridge -d mdb get dev br0 grp $grp src $src1 vid 10 | \ grep -q "permanent" check_err $? "(S, G) entry not added as \"permanent\" when should" - bridge -d -s mdb show dev br0 vid 10 | grep "$grp" | grep -v "src" | \ - grep -q " 0.00" + bridge -d -s mdb get dev br0 grp $grp vid 10 | grep -q " 0.00" check_err $? "(*, G) \"permanent\" entry has a pending group timer" - bridge -d -s mdb show dev br0 vid 10 | grep "$grp" | grep -v "src" | \ - grep -q "\/0.00" + bridge -d -s mdb get dev br0 grp $grp vid 10 | grep -q "\/0.00" check_err $? "\"permanent\" source entry has a pending source timer" bridge mdb del dev br0 port $swp1 grp $grp vid 10 @@ -342,18 +339,14 @@ __cfg_test_port_ip_star_g() bridge mdb add dev br0 port $swp1 grp $grp temp vid 10 \ filter_mode exclude source_list $src1 - bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -v "src" | \ - grep -q "temp" + bridge -d mdb get dev br0 grp $grp vid 10 | grep -q "temp" check_err $? "(*, G) EXCLUDE entry not added as \"temp\" when should" - bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep "src" | \ - grep -q "temp" + bridge -d mdb get dev br0 grp $grp src $src1 vid 10 | grep -q "temp" check_err $? "(S, G) \"blocked\" entry not added as \"temp\" when should" - bridge -d -s mdb show dev br0 vid 10 | grep "$grp" | grep -v "src" | \ - grep -q " 0.00" + bridge -d -s mdb get dev br0 grp $grp vid 10 | grep -q " 0.00" check_fail $? "(*, G) EXCLUDE entry does not have a pending group timer" - bridge -d -s mdb show dev br0 vid 10 | grep "$grp" | grep -v "src" | \ - grep -q "\/0.00" + bridge -d -s mdb get dev br0 grp $grp vid 10 | grep -q "\/0.00" check_err $? "\"blocked\" source entry has a pending source timer" bridge mdb del dev br0 port $swp1 grp $grp vid 10 @@ -363,18 +356,14 @@ __cfg_test_port_ip_star_g() bridge mdb add dev br0 port $swp1 grp $grp temp vid 10 \ filter_mode include source_list $src1 - bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -v "src" | \ - grep -q "temp" + bridge -d mdb get dev br0 grp $grp vid 10 | grep -q "temp" check_err $? "(*, G) INCLUDE entry not added as \"temp\" when should" - bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep "src" | \ - grep -q "temp" + bridge -d mdb get dev br0 grp $grp src $src1 vid 10 | grep -q "temp" check_err $? "(S, G) entry not added as \"temp\" when should" - bridge -d -s mdb show dev br0 vid 10 | grep "$grp" | grep -v "src" | \ - grep -q " 0.00" + bridge -d -s mdb get dev br0 grp $grp vid 10 | grep -q " 0.00" check_err $? "(*, G) INCLUDE entry has a pending group timer" - bridge -d -s mdb show dev br0 vid 10 | grep "$grp" | grep -v "src" | \ - grep -q "\/0.00" + bridge -d -s mdb get dev br0 grp $grp vid 10 | grep -q "\/0.00" check_fail $? "Source entry does not have a pending source timer" bridge mdb del dev br0 port $swp1 grp $grp vid 10 @@ -383,8 +372,7 @@ __cfg_test_port_ip_star_g() bridge mdb add dev br0 port $swp1 grp $grp temp vid 10 \ filter_mode include source_list $src1 - bridge -d -s mdb show dev br0 vid 10 | grep "$grp" | grep "src" | \ - grep -q " 0.00" + bridge -d -s mdb get dev br0 grp $grp src $src1 vid 10 | grep -q " 0.00" check_err $? "(S, G) entry has a pending group timer" bridge mdb del dev br0 port $swp1 grp $grp vid 10 @@ -396,11 +384,9 @@ __cfg_test_port_ip_star_g() bridge mdb add dev br0 port $swp1 grp $grp vid 10 \ filter_mode include source_list $src1 - bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -v "src" | \ - grep -q "include" + bridge -d mdb get dev br0 grp $grp vid 10 | grep -q "include" check_err $? "(*, G) INCLUDE not added with \"include\" filter mode" - bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep "src" | \ - grep -q "blocked" + bridge -d mdb get dev br0 grp $grp src $src1 vid 10 | grep -q "blocked" check_fail $? "(S, G) entry marked as \"blocked\" when should not" bridge mdb del dev br0 port $swp1 grp $grp vid 10 @@ -410,11 +396,9 @@ __cfg_test_port_ip_star_g() bridge mdb add dev br0 port $swp1 grp $grp vid 10 \ filter_mode exclude source_list $src1 - bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -v "src" | \ - grep -q "exclude" + bridge -d mdb get dev br0 grp $grp vid 10 | grep -q "exclude" check_err $? "(*, G) EXCLUDE not added with \"exclude\" filter mode" - bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep "src" | \ - grep -q "blocked" + bridge -d mdb get dev br0 grp $grp src $src1 vid 10 | grep -q "blocked" check_err $? "(S, G) entry not marked as \"blocked\" when should" bridge mdb del dev br0 port $swp1 grp $grp vid 10 @@ -426,11 +410,9 @@ __cfg_test_port_ip_star_g() bridge mdb add dev br0 port $swp1 grp $grp vid 10 \ filter_mode exclude source_list $src1 proto zebra - bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -v "src" | \ - grep -q "zebra" + bridge -d mdb get dev br0 grp $grp vid 10 | grep -q "zebra" check_err $? "(*, G) entry not added with \"zebra\" protocol" - bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep "src" | \ - grep -q "zebra" + bridge -d mdb get dev br0 grp $grp src $src1 vid 10 | grep -q "zebra" check_err $? "(S, G) entry not marked added with \"zebra\" protocol" bridge mdb del dev br0 port $swp1 grp $grp vid 10 @@ -443,20 +425,16 @@ __cfg_test_port_ip_star_g() bridge mdb replace dev br0 port $swp1 grp $grp permanent vid 10 \ filter_mode exclude source_list $src1 - bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -v "src" | \ - grep -q "permanent" + bridge -d mdb get dev br0 grp $grp vid 10 | grep -q "permanent" check_err $? "(*, G) entry not marked as \"permanent\" after replace" - bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep "src" | \ - grep -q "permanent" + bridge -d mdb get dev br0 grp $grp src $src1 vid 10 | grep -q "permanent" check_err $? "(S, G) entry not marked as \"permanent\" after replace" bridge mdb replace dev br0 port $swp1 grp $grp temp vid 10 \ filter_mode exclude source_list $src1 - bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -v "src" | \ - grep -q "temp" + bridge -d mdb get dev br0 grp $grp vid 10 | grep -q "temp" check_err $? "(*, G) entry not marked as \"temp\" after replace" - bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep "src" | \ - grep -q "temp" + bridge -d mdb get dev br0 grp $grp src $src1 vid 10 | grep -q "temp" check_err $? "(S, G) entry not marked as \"temp\" after replace" bridge mdb del dev br0 port $swp1 grp $grp vid 10 @@ -467,20 +445,16 @@ __cfg_test_port_ip_star_g() bridge mdb replace dev br0 port $swp1 grp $grp temp vid 10 \ filter_mode include source_list $src1 - bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -v "src" | \ - grep -q "include" + bridge -d mdb get dev br0 grp $grp vid 10 | grep -q "include" check_err $? "(*, G) not marked with \"include\" filter mode after replace" - bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep "src" | \ - grep -q "blocked" + bridge -d mdb get dev br0 grp $grp src $src1 vid 10 | grep -q "blocked" check_fail $? "(S, G) marked as \"blocked\" after replace" bridge mdb replace dev br0 port $swp1 grp $grp temp vid 10 \ filter_mode exclude source_list $src1 - bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -v "src" | \ - grep -q "exclude" + bridge -d mdb get dev br0 grp $grp vid 10 | grep -q "exclude" check_err $? "(*, G) not marked with \"exclude\" filter mode after replace" - bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep "src" | \ - grep -q "blocked" + bridge -d mdb get dev br0 grp $grp src $src1 vid 10 | grep -q "blocked" check_err $? "(S, G) not marked as \"blocked\" after replace" bridge mdb del dev br0 port $swp1 grp $grp vid 10 @@ -491,20 +465,20 @@ __cfg_test_port_ip_star_g() bridge mdb replace dev br0 port $swp1 grp $grp temp vid 10 \ filter_mode exclude source_list $src1,$src2,$src3 - bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -q "src $src1" + bridge -d mdb get dev br0 grp $grp src $src1 vid 10 &> /dev/null check_err $? "(S, G) entry for source $src1 not created after replace" - bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -q "src $src2" + bridge -d mdb get dev br0 grp $grp src $src2 vid 10 &> /dev/null check_err $? "(S, G) entry for source $src2 not created after replace" - bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -q "src $src3" + bridge -d mdb get dev br0 grp $grp src $src3 vid 10 &> /dev/null check_err $? "(S, G) entry for source $src3 not created after replace" bridge mdb replace dev br0 port $swp1 grp $grp temp vid 10 \ filter_mode exclude source_list $src1,$src3 - bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -q "src $src1" + bridge -d mdb get dev br0 grp $grp src $src1 vid 10 &> /dev/null check_err $? "(S, G) entry for source $src1 not created after second replace" - bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -q "src $src2" + bridge -d mdb get dev br0 grp $grp src $src2 vid 10 &> /dev/null check_fail $? "(S, G) entry for source $src2 created after second replace" - bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -q "src $src3" + bridge -d mdb get dev br0 grp $grp src $src3 vid 10 &> /dev/null check_err $? "(S, G) entry for source $src3 not created after second replace" bridge mdb del dev br0 port $swp1 grp $grp vid 10 @@ -515,11 +489,9 @@ __cfg_test_port_ip_star_g() bridge mdb replace dev br0 port $swp1 grp $grp temp vid 10 \ filter_mode exclude source_list $src1 proto bgp - bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -v "src" | \ - grep -q "bgp" + bridge -d mdb get dev br0 grp $grp vid 10 | grep -q "bgp" check_err $? "(*, G) protocol not changed to \"bgp\" after replace" - bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep "src" | \ - grep -q "bgp" + bridge -d mdb get dev br0 grp $grp src $src1 vid 10 | grep -q "bgp" check_err $? "(S, G) protocol not changed to \"bgp\" after replace" bridge mdb del dev br0 port $swp1 grp $grp vid 10 @@ -532,8 +504,8 @@ __cfg_test_port_ip_star_g() bridge mdb add dev br0 port $swp2 grp $grp vid 10 \ filter_mode include source_list $src1 bridge mdb add dev br0 port $swp1 grp $grp vid 10 - bridge -d mdb show dev br0 vid 10 | grep "$swp1" | grep "$grp" | \ - grep "$src1" | grep -q "added_by_star_ex" + bridge -d mdb get dev br0 grp $grp src $src1 vid 10 | grep "$swp1" | \ + grep -q "added_by_star_ex" check_err $? "\"added_by_star_ex\" entry not created after adding (*, G) entry" bridge mdb del dev br0 port $swp1 grp $grp vid 10 bridge mdb del dev br0 port $swp2 grp $grp src $src1 vid 10 @@ -606,27 +578,23 @@ __cfg_test_port_ip_sg() RET=0 bridge mdb add dev br0 port $swp1 $grp_key vid 10 - bridge -d mdb show dev br0 vid 10 | grep "$grp_key" | grep -q "include" + bridge -d mdb get dev br0 $grp_key vid 10 | grep -q "include" check_err $? "Default filter mode is not \"include\"" bridge mdb del dev br0 port $swp1 $grp_key vid 10 # Check that entries can be added as both permanent and temp and that # group timer is set correctly. bridge mdb add dev br0 port $swp1 $grp_key permanent vid 10 - bridge -d mdb show dev br0 vid 10 | grep "$grp_key" | \ - grep -q "permanent" + bridge -d mdb get dev br0 $grp_key vid 10 | grep -q "permanent" check_err $? "Entry not added as \"permanent\" when should" - bridge -d -s mdb show dev br0 vid 10 | grep "$grp_key" | \ - grep -q " 0.00" + bridge -d -s mdb get dev br0 $grp_key vid 10 | grep -q " 0.00" check_err $? "\"permanent\" entry has a pending group timer" bridge mdb del dev br0 port $swp1 $grp_key vid 10 bridge mdb add dev br0 port $swp1 $grp_key temp vid 10 - bridge -d mdb show dev br0 vid 10 | grep "$grp_key" | \ - grep -q "temp" + bridge -d mdb get dev br0 $grp_key vid 10 | grep -q "temp" check_err $? "Entry not added as \"temp\" when should" - bridge -d -s mdb show dev br0 vid 10 | grep "$grp_key" | \ - grep -q " 0.00" + bridge -d -s mdb get dev br0 $grp_key vid 10 | grep -q " 0.00" check_fail $? "\"temp\" entry has an unpending group timer" bridge mdb del dev br0 port $swp1 $grp_key vid 10 @@ -650,24 +618,19 @@ __cfg_test_port_ip_sg() # Check that we can replace available attributes. bridge mdb add dev br0 port $swp1 $grp_key vid 10 proto 123 bridge mdb replace dev br0 port $swp1 $grp_key vid 10 proto 111 - bridge -d mdb show dev br0 vid 10 | grep "$grp_key" | \ - grep -q "111" + bridge -d mdb get dev br0 $grp_key vid 10 | grep -q "111" check_err $? "Failed to replace protocol" bridge mdb replace dev br0 port $swp1 $grp_key vid 10 permanent - bridge -d mdb show dev br0 vid 10 | grep "$grp_key" | \ - grep -q "permanent" + bridge -d mdb get dev br0 $grp_key vid 10 | grep -q "permanent" check_err $? "Entry not marked as \"permanent\" after replace" - bridge -d -s mdb show dev br0 vid 10 | grep "$grp_key" | \ - grep -q " 0.00" + bridge -d -s mdb get dev br0 $grp_key vid 10 | grep -q " 0.00" check_err $? "Entry has a pending group timer after replace" bridge mdb replace dev br0 port $swp1 $grp_key vid 10 temp - bridge -d mdb show dev br0 vid 10 | grep "$grp_key" | \ - grep -q "temp" + bridge -d mdb get dev br0 $grp_key vid 10 | grep -q "temp" check_err $? "Entry not marked as \"temp\" after replace" - bridge -d -s mdb show dev br0 vid 10 | grep "$grp_key" | \ - grep -q " 0.00" + bridge -d -s mdb get dev br0 $grp_key vid 10 | grep -q " 0.00" check_fail $? "Entry has an unpending group timer after replace" bridge mdb del dev br0 port $swp1 $grp_key vid 10 @@ -675,7 +638,7 @@ __cfg_test_port_ip_sg() # (*, G) ports need to be added to it. bridge mdb add dev br0 port $swp2 grp $grp vid 10 bridge mdb add dev br0 port $swp1 $grp_key vid 10 - bridge mdb show dev br0 vid 10 | grep "$grp_key" | grep $swp2 | \ + bridge mdb get dev br0 $grp_key vid 10 | grep $swp2 | \ grep -q "added_by_star_ex" check_err $? "\"added_by_star_ex\" entry not created after adding (S, G) entry" bridge mdb del dev br0 port $swp1 $grp_key vid 10 @@ -1132,7 +1095,7 @@ ctrl_igmpv3_is_in_test() $MZ $h1.10 -c 1 -a own -b 01:00:5e:01:01:01 -A 192.0.2.1 -B 239.1.1.1 \ -t ip proto=2,p=$(igmpv3_is_in_get 239.1.1.1 192.0.2.2) -q - bridge -d mdb show dev br0 vid 10 | grep 239.1.1.1 | grep -q 192.0.2.2 + bridge mdb get dev br0 grp 239.1.1.1 src 192.0.2.2 vid 10 &> /dev/null check_fail $? "Permanent entry affected by IGMP packet" # Replace the permanent entry with a temporary one and check that after @@ -1145,12 +1108,10 @@ ctrl_igmpv3_is_in_test() $MZ $h1.10 -a own -b 01:00:5e:01:01:01 -c 1 -A 192.0.2.1 -B 239.1.1.1 \ -t ip proto=2,p=$(igmpv3_is_in_get 239.1.1.1 192.0.2.2) -q - bridge -d mdb show dev br0 vid 10 | grep 239.1.1.1 | grep -v "src" | \ - grep -q 192.0.2.2 + bridge -d mdb get dev br0 grp 239.1.1.1 vid 10 | grep -q 192.0.2.2 check_err $? "Source not add to source list" - bridge -d mdb show dev br0 vid 10 | grep 239.1.1.1 | \ - grep -q "src 192.0.2.2" + bridge mdb get dev br0 grp 239.1.1.1 src 192.0.2.2 vid 10 &> /dev/null check_err $? "(S, G) entry not created for new source" bridge mdb del dev br0 port $swp1 grp 239.1.1.1 vid 10 @@ -1172,8 +1133,7 @@ ctrl_mldv2_is_in_test() $MZ -6 $h1.10 -a own -b 33:33:00:00:00:01 -c 1 -A fe80::1 -B ff0e::1 \ -t ip hop=1,next=0,p="$p" -q - bridge -d mdb show dev br0 vid 10 | grep ff0e::1 | \ - grep -q 2001:db8:1::2 + bridge mdb get dev br0 grp ff0e::1 src 2001:db8:1::2 vid 10 &> /dev/null check_fail $? "Permanent entry affected by MLD packet" # Replace the permanent entry with a temporary one and check that after @@ -1186,12 +1146,10 @@ ctrl_mldv2_is_in_test() $MZ -6 $h1.10 -a own -b 33:33:00:00:00:01 -c 1 -A fe80::1 -B ff0e::1 \ -t ip hop=1,next=0,p="$p" -q - bridge -d mdb show dev br0 vid 10 | grep ff0e::1 | grep -v "src" | \ - grep -q 2001:db8:1::2 + bridge -d mdb get dev br0 grp ff0e::1 vid 10 | grep -q 2001:db8:1::2 check_err $? "Source not add to source list" - bridge -d mdb show dev br0 vid 10 | grep ff0e::1 | \ - grep -q "src 2001:db8:1::2" + bridge mdb get dev br0 grp ff0e::1 src 2001:db8:1::2 vid 10 &> /dev/null check_err $? "(S, G) entry not created for new source" bridge mdb del dev br0 port $swp1 grp ff0e::1 vid 10 @@ -1208,8 +1166,8 @@ ctrl_test() ctrl_mldv2_is_in_test } -if ! bridge mdb help 2>&1 | grep -q "replace"; then - echo "SKIP: iproute2 too old, missing bridge mdb replace support" +if ! bridge mdb help 2>&1 | grep -q "get"; then + echo "SKIP: iproute2 too old, missing bridge mdb get support" exit $ksft_skip fi diff --git a/tools/testing/selftests/net/fq_band_pktlimit.sh b/tools/testing/selftests/net/fq_band_pktlimit.sh new file mode 100755 index 000000000000..24b77bdf41ff --- /dev/null +++ b/tools/testing/selftests/net/fq_band_pktlimit.sh @@ -0,0 +1,57 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Verify that FQ has a packet limit per band: +# +# 1. set the limit to 10 per band +# 2. send 20 pkts on band A: verify that 10 are queued, 10 dropped +# 3. send 20 pkts on band A: verify that 0 are queued, 20 dropped +# 4. send 20 pkts on band B: verify that 10 are queued, 10 dropped +# +# Send packets with a 100ms delay to ensure that previously sent +# packets are still queued when later ones are sent. +# Use SO_TXTIME for this. + +die() { + echo "$1" + exit 1 +} + +# run inside private netns +if [[ $# -eq 0 ]]; then + ./in_netns.sh "$0" __subprocess + exit +fi + +ip link add type dummy +ip link set dev dummy0 up +ip -6 addr add fdaa::1/128 dev dummy0 +ip -6 route add fdaa::/64 dev dummy0 +tc qdisc replace dev dummy0 root handle 1: fq quantum 1514 initial_quantum 1514 limit 10 + +./cmsg_sender -6 -p u -d 100000 -n 20 fdaa::2 8000 +OUT1="$(tc -s qdisc show dev dummy0 | grep '^\ Sent')" + +./cmsg_sender -6 -p u -d 100000 -n 20 fdaa::2 8000 +OUT2="$(tc -s qdisc show dev dummy0 | grep '^\ Sent')" + +./cmsg_sender -6 -p u -d 100000 -n 20 -P 7 fdaa::2 8000 +OUT3="$(tc -s qdisc show dev dummy0 | grep '^\ Sent')" + +# Initial stats will report zero sent, as all packets are still +# queued in FQ. Sleep for the delay period (100ms) and see that +# twenty are now sent. +sleep 0.1 +OUT4="$(tc -s qdisc show dev dummy0 | grep '^\ Sent')" + +# Log the output after the test +echo "${OUT1}" +echo "${OUT2}" +echo "${OUT3}" +echo "${OUT4}" + +# Test the output for expected values +echo "${OUT1}" | grep -q '0\ pkt\ (dropped\ 10' || die "unexpected drop count at 1" +echo "${OUT2}" | grep -q '0\ pkt\ (dropped\ 30' || die "unexpected drop count at 2" +echo "${OUT3}" | grep -q '0\ pkt\ (dropped\ 40' || die "unexpected drop count at 3" +echo "${OUT4}" | grep -q '20\ pkt\ (dropped\ 40' || die "unexpected accept count at 4" diff --git a/tools/testing/selftests/net/io_uring_zerocopy_tx.c b/tools/testing/selftests/net/io_uring_zerocopy_tx.c index 154287740172..76e604e4810e 100644 --- a/tools/testing/selftests/net/io_uring_zerocopy_tx.c +++ b/tools/testing/selftests/net/io_uring_zerocopy_tx.c @@ -36,6 +36,8 @@ #include <sys/un.h> #include <sys/wait.h> +#include <io_uring/mini_liburing.h> + #define NOTIF_TAG 0xfffffffULL #define NONZC_TAG 0 #define ZC_TAG 1 @@ -60,272 +62,6 @@ static struct sockaddr_storage cfg_dst_addr; static char payload[IP_MAXPACKET] __attribute__((aligned(4096))); -struct io_sq_ring { - unsigned *head; - unsigned *tail; - unsigned *ring_mask; - unsigned *ring_entries; - unsigned *flags; - unsigned *array; -}; - -struct io_cq_ring { - unsigned *head; - unsigned *tail; - unsigned *ring_mask; - unsigned *ring_entries; - struct io_uring_cqe *cqes; -}; - -struct io_uring_sq { - unsigned *khead; - unsigned *ktail; - unsigned *kring_mask; - unsigned *kring_entries; - unsigned *kflags; - unsigned *kdropped; - unsigned *array; - struct io_uring_sqe *sqes; - - unsigned sqe_head; - unsigned sqe_tail; - - size_t ring_sz; -}; - -struct io_uring_cq { - unsigned *khead; - unsigned *ktail; - unsigned *kring_mask; - unsigned *kring_entries; - unsigned *koverflow; - struct io_uring_cqe *cqes; - - size_t ring_sz; -}; - -struct io_uring { - struct io_uring_sq sq; - struct io_uring_cq cq; - int ring_fd; -}; - -#ifdef __alpha__ -# ifndef __NR_io_uring_setup -# define __NR_io_uring_setup 535 -# endif -# ifndef __NR_io_uring_enter -# define __NR_io_uring_enter 536 -# endif -# ifndef __NR_io_uring_register -# define __NR_io_uring_register 537 -# endif -#else /* !__alpha__ */ -# ifndef __NR_io_uring_setup -# define __NR_io_uring_setup 425 -# endif -# ifndef __NR_io_uring_enter -# define __NR_io_uring_enter 426 -# endif -# ifndef __NR_io_uring_register -# define __NR_io_uring_register 427 -# endif -#endif - -#if defined(__x86_64) || defined(__i386__) -#define read_barrier() __asm__ __volatile__("":::"memory") -#define write_barrier() __asm__ __volatile__("":::"memory") -#else - -#define read_barrier() __sync_synchronize() -#define write_barrier() __sync_synchronize() -#endif - -static int io_uring_setup(unsigned int entries, struct io_uring_params *p) -{ - return syscall(__NR_io_uring_setup, entries, p); -} - -static int io_uring_enter(int fd, unsigned int to_submit, - unsigned int min_complete, - unsigned int flags, sigset_t *sig) -{ - return syscall(__NR_io_uring_enter, fd, to_submit, min_complete, - flags, sig, _NSIG / 8); -} - -static int io_uring_register_buffers(struct io_uring *ring, - const struct iovec *iovecs, - unsigned nr_iovecs) -{ - int ret; - - ret = syscall(__NR_io_uring_register, ring->ring_fd, - IORING_REGISTER_BUFFERS, iovecs, nr_iovecs); - return (ret < 0) ? -errno : ret; -} - -static int io_uring_mmap(int fd, struct io_uring_params *p, - struct io_uring_sq *sq, struct io_uring_cq *cq) -{ - size_t size; - void *ptr; - int ret; - - sq->ring_sz = p->sq_off.array + p->sq_entries * sizeof(unsigned); - ptr = mmap(0, sq->ring_sz, PROT_READ | PROT_WRITE, - MAP_SHARED | MAP_POPULATE, fd, IORING_OFF_SQ_RING); - if (ptr == MAP_FAILED) - return -errno; - sq->khead = ptr + p->sq_off.head; - sq->ktail = ptr + p->sq_off.tail; - sq->kring_mask = ptr + p->sq_off.ring_mask; - sq->kring_entries = ptr + p->sq_off.ring_entries; - sq->kflags = ptr + p->sq_off.flags; - sq->kdropped = ptr + p->sq_off.dropped; - sq->array = ptr + p->sq_off.array; - - size = p->sq_entries * sizeof(struct io_uring_sqe); - sq->sqes = mmap(0, size, PROT_READ | PROT_WRITE, - MAP_SHARED | MAP_POPULATE, fd, IORING_OFF_SQES); - if (sq->sqes == MAP_FAILED) { - ret = -errno; -err: - munmap(sq->khead, sq->ring_sz); - return ret; - } - - cq->ring_sz = p->cq_off.cqes + p->cq_entries * sizeof(struct io_uring_cqe); - ptr = mmap(0, cq->ring_sz, PROT_READ | PROT_WRITE, - MAP_SHARED | MAP_POPULATE, fd, IORING_OFF_CQ_RING); - if (ptr == MAP_FAILED) { - ret = -errno; - munmap(sq->sqes, p->sq_entries * sizeof(struct io_uring_sqe)); - goto err; - } - cq->khead = ptr + p->cq_off.head; - cq->ktail = ptr + p->cq_off.tail; - cq->kring_mask = ptr + p->cq_off.ring_mask; - cq->kring_entries = ptr + p->cq_off.ring_entries; - cq->koverflow = ptr + p->cq_off.overflow; - cq->cqes = ptr + p->cq_off.cqes; - return 0; -} - -static int io_uring_queue_init(unsigned entries, struct io_uring *ring, - unsigned flags) -{ - struct io_uring_params p; - int fd, ret; - - memset(ring, 0, sizeof(*ring)); - memset(&p, 0, sizeof(p)); - p.flags = flags; - - fd = io_uring_setup(entries, &p); - if (fd < 0) - return fd; - ret = io_uring_mmap(fd, &p, &ring->sq, &ring->cq); - if (!ret) - ring->ring_fd = fd; - else - close(fd); - return ret; -} - -static int io_uring_submit(struct io_uring *ring) -{ - struct io_uring_sq *sq = &ring->sq; - const unsigned mask = *sq->kring_mask; - unsigned ktail, submitted, to_submit; - int ret; - - read_barrier(); - if (*sq->khead != *sq->ktail) { - submitted = *sq->kring_entries; - goto submit; - } - if (sq->sqe_head == sq->sqe_tail) - return 0; - - ktail = *sq->ktail; - to_submit = sq->sqe_tail - sq->sqe_head; - for (submitted = 0; submitted < to_submit; submitted++) { - read_barrier(); - sq->array[ktail++ & mask] = sq->sqe_head++ & mask; - } - if (!submitted) - return 0; - - if (*sq->ktail != ktail) { - write_barrier(); - *sq->ktail = ktail; - write_barrier(); - } -submit: - ret = io_uring_enter(ring->ring_fd, submitted, 0, - IORING_ENTER_GETEVENTS, NULL); - return ret < 0 ? -errno : ret; -} - -static inline void io_uring_prep_send(struct io_uring_sqe *sqe, int sockfd, - const void *buf, size_t len, int flags) -{ - memset(sqe, 0, sizeof(*sqe)); - sqe->opcode = (__u8) IORING_OP_SEND; - sqe->fd = sockfd; - sqe->addr = (unsigned long) buf; - sqe->len = len; - sqe->msg_flags = (__u32) flags; -} - -static inline void io_uring_prep_sendzc(struct io_uring_sqe *sqe, int sockfd, - const void *buf, size_t len, int flags, - unsigned zc_flags) -{ - io_uring_prep_send(sqe, sockfd, buf, len, flags); - sqe->opcode = (__u8) IORING_OP_SEND_ZC; - sqe->ioprio = zc_flags; -} - -static struct io_uring_sqe *io_uring_get_sqe(struct io_uring *ring) -{ - struct io_uring_sq *sq = &ring->sq; - - if (sq->sqe_tail + 1 - sq->sqe_head > *sq->kring_entries) - return NULL; - return &sq->sqes[sq->sqe_tail++ & *sq->kring_mask]; -} - -static int io_uring_wait_cqe(struct io_uring *ring, struct io_uring_cqe **cqe_ptr) -{ - struct io_uring_cq *cq = &ring->cq; - const unsigned mask = *cq->kring_mask; - unsigned head = *cq->khead; - int ret; - - *cqe_ptr = NULL; - do { - read_barrier(); - if (head != *cq->ktail) { - *cqe_ptr = &cq->cqes[head & mask]; - break; - } - ret = io_uring_enter(ring->ring_fd, 0, 1, - IORING_ENTER_GETEVENTS, NULL); - if (ret < 0) - return -errno; - } while (1); - - return 0; -} - -static inline void io_uring_cqe_seen(struct io_uring *ring) -{ - *(&ring->cq)->khead += 1; - write_barrier(); -} - static unsigned long gettimeofday_ms(void) { struct timeval tv; diff --git a/tools/testing/selftests/net/mptcp/diag.sh b/tools/testing/selftests/net/mptcp/diag.sh index 85a8ee9395b3..95b498efacd1 100755 --- a/tools/testing/selftests/net/mptcp/diag.sh +++ b/tools/testing/selftests/net/mptcp/diag.sh @@ -182,23 +182,6 @@ chk_msk_inuse() __chk_nr get_msk_inuse $expected "$msg" 0 } -# $1: ns, $2: port -wait_local_port_listen() -{ - local listener_ns="${1}" - local port="${2}" - - local port_hex i - - port_hex="$(printf "%04X" "${port}")" - for i in $(seq 10); do - ip netns exec "${listener_ns}" cat /proc/net/tcp | \ - awk "BEGIN {rc=1} {if (\$2 ~ /:${port_hex}\$/ && \$4 ~ /0A/) {rc=0; exit}} END {exit rc}" && - break - sleep 0.1 - done -} - wait_connected() { local listener_ns="${1}" @@ -222,7 +205,7 @@ echo "a" | \ ip netns exec $ns \ ./mptcp_connect -p 10000 -l -t ${timeout_poll} -w 20 \ 0.0.0.0 >/dev/null & -wait_local_port_listen $ns 10000 +mptcp_lib_wait_local_port_listen $ns 10000 chk_msk_nr 0 "no msk on netns creation" chk_msk_listen 10000 @@ -245,7 +228,7 @@ echo "a" | \ ip netns exec $ns \ ./mptcp_connect -p 10001 -l -s TCP -t ${timeout_poll} -w 20 \ 0.0.0.0 >/dev/null & -wait_local_port_listen $ns 10001 +mptcp_lib_wait_local_port_listen $ns 10001 echo "b" | \ timeout ${timeout_test} \ ip netns exec $ns \ @@ -266,7 +249,7 @@ for I in `seq 1 $NR_CLIENTS`; do ./mptcp_connect -p $((I+10001)) -l -w 20 \ -t ${timeout_poll} 0.0.0.0 >/dev/null & done -wait_local_port_listen $ns $((NR_CLIENTS + 10001)) +mptcp_lib_wait_local_port_listen $ns $((NR_CLIENTS + 10001)) for I in `seq 1 $NR_CLIENTS`; do echo "b" | \ diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh index 61a2a1988ce6..7898d62fce0b 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh @@ -254,31 +254,6 @@ else set_ethtool_flags "$ns4" ns4eth3 "$ethtool_args" fi -print_file_err() -{ - ls -l "$1" 1>&2 - echo "Trailing bytes are: " - tail -c 27 "$1" -} - -check_transfer() -{ - local in=$1 - local out=$2 - local what=$3 - - cmp "$in" "$out" > /dev/null 2>&1 - if [ $? -ne 0 ] ;then - echo "[ FAIL ] $what does not match (in, out):" - print_file_err "$in" - print_file_err "$out" - - return 1 - fi - - return 0 -} - check_mptcp_disabled() { local disabled_ns="ns_disabled-$rndh" @@ -310,12 +285,6 @@ check_mptcp_disabled() return 0 } -# $1: IP address -is_v6() -{ - [ -z "${1##*:*}" ] -} - do_ping() { local listener_ns="$1" @@ -324,7 +293,7 @@ do_ping() local ping_args="-q -c 1" local rc=0 - if is_v6 "${connect_addr}"; then + if mptcp_lib_is_v6 "${connect_addr}"; then $ipv6 || return 0 ping_args="${ping_args} -6" fi @@ -341,38 +310,6 @@ do_ping() return 0 } -# $1: ns, $2: MIB counter -get_mib_counter() -{ - local listener_ns="${1}" - local mib="${2}" - - # strip the header - ip netns exec "${listener_ns}" \ - nstat -z -a "${mib}" | \ - tail -n+2 | \ - while read a count c rest; do - echo $count - done -} - -# $1: ns, $2: port -wait_local_port_listen() -{ - local listener_ns="${1}" - local port="${2}" - - local port_hex i - - port_hex="$(printf "%04X" "${port}")" - for i in $(seq 10); do - ip netns exec "${listener_ns}" cat /proc/net/tcp* | \ - awk "BEGIN {rc=1} {if (\$2 ~ /:${port_hex}\$/ && \$4 ~ /0A/) {rc=0; exit}} END {exit rc}" && - break - sleep 0.1 - done -} - do_transfer() { local listener_ns="$1" @@ -441,12 +378,12 @@ do_transfer() nstat -n fi - local stat_synrx_last_l=$(get_mib_counter "${listener_ns}" "MPTcpExtMPCapableSYNRX") - local stat_ackrx_last_l=$(get_mib_counter "${listener_ns}" "MPTcpExtMPCapableACKRX") - local stat_cookietx_last=$(get_mib_counter "${listener_ns}" "TcpExtSyncookiesSent") - local stat_cookierx_last=$(get_mib_counter "${listener_ns}" "TcpExtSyncookiesRecv") - local stat_csum_err_s=$(get_mib_counter "${listener_ns}" "MPTcpExtDataCsumErr") - local stat_csum_err_c=$(get_mib_counter "${connector_ns}" "MPTcpExtDataCsumErr") + local stat_synrx_last_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableSYNRX") + local stat_ackrx_last_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableACKRX") + local stat_cookietx_last=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesSent") + local stat_cookierx_last=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesRecv") + local stat_csum_err_s=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtDataCsumErr") + local stat_csum_err_c=$(mptcp_lib_get_counter "${connector_ns}" "MPTcpExtDataCsumErr") timeout ${timeout_test} \ ip netns exec ${listener_ns} \ @@ -454,7 +391,7 @@ do_transfer() $extra_args $local_addr < "$sin" > "$sout" & local spid=$! - wait_local_port_listen "${listener_ns}" "${port}" + mptcp_lib_wait_local_port_listen "${listener_ns}" "${port}" local start start=$(date +%s%3N) @@ -504,16 +441,16 @@ do_transfer() return 1 fi - check_transfer $sin $cout "file received by client" + mptcp_lib_check_transfer $sin $cout "file received by client" retc=$? - check_transfer $cin $sout "file received by server" + mptcp_lib_check_transfer $cin $sout "file received by server" rets=$? - local stat_synrx_now_l=$(get_mib_counter "${listener_ns}" "MPTcpExtMPCapableSYNRX") - local stat_ackrx_now_l=$(get_mib_counter "${listener_ns}" "MPTcpExtMPCapableACKRX") - local stat_cookietx_now=$(get_mib_counter "${listener_ns}" "TcpExtSyncookiesSent") - local stat_cookierx_now=$(get_mib_counter "${listener_ns}" "TcpExtSyncookiesRecv") - local stat_ooo_now=$(get_mib_counter "${listener_ns}" "TcpExtTCPOFOQueue") + local stat_synrx_now_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableSYNRX") + local stat_ackrx_now_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableACKRX") + local stat_cookietx_now=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesSent") + local stat_cookierx_now=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesRecv") + local stat_ooo_now=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtTCPOFOQueue") expect_synrx=$((stat_synrx_last_l)) expect_ackrx=$((stat_ackrx_last_l)) @@ -542,8 +479,8 @@ do_transfer() fi if $checksum; then - local csum_err_s=$(get_mib_counter "${listener_ns}" "MPTcpExtDataCsumErr") - local csum_err_c=$(get_mib_counter "${connector_ns}" "MPTcpExtDataCsumErr") + local csum_err_s=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtDataCsumErr") + local csum_err_c=$(mptcp_lib_get_counter "${connector_ns}" "MPTcpExtDataCsumErr") local csum_err_s_nr=$((csum_err_s - stat_csum_err_s)) if [ $csum_err_s_nr -gt 0 ]; then @@ -613,9 +550,8 @@ make_file() ksize=$((SIZE / 1024)) rem=$((SIZE - (ksize * 1024))) - dd if=/dev/urandom of="$name" bs=1024 count=$ksize 2> /dev/null - dd if=/dev/urandom conv=notrunc of="$name" bs=1 count=$rem 2> /dev/null - echo -e "\nMPTCP_TEST_FILE_END_MARKER" >> "$name" + mptcp_lib_make_file $name 1024 $ksize + dd if=/dev/urandom conv=notrunc of="$name" oflag=append bs=1 count=$rem 2> /dev/null echo "Created $name (size $(du -b "$name")) containing data sent by $who" } @@ -635,12 +571,12 @@ run_tests_lo() fi # skip if we don't want v6 - if ! $ipv6 && is_v6 "${connect_addr}"; then + if ! $ipv6 && mptcp_lib_is_v6 "${connect_addr}"; then return 0 fi local local_addr - if is_v6 "${connect_addr}"; then + if mptcp_lib_is_v6 "${connect_addr}"; then local_addr="::" else local_addr="0.0.0.0" @@ -708,7 +644,7 @@ run_test_transparent() TEST_GROUP="${msg}" # skip if we don't want v6 - if ! $ipv6 && is_v6 "${connect_addr}"; then + if ! $ipv6 && mptcp_lib_is_v6 "${connect_addr}"; then return 0 fi @@ -716,7 +652,7 @@ run_test_transparent() # the required infrastructure in MPTCP sockopt code. To support TOS, the # following function has been exported (T). Not great but better than # checking for a specific kernel version. - if ! mptcp_lib_kallsyms_has "T ip_sock_set_tos$"; then + if ! mptcp_lib_kallsyms_has "T __ip_sock_set_tos$"; then echo "INFO: ${msg} not supported by the kernel: SKIP" mptcp_lib_result_skip "${TEST_GROUP}" return @@ -741,7 +677,7 @@ EOF fi local local_addr - if is_v6 "${connect_addr}"; then + if mptcp_lib_is_v6 "${connect_addr}"; then local_addr="::" r6flag="-6" else diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index ee1f89a872b3..8362ea454af3 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -511,13 +511,6 @@ get_failed_tests_ids() done | sort -n } -print_file_err() -{ - ls -l "$1" 1>&2 - echo -n "Trailing bytes are: " - tail -c 27 "$1" -} - check_transfer() { local in=$1 @@ -548,8 +541,8 @@ check_transfer() local sum=$((0${a} + 0${b})) if [ $check_invert -eq 0 ] || [ $sum -ne $((0xff)) ]; then fail_test "$what does not match (in, out):" - print_file_err "$in" - print_file_err "$out" + mptcp_lib_print_file_err "$in" + mptcp_lib_print_file_err "$out" return 1 else @@ -587,49 +580,9 @@ link_failure() done } -# $1: IP address -is_v6() -{ - [ -z "${1##*:*}" ] -} - -# $1: ns, $2: port -wait_local_port_listen() -{ - local listener_ns="${1}" - local port="${2}" - - local port_hex - port_hex="$(printf "%04X" "${port}")" - - local i - for i in $(seq 10); do - ip netns exec "${listener_ns}" cat /proc/net/tcp* | \ - awk "BEGIN {rc=1} {if (\$2 ~ /:${port_hex}\$/ && \$4 ~ /0A/) {rc=0; exit}} END {exit rc}" && - break - sleep 0.1 - done -} - -# $1: ns ; $2: counter -get_counter() -{ - local ns="${1}" - local counter="${2}" - local count - - count=$(ip netns exec ${ns} nstat -asz "${counter}" | awk 'NR==1 {next} {print $2}') - if [ -z "${count}" ]; then - mptcp_lib_fail_if_expected_feature "${counter} counter" - return 1 - fi - - echo "${count}" -} - rm_addr_count() { - get_counter "${1}" "MPTcpExtRmAddr" + mptcp_lib_get_counter "${1}" "MPTcpExtRmAddr" } # $1: ns, $2: old rm_addr counter in $ns @@ -649,7 +602,7 @@ wait_rm_addr() rm_sf_count() { - get_counter "${1}" "MPTcpExtRmSubflow" + mptcp_lib_get_counter "${1}" "MPTcpExtRmSubflow" } # $1: ns, $2: old rm_sf counter in $ns @@ -672,26 +625,20 @@ wait_mpj() local ns="${1}" local cnt old_cnt - old_cnt=$(get_counter ${ns} "MPTcpExtMPJoinAckRx") + old_cnt=$(mptcp_lib_get_counter ${ns} "MPTcpExtMPJoinAckRx") local i for i in $(seq 10); do - cnt=$(get_counter ${ns} "MPTcpExtMPJoinAckRx") + cnt=$(mptcp_lib_get_counter ${ns} "MPTcpExtMPJoinAckRx") [ "$cnt" = "${old_cnt}" ] || break sleep 0.1 done } -kill_wait() -{ - kill $1 > /dev/null 2>&1 - wait $1 2>/dev/null -} - kill_events_pids() { - kill_wait $evts_ns1_pid - kill_wait $evts_ns2_pid + mptcp_lib_kill_wait $evts_ns1_pid + mptcp_lib_kill_wait $evts_ns2_pid } kill_tests_wait() @@ -901,7 +848,7 @@ pm_nl_set_endpoint() local id=10 while [ $add_nr_ns1 -gt 0 ]; do local addr - if is_v6 "${connect_addr}"; then + if mptcp_lib_is_v6 "${connect_addr}"; then addr="dead:beef:$counter::1" else addr="10.0.$counter.1" @@ -953,7 +900,7 @@ pm_nl_set_endpoint() local id=20 while [ $add_nr_ns2 -gt 0 ]; do local addr - if is_v6 "${connect_addr}"; then + if mptcp_lib_is_v6 "${connect_addr}"; then addr="dead:beef:$counter::2" else addr="10.0.$counter.2" @@ -995,7 +942,7 @@ pm_nl_set_endpoint() pm_nl_flush_endpoint ${connector_ns} elif [ $rm_nr_ns2 -eq 9 ]; then local addr - if is_v6 "${connect_addr}"; then + if mptcp_lib_is_v6 "${connect_addr}"; then addr="dead:beef:1::2" else addr="10.0.1.2" @@ -1117,7 +1064,7 @@ do_transfer() fi local spid=$! - wait_local_port_listen "${listener_ns}" "${port}" + mptcp_lib_wait_local_port_listen "${listener_ns}" "${port}" extra_cl_args="$extra_args $extra_cl_args" if [ "$test_linkfail" -eq 0 ];then @@ -1199,8 +1146,7 @@ make_file() local who=$2 local size=$3 - dd if=/dev/urandom of="$name" bs=1024 count=$size 2> /dev/null - echo -e "\nMPTCP_TEST_FILE_END_MARKER" >> "$name" + mptcp_lib_make_file $name 1024 $size print_info "Test file (size $size KB) for $who" } @@ -1284,7 +1230,7 @@ chk_csum_nr() fi print_check "sum" - count=$(get_counter ${ns1} "MPTcpExtDataCsumErr") + count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtDataCsumErr") if [ "$count" != "$csum_ns1" ]; then extra_msg="$extra_msg ns1=$count" fi @@ -1297,7 +1243,7 @@ chk_csum_nr() print_ok fi print_check "csum" - count=$(get_counter ${ns2} "MPTcpExtDataCsumErr") + count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtDataCsumErr") if [ "$count" != "$csum_ns2" ]; then extra_msg="$extra_msg ns2=$count" fi @@ -1341,7 +1287,7 @@ chk_fail_nr() fi print_check "ftx" - count=$(get_counter ${ns_tx} "MPTcpExtMPFailTx") + count=$(mptcp_lib_get_counter ${ns_tx} "MPTcpExtMPFailTx") if [ "$count" != "$fail_tx" ]; then extra_msg="$extra_msg,tx=$count" fi @@ -1355,7 +1301,7 @@ chk_fail_nr() fi print_check "failrx" - count=$(get_counter ${ns_rx} "MPTcpExtMPFailRx") + count=$(mptcp_lib_get_counter ${ns_rx} "MPTcpExtMPFailRx") if [ "$count" != "$fail_rx" ]; then extra_msg="$extra_msg,rx=$count" fi @@ -1388,7 +1334,7 @@ chk_fclose_nr() fi print_check "ctx" - count=$(get_counter ${ns_tx} "MPTcpExtMPFastcloseTx") + count=$(mptcp_lib_get_counter ${ns_tx} "MPTcpExtMPFastcloseTx") if [ -z "$count" ]; then print_skip elif [ "$count" != "$fclose_tx" ]; then @@ -1399,7 +1345,7 @@ chk_fclose_nr() fi print_check "fclzrx" - count=$(get_counter ${ns_rx} "MPTcpExtMPFastcloseRx") + count=$(mptcp_lib_get_counter ${ns_rx} "MPTcpExtMPFastcloseRx") if [ -z "$count" ]; then print_skip elif [ "$count" != "$fclose_rx" ]; then @@ -1429,20 +1375,24 @@ chk_rst_nr() fi print_check "rtx" - count=$(get_counter ${ns_tx} "MPTcpExtMPRstTx") + count=$(mptcp_lib_get_counter ${ns_tx} "MPTcpExtMPRstTx") if [ -z "$count" ]; then print_skip - elif [ $count -lt $rst_tx ]; then + # accept more rst than expected except if we don't expect any + elif { [ $rst_tx -ne 0 ] && [ $count -lt $rst_tx ]; } || + { [ $rst_tx -eq 0 ] && [ $count -ne 0 ]; }; then fail_test "got $count MP_RST[s] TX expected $rst_tx" else print_ok fi print_check "rstrx" - count=$(get_counter ${ns_rx} "MPTcpExtMPRstRx") + count=$(mptcp_lib_get_counter ${ns_rx} "MPTcpExtMPRstRx") if [ -z "$count" ]; then print_skip - elif [ "$count" -lt "$rst_rx" ]; then + # accept more rst than expected except if we don't expect any + elif { [ $rst_rx -ne 0 ] && [ $count -lt $rst_rx ]; } || + { [ $rst_rx -eq 0 ] && [ $count -ne 0 ]; }; then fail_test "got $count MP_RST[s] RX expected $rst_rx" else print_ok @@ -1458,7 +1408,7 @@ chk_infi_nr() local count print_check "itx" - count=$(get_counter ${ns2} "MPTcpExtInfiniteMapTx") + count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtInfiniteMapTx") if [ -z "$count" ]; then print_skip elif [ "$count" != "$infi_tx" ]; then @@ -1468,7 +1418,7 @@ chk_infi_nr() fi print_check "infirx" - count=$(get_counter ${ns1} "MPTcpExtInfiniteMapRx") + count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtInfiniteMapRx") if [ -z "$count" ]; then print_skip elif [ "$count" != "$infi_rx" ]; then @@ -1497,7 +1447,7 @@ chk_join_nr() fi print_check "syn" - count=$(get_counter ${ns1} "MPTcpExtMPJoinSynRx") + count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinSynRx") if [ -z "$count" ]; then print_skip elif [ "$count" != "$syn_nr" ]; then @@ -1508,7 +1458,7 @@ chk_join_nr() print_check "synack" with_cookie=$(ip netns exec $ns2 sysctl -n net.ipv4.tcp_syncookies) - count=$(get_counter ${ns2} "MPTcpExtMPJoinSynAckRx") + count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtMPJoinSynAckRx") if [ -z "$count" ]; then print_skip elif [ "$count" != "$syn_ack_nr" ]; then @@ -1525,7 +1475,7 @@ chk_join_nr() fi print_check "ack" - count=$(get_counter ${ns1} "MPTcpExtMPJoinAckRx") + count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinAckRx") if [ -z "$count" ]; then print_skip elif [ "$count" != "$ack_nr" ]; then @@ -1558,8 +1508,8 @@ chk_stale_nr() print_check "stale" - stale_nr=$(get_counter ${ns} "MPTcpExtSubflowStale") - recover_nr=$(get_counter ${ns} "MPTcpExtSubflowRecover") + stale_nr=$(mptcp_lib_get_counter ${ns} "MPTcpExtSubflowStale") + recover_nr=$(mptcp_lib_get_counter ${ns} "MPTcpExtSubflowRecover") if [ -z "$stale_nr" ] || [ -z "$recover_nr" ]; then print_skip elif [ $stale_nr -lt $stale_min ] || @@ -1596,7 +1546,7 @@ chk_add_nr() timeout=$(ip netns exec $ns1 sysctl -n net.mptcp.add_addr_timeout) print_check "add" - count=$(get_counter ${ns2} "MPTcpExtAddAddr") + count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtAddAddr") if [ -z "$count" ]; then print_skip # if the test configured a short timeout tolerate greater then expected @@ -1608,7 +1558,7 @@ chk_add_nr() fi print_check "echo" - count=$(get_counter ${ns1} "MPTcpExtEchoAdd") + count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtEchoAdd") if [ -z "$count" ]; then print_skip elif [ "$count" != "$echo_nr" ]; then @@ -1619,7 +1569,7 @@ chk_add_nr() if [ $port_nr -gt 0 ]; then print_check "pt" - count=$(get_counter ${ns2} "MPTcpExtPortAdd") + count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtPortAdd") if [ -z "$count" ]; then print_skip elif [ "$count" != "$port_nr" ]; then @@ -1629,7 +1579,7 @@ chk_add_nr() fi print_check "syn" - count=$(get_counter ${ns1} "MPTcpExtMPJoinPortSynRx") + count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinPortSynRx") if [ -z "$count" ]; then print_skip elif [ "$count" != "$syn_nr" ]; then @@ -1640,7 +1590,7 @@ chk_add_nr() fi print_check "synack" - count=$(get_counter ${ns2} "MPTcpExtMPJoinPortSynAckRx") + count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtMPJoinPortSynAckRx") if [ -z "$count" ]; then print_skip elif [ "$count" != "$syn_ack_nr" ]; then @@ -1651,7 +1601,7 @@ chk_add_nr() fi print_check "ack" - count=$(get_counter ${ns1} "MPTcpExtMPJoinPortAckRx") + count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinPortAckRx") if [ -z "$count" ]; then print_skip elif [ "$count" != "$ack_nr" ]; then @@ -1662,7 +1612,7 @@ chk_add_nr() fi print_check "syn" - count=$(get_counter ${ns1} "MPTcpExtMismatchPortSynRx") + count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMismatchPortSynRx") if [ -z "$count" ]; then print_skip elif [ "$count" != "$mis_syn_nr" ]; then @@ -1673,7 +1623,7 @@ chk_add_nr() fi print_check "ack" - count=$(get_counter ${ns1} "MPTcpExtMismatchPortAckRx") + count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMismatchPortAckRx") if [ -z "$count" ]; then print_skip elif [ "$count" != "$mis_ack_nr" ]; then @@ -1695,7 +1645,7 @@ chk_add_tx_nr() timeout=$(ip netns exec $ns1 sysctl -n net.mptcp.add_addr_timeout) print_check "add TX" - count=$(get_counter ${ns1} "MPTcpExtAddAddrTx") + count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtAddAddrTx") if [ -z "$count" ]; then print_skip # if the test configured a short timeout tolerate greater then expected @@ -1707,7 +1657,7 @@ chk_add_tx_nr() fi print_check "echo TX" - count=$(get_counter ${ns2} "MPTcpExtEchoAddTx") + count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtEchoAddTx") if [ -z "$count" ]; then print_skip elif [ "$count" != "$echo_tx_nr" ]; then @@ -1745,7 +1695,7 @@ chk_rm_nr() fi print_check "rm" - count=$(get_counter ${addr_ns} "MPTcpExtRmAddr") + count=$(mptcp_lib_get_counter ${addr_ns} "MPTcpExtRmAddr") if [ -z "$count" ]; then print_skip elif [ "$count" != "$rm_addr_nr" ]; then @@ -1755,18 +1705,21 @@ chk_rm_nr() fi print_check "rmsf" - count=$(get_counter ${subflow_ns} "MPTcpExtRmSubflow") + count=$(mptcp_lib_get_counter ${subflow_ns} "MPTcpExtRmSubflow") if [ -z "$count" ]; then print_skip elif [ -n "$simult" ]; then local cnt suffix - cnt=$(get_counter ${addr_ns} "MPTcpExtRmSubflow") + cnt=$(mptcp_lib_get_counter ${addr_ns} "MPTcpExtRmSubflow") # in case of simult flush, the subflow removal count on each side is # unreliable count=$((count + cnt)) - [ "$count" != "$rm_subflow_nr" ] && suffix="$count in [$rm_subflow_nr:$((rm_subflow_nr*2))]" + if [ "$count" != "$rm_subflow_nr" ]; then + suffix="$count in [$rm_subflow_nr:$((rm_subflow_nr*2))]" + extra_msg="$extra_msg simult" + fi if [ $count -ge "$rm_subflow_nr" ] && \ [ "$count" -le "$((rm_subflow_nr *2 ))" ]; then print_ok "$suffix" @@ -1787,7 +1740,7 @@ chk_rm_tx_nr() local rm_addr_tx_nr=$1 print_check "rm TX" - count=$(get_counter ${ns2} "MPTcpExtRmAddrTx") + count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtRmAddrTx") if [ -z "$count" ]; then print_skip elif [ "$count" != "$rm_addr_tx_nr" ]; then @@ -1804,7 +1757,7 @@ chk_prio_nr() local count print_check "ptx" - count=$(get_counter ${ns1} "MPTcpExtMPPrioTx") + count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPPrioTx") if [ -z "$count" ]; then print_skip elif [ "$count" != "$mp_prio_nr_tx" ]; then @@ -1814,7 +1767,7 @@ chk_prio_nr() fi print_check "prx" - count=$(get_counter ${ns1} "MPTcpExtMPPrioRx") + count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPPrioRx") if [ -z "$count" ]; then print_skip elif [ "$count" != "$mp_prio_nr_rx" ]; then @@ -1860,12 +1813,10 @@ chk_mptcp_info() local cnt2 local dump_stats - print_check "mptcp_info ${info1:0:8}=$exp1:$exp2" + print_check "mptcp_info ${info1:0:15}=$exp1:$exp2" - cnt1=$(ss -N $ns1 -inmHM | grep "$info1:" | - sed -n 's/.*\('"$info1"':\)\([[:digit:]]*\).*$/\2/p;q') - cnt2=$(ss -N $ns2 -inmHM | grep "$info2:" | - sed -n 's/.*\('"$info2"':\)\([[:digit:]]*\).*$/\2/p;q') + cnt1=$(ss -N $ns1 -inmHM | mptcp_lib_get_info_value "$info1" "$info1") + cnt2=$(ss -N $ns2 -inmHM | mptcp_lib_get_info_value "$info2" "$info2") # 'ss' only display active connections and counters that are not 0. [ -z "$cnt1" ] && cnt1=0 [ -z "$cnt2" ] && cnt2=0 @@ -1883,6 +1834,42 @@ chk_mptcp_info() fi } +# $1: subflows in ns1 ; $2: subflows in ns2 +# number of all subflows, including the initial subflow. +chk_subflows_total() +{ + local cnt1 + local cnt2 + local info="subflows_total" + local dump_stats + + # if subflows_total counter is supported, use it: + if [ -n "$(ss -N $ns1 -inmHM | mptcp_lib_get_info_value $info $info)" ]; then + chk_mptcp_info $info $1 $info $2 + return + fi + + print_check "$info $1:$2" + + # if not, count the TCP connections that are in fact MPTCP subflows + cnt1=$(ss -N $ns1 -ti state established state syn-sent state syn-recv | + grep -c tcp-ulp-mptcp) + cnt2=$(ss -N $ns2 -ti state established state syn-sent state syn-recv | + grep -c tcp-ulp-mptcp) + + if [ "$1" != "$cnt1" ] || [ "$2" != "$cnt2" ]; then + fail_test "got subflows $cnt1:$cnt2 expected $1:$2" + dump_stats=1 + else + print_ok + fi + + if [ "$dump_stats" = 1 ]; then + ss -N $ns1 -ti + ss -N $ns2 -ti + fi +} + chk_link_usage() { local ns=$1 @@ -1914,7 +1901,7 @@ wait_attempt_fail() while [ $time -lt $timeout_ms ]; do local cnt - cnt=$(get_counter ${ns} "TcpAttemptFails") + cnt=$(mptcp_lib_get_counter ${ns} "TcpAttemptFails") [ "$cnt" = 1 ] && return 1 time=$((time + 100)) @@ -2305,6 +2292,7 @@ remove_tests() chk_join_nr 1 1 1 chk_rm_tx_nr 1 chk_rm_nr 1 1 + chk_rst_nr 0 0 fi # multiple subflows, remove @@ -2317,6 +2305,7 @@ remove_tests() run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 2 2 2 chk_rm_nr 2 2 + chk_rst_nr 0 0 fi # single address, remove @@ -2329,6 +2318,7 @@ remove_tests() chk_join_nr 1 1 1 chk_add_nr 1 1 chk_rm_nr 1 1 invert + chk_rst_nr 0 0 fi # subflow and signal, remove @@ -2342,6 +2332,7 @@ remove_tests() chk_join_nr 2 2 2 chk_add_nr 1 1 chk_rm_nr 1 1 + chk_rst_nr 0 0 fi # subflows and signal, remove @@ -2356,6 +2347,7 @@ remove_tests() chk_join_nr 3 3 3 chk_add_nr 1 1 chk_rm_nr 2 2 + chk_rst_nr 0 0 fi # addresses remove @@ -2370,6 +2362,7 @@ remove_tests() chk_join_nr 3 3 3 chk_add_nr 3 3 chk_rm_nr 3 3 invert + chk_rst_nr 0 0 fi # invalid addresses remove @@ -2384,6 +2377,7 @@ remove_tests() chk_join_nr 1 1 1 chk_add_nr 3 3 chk_rm_nr 3 1 invert + chk_rst_nr 0 0 fi # subflows and signal, flush @@ -2398,6 +2392,7 @@ remove_tests() chk_join_nr 3 3 3 chk_add_nr 1 1 chk_rm_nr 1 3 invert simult + chk_rst_nr 0 0 fi # subflows flush @@ -2417,6 +2412,7 @@ remove_tests() else chk_rm_nr 3 3 fi + chk_rst_nr 0 0 fi # addresses flush @@ -2431,6 +2427,7 @@ remove_tests() chk_join_nr 3 3 3 chk_add_nr 3 3 chk_rm_nr 3 3 invert simult + chk_rst_nr 0 0 fi # invalid addresses flush @@ -2445,6 +2442,7 @@ remove_tests() chk_join_nr 1 1 1 chk_add_nr 3 3 chk_rm_nr 3 1 invert + chk_rst_nr 0 0 fi # remove id 0 subflow @@ -2456,6 +2454,7 @@ remove_tests() run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 1 1 1 chk_rm_nr 1 1 + chk_rst_nr 0 0 fi # remove id 0 address @@ -2468,6 +2467,7 @@ remove_tests() chk_join_nr 1 1 1 chk_add_nr 1 1 chk_rm_nr 1 1 invert + chk_rst_nr 0 0 invert fi } @@ -2794,6 +2794,7 @@ backup_tests() fi } +SUB_ESTABLISHED=10 # MPTCP_EVENT_SUB_ESTABLISHED LISTENER_CREATED=15 #MPTCP_EVENT_LISTENER_CREATED LISTENER_CLOSED=16 #MPTCP_EVENT_LISTENER_CLOSED @@ -2828,13 +2829,13 @@ verify_listener_events() return fi - type=$(grep "type:$e_type," $evt | sed -n 's/.*\(type:\)\([[:digit:]]*\).*$/\2/p;q') - family=$(grep "type:$e_type," $evt | sed -n 's/.*\(family:\)\([[:digit:]]*\).*$/\2/p;q') - sport=$(grep "type:$e_type," $evt | sed -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q') + type=$(mptcp_lib_evts_get_info type "$evt" "$e_type") + family=$(mptcp_lib_evts_get_info family "$evt" "$e_type") + sport=$(mptcp_lib_evts_get_info sport "$evt" "$e_type") if [ $family ] && [ $family = $AF_INET6 ]; then - saddr=$(grep "type:$e_type," $evt | sed -n 's/.*\(saddr6:\)\([0-9a-f:.]*\).*$/\2/p;q') + saddr=$(mptcp_lib_evts_get_info saddr6 "$evt" "$e_type") else - saddr=$(grep "type:$e_type," $evt | sed -n 's/.*\(saddr4:\)\([0-9.]*\).*$/\2/p;q') + saddr=$(mptcp_lib_evts_get_info saddr4 "$evt" "$e_type") fi if [ $type ] && [ $type = $e_type ] && @@ -3220,7 +3221,7 @@ fastclose_tests() if reset_check_counter "fastclose server test" "MPTcpExtMPFastcloseRx"; then test_linkfail=1024 fastclose=server \ run_tests $ns1 $ns2 10.0.1.1 - chk_join_nr 0 0 0 + chk_join_nr 0 0 0 0 0 0 1 chk_fclose_nr 1 1 invert chk_rst_nr 1 1 fi @@ -3229,8 +3230,7 @@ fastclose_tests() pedit_action_pkts() { tc -n $ns2 -j -s action show action pedit index 100 | \ - grep "packets" | \ - sed 's/.*"packets":\([0-9]\+\),.*/\1/' + mptcp_lib_get_info_value \"packets\" packets } fail_tests() @@ -3255,69 +3255,70 @@ fail_tests() fi } +# $1: ns ; $2: addr ; $3: id userspace_pm_add_addr() { - local addr=$1 - local id=$2 + local evts=$evts_ns1 local tk - tk=$(grep "type:1," "$evts_ns1" | - sed -n 's/.*\(token:\)\([[:digit:]]*\).*$/\2/p;q') - ip netns exec $ns1 ./pm_nl_ctl ann $addr token $tk id $id + [ "$1" == "$ns2" ] && evts=$evts_ns2 + tk=$(mptcp_lib_evts_get_info token "$evts") + + ip netns exec $1 ./pm_nl_ctl ann $2 token $tk id $3 sleep 1 } -userspace_pm_rm_sf_addr_ns1() +# $1: ns ; $2: id +userspace_pm_rm_addr() { - local addr=$1 - local id=$2 - local tk sp da dp - - tk=$(grep "type:1," "$evts_ns1" | - sed -n 's/.*\(token:\)\([[:digit:]]*\).*$/\2/p;q') - sp=$(grep "type:10" "$evts_ns1" | - sed -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q') - da=$(grep "type:10" "$evts_ns1" | - sed -n 's/.*\(daddr6:\)\([0-9a-f:.]*\).*$/\2/p;q') - dp=$(grep "type:10" "$evts_ns1" | - sed -n 's/.*\(dport:\)\([[:digit:]]*\).*$/\2/p;q') - ip netns exec $ns1 ./pm_nl_ctl rem token $tk id $id - ip netns exec $ns1 ./pm_nl_ctl dsf lip "::ffff:$addr" \ - lport $sp rip $da rport $dp token $tk - wait_rm_addr $ns1 1 - wait_rm_sf $ns1 1 + local evts=$evts_ns1 + local tk + local cnt + + [ "$1" == "$ns2" ] && evts=$evts_ns2 + tk=$(mptcp_lib_evts_get_info token "$evts") + + cnt=$(rm_addr_count ${1}) + ip netns exec $1 ./pm_nl_ctl rem token $tk id $2 + wait_rm_addr $1 "${cnt}" } +# $1: ns ; $2: addr ; $3: id userspace_pm_add_sf() { - local addr=$1 - local id=$2 + local evts=$evts_ns1 local tk da dp - tk=$(sed -n 's/.*\(token:\)\([[:digit:]]*\).*$/\2/p;q' "$evts_ns2") - da=$(sed -n 's/.*\(daddr4:\)\([0-9.]*\).*$/\2/p;q' "$evts_ns2") - dp=$(sed -n 's/.*\(dport:\)\([[:digit:]]*\).*$/\2/p;q' "$evts_ns2") - ip netns exec $ns2 ./pm_nl_ctl csf lip $addr lid $id \ + [ "$1" == "$ns2" ] && evts=$evts_ns2 + tk=$(mptcp_lib_evts_get_info token "$evts") + da=$(mptcp_lib_evts_get_info daddr4 "$evts") + dp=$(mptcp_lib_evts_get_info dport "$evts") + + ip netns exec $1 ./pm_nl_ctl csf lip $2 lid $3 \ rip $da rport $dp token $tk sleep 1 } -userspace_pm_rm_sf_addr_ns2() +# $1: ns ; $2: addr $3: event type +userspace_pm_rm_sf() { - local addr=$1 - local id=$2 + local evts=$evts_ns1 + local t=${3:-1} + local ip=4 local tk da dp sp + local cnt - tk=$(sed -n 's/.*\(token:\)\([[:digit:]]*\).*$/\2/p;q' "$evts_ns2") - da=$(sed -n 's/.*\(daddr4:\)\([0-9.]*\).*$/\2/p;q' "$evts_ns2") - dp=$(sed -n 's/.*\(dport:\)\([[:digit:]]*\).*$/\2/p;q' "$evts_ns2") - sp=$(grep "type:10" "$evts_ns2" | - sed -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q') - ip netns exec $ns2 ./pm_nl_ctl rem token $tk id $id - ip netns exec $ns2 ./pm_nl_ctl dsf lip $addr lport $sp \ + [ "$1" == "$ns2" ] && evts=$evts_ns2 + if mptcp_lib_is_v6 $2; then ip=6; fi + tk=$(mptcp_lib_evts_get_info token "$evts") + da=$(mptcp_lib_evts_get_info "daddr$ip" "$evts" $t) + dp=$(mptcp_lib_evts_get_info dport "$evts" $t) + sp=$(mptcp_lib_evts_get_info sport "$evts" $t) + + cnt=$(rm_sf_count ${1}) + ip netns exec $1 ./pm_nl_ctl dsf lip $2 lport $sp \ rip $da rport $dp token $tk - wait_rm_addr $ns2 1 - wait_rm_sf $ns2 1 + wait_rm_sf $1 "${cnt}" } userspace_tests() @@ -3400,18 +3401,21 @@ userspace_tests() continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then set_userspace_pm $ns1 pm_nl_set_limits $ns2 1 1 - speed=10 \ + speed=5 \ run_tests $ns1 $ns2 10.0.1.1 & local tests_pid=$! wait_mpj $ns1 - userspace_pm_add_addr 10.0.2.1 10 + userspace_pm_add_addr $ns1 10.0.2.1 10 chk_join_nr 1 1 1 chk_add_nr 1 1 chk_mptcp_info subflows 1 subflows 1 + chk_subflows_total 2 2 chk_mptcp_info add_addr_signal 1 add_addr_accepted 1 - userspace_pm_rm_sf_addr_ns1 10.0.2.1 10 + userspace_pm_rm_addr $ns1 10 + userspace_pm_rm_sf $ns1 "::ffff:10.0.2.1" $SUB_ESTABLISHED chk_rm_nr 1 1 invert chk_mptcp_info subflows 0 subflows 0 + chk_subflows_total 1 1 kill_events_pids wait $tests_pid fi @@ -3421,16 +3425,88 @@ userspace_tests() continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then set_userspace_pm $ns2 pm_nl_set_limits $ns1 0 1 - speed=10 \ + speed=5 \ run_tests $ns1 $ns2 10.0.1.1 & local tests_pid=$! wait_mpj $ns2 - userspace_pm_add_sf 10.0.3.2 20 + userspace_pm_add_sf $ns2 10.0.3.2 20 chk_join_nr 1 1 1 chk_mptcp_info subflows 1 subflows 1 - userspace_pm_rm_sf_addr_ns2 10.0.3.2 20 + chk_subflows_total 2 2 + userspace_pm_rm_addr $ns2 20 + userspace_pm_rm_sf $ns2 10.0.3.2 $SUB_ESTABLISHED chk_rm_nr 1 1 chk_mptcp_info subflows 0 subflows 0 + chk_subflows_total 1 1 + kill_events_pids + wait $tests_pid + fi + + # userspace pm create id 0 subflow + if reset_with_events "userspace pm create id 0 subflow" && + continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then + set_userspace_pm $ns2 + pm_nl_set_limits $ns1 0 1 + speed=5 \ + run_tests $ns1 $ns2 10.0.1.1 & + local tests_pid=$! + wait_mpj $ns2 + chk_mptcp_info subflows 0 subflows 0 + chk_subflows_total 1 1 + userspace_pm_add_sf $ns2 10.0.3.2 0 + chk_join_nr 1 1 1 + chk_mptcp_info subflows 1 subflows 1 + chk_subflows_total 2 2 + kill_events_pids + wait $tests_pid + fi + + # userspace pm remove initial subflow + if reset_with_events "userspace pm remove initial subflow" && + continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then + set_userspace_pm $ns2 + pm_nl_set_limits $ns1 0 1 + speed=5 \ + run_tests $ns1 $ns2 10.0.1.1 & + local tests_pid=$! + wait_mpj $ns2 + userspace_pm_add_sf $ns2 10.0.3.2 20 + chk_join_nr 1 1 1 + chk_mptcp_info subflows 1 subflows 1 + chk_subflows_total 2 2 + userspace_pm_rm_sf $ns2 10.0.1.2 + # we don't look at the counter linked to the RM_ADDR but + # to the one linked to the subflows that have been removed + chk_rm_nr 0 1 + chk_rst_nr 0 0 invert + chk_mptcp_info subflows 1 subflows 1 + chk_subflows_total 1 1 + kill_events_pids + wait $tests_pid + fi + + # userspace pm send RM_ADDR for ID 0 + if reset_with_events "userspace pm send RM_ADDR for ID 0" && + continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then + set_userspace_pm $ns1 + pm_nl_set_limits $ns2 1 1 + speed=5 \ + run_tests $ns1 $ns2 10.0.1.1 & + local tests_pid=$! + wait_mpj $ns1 + userspace_pm_add_addr $ns1 10.0.2.1 10 + chk_join_nr 1 1 1 + chk_add_nr 1 1 + chk_mptcp_info subflows 1 subflows 1 + chk_subflows_total 2 2 + chk_mptcp_info add_addr_signal 1 add_addr_accepted 1 + userspace_pm_rm_addr $ns1 0 + # we don't look at the counter linked to the subflows that + # have been removed but to the one linked to the RM_ADDR + chk_rm_nr 1 0 invert + chk_rst_nr 0 0 invert + chk_mptcp_info subflows 1 subflows 1 + chk_subflows_total 1 1 kill_events_pids wait $tests_pid fi diff --git a/tools/testing/selftests/net/mptcp/mptcp_lib.sh b/tools/testing/selftests/net/mptcp/mptcp_lib.sh index 92a5befe8039..022262a2cfe0 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_lib.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_lib.sh @@ -207,3 +207,94 @@ mptcp_lib_result_print_all_tap() { printf "%s\n" "${subtest}" done } + +# get the value of keyword $1 in the line marked by keyword $2 +mptcp_lib_get_info_value() { + grep "${2}" | sed -n 's/.*\('"${1}"':\)\([0-9a-f:.]*\).*$/\2/p;q' +} + +# $1: info name ; $2: evts_ns ; $3: event type +mptcp_lib_evts_get_info() { + mptcp_lib_get_info_value "${1}" "^type:${3:-1}," < "${2}" +} + +# $1: PID +mptcp_lib_kill_wait() { + [ "${1}" -eq 0 ] && return 0 + + kill -SIGUSR1 "${1}" > /dev/null 2>&1 + kill "${1}" > /dev/null 2>&1 + wait "${1}" 2>/dev/null +} + +# $1: IP address +mptcp_lib_is_v6() { + [ -z "${1##*:*}" ] +} + +# $1: ns, $2: MIB counter +mptcp_lib_get_counter() { + local ns="${1}" + local counter="${2}" + local count + + count=$(ip netns exec "${ns}" nstat -asz "${counter}" | + awk 'NR==1 {next} {print $2}') + if [ -z "${count}" ]; then + mptcp_lib_fail_if_expected_feature "${counter} counter" + return 1 + fi + + echo "${count}" +} + +mptcp_lib_make_file() { + local name="${1}" + local bs="${2}" + local size="${3}" + + dd if=/dev/urandom of="${name}" bs="${bs}" count="${size}" 2> /dev/null + echo -e "\nMPTCP_TEST_FILE_END_MARKER" >> "${name}" +} + +# $1: file +mptcp_lib_print_file_err() { + ls -l "${1}" 1>&2 + echo "Trailing bytes are: " + tail -c 27 "${1}" +} + +# $1: input file ; $2: output file ; $3: what kind of file +mptcp_lib_check_transfer() { + local in="${1}" + local out="${2}" + local what="${3}" + + if ! cmp "$in" "$out" > /dev/null 2>&1; then + echo "[ FAIL ] $what does not match (in, out):" + mptcp_lib_print_file_err "$in" + mptcp_lib_print_file_err "$out" + + return 1 + fi + + return 0 +} + +# $1: ns, $2: port +mptcp_lib_wait_local_port_listen() { + local listener_ns="${1}" + local port="${2}" + + local port_hex + port_hex="$(printf "%04X" "${port}")" + + local _ + for _ in $(seq 10); do + ip netns exec "${listener_ns}" cat /proc/net/tcp* | \ + awk "BEGIN {rc=1} {if (\$2 ~ /:${port_hex}\$/ && \$4 ~ /0A/) \ + {rc=0; exit}} END {exit rc}" && + break + sleep 0.1 + done +} diff --git a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh index 8c8694f21e7d..c643872ddf47 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh @@ -11,7 +11,6 @@ cout="" ksft_skip=4 timeout_poll=30 timeout_test=$((timeout_poll * 2 + 1)) -mptcp_connect="" iptables="iptables" ip6tables="ip6tables" @@ -136,38 +135,6 @@ check_mark() return 0 } -print_file_err() -{ - ls -l "$1" 1>&2 - echo "Trailing bytes are: " - tail -c 27 "$1" -} - -check_transfer() -{ - local in=$1 - local out=$2 - local what=$3 - - cmp "$in" "$out" > /dev/null 2>&1 - if [ $? -ne 0 ] ;then - echo "[ FAIL ] $what does not match (in, out):" - print_file_err "$in" - print_file_err "$out" - ret=1 - - return 1 - fi - - return 0 -} - -# $1: IP address -is_v6() -{ - [ -z "${1##*:*}" ] -} - do_transfer() { local listener_ns="$1" @@ -184,7 +151,7 @@ do_transfer() local mptcp_connect="./mptcp_connect -r 20" local local_addr ip - if is_v6 "${connect_addr}"; then + if mptcp_lib_is_v6 "${connect_addr}"; then local_addr="::" ip=ipv6 else @@ -239,7 +206,7 @@ do_transfer() check_mark $connector_ns 4 || retc=1 fi - check_transfer $cin $sout "file received by server" + mptcp_lib_check_transfer $cin $sout "file received by server" rets=$? mptcp_lib_result_code "${retc}" "mark ${ip}" @@ -258,8 +225,7 @@ make_file() local who=$2 local size=$3 - dd if=/dev/urandom of="$name" bs=1024 count=$size 2> /dev/null - echo -e "\nMPTCP_TEST_FILE_END_MARKER" >> "$name" + mptcp_lib_make_file $name 1024 $size echo "Created $name (size $size KB) containing data sent by $who" } diff --git a/tools/testing/selftests/net/mptcp/simult_flows.sh b/tools/testing/selftests/net/mptcp/simult_flows.sh index ce9203b817f8..ae8ad5d6fb9d 100755 --- a/tools/testing/selftests/net/mptcp/simult_flows.sh +++ b/tools/testing/selftests/net/mptcp/simult_flows.sh @@ -123,23 +123,6 @@ setup() grep -q ' kmemleak_init$\| lockdep_init$\| kasan_init$\| prove_locking$' /proc/kallsyms && slack=$((slack+550)) } -# $1: ns, $2: port -wait_local_port_listen() -{ - local listener_ns="${1}" - local port="${2}" - - local port_hex i - - port_hex="$(printf "%04X" "${port}")" - for i in $(seq 10); do - ip netns exec "${listener_ns}" cat /proc/net/tcp* | \ - awk "BEGIN {rc=1} {if (\$2 ~ /:${port_hex}\$/ && \$4 ~ /0A/) {rc=0; exit}} END {exit rc}" && - break - sleep 0.1 - done -} - do_transfer() { local cin=$1 @@ -179,7 +162,7 @@ do_transfer() 0.0.0.0 < "$sin" > "$sout" & local spid=$! - wait_local_port_listen "${ns3}" "${port}" + mptcp_lib_wait_local_port_listen "${ns3}" "${port}" timeout ${timeout_test} \ ip netns exec ${ns1} \ diff --git a/tools/testing/selftests/net/mptcp/userspace_pm.sh b/tools/testing/selftests/net/mptcp/userspace_pm.sh index b25a3e33eb25..6167837f48e1 100755 --- a/tools/testing/selftests/net/mptcp/userspace_pm.sh +++ b/tools/testing/selftests/net/mptcp/userspace_pm.sh @@ -108,15 +108,6 @@ test_fail() mptcp_lib_result_fail "${test_name}" } -kill_wait() -{ - [ $1 -eq 0 ] && return 0 - - kill -SIGUSR1 $1 > /dev/null 2>&1 - kill $1 > /dev/null 2>&1 - wait $1 2>/dev/null -} - # This function is used in the cleanup trap #shellcheck disable=SC2317 cleanup() @@ -128,7 +119,7 @@ cleanup() for pid in $client4_pid $server4_pid $client6_pid $server6_pid\ $server_evts_pid $client_evts_pid do - kill_wait $pid + mptcp_lib_kill_wait $pid done local netns @@ -173,22 +164,12 @@ print_title "Init" print_test "Created network namespaces ns1, ns2" test_pass -make_file() -{ - # Store a chunk of data in a file to transmit over an MPTCP connection - local name=$1 - local ksize=1 - - dd if=/dev/urandom of="$name" bs=2 count=$ksize 2> /dev/null - echo -e "\nMPTCP_TEST_FILE_END_MARKER" >> "$name" -} - make_connection() { if [ -z "$file" ]; then file=$(mktemp) fi - make_file "$file" "client" + mptcp_lib_make_file "$file" 2 1 local is_v6=$1 local app_port=$app4_port @@ -210,7 +191,7 @@ make_connection() fi :>"$client_evts" if [ $client_evts_pid -ne 0 ]; then - kill_wait $client_evts_pid + mptcp_lib_kill_wait $client_evts_pid fi ip netns exec "$ns2" ./pm_nl_ctl events >> "$client_evts" 2>&1 & client_evts_pid=$! @@ -219,7 +200,7 @@ make_connection() fi :>"$server_evts" if [ $server_evts_pid -ne 0 ]; then - kill_wait $server_evts_pid + mptcp_lib_kill_wait $server_evts_pid fi ip netns exec "$ns1" ./pm_nl_ctl events >> "$server_evts" 2>&1 & server_evts_pid=$! @@ -247,14 +228,11 @@ make_connection() local server_token local server_serverside - client_token=$(sed --unbuffered -n 's/.*\(token:\)\([[:digit:]]*\).*$/\2/p;q' "$client_evts") - client_port=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$client_evts") - client_serverside=$(sed --unbuffered -n 's/.*\(server_side:\)\([[:digit:]]*\).*$/\2/p;q'\ - "$client_evts") - server_token=$(grep "type:1," "$server_evts" | - sed --unbuffered -n 's/.*\(token:\)\([[:digit:]]*\).*$/\2/p;q') - server_serverside=$(grep "type:1," "$server_evts" | - sed --unbuffered -n 's/.*\(server_side:\)\([[:digit:]]*\).*$/\2/p;q') + client_token=$(mptcp_lib_evts_get_info token "$client_evts") + client_port=$(mptcp_lib_evts_get_info sport "$client_evts") + client_serverside=$(mptcp_lib_evts_get_info server_side "$client_evts") + server_token=$(mptcp_lib_evts_get_info token "$server_evts") + server_serverside=$(mptcp_lib_evts_get_info server_side "$server_evts") print_test "Established IP${is_v6} MPTCP Connection ns2 => ns1" if [ "$client_token" != "" ] && [ "$server_token" != "" ] && [ "$client_serverside" = 0 ] && @@ -340,16 +318,16 @@ verify_announce_event() local dport local id - type=$(sed --unbuffered -n 's/.*\(type:\)\([[:digit:]]*\).*$/\2/p;q' "$evt") - token=$(sed --unbuffered -n 's/.*\(token:\)\([[:digit:]]*\).*$/\2/p;q' "$evt") + type=$(mptcp_lib_evts_get_info type "$evt" $e_type) + token=$(mptcp_lib_evts_get_info token "$evt" $e_type) if [ "$e_af" = "v6" ] then - addr=$(sed --unbuffered -n 's/.*\(daddr6:\)\([0-9a-f:.]*\).*$/\2/p;q' "$evt") + addr=$(mptcp_lib_evts_get_info daddr6 "$evt" $e_type) else - addr=$(sed --unbuffered -n 's/.*\(daddr4:\)\([0-9.]*\).*$/\2/p;q' "$evt") + addr=$(mptcp_lib_evts_get_info daddr4 "$evt" $e_type) fi - dport=$(sed --unbuffered -n 's/.*\(dport:\)\([[:digit:]]*\).*$/\2/p;q' "$evt") - id=$(sed --unbuffered -n 's/.*\(rem_id:\)\([[:digit:]]*\).*$/\2/p;q' "$evt") + dport=$(mptcp_lib_evts_get_info dport "$evt" $e_type) + id=$(mptcp_lib_evts_get_info rem_id "$evt" $e_type) check_expected "type" "token" "addr" "dport" "id" } @@ -367,7 +345,7 @@ test_announce() $client_addr_id dev ns2eth1 > /dev/null 2>&1 local type - type=$(sed --unbuffered -n 's/.*\(type:\)\([[:digit:]]*\).*$/\2/p;q' "$server_evts") + type=$(mptcp_lib_evts_get_info type "$server_evts") print_test "ADD_ADDR 10.0.2.2 (ns2) => ns1, invalid token" if [ "$type" = "" ] then @@ -446,9 +424,9 @@ verify_remove_event() local token local id - type=$(sed --unbuffered -n 's/.*\(type:\)\([[:digit:]]*\).*$/\2/p;q' "$evt") - token=$(sed --unbuffered -n 's/.*\(token:\)\([[:digit:]]*\).*$/\2/p;q' "$evt") - id=$(sed --unbuffered -n 's/.*\(rem_id:\)\([[:digit:]]*\).*$/\2/p;q' "$evt") + type=$(mptcp_lib_evts_get_info type "$evt" $e_type) + token=$(mptcp_lib_evts_get_info token "$evt" $e_type) + id=$(mptcp_lib_evts_get_info rem_id "$evt" $e_type) check_expected "type" "token" "id" } @@ -466,7 +444,7 @@ test_remove() $client_addr_id > /dev/null 2>&1 print_test "RM_ADDR id:${client_addr_id} ns2 => ns1, invalid token" local type - type=$(sed --unbuffered -n 's/.*\(type:\)\([[:digit:]]*\).*$/\2/p;q' "$server_evts") + type=$(mptcp_lib_evts_get_info type "$server_evts") if [ "$type" = "" ] then test_pass @@ -479,7 +457,7 @@ test_remove() ip netns exec "$ns2" ./pm_nl_ctl rem token "$client4_token" id\ $invalid_id > /dev/null 2>&1 print_test "RM_ADDR id:${invalid_id} ns2 => ns1, invalid id" - type=$(sed --unbuffered -n 's/.*\(type:\)\([[:digit:]]*\).*$/\2/p;q' "$server_evts") + type=$(mptcp_lib_evts_get_info type "$server_evts") if [ "$type" = "" ] then test_pass @@ -583,19 +561,19 @@ verify_subflow_events() fi fi - type=$(sed --unbuffered -n 's/.*\(type:\)\([[:digit:]]*\).*$/\2/p;q' "$evt") - token=$(sed --unbuffered -n 's/.*\(token:\)\([[:digit:]]*\).*$/\2/p;q' "$evt") - family=$(sed --unbuffered -n 's/.*\(family:\)\([[:digit:]]*\).*$/\2/p;q' "$evt") - dport=$(sed --unbuffered -n 's/.*\(dport:\)\([[:digit:]]*\).*$/\2/p;q' "$evt") - locid=$(sed --unbuffered -n 's/.*\(loc_id:\)\([[:digit:]]*\).*$/\2/p;q' "$evt") - remid=$(sed --unbuffered -n 's/.*\(rem_id:\)\([[:digit:]]*\).*$/\2/p;q' "$evt") + type=$(mptcp_lib_evts_get_info type "$evt" $e_type) + token=$(mptcp_lib_evts_get_info token "$evt" $e_type) + family=$(mptcp_lib_evts_get_info family "$evt" $e_type) + dport=$(mptcp_lib_evts_get_info dport "$evt" $e_type) + locid=$(mptcp_lib_evts_get_info loc_id "$evt" $e_type) + remid=$(mptcp_lib_evts_get_info rem_id "$evt" $e_type) if [ "$family" = "$AF_INET6" ] then - saddr=$(sed --unbuffered -n 's/.*\(saddr6:\)\([0-9a-f:.]*\).*$/\2/p;q' "$evt") - daddr=$(sed --unbuffered -n 's/.*\(daddr6:\)\([0-9a-f:.]*\).*$/\2/p;q' "$evt") + saddr=$(mptcp_lib_evts_get_info saddr6 "$evt" $e_type) + daddr=$(mptcp_lib_evts_get_info daddr6 "$evt" $e_type) else - saddr=$(sed --unbuffered -n 's/.*\(saddr4:\)\([0-9.]*\).*$/\2/p;q' "$evt") - daddr=$(sed --unbuffered -n 's/.*\(daddr4:\)\([0-9.]*\).*$/\2/p;q' "$evt") + saddr=$(mptcp_lib_evts_get_info saddr4 "$evt" $e_type) + daddr=$(mptcp_lib_evts_get_info daddr4 "$evt" $e_type) fi check_expected "type" "token" "daddr" "dport" "family" "saddr" "locid" "remid" @@ -627,10 +605,10 @@ test_subflows() "10.0.2.2" "$client4_port" "23" "$client_addr_id" "ns1" "ns2" # Delete the listener from the client ns, if one was created - kill_wait $listener_pid + mptcp_lib_kill_wait $listener_pid local sport - sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$server_evts") + sport=$(mptcp_lib_evts_get_info sport "$server_evts" $SUB_ESTABLISHED) # DESTROY_SUBFLOW from server to client machine :>"$server_evts" @@ -666,9 +644,9 @@ test_subflows() "$client_addr_id" "ns1" "ns2" # Delete the listener from the client ns, if one was created - kill_wait $listener_pid + mptcp_lib_kill_wait $listener_pid - sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$server_evts") + sport=$(mptcp_lib_evts_get_info sport "$server_evts" $SUB_ESTABLISHED) # DESTROY_SUBFLOW6 from server to client machine :>"$server_evts" @@ -705,9 +683,9 @@ test_subflows() "$client_addr_id" "ns1" "ns2" # Delete the listener from the client ns, if one was created - kill_wait $listener_pid + mptcp_lib_kill_wait $listener_pid - sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$server_evts") + sport=$(mptcp_lib_evts_get_info sport "$server_evts" $SUB_ESTABLISHED) # DESTROY_SUBFLOW from server to client machine :>"$server_evts" @@ -743,9 +721,9 @@ test_subflows() "10.0.2.1" "$app4_port" "23" "$server_addr_id" "ns2" "ns1" # Delete the listener from the server ns, if one was created - kill_wait $listener_pid + mptcp_lib_kill_wait $listener_pid - sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$client_evts") + sport=$(mptcp_lib_evts_get_info sport "$client_evts" $SUB_ESTABLISHED) # DESTROY_SUBFLOW from client to server machine :>"$client_evts" @@ -782,9 +760,9 @@ test_subflows() "$server_addr_id" "ns2" "ns1" # Delete the listener from the server ns, if one was created - kill_wait $listener_pid + mptcp_lib_kill_wait $listener_pid - sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$client_evts") + sport=$(mptcp_lib_evts_get_info sport "$client_evts" $SUB_ESTABLISHED) # DESTROY_SUBFLOW6 from client to server machine :>"$client_evts" @@ -819,9 +797,9 @@ test_subflows() "10.0.2.2" "10.0.2.1" "$new4_port" "23" "$server_addr_id" "ns2" "ns1" # Delete the listener from the server ns, if one was created - kill_wait $listener_pid + mptcp_lib_kill_wait $listener_pid - sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$client_evts") + sport=$(mptcp_lib_evts_get_info sport "$client_evts" $SUB_ESTABLISHED) # DESTROY_SUBFLOW from client to server machine :>"$client_evts" @@ -865,9 +843,9 @@ test_subflows_v4_v6_mix() "$server_addr_id" "ns2" "ns1" # Delete the listener from the server ns, if one was created - kill_wait $listener_pid + mptcp_lib_kill_wait $listener_pid - sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$client_evts") + sport=$(mptcp_lib_evts_get_info sport "$client_evts" $SUB_ESTABLISHED) # DESTROY_SUBFLOW from client to server machine :>"$client_evts" @@ -896,9 +874,10 @@ test_prio() # Check TX print_test "MP_PRIO TX" - count=$(ip netns exec "$ns2" nstat -as | grep MPTcpExtMPPrioTx | awk '{print $2}') - [ -z "$count" ] && count=0 - if [ $count != 1 ]; then + count=$(mptcp_lib_get_counter "$ns2" "MPTcpExtMPPrioTx") + if [ -z "$count" ]; then + test_skip + elif [ $count != 1 ]; then test_fail "Count != 1: ${count}" else test_pass @@ -906,9 +885,10 @@ test_prio() # Check RX print_test "MP_PRIO RX" - count=$(ip netns exec "$ns1" nstat -as | grep MPTcpExtMPPrioRx | awk '{print $2}') - [ -z "$count" ] && count=0 - if [ $count != 1 ]; then + count=$(mptcp_lib_get_counter "$ns1" "MPTcpExtMPPrioRx") + if [ -z "$count" ]; then + test_skip + elif [ $count != 1 ]; then test_fail "Count != 1: ${count}" else test_pass @@ -933,18 +913,13 @@ verify_listener_events() print_test "CLOSE_LISTENER $e_saddr:$e_sport" fi - type=$(grep "type:$e_type," $evt | - sed --unbuffered -n 's/.*\(type:\)\([[:digit:]]*\).*$/\2/p;q') - family=$(grep "type:$e_type," $evt | - sed --unbuffered -n 's/.*\(family:\)\([[:digit:]]*\).*$/\2/p;q') - sport=$(grep "type:$e_type," $evt | - sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q') + type=$(mptcp_lib_evts_get_info type $evt $e_type) + family=$(mptcp_lib_evts_get_info family $evt $e_type) + sport=$(mptcp_lib_evts_get_info sport $evt $e_type) if [ $family ] && [ $family = $AF_INET6 ]; then - saddr=$(grep "type:$e_type," $evt | - sed --unbuffered -n 's/.*\(saddr6:\)\([0-9a-f:.]*\).*$/\2/p;q') + saddr=$(mptcp_lib_evts_get_info saddr6 $evt $e_type) else - saddr=$(grep "type:$e_type," $evt | - sed --unbuffered -n 's/.*\(saddr4:\)\([0-9.]*\).*$/\2/p;q') + saddr=$(mptcp_lib_evts_get_info saddr4 $evt $e_type) fi check_expected "type" "family" "saddr" "sport" @@ -982,7 +957,7 @@ test_listener() sleep 0.5 # Delete the listener from the client ns, if one was created - kill_wait $listener_pid + mptcp_lib_kill_wait $listener_pid sleep 0.5 verify_listener_events $client_evts $LISTENER_CLOSED $AF_INET 10.0.2.2 $client4_port diff --git a/tools/testing/selftests/net/net_helper.sh b/tools/testing/selftests/net/net_helper.sh new file mode 100755 index 000000000000..4fe0befa13fb --- /dev/null +++ b/tools/testing/selftests/net/net_helper.sh @@ -0,0 +1,22 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Helper functions + +wait_local_port_listen() +{ + local listener_ns="${1}" + local port="${2}" + local protocol="${3}" + local port_hex + local i + + port_hex="$(printf "%04X" "${port}")" + for i in $(seq 10); do + if ip netns exec "${listener_ns}" cat /proc/net/"${protocol}"* | \ + grep -q "${port_hex}"; then + break + fi + sleep 0.1 + done +} diff --git a/tools/testing/selftests/net/netns-name.sh b/tools/testing/selftests/net/netns-name.sh new file mode 100755 index 000000000000..7d3d3fc99461 --- /dev/null +++ b/tools/testing/selftests/net/netns-name.sh @@ -0,0 +1,87 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +set -o pipefail + +NS=netns-name-test +DEV=dummy-dev0 +DEV2=dummy-dev1 +ALT_NAME=some-alt-name + +RET_CODE=0 + +cleanup() { + ip netns del $NS +} + +trap cleanup EXIT + +fail() { + echo "ERROR: ${1:-unexpected return code} (ret: $_)" >&2 + RET_CODE=1 +} + +ip netns add $NS + +# +# Test basic move without a rename +# +ip -netns $NS link add name $DEV type dummy || fail +ip -netns $NS link set dev $DEV netns 1 || + fail "Can't perform a netns move" +ip link show dev $DEV >> /dev/null || fail "Device not found after move" +ip link del $DEV || fail + +# +# Test move with a conflict +# +ip link add name $DEV type dummy +ip -netns $NS link add name $DEV type dummy || fail +ip -netns $NS link set dev $DEV netns 1 2> /dev/null && + fail "Performed a netns move with a name conflict" +ip link show dev $DEV >> /dev/null || fail "Device not found after move" +ip -netns $NS link del $DEV || fail +ip link del $DEV || fail + +# +# Test move with a conflict and rename +# +ip link add name $DEV type dummy +ip -netns $NS link add name $DEV type dummy || fail +ip -netns $NS link set dev $DEV netns 1 name $DEV2 || + fail "Can't perform a netns move with rename" +ip link del $DEV2 || fail +ip link del $DEV || fail + +# +# Test dup alt-name with netns move +# +ip link add name $DEV type dummy || fail +ip link property add dev $DEV altname $ALT_NAME || fail +ip -netns $NS link add name $DEV2 type dummy || fail +ip -netns $NS link property add dev $DEV2 altname $ALT_NAME || fail + +ip -netns $NS link set dev $DEV2 netns 1 2> /dev/null && + fail "Moved with alt-name dup" + +ip link del $DEV || fail +ip -netns $NS link del $DEV2 || fail + +# +# Test creating alt-name in one net-ns and using in another +# +ip -netns $NS link add name $DEV type dummy || fail +ip -netns $NS link property add dev $DEV altname $ALT_NAME || fail +ip -netns $NS link set dev $DEV netns 1 || fail +ip link show dev $ALT_NAME >> /dev/null || fail "Can't find alt-name after move" +ip -netns $NS link show dev $ALT_NAME 2> /dev/null && + fail "Can still find alt-name after move" +ip link del $DEV || fail + +echo -ne "$(basename $0) \t\t\t\t" +if [ $RET_CODE -eq 0 ]; then + echo "[ OK ]" +else + echo "[ FAIL ]" +fi +exit $RET_CODE diff --git a/tools/testing/selftests/net/openvswitch/openvswitch.sh b/tools/testing/selftests/net/openvswitch/openvswitch.sh index 9c2012d70b08..f8499d4c87f3 100755 --- a/tools/testing/selftests/net/openvswitch/openvswitch.sh +++ b/tools/testing/selftests/net/openvswitch/openvswitch.sh @@ -3,6 +3,8 @@ # # OVS kernel module self tests +trap ovs_exit_sig EXIT TERM INT ERR + # Kselftest framework requirement - SKIP code is 4. ksft_skip=4 @@ -142,6 +144,12 @@ ovs_add_flow () { return 0 } +ovs_del_flows () { + info "Deleting all flows from DP: sbx:$1 br:$2" + ovs_sbx "$1" python3 $ovs_base/ovs-dpctl.py del-flows "$2" + return 0 +} + ovs_drop_record_and_run () { local sbx=$1 shift @@ -198,6 +206,17 @@ test_drop_reason() { ip netns exec server ip addr add 172.31.110.20/24 dev s1 ip netns exec server ip link set s1 up + # Check if drop reasons can be sent + ovs_add_flow "test_drop_reason" dropreason \ + 'in_port(1),eth(),eth_type(0x0806),arp()' 'drop(10)' 2>/dev/null + if [ $? == 1 ]; then + info "no support for drop reasons - skipping" + ovs_exit_sig + return $ksft_skip + fi + + ovs_del_flows "test_drop_reason" dropreason + # Allow ARP ovs_add_flow "test_drop_reason" dropreason \ 'in_port(1),eth(),eth_type(0x0806),arp()' '2' || return 1 @@ -525,7 +544,7 @@ run_test() { fi if python3 ovs-dpctl.py -h 2>&1 | \ - grep "Need to install the python" >/dev/null 2>&1; then + grep -E "Need to (install|upgrade) the python" >/dev/null 2>&1; then stdbuf -o0 printf "TEST: %-60s [PYLIB]\n" "${tdesc}" return $ksft_skip fi diff --git a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py index 912dc8c49085..b97e621face9 100644 --- a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py +++ b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py @@ -28,8 +28,10 @@ try: from pyroute2.netlink import nlmsg_atoms from pyroute2.netlink.exceptions import NetlinkError from pyroute2.netlink.generic import GenericNetlinkSocket + import pyroute2 + except ModuleNotFoundError: - print("Need to install the python pyroute2 package.") + print("Need to install the python pyroute2 package >= 0.6.") sys.exit(0) @@ -1117,12 +1119,14 @@ class ovskey(nla): "src", lambda x: str(ipaddress.IPv4Address(x)), int, + convert_ipv4, ), ( "dst", "dst", - lambda x: str(ipaddress.IPv6Address(x)), + lambda x: str(ipaddress.IPv4Address(x)), int, + convert_ipv4, ), ("tp_src", "tp_src", "%d", int), ("tp_dst", "tp_dst", "%d", int), @@ -1904,6 +1908,32 @@ class OvsFlow(GenericNetlinkSocket): raise ne return reply + def del_flows(self, dpifindex): + """ + Send a del message to the kernel that will drop all flows. + + dpifindex should be a valid datapath obtained by calling + into the OvsDatapath lookup + """ + + flowmsg = OvsFlow.ovs_flow_msg() + flowmsg["cmd"] = OVS_FLOW_CMD_DEL + flowmsg["version"] = OVS_DATAPATH_VERSION + flowmsg["reserved"] = 0 + flowmsg["dpifindex"] = dpifindex + + try: + reply = self.nlm_request( + flowmsg, + msg_type=self.prid, + msg_flags=NLM_F_REQUEST | NLM_F_ACK, + ) + reply = reply[0] + except NetlinkError as ne: + print(flowmsg) + raise ne + return reply + def dump(self, dpifindex, flowspec=None): """ Returns a list of messages containing flows. @@ -1998,6 +2028,12 @@ def main(argv): nlmsg_atoms.ovskey = ovskey nlmsg_atoms.ovsactions = ovsactions + # version check for pyroute2 + prverscheck = pyroute2.__version__.split(".") + if int(prverscheck[0]) == 0 and int(prverscheck[1]) < 6: + print("Need to upgrade the python pyroute2 package to >= 0.6.") + sys.exit(0) + parser = argparse.ArgumentParser() parser.add_argument( "-v", @@ -2060,6 +2096,9 @@ def main(argv): addflcmd.add_argument("flow", help="Flow specification") addflcmd.add_argument("acts", help="Flow actions") + delfscmd = subparsers.add_parser("del-flows") + delfscmd.add_argument("flsbr", help="Datapath name") + args = parser.parse_args() if args.verbose > 0: @@ -2143,6 +2182,11 @@ def main(argv): flow = OvsFlow.ovs_flow_msg() flow.parse(args.flow, args.acts, rep["dpifindex"]) ovsflow.add_flow(rep["dpifindex"], flow) + elif hasattr(args, "flsbr"): + rep = ovsdp.info(args.flsbr, 0) + if rep is None: + print("DP '%s' not found." % args.flsbr) + ovsflow.del_flows(rep["dpifindex"]) return 0 diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh index f838dd370f6a..b3b2dc5a630c 100755 --- a/tools/testing/selftests/net/pmtu.sh +++ b/tools/testing/selftests/net/pmtu.sh @@ -2048,7 +2048,7 @@ run_test() { case $ret in 0) all_skipped=false - [ $exitcode=$ksft_skip ] && exitcode=0 + [ $exitcode -eq $ksft_skip ] && exitcode=0 ;; $ksft_skip) [ $all_skipped = true ] && exitcode=$ksft_skip diff --git a/tools/testing/selftests/net/route_localnet.sh b/tools/testing/selftests/net/route_localnet.sh index 116bfeab72fa..e08701c750e3 100755 --- a/tools/testing/selftests/net/route_localnet.sh +++ b/tools/testing/selftests/net/route_localnet.sh @@ -18,8 +18,10 @@ setup() { ip route del 127.0.0.0/8 dev lo table local ip netns exec "${PEER_NS}" ip route del 127.0.0.0/8 dev lo table local - ifconfig veth0 127.25.3.4/24 up - ip netns exec "${PEER_NS}" ifconfig veth1 127.25.3.14/24 up + ip address add 127.25.3.4/24 dev veth0 + ip link set dev veth0 up + ip netns exec "${PEER_NS}" ip address add 127.25.3.14/24 dev veth1 + ip netns exec "${PEER_NS}" ip link set dev veth1 up ip route flush cache ip netns exec "${PEER_NS}" ip route flush cache diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh index 5f2b3f6c0d74..38be9706c45f 100755 --- a/tools/testing/selftests/net/rtnetlink.sh +++ b/tools/testing/selftests/net/rtnetlink.sh @@ -859,7 +859,7 @@ kci_test_gretap() run_cmd ip -netns "$testns" addr add dev "$DEV_NS" 10.1.1.100/24 - run_cmd ip -netns "$testns" link set dev $DEV_NS ups + run_cmd ip -netns "$testns" link set dev $DEV_NS up run_cmd ip -netns "$testns" link del "$DEV_NS" # test external mode diff --git a/tools/testing/selftests/net/test_vxlan_mdb.sh b/tools/testing/selftests/net/test_vxlan_mdb.sh index 31e5f0f8859d..6e996f8063cd 100755 --- a/tools/testing/selftests/net/test_vxlan_mdb.sh +++ b/tools/testing/selftests/net/test_vxlan_mdb.sh @@ -337,62 +337,62 @@ basic_common() # Basic add, replace and delete behavior. run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 $grp_key permanent dst $vtep_ip src_vni 10010" log_test $? 0 "MDB entry addition" - run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep \"$grp_key\"" + run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 $grp_key src_vni 10010" log_test $? 0 "MDB entry presence after addition" run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 $grp_key permanent dst $vtep_ip src_vni 10010" log_test $? 0 "MDB entry replacement" - run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep \"$grp_key\"" + run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 $grp_key src_vni 10010" log_test $? 0 "MDB entry presence after replacement" run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 $grp_key dst $vtep_ip src_vni 10010" log_test $? 0 "MDB entry deletion" - run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep \"$grp_key\"" - log_test $? 1 "MDB entry presence after deletion" + run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 $grp_key src_vni 10010" + log_test $? 254 "MDB entry presence after deletion" run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 $grp_key dst $vtep_ip src_vni 10010" log_test $? 255 "Non-existent MDB entry deletion" # Default protocol and replacement. run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 $grp_key permanent dst $vtep_ip src_vni 10010" - run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep \"$grp_key\" | grep \"proto static\"" + run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 $grp_key src_vni 10010 | grep \"proto static\"" log_test $? 0 "MDB entry default protocol" run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 $grp_key permanent proto 123 dst $vtep_ip src_vni 10010" - run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep \"$grp_key\" | grep \"proto 123\"" + run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 $grp_key src_vni 10010 | grep \"proto 123\"" log_test $? 0 "MDB entry protocol replacement" run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 $grp_key dst $vtep_ip src_vni 10010" # Default destination port and replacement. run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 $grp_key permanent dst $vtep_ip src_vni 10010" - run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep \"$grp_key\" | grep \" dst_port \"" + run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 $grp_key src_vni 10010 | grep \" dst_port \"" log_test $? 1 "MDB entry default destination port" run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 $grp_key permanent dst $vtep_ip dst_port 1234 src_vni 10010" - run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep \"$grp_key\" | grep \"dst_port 1234\"" + run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 $grp_key src_vni 10010 | grep \"dst_port 1234\"" log_test $? 0 "MDB entry destination port replacement" run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 $grp_key dst $vtep_ip src_vni 10010" # Default destination VNI and replacement. run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 $grp_key permanent dst $vtep_ip src_vni 10010" - run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep \"$grp_key\" | grep \" vni \"" + run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 $grp_key src_vni 10010 | grep \" vni \"" log_test $? 1 "MDB entry default destination VNI" run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 $grp_key permanent dst $vtep_ip vni 1234 src_vni 10010" - run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep \"$grp_key\" | grep \"vni 1234\"" + run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 $grp_key src_vni 10010 | grep \"vni 1234\"" log_test $? 0 "MDB entry destination VNI replacement" run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 $grp_key dst $vtep_ip src_vni 10010" # Default outgoing interface and replacement. run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 $grp_key permanent dst $vtep_ip src_vni 10010" - run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep \"$grp_key\" | grep \" via \"" + run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 $grp_key src_vni 10010 | grep \" via \"" log_test $? 1 "MDB entry default outgoing interface" run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 $grp_key permanent dst $vtep_ip src_vni 10010 via veth0" - run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep \"$grp_key\" | grep \"via veth0\"" + run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 $grp_key src_vni 10010 | grep \"via veth0\"" log_test $? 0 "MDB entry outgoing interface replacement" run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 $grp_key dst $vtep_ip src_vni 10010" @@ -550,127 +550,127 @@ star_g_common() # Basic add, replace and delete behavior. run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $src1 dst $vtep_ip src_vni 10010" log_test $? 0 "(*, G) MDB entry addition with source list" - run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep -v \" src \"" + run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src_vni 10010" log_test $? 0 "(*, G) MDB entry presence after addition" - run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \"src $src1\"" + run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src $src1 src_vni 10010" log_test $? 0 "(S, G) MDB entry presence after addition" run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $src1 dst $vtep_ip src_vni 10010" log_test $? 0 "(*, G) MDB entry replacement with source list" - run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep -v \" src \"" + run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src_vni 10010" log_test $? 0 "(*, G) MDB entry presence after replacement" - run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \"src $src1\"" + run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src $src1 src_vni 10010" log_test $? 0 "(S, G) MDB entry presence after replacement" run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep_ip src_vni 10010" log_test $? 0 "(*, G) MDB entry deletion" - run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep -v \" src \"" - log_test $? 1 "(*, G) MDB entry presence after deletion" - run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \"src $src1\"" - log_test $? 1 "(S, G) MDB entry presence after deletion" + run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src_vni 10010" + log_test $? 254 "(*, G) MDB entry presence after deletion" + run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src $src1 src_vni 10010" + log_test $? 254 "(S, G) MDB entry presence after deletion" # Default filter mode and replacement. run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp $grp permanent dst $vtep_ip src_vni 10010" - run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep exclude" + run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src_vni 10010 | grep exclude" log_test $? 0 "(*, G) MDB entry default filter mode" run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode include source_list $src1 dst $vtep_ip src_vni 10010" - run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep include" + run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src_vni 10010 | grep include" log_test $? 0 "(*, G) MDB entry after replacing filter mode to \"include\"" - run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \"src $src1\"" + run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src $src1 src_vni 10010" log_test $? 0 "(S, G) MDB entry after replacing filter mode to \"include\"" - run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \"src $src1\" | grep blocked" + run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src $src1 src_vni 10010 | grep blocked" log_test $? 1 "\"blocked\" flag after replacing filter mode to \"include\"" run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $src1 dst $vtep_ip src_vni 10010" - run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep exclude" + run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src_vni 10010 | grep exclude" log_test $? 0 "(*, G) MDB entry after replacing filter mode to \"exclude\"" - run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \"src $src1\"" + run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grep grp $grp src $src1 src_vni 10010" log_test $? 0 "(S, G) MDB entry after replacing filter mode to \"exclude\"" - run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \"src $src1\" | grep blocked" + run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src $src1 src_vni 10010 | grep blocked" log_test $? 0 "\"blocked\" flag after replacing filter mode to \"exclude\"" run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep_ip src_vni 10010" # Default source list and replacement. run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp $grp permanent dst $vtep_ip src_vni 10010" - run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep source_list" + run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src_vni 10010 | grep source_list" log_test $? 1 "(*, G) MDB entry default source list" run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $src1,$src2,$src3 dst $vtep_ip src_vni 10010" - run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \"src $src1\"" + run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src $src1 src_vni 10010" log_test $? 0 "(S, G) MDB entry of 1st source after replacing source list" - run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \"src $src2\"" + run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src $src2 src_vni 10010" log_test $? 0 "(S, G) MDB entry of 2nd source after replacing source list" - run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \"src $src3\"" + run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src $src3 src_vni 10010" log_test $? 0 "(S, G) MDB entry of 3rd source after replacing source list" run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $src1,$src3 dst $vtep_ip src_vni 10010" - run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \"src $src1\"" + run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src $src1 src_vni 10010" log_test $? 0 "(S, G) MDB entry of 1st source after removing source" - run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \"src $src2\"" - log_test $? 1 "(S, G) MDB entry of 2nd source after removing source" - run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \"src $src3\"" + run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src $src2 src_vni 10010" + log_test $? 254 "(S, G) MDB entry of 2nd source after removing source" + run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src $src3 src_vni 10010" log_test $? 0 "(S, G) MDB entry of 3rd source after removing source" run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep_ip src_vni 10010" # Default protocol and replacement. run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $src1 dst $vtep_ip src_vni 10010" - run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep -v \" src \" | grep \"proto static\"" + run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src_vni 10010 | grep \"proto static\"" log_test $? 0 "(*, G) MDB entry default protocol" - run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \" src \" | grep \"proto static\"" + run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src $src1 src_vni 10010 | grep \"proto static\"" log_test $? 0 "(S, G) MDB entry default protocol" run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $src1 proto bgp dst $vtep_ip src_vni 10010" - run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep -v \" src \" | grep \"proto bgp\"" + run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src_vni 10010 | grep \"proto bgp\"" log_test $? 0 "(*, G) MDB entry protocol after replacement" - run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \" src \" | grep \"proto bgp\"" + run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src $src1 src_vni 10010 | grep \"proto bgp\"" log_test $? 0 "(S, G) MDB entry protocol after replacement" run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep_ip src_vni 10010" # Default destination port and replacement. run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $src1 dst $vtep_ip src_vni 10010" - run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep -v \" src \" | grep \" dst_port \"" + run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src_vni 10010 | grep \" dst_port \"" log_test $? 1 "(*, G) MDB entry default destination port" - run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \" src \" | grep \" dst_port \"" + run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src $src1 src_vni 10010 | grep \" dst_port \"" log_test $? 1 "(S, G) MDB entry default destination port" run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $src1 dst $vtep_ip dst_port 1234 src_vni 10010" - run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep -v \" src \" | grep \" dst_port 1234 \"" + run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src_vni 10010 | grep \" dst_port 1234 \"" log_test $? 0 "(*, G) MDB entry destination port after replacement" - run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \" src \" | grep \" dst_port 1234 \"" + run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src $src1 src_vni 10010 | grep \" dst_port 1234 \"" log_test $? 0 "(S, G) MDB entry destination port after replacement" run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep_ip src_vni 10010" # Default destination VNI and replacement. run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $src1 dst $vtep_ip src_vni 10010" - run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep -v \" src \" | grep \" vni \"" + run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src_vni 10010 | grep \" vni \"" log_test $? 1 "(*, G) MDB entry default destination VNI" - run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \" src \" | grep \" vni \"" + run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src $src1 src_vni 10010 | grep \" vni \"" log_test $? 1 "(S, G) MDB entry default destination VNI" run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $src1 dst $vtep_ip vni 1234 src_vni 10010" - run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep -v \" src \" | grep \" vni 1234 \"" + run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src_vni 10010 | grep \" vni 1234 \"" log_test $? 0 "(*, G) MDB entry destination VNI after replacement" - run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \" src \" | grep \" vni 1234 \"" + run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src $src1 src_vni 10010 | grep \" vni 1234 \"" log_test $? 0 "(S, G) MDB entry destination VNI after replacement" run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep_ip src_vni 10010" # Default outgoing interface and replacement. run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $src1 dst $vtep_ip src_vni 10010" - run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep -v \" src \" | grep \" via \"" + run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src_vni 10010 | grep \" via \"" log_test $? 1 "(*, G) MDB entry default outgoing interface" - run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \" src \" | grep \" via \"" + run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src $src1 src_vni 10010 | grep \" via \"" log_test $? 1 "(S, G) MDB entry default outgoing interface" run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $src1 dst $vtep_ip src_vni 10010 via veth0" - run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep -v \" src \" | grep \" via veth0 \"" + run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src_vni 10010 | grep \" via veth0 \"" log_test $? 0 "(*, G) MDB entry outgoing interface after replacement" - run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \" src \" | grep \" via veth0 \"" + run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src $src1 src_vni 10010 | grep \" via veth0 \"" log_test $? 0 "(S, G) MDB entry outgoing interface after replacement" run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep_ip src_vni 10010" @@ -772,7 +772,7 @@ sg_common() # Default filter mode. run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp $grp src $src permanent dst $vtep_ip src_vni 10010" - run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep include" + run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src $src src_vni 10010 | grep include" log_test $? 0 "(S, G) MDB entry default filter mode" run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp src $src permanent dst $vtep_ip src_vni 10010" @@ -2296,9 +2296,9 @@ if [ ! -x "$(command -v jq)" ]; then exit $ksft_skip fi -bridge mdb help 2>&1 | grep -q "src_vni" +bridge mdb help 2>&1 | grep -q "get" if [ $? -ne 0 ]; then - echo "SKIP: iproute2 bridge too old, missing VXLAN MDB support" + echo "SKIP: iproute2 bridge too old, missing VXLAN MDB get support" exit $ksft_skip fi diff --git a/tools/testing/selftests/net/udpgro.sh b/tools/testing/selftests/net/udpgro.sh index 0c743752669a..af5dc57c8ce9 100755 --- a/tools/testing/selftests/net/udpgro.sh +++ b/tools/testing/selftests/net/udpgro.sh @@ -3,6 +3,8 @@ # # Run a series of udpgro functional tests. +source net_helper.sh + readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)" BPF_FILE="../bpf/xdp_dummy.bpf.o" @@ -51,8 +53,7 @@ run_one() { echo "ok" || \ echo "failed" & - # Hack: let bg programs complete the startup - sleep 0.2 + wait_local_port_listen ${PEER_NS} 8000 udp ./udpgso_bench_tx ${tx_args} ret=$? wait $(jobs -p) @@ -97,7 +98,7 @@ run_one_nat() { echo "ok" || \ echo "failed"& - sleep 0.1 + wait_local_port_listen "${PEER_NS}" 8000 udp ./udpgso_bench_tx ${tx_args} ret=$? kill -INT $pid @@ -118,11 +119,9 @@ run_one_2sock() { echo "ok" || \ echo "failed" & - # Hack: let bg programs complete the startup - sleep 0.2 + wait_local_port_listen "${PEER_NS}" 12345 udp ./udpgso_bench_tx ${tx_args} -p 12345 - sleep 0.1 - # first UDP GSO socket should be closed at this point + wait_local_port_listen "${PEER_NS}" 8000 udp ./udpgso_bench_tx ${tx_args} ret=$? wait $(jobs -p) diff --git a/tools/testing/selftests/net/udpgro_bench.sh b/tools/testing/selftests/net/udpgro_bench.sh index 894972877e8b..cb664679b434 100755 --- a/tools/testing/selftests/net/udpgro_bench.sh +++ b/tools/testing/selftests/net/udpgro_bench.sh @@ -3,6 +3,8 @@ # # Run a series of udpgro benchmarks +source net_helper.sh + readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)" BPF_FILE="../bpf/xdp_dummy.bpf.o" @@ -40,8 +42,7 @@ run_one() { ip netns exec "${PEER_NS}" ./udpgso_bench_rx ${rx_args} -r & ip netns exec "${PEER_NS}" ./udpgso_bench_rx -t ${rx_args} -r & - # Hack: let bg programs complete the startup - sleep 0.2 + wait_local_port_listen "${PEER_NS}" 8000 udp ./udpgso_bench_tx ${tx_args} } diff --git a/tools/testing/selftests/net/udpgro_frglist.sh b/tools/testing/selftests/net/udpgro_frglist.sh index 0a6359bed0b9..dd47fa96f6b3 100755 --- a/tools/testing/selftests/net/udpgro_frglist.sh +++ b/tools/testing/selftests/net/udpgro_frglist.sh @@ -3,6 +3,8 @@ # # Run a series of udpgro benchmarks +source net_helper.sh + readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)" BPF_FILE="../bpf/xdp_dummy.bpf.o" @@ -45,8 +47,7 @@ run_one() { echo ${rx_args} ip netns exec "${PEER_NS}" ./udpgso_bench_rx ${rx_args} -r & - # Hack: let bg programs complete the startup - sleep 0.2 + wait_local_port_listen "${PEER_NS}" 8000 udp ./udpgso_bench_tx ${tx_args} } diff --git a/tools/testing/selftests/netfilter/Makefile b/tools/testing/selftests/netfilter/Makefile index ef90aca4cc96..bced422b78f7 100644 --- a/tools/testing/selftests/netfilter/Makefile +++ b/tools/testing/selftests/netfilter/Makefile @@ -7,7 +7,7 @@ TEST_PROGS := nft_trans_stress.sh nft_fib.sh nft_nat.sh bridge_brouter.sh \ nft_queue.sh nft_meta.sh nf_nat_edemux.sh \ ipip-conntrack-mtu.sh conntrack_tcp_unreplied.sh \ conntrack_vrf.sh nft_synproxy.sh rpath.sh nft_audit.sh \ - conntrack_sctp_collision.sh + conntrack_sctp_collision.sh xt_string.sh HOSTPKG_CONFIG := pkg-config diff --git a/tools/testing/selftests/netfilter/nft_audit.sh b/tools/testing/selftests/netfilter/nft_audit.sh index bb34329e02a7..99ed5bd6e840 100755 --- a/tools/testing/selftests/netfilter/nft_audit.sh +++ b/tools/testing/selftests/netfilter/nft_audit.sh @@ -11,6 +11,12 @@ nft --version >/dev/null 2>&1 || { exit $SKIP_RC } +# Run everything in a separate network namespace +[ "${1}" != "run" ] && { unshare -n "${0}" run; exit $?; } + +# give other scripts a chance to finish - audit_logread sees all activity +sleep 1 + logfile=$(mktemp) rulefile=$(mktemp) echo "logging into $logfile" @@ -93,6 +99,12 @@ do_test 'nft add counter t1 c1' \ do_test 'nft add counter t2 c1; add counter t2 c2' \ 'table=t2 family=2 entries=2 op=nft_register_obj' +for ((i = 3; i <= 500; i++)); do + echo "add counter t2 c$i" +done >$rulefile +do_test "nft -f $rulefile" \ +'table=t2 family=2 entries=498 op=nft_register_obj' + # adding/updating quotas do_test 'nft add quota t1 q1 { 10 bytes }' \ @@ -101,6 +113,12 @@ do_test 'nft add quota t1 q1 { 10 bytes }' \ do_test 'nft add quota t2 q1 { 10 bytes }; add quota t2 q2 { 10 bytes }' \ 'table=t2 family=2 entries=2 op=nft_register_obj' +for ((i = 3; i <= 500; i++)); do + echo "add quota t2 q$i { 10 bytes }" +done >$rulefile +do_test "nft -f $rulefile" \ +'table=t2 family=2 entries=498 op=nft_register_obj' + # changing the quota value triggers obj update path do_test 'nft add quota t1 q1 { 20 bytes }' \ 'table=t1 family=2 entries=1 op=nft_register_obj' @@ -150,6 +168,40 @@ done do_test 'nft reset set t1 s' \ 'table=t1 family=2 entries=3 op=nft_reset_setelem' +# resetting counters + +do_test 'nft reset counter t1 c1' \ +'table=t1 family=2 entries=1 op=nft_reset_obj' + +do_test 'nft reset counters t1' \ +'table=t1 family=2 entries=1 op=nft_reset_obj' + +do_test 'nft reset counters t2' \ +'table=t2 family=2 entries=342 op=nft_reset_obj +table=t2 family=2 entries=158 op=nft_reset_obj' + +do_test 'nft reset counters' \ +'table=t1 family=2 entries=1 op=nft_reset_obj +table=t2 family=2 entries=341 op=nft_reset_obj +table=t2 family=2 entries=159 op=nft_reset_obj' + +# resetting quotas + +do_test 'nft reset quota t1 q1' \ +'table=t1 family=2 entries=1 op=nft_reset_obj' + +do_test 'nft reset quotas t1' \ +'table=t1 family=2 entries=1 op=nft_reset_obj' + +do_test 'nft reset quotas t2' \ +'table=t2 family=2 entries=315 op=nft_reset_obj +table=t2 family=2 entries=185 op=nft_reset_obj' + +do_test 'nft reset quotas' \ +'table=t1 family=2 entries=1 op=nft_reset_obj +table=t2 family=2 entries=314 op=nft_reset_obj +table=t2 family=2 entries=186 op=nft_reset_obj' + # deleting rules readarray -t handles < <(nft -a list chain t1 c1 | \ diff --git a/tools/testing/selftests/netfilter/xt_string.sh b/tools/testing/selftests/netfilter/xt_string.sh new file mode 100755 index 000000000000..1802653a4728 --- /dev/null +++ b/tools/testing/selftests/netfilter/xt_string.sh @@ -0,0 +1,128 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# return code to signal skipped test +ksft_skip=4 +rc=0 + +if ! iptables --version >/dev/null 2>&1; then + echo "SKIP: Test needs iptables" + exit $ksft_skip +fi +if ! ip -V >/dev/null 2>&1; then + echo "SKIP: Test needs iproute2" + exit $ksft_skip +fi +if ! nc -h >/dev/null 2>&1; then + echo "SKIP: Test needs netcat" + exit $ksft_skip +fi + +pattern="foo bar baz" +patlen=11 +hdrlen=$((20 + 8)) # IPv4 + UDP +ns="ns-$(mktemp -u XXXXXXXX)" +trap 'ip netns del $ns' EXIT +ip netns add "$ns" +ip -net "$ns" link add d0 type dummy +ip -net "$ns" link set d0 up +ip -net "$ns" addr add 10.1.2.1/24 dev d0 + +#ip netns exec "$ns" tcpdump -npXi d0 & +#tcpdump_pid=$! +#trap 'kill $tcpdump_pid; ip netns del $ns' EXIT + +add_rule() { # (alg, from, to) + ip netns exec "$ns" \ + iptables -A OUTPUT -o d0 -m string \ + --string "$pattern" --algo $1 --from $2 --to $3 +} +showrules() { # () + ip netns exec "$ns" iptables -v -S OUTPUT | grep '^-A' +} +zerorules() { + ip netns exec "$ns" iptables -Z OUTPUT +} +countrule() { # (pattern) + showrules | grep -c -- "$*" +} +send() { # (offset) + ( for ((i = 0; i < $1 - $hdrlen; i++)); do + printf " " + done + printf "$pattern" + ) | ip netns exec "$ns" nc -w 1 -u 10.1.2.2 27374 +} + +add_rule bm 1000 1500 +add_rule bm 1400 1600 +add_rule kmp 1000 1500 +add_rule kmp 1400 1600 + +zerorules +send 0 +send $((1000 - $patlen)) +if [ $(countrule -c 0 0) -ne 4 ]; then + echo "FAIL: rules match data before --from" + showrules + ((rc--)) +fi + +zerorules +send 1000 +send $((1400 - $patlen)) +if [ $(countrule -c 2) -ne 2 ]; then + echo "FAIL: only two rules should match at low offset" + showrules + ((rc--)) +fi + +zerorules +send $((1500 - $patlen)) +if [ $(countrule -c 1) -ne 4 ]; then + echo "FAIL: all rules should match at end of packet" + showrules + ((rc--)) +fi + +zerorules +send 1495 +if [ $(countrule -c 1) -ne 1 ]; then + echo "FAIL: only kmp with proper --to should match pattern spanning fragments" + showrules + ((rc--)) +fi + +zerorules +send 1500 +if [ $(countrule -c 1) -ne 2 ]; then + echo "FAIL: two rules should match pattern at start of second fragment" + showrules + ((rc--)) +fi + +zerorules +send $((1600 - $patlen)) +if [ $(countrule -c 1) -ne 2 ]; then + echo "FAIL: two rules should match pattern at end of largest --to" + showrules + ((rc--)) +fi + +zerorules +send $((1600 - $patlen + 1)) +if [ $(countrule -c 1) -ne 0 ]; then + echo "FAIL: no rules should match pattern extending largest --to" + showrules + ((rc--)) +fi + +zerorules +send 1600 +if [ $(countrule -c 1) -ne 0 ]; then + echo "FAIL: no rule should match pattern past largest --to" + showrules + ((rc--)) +fi + +exit $rc diff --git a/tools/testing/selftests/nolibc/.gitignore b/tools/testing/selftests/nolibc/.gitignore index 52f613cdad54..5119f9f7afd2 100644 --- a/tools/testing/selftests/nolibc/.gitignore +++ b/tools/testing/selftests/nolibc/.gitignore @@ -1,4 +1,5 @@ /initramfs/ +/initramfs.cpio /libc-test /nolibc-test /run.out diff --git a/tools/testing/selftests/nolibc/Makefile b/tools/testing/selftests/nolibc/Makefile index dfe66776a331..a0fc07253baf 100644 --- a/tools/testing/selftests/nolibc/Makefile +++ b/tools/testing/selftests/nolibc/Makefile @@ -82,7 +82,7 @@ QEMU_ARCH_arm = arm QEMU_ARCH_mips = mipsel # works with malta_defconfig QEMU_ARCH_ppc = ppc QEMU_ARCH_ppc64 = ppc64 -QEMU_ARCH_ppc64le = ppc64le +QEMU_ARCH_ppc64le = ppc64 QEMU_ARCH_riscv = riscv64 QEMU_ARCH_s390 = s390x QEMU_ARCH_loongarch = loongarch64 @@ -113,6 +113,7 @@ else Q=@ endif +CFLAGS_i386 = $(call cc-option,-m32) CFLAGS_ppc = -m32 -mbig-endian -mno-vsx $(call cc-option,-mmultiple) CFLAGS_ppc64 = -m64 -mbig-endian -mno-vsx $(call cc-option,-mmultiple) CFLAGS_ppc64le = -m64 -mlittle-endian -mno-vsx $(call cc-option,-mabi=elfv2) @@ -131,18 +132,20 @@ REPORT ?= awk '/\[OK\][\r]*$$/{p++} /\[FAIL\][\r]*$$/{if (!f) printf("\n"); f++ help: @echo "Supported targets under selftests/nolibc:" - @echo " all call the \"run\" target below" - @echo " help this help" - @echo " sysroot create the nolibc sysroot here (uses \$$ARCH)" - @echo " nolibc-test build the executable (uses \$$CC and \$$CROSS_COMPILE)" - @echo " libc-test build an executable using the compiler's default libc instead" - @echo " run-user runs the executable under QEMU (uses \$$XARCH, \$$TEST)" - @echo " initramfs prepare the initramfs with nolibc-test" - @echo " defconfig create a fresh new default config (uses \$$XARCH)" - @echo " kernel (re)build the kernel with the initramfs (uses \$$XARCH)" - @echo " run runs the kernel in QEMU after building it (uses \$$XARCH, \$$TEST)" - @echo " rerun runs a previously prebuilt kernel in QEMU (uses \$$XARCH, \$$TEST)" - @echo " clean clean the sysroot, initramfs, build and output files" + @echo " all call the \"run\" target below" + @echo " help this help" + @echo " sysroot create the nolibc sysroot here (uses \$$ARCH)" + @echo " nolibc-test build the executable (uses \$$CC and \$$CROSS_COMPILE)" + @echo " libc-test build an executable using the compiler's default libc instead" + @echo " run-user runs the executable under QEMU (uses \$$XARCH, \$$TEST)" + @echo " initramfs.cpio prepare the initramfs archive with nolibc-test" + @echo " initramfs prepare the initramfs tree with nolibc-test" + @echo " defconfig create a fresh new default config (uses \$$XARCH)" + @echo " kernel (re)build the kernel (uses \$$XARCH)" + @echo " kernel-standalone (re)build the kernel with the initramfs (uses \$$XARCH)" + @echo " run runs the kernel in QEMU after building it (uses \$$XARCH, \$$TEST)" + @echo " rerun runs a previously prebuilt kernel in QEMU (uses \$$XARCH, \$$TEST)" + @echo " clean clean the sysroot, initramfs, build and output files" @echo "" @echo "The output file is \"run.out\". Test ranges may be passed using \$$TEST." @echo "" @@ -168,17 +171,17 @@ sysroot/$(ARCH)/include: $(Q)mv sysroot/sysroot sysroot/$(ARCH) ifneq ($(NOLIBC_SYSROOT),0) -nolibc-test: nolibc-test.c sysroot/$(ARCH)/include +nolibc-test: nolibc-test.c nolibc-test-linkage.c sysroot/$(ARCH)/include $(QUIET_CC)$(CC) $(CFLAGS) $(LDFLAGS) -o $@ \ - -nostdlib -static -Isysroot/$(ARCH)/include $< -lgcc + -nostdlib -nostdinc -static -Isysroot/$(ARCH)/include nolibc-test.c nolibc-test-linkage.c -lgcc else -nolibc-test: nolibc-test.c +nolibc-test: nolibc-test.c nolibc-test-linkage.c $(QUIET_CC)$(CC) $(CFLAGS) $(LDFLAGS) -o $@ \ - -nostdlib -static -include ../../../include/nolibc/nolibc.h $< -lgcc + -nostdlib -static -include ../../../include/nolibc/nolibc.h nolibc-test.c nolibc-test-linkage.c -lgcc endif -libc-test: nolibc-test.c - $(QUIET_CC)$(HOSTCC) -o $@ $< +libc-test: nolibc-test.c nolibc-test-linkage.c + $(QUIET_CC)$(HOSTCC) -o $@ nolibc-test.c nolibc-test-linkage.c # local libc-test run-libc-test: libc-test @@ -195,6 +198,9 @@ run-user: nolibc-test $(Q)qemu-$(QEMU_ARCH) ./nolibc-test > "$(CURDIR)/run.out" || : $(Q)$(REPORT) $(CURDIR)/run.out +initramfs.cpio: kernel nolibc-test + $(QUIET_GEN)echo 'file /init nolibc-test 755 0 0' | $(srctree)/usr/gen_init_cpio - > initramfs.cpio + initramfs: nolibc-test $(QUIET_MKDIR)mkdir -p initramfs $(call QUIET_INSTALL, initramfs/init) @@ -203,17 +209,20 @@ initramfs: nolibc-test defconfig: $(Q)$(MAKE) -C $(srctree) ARCH=$(ARCH) CC=$(CC) CROSS_COMPILE=$(CROSS_COMPILE) mrproper $(DEFCONFIG) prepare -kernel: initramfs +kernel: + $(Q)$(MAKE) -C $(srctree) ARCH=$(ARCH) CC=$(CC) CROSS_COMPILE=$(CROSS_COMPILE) $(IMAGE_NAME) + +kernel-standalone: initramfs $(Q)$(MAKE) -C $(srctree) ARCH=$(ARCH) CC=$(CC) CROSS_COMPILE=$(CROSS_COMPILE) $(IMAGE_NAME) CONFIG_INITRAMFS_SOURCE=$(CURDIR)/initramfs # run the tests after building the kernel -run: kernel - $(Q)qemu-system-$(QEMU_ARCH) -display none -no-reboot -kernel "$(srctree)/$(IMAGE)" -serial stdio $(QEMU_ARGS) > "$(CURDIR)/run.out" +run: kernel initramfs.cpio + $(Q)qemu-system-$(QEMU_ARCH) -display none -no-reboot -kernel "$(srctree)/$(IMAGE)" -initrd initramfs.cpio -serial stdio $(QEMU_ARGS) > "$(CURDIR)/run.out" $(Q)$(REPORT) $(CURDIR)/run.out # re-run the tests from an existing kernel rerun: - $(Q)qemu-system-$(QEMU_ARCH) -display none -no-reboot -kernel "$(srctree)/$(IMAGE)" -serial stdio $(QEMU_ARGS) > "$(CURDIR)/run.out" + $(Q)qemu-system-$(QEMU_ARCH) -display none -no-reboot -kernel "$(srctree)/$(IMAGE)" -initrd initramfs.cpio -serial stdio $(QEMU_ARGS) > "$(CURDIR)/run.out" $(Q)$(REPORT) $(CURDIR)/run.out # report with existing test log @@ -227,6 +236,8 @@ clean: $(Q)rm -f nolibc-test $(call QUIET_CLEAN, libc-test) $(Q)rm -f libc-test + $(call QUIET_CLEAN, initramfs.cpio) + $(Q)rm -rf initramfs.cpio $(call QUIET_CLEAN, initramfs) $(Q)rm -rf initramfs $(call QUIET_CLEAN, run.out) diff --git a/tools/testing/selftests/nolibc/nolibc-test-linkage.c b/tools/testing/selftests/nolibc/nolibc-test-linkage.c new file mode 100644 index 000000000000..5ff4c8a1db2a --- /dev/null +++ b/tools/testing/selftests/nolibc/nolibc-test-linkage.c @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#include "nolibc-test-linkage.h" + +#ifndef NOLIBC +#include <errno.h> +#endif + +void *linkage_test_errno_addr(void) +{ + return &errno; +} + +int linkage_test_constructor_test_value; + +__attribute__((constructor)) +static void constructor1(void) +{ + linkage_test_constructor_test_value = 2; +} + +__attribute__((constructor)) +static void constructor2(void) +{ + linkage_test_constructor_test_value *= 3; +} diff --git a/tools/testing/selftests/nolibc/nolibc-test-linkage.h b/tools/testing/selftests/nolibc/nolibc-test-linkage.h new file mode 100644 index 000000000000..c66473070d73 --- /dev/null +++ b/tools/testing/selftests/nolibc/nolibc-test-linkage.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _NOLIBC_TEST_LINKAGE_H +#define _NOLIBC_TEST_LINKAGE_H + +void *linkage_test_errno_addr(void); +extern int linkage_test_constructor_test_value; + +#endif /* _NOLIBC_TEST_LINKAGE_H */ diff --git a/tools/testing/selftests/nolibc/nolibc-test.c b/tools/testing/selftests/nolibc/nolibc-test.c index fb3bf91462e2..2f10541e6f38 100644 --- a/tools/testing/selftests/nolibc/nolibc-test.c +++ b/tools/testing/selftests/nolibc/nolibc-test.c @@ -41,6 +41,8 @@ #endif #endif +#include "nolibc-test-linkage.h" + /* for the type of int_fast16_t and int_fast32_t, musl differs from glibc and nolibc */ #define SINT_MAX_OF_TYPE(type) (((type)1 << (sizeof(type) * 8 - 2)) - (type)1 + ((type)1 << (sizeof(type) * 8 - 2))) #define SINT_MIN_OF_TYPE(type) (-SINT_MAX_OF_TYPE(type) - 1) @@ -57,6 +59,9 @@ static int test_argc; /* will be used by some test cases as readable file, please don't write it */ static const char *argv0; +/* will be used by constructor tests */ +static int constructor_test_value; + /* definition of a series of tests */ struct test { const char *name; /* test name */ @@ -594,6 +599,19 @@ int expect_strne(const char *expr, int llen, const char *cmp) #define CASE_TEST(name) \ case __LINE__: llen += printf("%d %s", test, #name); +/* constructors validate that they are executed in definition order */ +__attribute__((constructor)) +static void constructor1(void) +{ + constructor_test_value = 1; +} + +__attribute__((constructor)) +static void constructor2(void) +{ + constructor_test_value *= 2; +} + int run_startup(int min, int max) { int test; @@ -630,7 +648,9 @@ int run_startup(int min, int max) CASE_TEST(environ_HOME); EXPECT_PTRNZ(1, getenv("HOME")); break; CASE_TEST(auxv_addr); EXPECT_PTRGT(test_auxv != (void *)-1, test_auxv, brk); break; CASE_TEST(auxv_AT_UID); EXPECT_EQ(1, getauxval(AT_UID), getuid()); break; - CASE_TEST(auxv_AT_PAGESZ); EXPECT_GE(1, getauxval(AT_PAGESZ), 4096); break; + CASE_TEST(constructor); EXPECT_EQ(1, constructor_test_value, 2); break; + CASE_TEST(linkage_errno); EXPECT_PTREQ(1, linkage_test_errno_addr(), &errno); break; + CASE_TEST(linkage_constr); EXPECT_EQ(1, linkage_test_constructor_test_value, 6); break; case __LINE__: return ret; /* must be last */ /* note: do not set any defaults so as to permit holes above */ @@ -894,14 +914,14 @@ int run_syscall(int min, int max) CASE_TEST(lseek_0); EXPECT_SYSER(1, lseek(0, 0, SEEK_SET), -1, ESPIPE); break; CASE_TEST(mkdir_root); EXPECT_SYSER(1, mkdir("/", 0755), -1, EEXIST); break; CASE_TEST(mmap_bad); EXPECT_PTRER(1, mmap(NULL, 0, PROT_READ, MAP_PRIVATE, 0, 0), MAP_FAILED, EINVAL); break; - CASE_TEST(munmap_bad); EXPECT_SYSER(1, munmap((void *)1, 0), -1, EINVAL); break; + CASE_TEST(munmap_bad); EXPECT_SYSER(1, munmap(NULL, 0), -1, EINVAL); break; CASE_TEST(mmap_munmap_good); EXPECT_SYSZR(1, test_mmap_munmap()); break; CASE_TEST(open_tty); EXPECT_SYSNE(1, tmp = open("/dev/null", 0), -1); if (tmp != -1) close(tmp); break; CASE_TEST(open_blah); EXPECT_SYSER(1, tmp = open("/proc/self/blah", 0), -1, ENOENT); if (tmp != -1) close(tmp); break; CASE_TEST(pipe); EXPECT_SYSZR(1, test_pipe()); break; CASE_TEST(poll_null); EXPECT_SYSZR(1, poll(NULL, 0, 0)); break; CASE_TEST(poll_stdout); EXPECT_SYSNE(1, ({ struct pollfd fds = { 1, POLLOUT, 0}; poll(&fds, 1, 0); }), -1); break; - CASE_TEST(poll_fault); EXPECT_SYSER(1, poll((void *)1, 1, 0), -1, EFAULT); break; + CASE_TEST(poll_fault); EXPECT_SYSER(1, poll(NULL, 1, 0), -1, EFAULT); break; CASE_TEST(prctl); EXPECT_SYSER(1, prctl(PR_SET_NAME, (unsigned long)NULL, 0, 0, 0), -1, EFAULT); break; CASE_TEST(read_badf); EXPECT_SYSER(1, read(-1, &tmp, 1), -1, EBADF); break; CASE_TEST(rmdir_blah); EXPECT_SYSER(1, rmdir("/blah"), -1, ENOENT); break; @@ -910,7 +930,7 @@ int run_syscall(int min, int max) CASE_TEST(select_stdout); EXPECT_SYSNE(1, ({ fd_set fds; FD_ZERO(&fds); FD_SET(1, &fds); select(2, NULL, &fds, NULL, NULL); }), -1); break; CASE_TEST(select_fault); EXPECT_SYSER(1, select(1, (void *)1, NULL, NULL, 0), -1, EFAULT); break; CASE_TEST(stat_blah); EXPECT_SYSER(1, stat("/proc/self/blah", &stat_buf), -1, ENOENT); break; - CASE_TEST(stat_fault); EXPECT_SYSER(1, stat((void *)1, &stat_buf), -1, EFAULT); break; + CASE_TEST(stat_fault); EXPECT_SYSER(1, stat(NULL, &stat_buf), -1, EFAULT); break; CASE_TEST(stat_timestamps); EXPECT_SYSZR(1, test_stat_timestamps()); break; CASE_TEST(symlink_root); EXPECT_SYSER(1, symlink("/", "/"), -1, EEXIST); break; CASE_TEST(unlink_root); EXPECT_SYSER(1, unlink("/"), -1, EISDIR); break; diff --git a/tools/testing/selftests/openat2/openat2_test.c b/tools/testing/selftests/openat2/openat2_test.c index 7fb902099de4..9024754530b2 100644 --- a/tools/testing/selftests/openat2/openat2_test.c +++ b/tools/testing/selftests/openat2/openat2_test.c @@ -300,7 +300,7 @@ void test_openat2_flags(void) ksft_print_msg("openat2 unexpectedly returned "); if (fdpath) - ksft_print_msg("%d['%s'] with %X (!= %X)\n", + ksft_print_msg("%d['%s'] with %X (!= %llX)\n", fd, fdpath, fdflags, test->how.flags); else diff --git a/tools/testing/selftests/pidfd/pidfd_fdinfo_test.c b/tools/testing/selftests/pidfd/pidfd_fdinfo_test.c index 4e86f927880c..01cc37bf611c 100644 --- a/tools/testing/selftests/pidfd/pidfd_fdinfo_test.c +++ b/tools/testing/selftests/pidfd/pidfd_fdinfo_test.c @@ -62,7 +62,7 @@ static void error_report(struct error *err, const char *test_name) break; case PIDFD_PASS: - ksft_test_result_pass("%s test: Passed\n"); + ksft_test_result_pass("%s test: Passed\n", test_name); break; default: diff --git a/tools/testing/selftests/pidfd/pidfd_test.c b/tools/testing/selftests/pidfd/pidfd_test.c index 00a07e7c571c..c081ae91313a 100644 --- a/tools/testing/selftests/pidfd/pidfd_test.c +++ b/tools/testing/selftests/pidfd/pidfd_test.c @@ -381,13 +381,13 @@ static int test_pidfd_send_signal_syscall_support(void) static void *test_pidfd_poll_exec_thread(void *priv) { - ksft_print_msg("Child Thread: starting. pid %d tid %d ; and sleeping\n", + ksft_print_msg("Child Thread: starting. pid %d tid %ld ; and sleeping\n", getpid(), syscall(SYS_gettid)); ksft_print_msg("Child Thread: doing exec of sleep\n"); execl("/bin/sleep", "sleep", str(CHILD_THREAD_MIN_WAIT), (char *)NULL); - ksft_print_msg("Child Thread: DONE. pid %d tid %d\n", + ksft_print_msg("Child Thread: DONE. pid %d tid %ld\n", getpid(), syscall(SYS_gettid)); return NULL; } @@ -427,7 +427,7 @@ static int child_poll_exec_test(void *args) { pthread_t t1; - ksft_print_msg("Child (pidfd): starting. pid %d tid %d\n", getpid(), + ksft_print_msg("Child (pidfd): starting. pid %d tid %ld\n", getpid(), syscall(SYS_gettid)); pthread_create(&t1, NULL, test_pidfd_poll_exec_thread, NULL); /* @@ -480,10 +480,10 @@ static void test_pidfd_poll_exec(int use_waitpid) static void *test_pidfd_poll_leader_exit_thread(void *priv) { - ksft_print_msg("Child Thread: starting. pid %d tid %d ; and sleeping\n", + ksft_print_msg("Child Thread: starting. pid %d tid %ld ; and sleeping\n", getpid(), syscall(SYS_gettid)); sleep(CHILD_THREAD_MIN_WAIT); - ksft_print_msg("Child Thread: DONE. pid %d tid %d\n", getpid(), syscall(SYS_gettid)); + ksft_print_msg("Child Thread: DONE. pid %d tid %ld\n", getpid(), syscall(SYS_gettid)); return NULL; } @@ -492,7 +492,7 @@ static int child_poll_leader_exit_test(void *args) { pthread_t t1, t2; - ksft_print_msg("Child: starting. pid %d tid %d\n", getpid(), syscall(SYS_gettid)); + ksft_print_msg("Child: starting. pid %d tid %ld\n", getpid(), syscall(SYS_gettid)); pthread_create(&t1, NULL, test_pidfd_poll_leader_exit_thread, NULL); pthread_create(&t2, NULL, test_pidfd_poll_leader_exit_thread, NULL); diff --git a/tools/testing/selftests/proc/proc-empty-vm.c b/tools/testing/selftests/proc/proc-empty-vm.c index ee71ce52cb6a..56198d4ca2bf 100644 --- a/tools/testing/selftests/proc/proc-empty-vm.c +++ b/tools/testing/selftests/proc/proc-empty-vm.c @@ -23,6 +23,9 @@ * /proc/${pid}/smaps * /proc/${pid}/smaps_rollup */ +#undef _GNU_SOURCE +#define _GNU_SOURCE + #undef NDEBUG #include <assert.h> #include <errno.h> @@ -34,6 +37,7 @@ #include <sys/mman.h> #include <sys/ptrace.h> #include <sys/resource.h> +#include <sys/syscall.h> #include <sys/types.h> #include <sys/wait.h> #include <unistd.h> @@ -42,6 +46,43 @@ #define TEST_VSYSCALL #endif +#if defined __amd64__ + #ifndef SYS_pkey_alloc + #define SYS_pkey_alloc 330 + #endif + #ifndef SYS_pkey_free + #define SYS_pkey_free 331 + #endif +#elif defined __i386__ + #ifndef SYS_pkey_alloc + #define SYS_pkey_alloc 381 + #endif + #ifndef SYS_pkey_free + #define SYS_pkey_free 382 + #endif +#else + #error "SYS_pkey_alloc" +#endif + +static int g_protection_key_support; + +static int protection_key_support(void) +{ + long rv = syscall(SYS_pkey_alloc, 0, 0); + if (rv > 0) { + syscall(SYS_pkey_free, (int)rv); + return 1; + } else if (rv == -1 && errno == ENOSYS) { + return 0; + } else if (rv == -1 && errno == EINVAL) { + // ospke=n + return 0; + } else { + fprintf(stderr, "%s: error: rv %ld, errno %d\n", __func__, rv, errno); + exit(EXIT_FAILURE); + } +} + /* * 0: vsyscall VMA doesn't exist vsyscall=none * 1: vsyscall VMA is --xp vsyscall=xonly @@ -60,7 +101,7 @@ static const char proc_pid_maps_vsyscall_2[] = static const char proc_pid_smaps_vsyscall_0[] = ""; static const char proc_pid_smaps_vsyscall_1[] = -"ffffffffff600000-ffffffffff601000 r-xp 00000000 00:00 0 [vsyscall]\n" +"ffffffffff600000-ffffffffff601000 --xp 00000000 00:00 0 [vsyscall]\n" "Size: 4 kB\n" "KernelPageSize: 4 kB\n" "MMUPageSize: 4 kB\n" @@ -73,6 +114,7 @@ static const char proc_pid_smaps_vsyscall_1[] = "Private_Dirty: 0 kB\n" "Referenced: 0 kB\n" "Anonymous: 0 kB\n" +"KSM: 0 kB\n" "LazyFree: 0 kB\n" "AnonHugePages: 0 kB\n" "ShmemPmdMapped: 0 kB\n" @@ -83,14 +125,10 @@ static const char proc_pid_smaps_vsyscall_1[] = "SwapPss: 0 kB\n" "Locked: 0 kB\n" "THPeligible: 0\n" -/* - * "ProtectionKey:" field is conditional. It is possible to check it as well, - * but I don't have such machine. - */ ; static const char proc_pid_smaps_vsyscall_2[] = -"ffffffffff600000-ffffffffff601000 --xp 00000000 00:00 0 [vsyscall]\n" +"ffffffffff600000-ffffffffff601000 r-xp 00000000 00:00 0 [vsyscall]\n" "Size: 4 kB\n" "KernelPageSize: 4 kB\n" "MMUPageSize: 4 kB\n" @@ -103,6 +141,7 @@ static const char proc_pid_smaps_vsyscall_2[] = "Private_Dirty: 0 kB\n" "Referenced: 0 kB\n" "Anonymous: 0 kB\n" +"KSM: 0 kB\n" "LazyFree: 0 kB\n" "AnonHugePages: 0 kB\n" "ShmemPmdMapped: 0 kB\n" @@ -113,10 +152,6 @@ static const char proc_pid_smaps_vsyscall_2[] = "SwapPss: 0 kB\n" "Locked: 0 kB\n" "THPeligible: 0\n" -/* - * "ProtectionKey:" field is conditional. It is possible to check it as well, - * but I'm too tired. - */ ; static void sigaction_SIGSEGV(int _, siginfo_t *__, void *___) @@ -238,19 +273,27 @@ static int test_proc_pid_smaps(pid_t pid) } perror("open /proc/${pid}/smaps"); return EXIT_FAILURE; + } + ssize_t rv = read(fd, buf, sizeof(buf)); + close(fd); + + assert(0 <= rv); + assert(rv <= sizeof(buf)); + + if (g_vsyscall == 0) { + assert(rv == 0); } else { - ssize_t rv = read(fd, buf, sizeof(buf)); - close(fd); - if (g_vsyscall == 0) { - assert(rv == 0); - } else { - size_t len = strlen(g_proc_pid_maps_vsyscall); - /* TODO "ProtectionKey:" */ - assert(rv > len); - assert(memcmp(buf, g_proc_pid_maps_vsyscall, len) == 0); + size_t len = strlen(g_proc_pid_smaps_vsyscall); + assert(rv > len); + assert(memcmp(buf, g_proc_pid_smaps_vsyscall, len) == 0); + + if (g_protection_key_support) { +#define PROTECTION_KEY "ProtectionKey: 0\n" + assert(memmem(buf, rv, PROTECTION_KEY, strlen(PROTECTION_KEY))); } - return EXIT_SUCCESS; } + + return EXIT_SUCCESS; } static const char g_smaps_rollup[] = @@ -303,6 +346,95 @@ static int test_proc_pid_smaps_rollup(pid_t pid) } } +static const char *parse_u64(const char *p, const char *const end, uint64_t *rv) +{ + *rv = 0; + for (; p != end; p += 1) { + if ('0' <= *p && *p <= '9') { + assert(!__builtin_mul_overflow(*rv, 10, rv)); + assert(!__builtin_add_overflow(*rv, *p - '0', rv)); + } else { + break; + } + } + assert(p != end); + return p; +} + +/* + * There seems to be 2 types of valid output: + * "0 A A B 0 0 0\n" for dynamic exeuctables, + * "0 0 0 B 0 0 0\n" for static executables. + */ +static int test_proc_pid_statm(pid_t pid) +{ + char buf[4096]; + snprintf(buf, sizeof(buf), "/proc/%u/statm", pid); + int fd = open(buf, O_RDONLY); + if (fd == -1) { + perror("open /proc/${pid}/statm"); + return EXIT_FAILURE; + } + + ssize_t rv = read(fd, buf, sizeof(buf)); + close(fd); + + assert(rv >= 0); + assert(rv <= sizeof(buf)); + if (0) { + write(1, buf, rv); + } + + const char *p = buf; + const char *const end = p + rv; + + /* size */ + assert(p != end && *p++ == '0'); + assert(p != end && *p++ == ' '); + + uint64_t resident; + p = parse_u64(p, end, &resident); + assert(p != end && *p++ == ' '); + + uint64_t shared; + p = parse_u64(p, end, &shared); + assert(p != end && *p++ == ' '); + + uint64_t text; + p = parse_u64(p, end, &text); + assert(p != end && *p++ == ' '); + + assert(p != end && *p++ == '0'); + assert(p != end && *p++ == ' '); + + /* data */ + assert(p != end && *p++ == '0'); + assert(p != end && *p++ == ' '); + + assert(p != end && *p++ == '0'); + assert(p != end && *p++ == '\n'); + + assert(p == end); + + /* + * "text" is "mm->end_code - mm->start_code" at execve(2) time. + * munmap() doesn't change it. It can be anything (just link + * statically). It can't be 0 because executing to this point + * implies at least 1 page of code. + */ + assert(text > 0); + + /* + * These two are always equal. Always 0 for statically linked + * executables and sometimes 0 for dynamically linked executables. + * There is no way to tell one from another without parsing ELF + * which is too much for this test. + */ + assert(resident == shared); + + return EXIT_SUCCESS; +} + int main(void) { int rv = EXIT_SUCCESS; @@ -328,6 +460,8 @@ int main(void) abort(); } + g_protection_key_support = protection_key_support(); + pid_t pid = fork(); if (pid == -1) { perror("fork"); @@ -389,11 +523,9 @@ int main(void) if (rv == EXIT_SUCCESS) { rv = test_proc_pid_smaps_rollup(pid); } - /* - * TODO test /proc/${pid}/statm, task_statm() - * ->start_code, ->end_code aren't updated by munmap(). - * Output can be "0 0 0 2 0 0 0\n" where "2" can be anything. - */ + if (rv == EXIT_SUCCESS) { + rv = test_proc_pid_statm(pid); + } /* Cut the rope. */ int wstatus; diff --git a/tools/testing/selftests/rcutorture/bin/functions.sh b/tools/testing/selftests/rcutorture/bin/functions.sh index b8e2ea23cb3f..6e415ddb206f 100644..100755 --- a/tools/testing/selftests/rcutorture/bin/functions.sh +++ b/tools/testing/selftests/rcutorture/bin/functions.sh @@ -331,3 +331,32 @@ specify_qemu_net () { echo $1 -net none fi } + +# Extract the ftrace output from the console log output +# The ftrace output in the original logs look like: +# Dumping ftrace buffer: +# --------------------------------- +# [...] +# --------------------------------- +extract_ftrace_from_console() { + awk < "$1" ' + + /Dumping ftrace buffer:/ { + buffer_count++ + print "Ftrace dump " buffer_count ":" + capture = 1 + next + } + + /---------------------------------/ { + if(capture == 1) { + capture = 2 + next + } else if(capture == 2) { + capture = 0 + print "" + } + } + + capture == 2' +} diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh index 5be670dd4009..de65d77b47ff 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh @@ -13,7 +13,7 @@ # # Authors: Paul E. McKenney <paulmck@linux.ibm.com> -T=/tmp/kvm-recheck.sh.$$ +T="`mktemp ${TMPDIR-/tmp}/kvm-recheck.sh.XXXXXX`" trap 'rm -f $T' 0 2 configerrors=0 diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh index b0f36a638a69..7af73ddc148d 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm.sh @@ -49,6 +49,7 @@ TORTURE_SHUTDOWN_GRACE=180 TORTURE_SUITE=rcu TORTURE_MOD=rcutorture TORTURE_TRUST_MAKE="" +debuginfo="CONFIG_DEBUG_INFO_NONE=n CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y" resdir="" configs="" cpus=0 @@ -68,6 +69,7 @@ usage () { echo " --cpus N" echo " --datestamp string" echo " --defconfig string" + echo " --debug-info" echo " --dryrun batches|scenarios|sched|script" echo " --duration minutes | <seconds>s | <hours>h | <days>d" echo " --gdb" @@ -135,6 +137,15 @@ do ds=$2 shift ;; + --debug-info|--debuginfo) + if test -z "$TORTURE_KCONFIG_KCSAN_ARG" && test -z "$TORTURE_BOOT_GDB_ARG" + then + TORTURE_KCONFIG_KCSAN_ARG="$debuginfo"; export TORTURE_KCONFIG_KCSAN_ARG + TORTURE_BOOT_GDB_ARG="nokaslr"; export TORTURE_BOOT_GDB_ARG + else + echo "Ignored redundant --debug-info (implied by --kcsan &c)" + fi + ;; --defconfig) checkarg --defconfig "defconfigtype" "$#" "$2" '^[^/][^/]*$' '^--' TORTURE_DEFCONFIG=$2 @@ -163,7 +174,7 @@ do shift ;; --gdb) - TORTURE_KCONFIG_GDB_ARG="CONFIG_DEBUG_INFO_NONE=n CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y"; export TORTURE_KCONFIG_GDB_ARG + TORTURE_KCONFIG_GDB_ARG="$debuginfo"; export TORTURE_KCONFIG_GDB_ARG TORTURE_BOOT_GDB_ARG="nokaslr"; export TORTURE_BOOT_GDB_ARG TORTURE_QEMU_GDB_ARG="-s -S"; export TORTURE_QEMU_GDB_ARG ;; @@ -179,7 +190,7 @@ do shift ;; --kasan) - TORTURE_KCONFIG_KASAN_ARG="CONFIG_DEBUG_INFO_NONE=n CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_KASAN=y"; export TORTURE_KCONFIG_KASAN_ARG + TORTURE_KCONFIG_KASAN_ARG="$debuginfo CONFIG_KASAN=y"; export TORTURE_KCONFIG_KASAN_ARG if test -n "$torture_qemu_mem_default" then TORTURE_QEMU_MEM=2G @@ -191,7 +202,7 @@ do shift ;; --kcsan) - TORTURE_KCONFIG_KCSAN_ARG="CONFIG_DEBUG_INFO_NONE=n CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_KCSAN=y CONFIG_KCSAN_STRICT=y CONFIG_KCSAN_REPORT_ONCE_IN_MS=100000 CONFIG_KCSAN_VERBOSE=y CONFIG_DEBUG_LOCK_ALLOC=y CONFIG_PROVE_LOCKING=y"; export TORTURE_KCONFIG_KCSAN_ARG + TORTURE_KCONFIG_KCSAN_ARG="$debuginfo CONFIG_KCSAN=y CONFIG_KCSAN_STRICT=y CONFIG_KCSAN_REPORT_ONCE_IN_MS=100000 CONFIG_KCSAN_VERBOSE=y CONFIG_DEBUG_LOCK_ALLOC=y CONFIG_PROVE_LOCKING=y"; export TORTURE_KCONFIG_KCSAN_ARG ;; --kmake-arg|--kmake-args) checkarg --kmake-arg "(kernel make arguments)" $# "$2" '.*' '^error$' diff --git a/tools/testing/selftests/rcutorture/bin/parse-console.sh b/tools/testing/selftests/rcutorture/bin/parse-console.sh index 9ab0f6bc172c..b07c11cf6929 100755 --- a/tools/testing/selftests/rcutorture/bin/parse-console.sh +++ b/tools/testing/selftests/rcutorture/bin/parse-console.sh @@ -11,7 +11,7 @@ # # Authors: Paul E. McKenney <paulmck@linux.ibm.com> -T=${TMPDIR-/tmp}/parse-console.sh.$$ +T="`mktemp -d ${TMPDIR-/tmp}/parse-console.sh.XXXXXX`" file="$1" title="$2" @@ -182,3 +182,10 @@ if ! test -s $file.diags then rm -f $file.diags fi + +# Call extract_ftrace_from_console function, if the output is empty, +# don't create $file.ftrace. Otherwise output the results to $file.ftrace +extract_ftrace_from_console $file > $file.ftrace +if [ ! -s $file.ftrace ]; then + rm -f $file.ftrace +fi diff --git a/tools/testing/selftests/rcutorture/bin/torture.sh b/tools/testing/selftests/rcutorture/bin/torture.sh index 12b50a4a881a..d5a0d8a33c27 100755 --- a/tools/testing/selftests/rcutorture/bin/torture.sh +++ b/tools/testing/selftests/rcutorture/bin/torture.sh @@ -472,7 +472,7 @@ do if test -n "$firsttime" then torture_bootargs="refscale.scale_type="$prim" refscale.nreaders=$HALF_ALLOTED_CPUS refscale.loops=10000 refscale.holdoff=20 torture.disable_onoff_at_boot" - torture_set "refscale-$prim" tools/testing/selftests/rcutorture/bin/kvm.sh --torture refscale --allcpus --duration 5 --kconfig "CONFIG_TASKS_TRACE_RCU=y CONFIG_NR_CPUS=$HALF_ALLOTED_CPUS" --bootargs "verbose_batched=$VERBOSE_BATCH_CPUS torture.verbose_sleep_frequency=8 torture.verbose_sleep_duration=$VERBOSE_BATCH_CPUS" --trust-make + torture_set "refscale-$prim" tools/testing/selftests/rcutorture/bin/kvm.sh --torture refscale --allcpus --duration 5 --kconfig "CONFIG_TASKS_TRACE_RCU=y CONFIG_NR_CPUS=$HALF_ALLOTED_CPUS" --bootargs "refscale.verbose_batched=$VERBOSE_BATCH_CPUS torture.verbose_sleep_frequency=8 torture.verbose_sleep_duration=$VERBOSE_BATCH_CPUS" --trust-make mv $T/last-resdir-nodebug $T/first-resdir-nodebug || : if test -f "$T/last-resdir-kasan" then diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TRACE02 b/tools/testing/selftests/rcutorture/configs/rcu/TRACE02 index 093ea6e8e65c..9003c56cd764 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TRACE02 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TRACE02 @@ -11,3 +11,4 @@ CONFIG_FORCE_TASKS_TRACE_RCU=y #CHECK#CONFIG_TASKS_TRACE_RCU=y CONFIG_TASKS_TRACE_RCU_READ_MB=n CONFIG_RCU_EXPERT=y +CONFIG_DEBUG_OBJECTS=y diff --git a/tools/testing/selftests/resctrl/Makefile b/tools/testing/selftests/resctrl/Makefile index 5073dbc96125..2deac2031de9 100644 --- a/tools/testing/selftests/resctrl/Makefile +++ b/tools/testing/selftests/resctrl/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 -CFLAGS = -g -Wall -O2 -D_FORTIFY_SOURCE=2 +CFLAGS = -g -Wall -O2 -D_FORTIFY_SOURCE=2 -D_GNU_SOURCE CFLAGS += $(KHDR_INCLUDES) TEST_GEN_PROGS := resctrl_tests diff --git a/tools/testing/selftests/resctrl/cache.c b/tools/testing/selftests/resctrl/cache.c index d3cbb829ff6a..bcbca356d56a 100644 --- a/tools/testing/selftests/resctrl/cache.c +++ b/tools/testing/selftests/resctrl/cache.c @@ -205,10 +205,11 @@ int measure_cache_vals(struct resctrl_val_param *param, int bm_pid) * cache_val: execute benchmark and measure LLC occupancy resctrl * and perf cache miss for the benchmark * @param: parameters passed to cache_val() + * @span: buffer size for the benchmark * * Return: 0 on success. non-zero on failure. */ -int cat_val(struct resctrl_val_param *param) +int cat_val(struct resctrl_val_param *param, size_t span) { int memflush = 1, operation = 0, ret = 0; char *resctrl_val = param->resctrl_val; @@ -245,7 +246,7 @@ int cat_val(struct resctrl_val_param *param) if (ret) break; - if (run_fill_buf(param->span, memflush, operation, true)) { + if (run_fill_buf(span, memflush, operation, true)) { fprintf(stderr, "Error-running fill buffer\n"); ret = -1; goto pe_close; @@ -294,7 +295,7 @@ int show_cache_info(unsigned long sum_llc_val, int no_of_bits, ret = platform && abs((int)diff_percent) > max_diff_percent && (cmt ? (abs(avg_diff) > max_diff) : true); - ksft_print_msg("%s Check cache miss rate within %d%%\n", + ksft_print_msg("%s Check cache miss rate within %lu%%\n", ret ? "Fail:" : "Pass:", max_diff_percent); ksft_print_msg("Percent diff=%d\n", abs((int)diff_percent)); diff --git a/tools/testing/selftests/resctrl/cat_test.c b/tools/testing/selftests/resctrl/cat_test.c index 3848dfb46aba..224ba8544d8a 100644 --- a/tools/testing/selftests/resctrl/cat_test.c +++ b/tools/testing/selftests/resctrl/cat_test.c @@ -41,7 +41,7 @@ static int cat_setup(struct resctrl_val_param *p) return ret; } -static int check_results(struct resctrl_val_param *param) +static int check_results(struct resctrl_val_param *param, size_t span) { char *token_array[8], temp[512]; unsigned long sum_llc_perf_miss = 0; @@ -76,7 +76,7 @@ static int check_results(struct resctrl_val_param *param) fclose(fp); no_of_bits = count_bits(param->mask); - return show_cache_info(sum_llc_perf_miss, no_of_bits, param->span / 64, + return show_cache_info(sum_llc_perf_miss, no_of_bits, span / 64, MAX_DIFF, MAX_DIFF_PERCENT, runs - 1, get_vendor() == ARCH_INTEL, false); } @@ -96,6 +96,7 @@ int cat_perf_miss_val(int cpu_no, int n, char *cache_type) char cbm_mask[256]; int count_of_bits; char pipe_message; + size_t span; /* Get default cbm mask for L3/L2 cache */ ret = get_cbm_mask(cache_type, cbm_mask); @@ -140,7 +141,7 @@ int cat_perf_miss_val(int cpu_no, int n, char *cache_type) /* Set param values for parent thread which will be allocated bitmask * with (max_bits - n) bits */ - param.span = cache_size * (count_of_bits - n) / count_of_bits; + span = cache_size * (count_of_bits - n) / count_of_bits; strcpy(param.ctrlgrp, "c2"); strcpy(param.mongrp, "m2"); strcpy(param.filename, RESULT_FILE_NAME2); @@ -162,23 +163,17 @@ int cat_perf_miss_val(int cpu_no, int n, char *cache_type) param.mask = l_mask_1; strcpy(param.ctrlgrp, "c1"); strcpy(param.mongrp, "m1"); - param.span = cache_size * n / count_of_bits; + span = cache_size * n / count_of_bits; strcpy(param.filename, RESULT_FILE_NAME1); param.num_of_runs = 0; param.cpu_no = sibling_cpu_no; - } else { - ret = signal_handler_register(); - if (ret) { - kill(bm_pid, SIGKILL); - goto out; - } } remove(param.filename); - ret = cat_val(¶m); + ret = cat_val(¶m, span); if (ret == 0) - ret = check_results(¶m); + ret = check_results(¶m, span); if (bm_pid == 0) { /* Tell parent that child is ready */ @@ -208,10 +203,8 @@ int cat_perf_miss_val(int cpu_no, int n, char *cache_type) } close(pipefd[0]); kill(bm_pid, SIGKILL); - signal_handler_unregister(); } -out: cat_test_cleanup(); return ret; diff --git a/tools/testing/selftests/resctrl/cmt_test.c b/tools/testing/selftests/resctrl/cmt_test.c index cb2197647c6c..50bdbce9fba9 100644 --- a/tools/testing/selftests/resctrl/cmt_test.c +++ b/tools/testing/selftests/resctrl/cmt_test.c @@ -27,7 +27,7 @@ static int cmt_setup(struct resctrl_val_param *p) return 0; } -static int check_results(struct resctrl_val_param *param, int no_of_bits) +static int check_results(struct resctrl_val_param *param, size_t span, int no_of_bits) { char *token_array[8], temp[512]; unsigned long sum_llc_occu_resc = 0; @@ -58,7 +58,7 @@ static int check_results(struct resctrl_val_param *param, int no_of_bits) } fclose(fp); - return show_cache_info(sum_llc_occu_resc, no_of_bits, param->span, + return show_cache_info(sum_llc_occu_resc, no_of_bits, span, MAX_DIFF, MAX_DIFF_PERCENT, runs - 1, true, true); } @@ -68,16 +68,17 @@ void cmt_test_cleanup(void) remove(RESULT_FILE_NAME); } -int cmt_resctrl_val(int cpu_no, int n, char **benchmark_cmd) +int cmt_resctrl_val(int cpu_no, int n, const char * const *benchmark_cmd) { + const char * const *cmd = benchmark_cmd; + const char *new_cmd[BENCHMARK_ARGS]; unsigned long cache_size = 0; unsigned long long_mask; + char *span_str = NULL; char cbm_mask[256]; int count_of_bits; - int ret; - - if (!validate_resctrl_feature_request(CMT_STR)) - return -1; + size_t span; + int ret, i; ret = get_cbm_mask("L3", cbm_mask); if (ret) @@ -105,24 +106,36 @@ int cmt_resctrl_val(int cpu_no, int n, char **benchmark_cmd) .cpu_no = cpu_no, .filename = RESULT_FILE_NAME, .mask = ~(long_mask << n) & long_mask, - .span = cache_size * n / count_of_bits, .num_of_runs = 0, .setup = cmt_setup, }; - if (strcmp(benchmark_cmd[0], "fill_buf") == 0) - sprintf(benchmark_cmd[1], "%zu", param.span); + span = cache_size * n / count_of_bits; + + if (strcmp(cmd[0], "fill_buf") == 0) { + /* Duplicate the command to be able to replace span in it */ + for (i = 0; benchmark_cmd[i]; i++) + new_cmd[i] = benchmark_cmd[i]; + new_cmd[i] = NULL; + + ret = asprintf(&span_str, "%zu", span); + if (ret < 0) + return -1; + new_cmd[1] = span_str; + cmd = new_cmd; + } remove(RESULT_FILE_NAME); - ret = resctrl_val(benchmark_cmd, ¶m); + ret = resctrl_val(cmd, ¶m); if (ret) goto out; - ret = check_results(¶m, n); + ret = check_results(¶m, span, n); out: cmt_test_cleanup(); + free(span_str); return ret; } diff --git a/tools/testing/selftests/resctrl/mba_test.c b/tools/testing/selftests/resctrl/mba_test.c index 4d2f145804b8..d3bf4368341e 100644 --- a/tools/testing/selftests/resctrl/mba_test.c +++ b/tools/testing/selftests/resctrl/mba_test.c @@ -12,7 +12,7 @@ #define RESULT_FILE_NAME "result_mba" #define NUM_OF_RUNS 5 -#define MAX_DIFF_PERCENT 5 +#define MAX_DIFF_PERCENT 8 #define ALLOCATION_MAX 100 #define ALLOCATION_MIN 10 #define ALLOCATION_STEP 10 @@ -141,7 +141,7 @@ void mba_test_cleanup(void) remove(RESULT_FILE_NAME); } -int mba_schemata_change(int cpu_no, char *bw_report, char **benchmark_cmd) +int mba_schemata_change(int cpu_no, const char * const *benchmark_cmd) { struct resctrl_val_param param = { .resctrl_val = MBA_STR, @@ -149,7 +149,7 @@ int mba_schemata_change(int cpu_no, char *bw_report, char **benchmark_cmd) .mongrp = "m1", .cpu_no = cpu_no, .filename = RESULT_FILE_NAME, - .bw_report = bw_report, + .bw_report = "reads", .setup = mba_setup }; int ret; diff --git a/tools/testing/selftests/resctrl/mbm_test.c b/tools/testing/selftests/resctrl/mbm_test.c index c7de6f5977f6..741533f2b075 100644 --- a/tools/testing/selftests/resctrl/mbm_test.c +++ b/tools/testing/selftests/resctrl/mbm_test.c @@ -11,7 +11,7 @@ #include "resctrl.h" #define RESULT_FILE_NAME "result_mbm" -#define MAX_DIFF_PERCENT 5 +#define MAX_DIFF_PERCENT 8 #define NUM_OF_RUNS 5 static int @@ -95,7 +95,7 @@ static int mbm_setup(struct resctrl_val_param *p) return END_OF_TESTS; /* Set up shemata with 100% allocation on the first run. */ - if (p->num_of_runs == 0) + if (p->num_of_runs == 0 && validate_resctrl_feature_request("MB", NULL)) ret = write_schemata(p->ctrlgrp, "100", p->cpu_no, p->resctrl_val); @@ -109,16 +109,15 @@ void mbm_test_cleanup(void) remove(RESULT_FILE_NAME); } -int mbm_bw_change(size_t span, int cpu_no, char *bw_report, char **benchmark_cmd) +int mbm_bw_change(int cpu_no, const char * const *benchmark_cmd) { struct resctrl_val_param param = { .resctrl_val = MBM_STR, .ctrlgrp = "c1", .mongrp = "m1", - .span = span, .cpu_no = cpu_no, .filename = RESULT_FILE_NAME, - .bw_report = bw_report, + .bw_report = "reads", .setup = mbm_setup }; int ret; @@ -129,7 +128,7 @@ int mbm_bw_change(size_t span, int cpu_no, char *bw_report, char **benchmark_cmd if (ret) goto out; - ret = check_results(span); + ret = check_results(DEFAULT_SPAN); out: mbm_test_cleanup(); diff --git a/tools/testing/selftests/resctrl/resctrl.h b/tools/testing/selftests/resctrl/resctrl.h index 838d1a438f33..a33f414f6019 100644 --- a/tools/testing/selftests/resctrl/resctrl.h +++ b/tools/testing/selftests/resctrl/resctrl.h @@ -1,5 +1,4 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#define _GNU_SOURCE #ifndef RESCTRL_H #define RESCTRL_H #include <stdio.h> @@ -28,16 +27,16 @@ #define RESCTRL_PATH "/sys/fs/resctrl" #define PHYS_ID_PATH "/sys/devices/system/cpu/cpu" #define INFO_PATH "/sys/fs/resctrl/info" -#define L3_PATH "/sys/fs/resctrl/info/L3" -#define MB_PATH "/sys/fs/resctrl/info/MB" -#define L3_MON_PATH "/sys/fs/resctrl/info/L3_MON" -#define L3_MON_FEATURES_PATH "/sys/fs/resctrl/info/L3_MON/mon_features" #define ARCH_INTEL 1 #define ARCH_AMD 2 #define END_OF_TESTS 1 +#define BENCHMARK_ARGS 64 + +#define DEFAULT_SPAN (250 * MB) + #define PARENT_EXIT(err_msg) \ do { \ perror(err_msg); \ @@ -52,7 +51,6 @@ * @ctrlgrp: Name of the control monitor group (con_mon grp) * @mongrp: Name of the monitor group (mon grp) * @cpu_no: CPU number to which the benchmark would be binded - * @span: Memory bytes accessed in each benchmark iteration * @filename: Name of file to which the o/p should be written * @bw_report: Bandwidth report type (reads vs writes) * @setup: Call back function to setup test environment @@ -62,7 +60,6 @@ struct resctrl_val_param { char ctrlgrp[64]; char mongrp[64]; int cpu_no; - size_t span; char filename[64]; char *bw_report; unsigned long mask; @@ -86,10 +83,9 @@ int get_resource_id(int cpu_no, int *resource_id); int mount_resctrlfs(void); int umount_resctrlfs(void); int validate_bw_report_request(char *bw_report); -bool validate_resctrl_feature_request(const char *resctrl_val); +bool validate_resctrl_feature_request(const char *resource, const char *feature); char *fgrep(FILE *inf, const char *str); int taskset_benchmark(pid_t bm_pid, int cpu_no); -void run_benchmark(int signum, siginfo_t *info, void *ucontext); int write_schemata(char *ctrlgrp, char *schemata, int cpu_no, char *resctrl_val); int write_bm_pid_to_resctrl(pid_t bm_pid, char *ctrlgrp, char *mongrp, @@ -97,21 +93,21 @@ int write_bm_pid_to_resctrl(pid_t bm_pid, char *ctrlgrp, char *mongrp, int perf_event_open(struct perf_event_attr *hw_event, pid_t pid, int cpu, int group_fd, unsigned long flags); int run_fill_buf(size_t span, int memflush, int op, bool once); -int resctrl_val(char **benchmark_cmd, struct resctrl_val_param *param); -int mbm_bw_change(size_t span, int cpu_no, char *bw_report, char **benchmark_cmd); +int resctrl_val(const char * const *benchmark_cmd, struct resctrl_val_param *param); +int mbm_bw_change(int cpu_no, const char * const *benchmark_cmd); void tests_cleanup(void); void mbm_test_cleanup(void); -int mba_schemata_change(int cpu_no, char *bw_report, char **benchmark_cmd); +int mba_schemata_change(int cpu_no, const char * const *benchmark_cmd); void mba_test_cleanup(void); int get_cbm_mask(char *cache_type, char *cbm_mask); int get_cache_size(int cpu_no, char *cache_type, unsigned long *cache_size); void ctrlc_handler(int signum, siginfo_t *info, void *ptr); int signal_handler_register(void); void signal_handler_unregister(void); -int cat_val(struct resctrl_val_param *param); +int cat_val(struct resctrl_val_param *param, size_t span); void cat_test_cleanup(void); int cat_perf_miss_val(int cpu_no, int no_of_bits, char *cache_type); -int cmt_resctrl_val(int cpu_no, int n, char **benchmark_cmd); +int cmt_resctrl_val(int cpu_no, int n, const char * const *benchmark_cmd); unsigned int count_bits(unsigned long n); void cmt_test_cleanup(void); int get_core_sibling(int cpu_no); diff --git a/tools/testing/selftests/resctrl/resctrl_tests.c b/tools/testing/selftests/resctrl/resctrl_tests.c index d511daeb6851..2bbe3045a018 100644 --- a/tools/testing/selftests/resctrl/resctrl_tests.c +++ b/tools/testing/selftests/resctrl/resctrl_tests.c @@ -10,9 +10,6 @@ */ #include "resctrl.h" -#define BENCHMARK_ARGS 64 -#define BENCHMARK_ARG_SIZE 64 - static int detect_vendor(void) { FILE *inf = fopen("/proc/cpuinfo", "r"); @@ -52,8 +49,8 @@ int get_vendor(void) static void cmd_help(void) { - printf("usage: resctrl_tests [-h] [-b \"benchmark_cmd [options]\"] [-t test list] [-n no_of_bits]\n"); - printf("\t-b benchmark_cmd [options]: run specified benchmark for MBM, MBA and CMT\n"); + printf("usage: resctrl_tests [-h] [-t test list] [-n no_of_bits] [-b benchmark_cmd [option]...]\n"); + printf("\t-b benchmark_cmd [option]...: run specified benchmark for MBM, MBA and CMT\n"); printf("\t default benchmark is builtin fill_buf\n"); printf("\t-t test list: run tests specified in the test list, "); printf("e.g. -t mbm,mba,cmt,cat\n"); @@ -70,72 +67,98 @@ void tests_cleanup(void) cat_test_cleanup(); } -static void run_mbm_test(char **benchmark_cmd, size_t span, - int cpu_no, char *bw_report) +static int test_prepare(void) { int res; - ksft_print_msg("Starting MBM BW change ...\n"); + res = signal_handler_register(); + if (res) { + ksft_print_msg("Failed to register signal handler\n"); + return res; + } res = mount_resctrlfs(); if (res) { - ksft_exit_fail_msg("Failed to mount resctrl FS\n"); + signal_handler_unregister(); + ksft_print_msg("Failed to mount resctrl FS\n"); + return res; + } + return 0; +} + +static void test_cleanup(void) +{ + umount_resctrlfs(); + signal_handler_unregister(); +} + +static void run_mbm_test(const char * const *benchmark_cmd, int cpu_no) +{ + int res; + + ksft_print_msg("Starting MBM BW change ...\n"); + + if (test_prepare()) { + ksft_exit_fail_msg("Abnormal failure when preparing for the test\n"); return; } - if (!validate_resctrl_feature_request(MBM_STR) || (get_vendor() != ARCH_INTEL)) { + if (!validate_resctrl_feature_request("L3_MON", "mbm_total_bytes") || + !validate_resctrl_feature_request("L3_MON", "mbm_local_bytes") || + (get_vendor() != ARCH_INTEL)) { ksft_test_result_skip("Hardware does not support MBM or MBM is disabled\n"); - goto umount; + goto cleanup; } - res = mbm_bw_change(span, cpu_no, bw_report, benchmark_cmd); + res = mbm_bw_change(cpu_no, benchmark_cmd); ksft_test_result(!res, "MBM: bw change\n"); if ((get_vendor() == ARCH_INTEL) && res) ksft_print_msg("Intel MBM may be inaccurate when Sub-NUMA Clustering is enabled. Check BIOS configuration.\n"); -umount: - umount_resctrlfs(); +cleanup: + test_cleanup(); } -static void run_mba_test(char **benchmark_cmd, int cpu_no, char *bw_report) +static void run_mba_test(const char * const *benchmark_cmd, int cpu_no) { int res; ksft_print_msg("Starting MBA Schemata change ...\n"); - res = mount_resctrlfs(); - if (res) { - ksft_exit_fail_msg("Failed to mount resctrl FS\n"); + if (test_prepare()) { + ksft_exit_fail_msg("Abnormal failure when preparing for the test\n"); return; } - if (!validate_resctrl_feature_request(MBA_STR) || (get_vendor() != ARCH_INTEL)) { + if (!validate_resctrl_feature_request("MB", NULL) || + !validate_resctrl_feature_request("L3_MON", "mbm_local_bytes") || + (get_vendor() != ARCH_INTEL)) { ksft_test_result_skip("Hardware does not support MBA or MBA is disabled\n"); - goto umount; + goto cleanup; } - res = mba_schemata_change(cpu_no, bw_report, benchmark_cmd); + res = mba_schemata_change(cpu_no, benchmark_cmd); ksft_test_result(!res, "MBA: schemata change\n"); -umount: - umount_resctrlfs(); +cleanup: + test_cleanup(); } -static void run_cmt_test(char **benchmark_cmd, int cpu_no) +static void run_cmt_test(const char * const *benchmark_cmd, int cpu_no) { int res; ksft_print_msg("Starting CMT test ...\n"); - res = mount_resctrlfs(); - if (res) { - ksft_exit_fail_msg("Failed to mount resctrl FS\n"); + if (test_prepare()) { + ksft_exit_fail_msg("Abnormal failure when preparing for the test\n"); return; } - if (!validate_resctrl_feature_request(CMT_STR)) { + if (!validate_resctrl_feature_request("L3_MON", "llc_occupancy") || + !validate_resctrl_feature_request("L3", NULL)) { ksft_test_result_skip("Hardware does not support CMT or CMT is disabled\n"); - goto umount; + goto cleanup; } res = cmt_resctrl_val(cpu_no, 5, benchmark_cmd); @@ -143,8 +166,8 @@ static void run_cmt_test(char **benchmark_cmd, int cpu_no) if ((get_vendor() == ARCH_INTEL) && res) ksft_print_msg("Intel CMT may be inaccurate when Sub-NUMA Clustering is enabled. Check BIOS configuration.\n"); -umount: - umount_resctrlfs(); +cleanup: + test_cleanup(); } static void run_cat_test(int cpu_no, int no_of_bits) @@ -153,48 +176,53 @@ static void run_cat_test(int cpu_no, int no_of_bits) ksft_print_msg("Starting CAT test ...\n"); - res = mount_resctrlfs(); - if (res) { - ksft_exit_fail_msg("Failed to mount resctrl FS\n"); + if (test_prepare()) { + ksft_exit_fail_msg("Abnormal failure when preparing for the test\n"); return; } - if (!validate_resctrl_feature_request(CAT_STR)) { + if (!validate_resctrl_feature_request("L3", NULL)) { ksft_test_result_skip("Hardware does not support CAT or CAT is disabled\n"); - goto umount; + goto cleanup; } res = cat_perf_miss_val(cpu_no, no_of_bits, "L3"); ksft_test_result(!res, "CAT: test\n"); -umount: - umount_resctrlfs(); +cleanup: + test_cleanup(); } int main(int argc, char **argv) { - bool has_ben = false, mbm_test = true, mba_test = true, cmt_test = true; - char *benchmark_cmd[BENCHMARK_ARGS], bw_report[64], bm_type[64]; - char benchmark_cmd_area[BENCHMARK_ARGS][BENCHMARK_ARG_SIZE]; - int c, cpu_no = 1, argc_new = argc, i, no_of_bits = 0; - int ben_ind, ben_count, tests = 0; - size_t span = 250 * MB; + bool mbm_test = true, mba_test = true, cmt_test = true; + const char *benchmark_cmd[BENCHMARK_ARGS] = {}; + int c, cpu_no = 1, i, no_of_bits = 0; + char *span_str = NULL; bool cat_test = true; + int tests = 0; + int ret; - for (i = 0; i < argc; i++) { - if (strcmp(argv[i], "-b") == 0) { - ben_ind = i + 1; - ben_count = argc - ben_ind; - argc_new = ben_ind - 1; - has_ben = true; - break; - } - } - - while ((c = getopt(argc_new, argv, "ht:b:n:p:")) != -1) { + while ((c = getopt(argc, argv, "ht:b:n:p:")) != -1) { char *token; switch (c) { + case 'b': + /* + * First move optind back to the (first) optarg and + * then build the benchmark command using the + * remaining arguments. + */ + optind--; + if (argc - optind >= BENCHMARK_ARGS) + ksft_exit_fail_msg("Too long benchmark command"); + + /* Extract benchmark command from command line. */ + for (i = 0; i < argc - optind; i++) + benchmark_cmd[i] = argv[i + optind]; + benchmark_cmd[i] = NULL; + + goto last_arg; case 't': token = strtok(optarg, ","); @@ -243,6 +271,7 @@ int main(int argc, char **argv) return -1; } } +last_arg: ksft_print_header(); @@ -254,29 +283,6 @@ int main(int argc, char **argv) if (geteuid() != 0) return ksft_exit_skip("Not running as root. Skipping...\n"); - if (has_ben) { - /* Extract benchmark command from command line. */ - for (i = ben_ind; i < argc; i++) { - benchmark_cmd[i - ben_ind] = benchmark_cmd_area[i]; - sprintf(benchmark_cmd[i - ben_ind], "%s", argv[i]); - } - benchmark_cmd[ben_count] = NULL; - } else { - /* If no benchmark is given by "-b" argument, use fill_buf. */ - for (i = 0; i < 5; i++) - benchmark_cmd[i] = benchmark_cmd_area[i]; - - strcpy(benchmark_cmd[0], "fill_buf"); - sprintf(benchmark_cmd[1], "%zu", span); - strcpy(benchmark_cmd[2], "1"); - strcpy(benchmark_cmd[3], "0"); - strcpy(benchmark_cmd[4], "false"); - benchmark_cmd[5] = NULL; - } - - sprintf(bw_report, "reads"); - sprintf(bm_type, "fill_buf"); - if (!check_resctrlfs_support()) return ksft_exit_skip("resctrl FS does not exist. Enable X86_CPU_RESCTRL config option.\n"); @@ -285,13 +291,26 @@ int main(int argc, char **argv) filter_dmesg(); + if (!benchmark_cmd[0]) { + /* If no benchmark is given by "-b" argument, use fill_buf. */ + benchmark_cmd[0] = "fill_buf"; + ret = asprintf(&span_str, "%u", DEFAULT_SPAN); + if (ret < 0) + ksft_exit_fail_msg("Out of memory!\n"); + benchmark_cmd[1] = span_str; + benchmark_cmd[2] = "1"; + benchmark_cmd[3] = "0"; + benchmark_cmd[4] = "false"; + benchmark_cmd[5] = NULL; + } + ksft_set_plan(tests ? : 4); if (mbm_test) - run_mbm_test(benchmark_cmd, span, cpu_no, bw_report); + run_mbm_test(benchmark_cmd, cpu_no); if (mba_test) - run_mba_test(benchmark_cmd, cpu_no, bw_report); + run_mba_test(benchmark_cmd, cpu_no); if (cmt_test) run_cmt_test(benchmark_cmd, cpu_no); @@ -299,5 +318,6 @@ int main(int argc, char **argv) if (cat_test) run_cat_test(cpu_no, no_of_bits); + free(span_str); ksft_finished(); } diff --git a/tools/testing/selftests/resctrl/resctrl_val.c b/tools/testing/selftests/resctrl/resctrl_val.c index f0f6c5f6e98b..88789678917b 100644 --- a/tools/testing/selftests/resctrl/resctrl_val.c +++ b/tools/testing/selftests/resctrl/resctrl_val.c @@ -468,7 +468,9 @@ pid_t bm_pid, ppid; void ctrlc_handler(int signum, siginfo_t *info, void *ptr) { - kill(bm_pid, SIGKILL); + /* Only kill child after bm_pid is set after fork() */ + if (bm_pid) + kill(bm_pid, SIGKILL); umount_resctrlfs(); tests_cleanup(); ksft_print_msg("Ending\n\n"); @@ -482,9 +484,11 @@ void ctrlc_handler(int signum, siginfo_t *info, void *ptr) */ int signal_handler_register(void) { - struct sigaction sigact; + struct sigaction sigact = {}; int ret = 0; + bm_pid = 0; + sigact.sa_sigaction = ctrlc_handler; sigemptyset(&sigact.sa_mask); sigact.sa_flags = SA_SIGINFO; @@ -504,7 +508,7 @@ int signal_handler_register(void) */ void signal_handler_unregister(void) { - struct sigaction sigact; + struct sigaction sigact = {}; sigact.sa_handler = SIG_DFL; sigemptyset(&sigact.sa_mask); @@ -622,6 +626,56 @@ measure_vals(struct resctrl_val_param *param, unsigned long *bw_resc_start) } /* + * run_benchmark - Run a specified benchmark or fill_buf (default benchmark) + * in specified signal. Direct benchmark stdio to /dev/null. + * @signum: signal number + * @info: signal info + * @ucontext: user context in signal handling + */ +static void run_benchmark(int signum, siginfo_t *info, void *ucontext) +{ + int operation, ret, memflush; + char **benchmark_cmd; + size_t span; + bool once; + FILE *fp; + + benchmark_cmd = info->si_ptr; + + /* + * Direct stdio of child to /dev/null, so that only parent writes to + * stdio (console) + */ + fp = freopen("/dev/null", "w", stdout); + if (!fp) + PARENT_EXIT("Unable to direct benchmark status to /dev/null"); + + if (strcmp(benchmark_cmd[0], "fill_buf") == 0) { + /* Execute default fill_buf benchmark */ + span = strtoul(benchmark_cmd[1], NULL, 10); + memflush = atoi(benchmark_cmd[2]); + operation = atoi(benchmark_cmd[3]); + if (!strcmp(benchmark_cmd[4], "true")) + once = true; + else if (!strcmp(benchmark_cmd[4], "false")) + once = false; + else + PARENT_EXIT("Invalid once parameter"); + + if (run_fill_buf(span, memflush, operation, once)) + fprintf(stderr, "Error in running fill buffer\n"); + } else { + /* Execute specified benchmark */ + ret = execvp(benchmark_cmd[0], benchmark_cmd); + if (ret) + perror("wrong\n"); + } + + fclose(stdout); + PARENT_EXIT("Unable to run specified benchmark"); +} + +/* * resctrl_val: execute benchmark and measure memory bandwidth on * the benchmark * @benchmark_cmd: benchmark command and its arguments @@ -629,7 +683,7 @@ measure_vals(struct resctrl_val_param *param, unsigned long *bw_resc_start) * * Return: 0 on success. non-zero on failure. */ -int resctrl_val(char **benchmark_cmd, struct resctrl_val_param *param) +int resctrl_val(const char * const *benchmark_cmd, struct resctrl_val_param *param) { char *resctrl_val = param->resctrl_val; unsigned long bw_resc_start = 0; @@ -706,28 +760,30 @@ int resctrl_val(char **benchmark_cmd, struct resctrl_val_param *param) ksft_print_msg("Benchmark PID: %d\n", bm_pid); - ret = signal_handler_register(); - if (ret) - goto out; - - value.sival_ptr = benchmark_cmd; + /* + * The cast removes constness but nothing mutates benchmark_cmd within + * the context of this process. At the receiving process, it becomes + * argv, which is mutable, on exec() but that's after fork() so it + * doesn't matter for the process running the tests. + */ + value.sival_ptr = (void *)benchmark_cmd; /* Taskset benchmark to specified cpu */ ret = taskset_benchmark(bm_pid, param->cpu_no); if (ret) - goto unregister; + goto out; /* Write benchmark to specified control&monitoring grp in resctrl FS */ ret = write_bm_pid_to_resctrl(bm_pid, param->ctrlgrp, param->mongrp, resctrl_val); if (ret) - goto unregister; + goto out; if (!strncmp(resctrl_val, MBM_STR, sizeof(MBM_STR)) || !strncmp(resctrl_val, MBA_STR, sizeof(MBA_STR))) { ret = initialize_mem_bw_imc(); if (ret) - goto unregister; + goto out; initialize_mem_bw_resctrl(param->ctrlgrp, param->mongrp, param->cpu_no, resctrl_val); @@ -742,7 +798,7 @@ int resctrl_val(char **benchmark_cmd, struct resctrl_val_param *param) sizeof(pipe_message)) { perror("# failed reading message from child process"); close(pipefd[0]); - goto unregister; + goto out; } } close(pipefd[0]); @@ -751,7 +807,7 @@ int resctrl_val(char **benchmark_cmd, struct resctrl_val_param *param) if (sigqueue(bm_pid, SIGUSR1, value) == -1) { perror("# sigqueue SIGUSR1 to child"); ret = errno; - goto unregister; + goto out; } /* Give benchmark enough time to fully run */ @@ -780,8 +836,6 @@ int resctrl_val(char **benchmark_cmd, struct resctrl_val_param *param) } } -unregister: - signal_handler_unregister(); out: kill(bm_pid, SIGKILL); diff --git a/tools/testing/selftests/resctrl/resctrlfs.c b/tools/testing/selftests/resctrl/resctrlfs.c index bd36ee206602..5ebd43683876 100644 --- a/tools/testing/selftests/resctrl/resctrlfs.c +++ b/tools/testing/selftests/resctrl/resctrlfs.c @@ -8,6 +8,9 @@ * Sai Praneeth Prakhya <sai.praneeth.prakhya@intel.com>, * Fenghua Yu <fenghua.yu@intel.com> */ +#include <fcntl.h> +#include <limits.h> + #include "resctrl.h" static int find_resctrl_mount(char *buffer) @@ -292,58 +295,6 @@ int taskset_benchmark(pid_t bm_pid, int cpu_no) } /* - * run_benchmark - Run a specified benchmark or fill_buf (default benchmark) - * in specified signal. Direct benchmark stdio to /dev/null. - * @signum: signal number - * @info: signal info - * @ucontext: user context in signal handling - * - * Return: void - */ -void run_benchmark(int signum, siginfo_t *info, void *ucontext) -{ - int operation, ret, memflush; - char **benchmark_cmd; - size_t span; - bool once; - FILE *fp; - - benchmark_cmd = info->si_ptr; - - /* - * Direct stdio of child to /dev/null, so that only parent writes to - * stdio (console) - */ - fp = freopen("/dev/null", "w", stdout); - if (!fp) - PARENT_EXIT("Unable to direct benchmark status to /dev/null"); - - if (strcmp(benchmark_cmd[0], "fill_buf") == 0) { - /* Execute default fill_buf benchmark */ - span = strtoul(benchmark_cmd[1], NULL, 10); - memflush = atoi(benchmark_cmd[2]); - operation = atoi(benchmark_cmd[3]); - if (!strcmp(benchmark_cmd[4], "true")) - once = true; - else if (!strcmp(benchmark_cmd[4], "false")) - once = false; - else - PARENT_EXIT("Invalid once parameter"); - - if (run_fill_buf(span, memflush, operation, once)) - fprintf(stderr, "Error in running fill buffer\n"); - } else { - /* Execute specified benchmark */ - ret = execvp(benchmark_cmd[0], benchmark_cmd); - if (ret) - perror("wrong\n"); - } - - fclose(stdout); - PARENT_EXIT("Unable to run specified benchmark"); -} - -/* * create_grp - Create a group only if one doesn't exist * @grp_name: Name of the group * @grp: Full path and name of the group @@ -488,9 +439,8 @@ out: */ int write_schemata(char *ctrlgrp, char *schemata, int cpu_no, char *resctrl_val) { - char controlgroup[1024], schema[1024], reason[64]; - int resource_id, ret = 0; - FILE *fp; + char controlgroup[1024], reason[128], schema[1024] = {}; + int resource_id, fd, schema_len = -1, ret = 0; if (strncmp(resctrl_val, MBA_STR, sizeof(MBA_STR)) && strncmp(resctrl_val, MBM_STR, sizeof(MBM_STR)) && @@ -518,28 +468,39 @@ int write_schemata(char *ctrlgrp, char *schemata, int cpu_no, char *resctrl_val) if (!strncmp(resctrl_val, CAT_STR, sizeof(CAT_STR)) || !strncmp(resctrl_val, CMT_STR, sizeof(CMT_STR))) - sprintf(schema, "%s%d%c%s", "L3:", resource_id, '=', schemata); + schema_len = snprintf(schema, sizeof(schema), "%s%d%c%s\n", + "L3:", resource_id, '=', schemata); if (!strncmp(resctrl_val, MBA_STR, sizeof(MBA_STR)) || !strncmp(resctrl_val, MBM_STR, sizeof(MBM_STR))) - sprintf(schema, "%s%d%c%s", "MB:", resource_id, '=', schemata); - - fp = fopen(controlgroup, "w"); - if (!fp) { - sprintf(reason, "Failed to open control group"); + schema_len = snprintf(schema, sizeof(schema), "%s%d%c%s\n", + "MB:", resource_id, '=', schemata); + if (schema_len < 0 || schema_len >= sizeof(schema)) { + snprintf(reason, sizeof(reason), + "snprintf() failed with return value : %d", schema_len); ret = -1; - goto out; } - if (fprintf(fp, "%s\n", schema) < 0) { - sprintf(reason, "Failed to write schemata in control group"); - fclose(fp); + fd = open(controlgroup, O_WRONLY); + if (fd < 0) { + snprintf(reason, sizeof(reason), + "open() failed : %s", strerror(errno)); ret = -1; - goto out; + goto err_schema_not_empty; } - fclose(fp); + if (write(fd, schema, schema_len) < 0) { + snprintf(reason, sizeof(reason), + "write() failed : %s", strerror(errno)); + close(fd); + ret = -1; + + goto err_schema_not_empty; + } + close(fd); +err_schema_not_empty: + schema[schema_len - 1] = 0; out: ksft_print_msg("Write schema \"%s\" to resctrl FS%s%s\n", schema, ret ? " # " : "", @@ -604,63 +565,46 @@ char *fgrep(FILE *inf, const char *str) /* * validate_resctrl_feature_request - Check if requested feature is valid. - * @resctrl_val: Requested feature + * @resource: Required resource (e.g., MB, L3, L2, L3_MON, etc.) + * @feature: Required monitor feature (in mon_features file). Can only be + * set for L3_MON. Must be NULL for all other resources. * - * Return: True if the feature is supported, else false. False is also - * returned if resctrl FS is not mounted. + * Return: True if the resource/feature is supported, else false. False is + * also returned if resctrl FS is not mounted. */ -bool validate_resctrl_feature_request(const char *resctrl_val) +bool validate_resctrl_feature_request(const char *resource, const char *feature) { + char res_path[PATH_MAX]; struct stat statbuf; - bool found = false; char *res; FILE *inf; int ret; - if (!resctrl_val) + if (!resource) return false; ret = find_resctrl_mount(NULL); if (ret) return false; - if (!strncmp(resctrl_val, CAT_STR, sizeof(CAT_STR))) { - if (!stat(L3_PATH, &statbuf)) - return true; - } else if (!strncmp(resctrl_val, MBA_STR, sizeof(MBA_STR))) { - if (!stat(MB_PATH, &statbuf)) - return true; - } else if (!strncmp(resctrl_val, MBM_STR, sizeof(MBM_STR)) || - !strncmp(resctrl_val, CMT_STR, sizeof(CMT_STR))) { - if (!stat(L3_MON_PATH, &statbuf)) { - inf = fopen(L3_MON_FEATURES_PATH, "r"); - if (!inf) - return false; - - if (!strncmp(resctrl_val, CMT_STR, sizeof(CMT_STR))) { - res = fgrep(inf, "llc_occupancy"); - if (res) { - found = true; - free(res); - } - } - - if (!strncmp(resctrl_val, MBM_STR, sizeof(MBM_STR))) { - res = fgrep(inf, "mbm_total_bytes"); - if (res) { - free(res); - res = fgrep(inf, "mbm_local_bytes"); - if (res) { - found = true; - free(res); - } - } - } - fclose(inf); - } - } + snprintf(res_path, sizeof(res_path), "%s/%s", INFO_PATH, resource); + + if (stat(res_path, &statbuf)) + return false; + + if (!feature) + return true; + + snprintf(res_path, sizeof(res_path), "%s/%s/mon_features", INFO_PATH, resource); + inf = fopen(res_path, "r"); + if (!inf) + return false; + + res = fgrep(inf, feature); + free(res); + fclose(inf); - return found; + return !!res; } int filter_dmesg(void) diff --git a/tools/testing/selftests/riscv/hwprobe/Makefile b/tools/testing/selftests/riscv/hwprobe/Makefile index ebdbb3c22e54..f224b84591fb 100644 --- a/tools/testing/selftests/riscv/hwprobe/Makefile +++ b/tools/testing/selftests/riscv/hwprobe/Makefile @@ -2,9 +2,14 @@ # Copyright (C) 2021 ARM Limited # Originally tools/testing/arm64/abi/Makefile -TEST_GEN_PROGS := hwprobe +CFLAGS += -I$(top_srcdir)/tools/include + +TEST_GEN_PROGS := hwprobe cbo include ../../lib.mk $(OUTPUT)/hwprobe: hwprobe.c sys_hwprobe.S - $(CC) -o$@ $(CFLAGS) $(LDFLAGS) $^ + $(CC) -static -o$@ $(CFLAGS) $(LDFLAGS) $^ + +$(OUTPUT)/cbo: cbo.c sys_hwprobe.S + $(CC) -static -o$@ $(CFLAGS) $(LDFLAGS) $^ diff --git a/tools/testing/selftests/riscv/hwprobe/cbo.c b/tools/testing/selftests/riscv/hwprobe/cbo.c new file mode 100644 index 000000000000..50a2cc8aef38 --- /dev/null +++ b/tools/testing/selftests/riscv/hwprobe/cbo.c @@ -0,0 +1,228 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2023 Ventana Micro Systems Inc. + * + * Run with 'taskset -c <cpu-list> cbo' to only execute hwprobe on a + * subset of cpus, as well as only executing the tests on those cpus. + */ +#define _GNU_SOURCE +#include <stdbool.h> +#include <stdint.h> +#include <string.h> +#include <sched.h> +#include <signal.h> +#include <assert.h> +#include <linux/compiler.h> +#include <linux/kernel.h> +#include <asm/ucontext.h> + +#include "hwprobe.h" +#include "../../kselftest.h" + +#define MK_CBO(fn) cpu_to_le32((fn) << 20 | 10 << 15 | 2 << 12 | 0 << 7 | 15) + +static char mem[4096] __aligned(4096) = { [0 ... 4095] = 0xa5 }; + +static bool illegal_insn; + +static void sigill_handler(int sig, siginfo_t *info, void *context) +{ + unsigned long *regs = (unsigned long *)&((ucontext_t *)context)->uc_mcontext; + uint32_t insn = *(uint32_t *)regs[0]; + + assert(insn == MK_CBO(regs[11])); + + illegal_insn = true; + regs[0] += 4; +} + +static void cbo_insn(char *base, int fn) +{ + uint32_t insn = MK_CBO(fn); + + asm volatile( + "mv a0, %0\n" + "li a1, %1\n" + ".4byte %2\n" + : : "r" (base), "i" (fn), "i" (insn) : "a0", "a1", "memory"); +} + +static void cbo_inval(char *base) { cbo_insn(base, 0); } +static void cbo_clean(char *base) { cbo_insn(base, 1); } +static void cbo_flush(char *base) { cbo_insn(base, 2); } +static void cbo_zero(char *base) { cbo_insn(base, 4); } + +static void test_no_zicbom(void *arg) +{ + ksft_print_msg("Testing Zicbom instructions remain privileged\n"); + + illegal_insn = false; + cbo_clean(&mem[0]); + ksft_test_result(illegal_insn, "No cbo.clean\n"); + + illegal_insn = false; + cbo_flush(&mem[0]); + ksft_test_result(illegal_insn, "No cbo.flush\n"); + + illegal_insn = false; + cbo_inval(&mem[0]); + ksft_test_result(illegal_insn, "No cbo.inval\n"); +} + +static void test_no_zicboz(void *arg) +{ + ksft_print_msg("No Zicboz, testing cbo.zero remains privileged\n"); + + illegal_insn = false; + cbo_zero(&mem[0]); + ksft_test_result(illegal_insn, "No cbo.zero\n"); +} + +static bool is_power_of_2(__u64 n) +{ + return n != 0 && (n & (n - 1)) == 0; +} + +static void test_zicboz(void *arg) +{ + struct riscv_hwprobe pair = { + .key = RISCV_HWPROBE_KEY_ZICBOZ_BLOCK_SIZE, + }; + cpu_set_t *cpus = (cpu_set_t *)arg; + __u64 block_size; + int i, j; + long rc; + + rc = riscv_hwprobe(&pair, 1, sizeof(cpu_set_t), (unsigned long *)cpus, 0); + block_size = pair.value; + ksft_test_result(rc == 0 && pair.key == RISCV_HWPROBE_KEY_ZICBOZ_BLOCK_SIZE && + is_power_of_2(block_size), "Zicboz block size\n"); + ksft_print_msg("Zicboz block size: %ld\n", block_size); + + illegal_insn = false; + cbo_zero(&mem[block_size]); + ksft_test_result(!illegal_insn, "cbo.zero\n"); + + if (illegal_insn || !is_power_of_2(block_size)) { + ksft_test_result_skip("cbo.zero check\n"); + return; + } + + assert(block_size <= 1024); + + for (i = 0; i < 4096 / block_size; ++i) { + if (i % 2) + cbo_zero(&mem[i * block_size]); + } + + for (i = 0; i < 4096 / block_size; ++i) { + char expected = i % 2 ? 0x0 : 0xa5; + + for (j = 0; j < block_size; ++j) { + if (mem[i * block_size + j] != expected) { + ksft_test_result_fail("cbo.zero check\n"); + ksft_print_msg("cbo.zero check: mem[%d] != 0x%x\n", + i * block_size + j, expected); + return; + } + } + } + + ksft_test_result_pass("cbo.zero check\n"); +} + +static void check_no_zicboz_cpus(cpu_set_t *cpus) +{ + struct riscv_hwprobe pair = { + .key = RISCV_HWPROBE_KEY_IMA_EXT_0, + }; + cpu_set_t one_cpu; + int i = 0, c = 0; + long rc; + + while (i++ < CPU_COUNT(cpus)) { + while (!CPU_ISSET(c, cpus)) + ++c; + + CPU_ZERO(&one_cpu); + CPU_SET(c, &one_cpu); + + rc = riscv_hwprobe(&pair, 1, sizeof(cpu_set_t), (unsigned long *)&one_cpu, 0); + assert(rc == 0 && pair.key == RISCV_HWPROBE_KEY_IMA_EXT_0); + + if (pair.value & RISCV_HWPROBE_EXT_ZICBOZ) + ksft_exit_fail_msg("Zicboz is only present on a subset of harts.\n" + "Use taskset to select a set of harts where Zicboz\n" + "presence (present or not) is consistent for each hart\n"); + ++c; + } +} + +enum { + TEST_ZICBOZ, + TEST_NO_ZICBOZ, + TEST_NO_ZICBOM, +}; + +static struct test_info { + bool enabled; + unsigned int nr_tests; + void (*test_fn)(void *arg); +} tests[] = { + [TEST_ZICBOZ] = { .nr_tests = 3, test_zicboz }, + [TEST_NO_ZICBOZ] = { .nr_tests = 1, test_no_zicboz }, + [TEST_NO_ZICBOM] = { .nr_tests = 3, test_no_zicbom }, +}; + +int main(int argc, char **argv) +{ + struct sigaction act = { + .sa_sigaction = &sigill_handler, + .sa_flags = SA_SIGINFO, + }; + struct riscv_hwprobe pair; + unsigned int plan = 0; + cpu_set_t cpus; + long rc; + int i; + + if (argc > 1 && !strcmp(argv[1], "--sigill")) { + rc = sigaction(SIGILL, &act, NULL); + assert(rc == 0); + tests[TEST_NO_ZICBOZ].enabled = true; + tests[TEST_NO_ZICBOM].enabled = true; + } + + rc = sched_getaffinity(0, sizeof(cpu_set_t), &cpus); + assert(rc == 0); + + ksft_print_header(); + + pair.key = RISCV_HWPROBE_KEY_IMA_EXT_0; + rc = riscv_hwprobe(&pair, 1, sizeof(cpu_set_t), (unsigned long *)&cpus, 0); + if (rc < 0) + ksft_exit_fail_msg("hwprobe() failed with %d\n", rc); + assert(rc == 0 && pair.key == RISCV_HWPROBE_KEY_IMA_EXT_0); + + if (pair.value & RISCV_HWPROBE_EXT_ZICBOZ) { + tests[TEST_ZICBOZ].enabled = true; + tests[TEST_NO_ZICBOZ].enabled = false; + } else { + check_no_zicboz_cpus(&cpus); + } + + for (i = 0; i < ARRAY_SIZE(tests); ++i) + plan += tests[i].enabled ? tests[i].nr_tests : 0; + + if (plan == 0) + ksft_print_msg("No tests enabled.\n"); + else + ksft_set_plan(plan); + + for (i = 0; i < ARRAY_SIZE(tests); ++i) { + if (tests[i].enabled) + tests[i].test_fn(&cpus); + } + + ksft_finished(); +} diff --git a/tools/testing/selftests/riscv/hwprobe/hwprobe.c b/tools/testing/selftests/riscv/hwprobe/hwprobe.c index 09f290a67420..c474891df307 100644 --- a/tools/testing/selftests/riscv/hwprobe/hwprobe.c +++ b/tools/testing/selftests/riscv/hwprobe/hwprobe.c @@ -1,14 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only -#include <stddef.h> -#include <asm/hwprobe.h> - -/* - * Rather than relying on having a new enough libc to define this, just do it - * ourselves. This way we don't need to be coupled to a new-enough libc to - * contain the call. - */ -long riscv_hwprobe(struct riscv_hwprobe *pairs, size_t pair_count, - size_t cpu_count, unsigned long *cpus, unsigned int flags); +#include "hwprobe.h" +#include "../../kselftest.h" int main(int argc, char **argv) { @@ -16,6 +8,9 @@ int main(int argc, char **argv) unsigned long cpus; long out; + ksft_print_header(); + ksft_set_plan(5); + /* Fake the CPU_SET ops. */ cpus = -1; @@ -25,13 +20,16 @@ int main(int argc, char **argv) */ for (long i = 0; i < 8; i++) pairs[i].key = i; + out = riscv_hwprobe(pairs, 8, 1, &cpus, 0); if (out != 0) - return -1; + ksft_exit_fail_msg("hwprobe() failed with %ld\n", out); + for (long i = 0; i < 4; ++i) { /* Fail if the kernel claims not to recognize a base key. */ if ((i < 4) && (pairs[i].key != i)) - return -2; + ksft_exit_fail_msg("Failed to recognize base key: key != i, " + "key=%ld, i=%ld\n", pairs[i].key, i); if (pairs[i].key != RISCV_HWPROBE_KEY_BASE_BEHAVIOR) continue; @@ -39,52 +37,30 @@ int main(int argc, char **argv) if (pairs[i].value & RISCV_HWPROBE_BASE_BEHAVIOR_IMA) continue; - return -3; + ksft_exit_fail_msg("Unexpected pair: (%ld, %ld)\n", pairs[i].key, pairs[i].value); } - /* - * This should also work with a NULL CPU set, but should not work - * with an improperly supplied CPU set. - */ out = riscv_hwprobe(pairs, 8, 0, 0, 0); - if (out != 0) - return -4; + ksft_test_result(out == 0, "NULL CPU set\n"); out = riscv_hwprobe(pairs, 8, 0, &cpus, 0); - if (out == 0) - return -5; + ksft_test_result(out != 0, "Bad CPU set\n"); out = riscv_hwprobe(pairs, 8, 1, 0, 0); - if (out == 0) - return -6; + ksft_test_result(out != 0, "NULL CPU set with non-zero count\n"); - /* - * Check that keys work by providing one that we know exists, and - * checking to make sure the resultig pair is what we asked for. - */ pairs[0].key = RISCV_HWPROBE_KEY_BASE_BEHAVIOR; out = riscv_hwprobe(pairs, 1, 1, &cpus, 0); - if (out != 0) - return -7; - if (pairs[0].key != RISCV_HWPROBE_KEY_BASE_BEHAVIOR) - return -8; + ksft_test_result(out == 0 && pairs[0].key == RISCV_HWPROBE_KEY_BASE_BEHAVIOR, + "Existing key is maintained\n"); - /* - * Check that an unknown key gets overwritten with -1, - * but doesn't block elements after it. - */ pairs[0].key = 0x5555; pairs[1].key = 1; pairs[1].value = 0xAAAA; out = riscv_hwprobe(pairs, 2, 0, 0, 0); - if (out != 0) - return -9; - - if (pairs[0].key != -1) - return -10; - - if ((pairs[1].key != 1) || (pairs[1].value == 0xAAAA)) - return -11; + ksft_test_result(out == 0 && pairs[0].key == -1 && + pairs[1].key == 1 && pairs[1].value != 0xAAAA, + "Unknown key overwritten with -1 and doesn't block other elements\n"); - return 0; + ksft_finished(); } diff --git a/tools/testing/selftests/riscv/hwprobe/hwprobe.h b/tools/testing/selftests/riscv/hwprobe/hwprobe.h new file mode 100644 index 000000000000..721b0ce73a56 --- /dev/null +++ b/tools/testing/selftests/riscv/hwprobe/hwprobe.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef SELFTEST_RISCV_HWPROBE_H +#define SELFTEST_RISCV_HWPROBE_H +#include <stddef.h> +#include <asm/hwprobe.h> + +/* + * Rather than relying on having a new enough libc to define this, just do it + * ourselves. This way we don't need to be coupled to a new-enough libc to + * contain the call. + */ +long riscv_hwprobe(struct riscv_hwprobe *pairs, size_t pair_count, + size_t cpu_count, unsigned long *cpus, unsigned int flags); + +#endif diff --git a/tools/testing/selftests/riscv/mm/Makefile b/tools/testing/selftests/riscv/mm/Makefile index 11e0f0568923..c333263f2b27 100644 --- a/tools/testing/selftests/riscv/mm/Makefile +++ b/tools/testing/selftests/riscv/mm/Makefile @@ -5,11 +5,11 @@ # Additional include paths needed by kselftest.h and local headers CFLAGS += -D_GNU_SOURCE -std=gnu99 -I. -TEST_GEN_FILES := testcases/mmap_default testcases/mmap_bottomup +TEST_GEN_FILES := mmap_default mmap_bottomup -TEST_PROGS := testcases/run_mmap.sh +TEST_PROGS := run_mmap.sh include ../../lib.mk -$(OUTPUT)/mm: testcases/mmap_default.c testcases/mmap_bottomup.c testcases/mmap_tests.h +$(OUTPUT)/mm: mmap_default.c mmap_bottomup.c mmap_tests.h $(CC) -o$@ $(CFLAGS) $(LDFLAGS) $^ diff --git a/tools/testing/selftests/riscv/mm/testcases/mmap_bottomup.c b/tools/testing/selftests/riscv/mm/mmap_bottomup.c index b29379f7e478..1757d19ca89b 100644 --- a/tools/testing/selftests/riscv/mm/testcases/mmap_bottomup.c +++ b/tools/testing/selftests/riscv/mm/mmap_bottomup.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only #include <sys/mman.h> -#include <testcases/mmap_test.h> +#include <mmap_test.h> #include "../../kselftest_harness.h" diff --git a/tools/testing/selftests/riscv/mm/testcases/mmap_default.c b/tools/testing/selftests/riscv/mm/mmap_default.c index d1accb91b726..c63c60b9397e 100644 --- a/tools/testing/selftests/riscv/mm/testcases/mmap_default.c +++ b/tools/testing/selftests/riscv/mm/mmap_default.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only #include <sys/mman.h> -#include <testcases/mmap_test.h> +#include <mmap_test.h> #include "../../kselftest_harness.h" diff --git a/tools/testing/selftests/riscv/mm/testcases/mmap_test.h b/tools/testing/selftests/riscv/mm/mmap_test.h index 9b8434f62f57..9b8434f62f57 100644 --- a/tools/testing/selftests/riscv/mm/testcases/mmap_test.h +++ b/tools/testing/selftests/riscv/mm/mmap_test.h diff --git a/tools/testing/selftests/riscv/mm/testcases/run_mmap.sh b/tools/testing/selftests/riscv/mm/run_mmap.sh index ca5ad7c48bad..ca5ad7c48bad 100755 --- a/tools/testing/selftests/riscv/mm/testcases/run_mmap.sh +++ b/tools/testing/selftests/riscv/mm/run_mmap.sh diff --git a/tools/testing/selftests/rseq/param_test.c b/tools/testing/selftests/rseq/param_test.c index bf951a490bb4..20403d58345c 100644 --- a/tools/testing/selftests/rseq/param_test.c +++ b/tools/testing/selftests/rseq/param_test.c @@ -1231,7 +1231,7 @@ void *test_membarrier_worker_thread(void *arg) } /* Wait for initialization. */ - while (!atomic_load(&args->percpu_list_ptr)) {} + while (!__atomic_load_n(&args->percpu_list_ptr, __ATOMIC_ACQUIRE)) {} for (i = 0; i < iters; ++i) { int ret; @@ -1299,22 +1299,22 @@ void *test_membarrier_manager_thread(void *arg) test_membarrier_init_percpu_list(&list_a); test_membarrier_init_percpu_list(&list_b); - atomic_store(&args->percpu_list_ptr, (intptr_t)&list_a); + __atomic_store_n(&args->percpu_list_ptr, (intptr_t)&list_a, __ATOMIC_RELEASE); - while (!atomic_load(&args->stop)) { + while (!__atomic_load_n(&args->stop, __ATOMIC_ACQUIRE)) { /* list_a is "active". */ cpu_a = rand() % CPU_SETSIZE; /* * As list_b is "inactive", we should never see changes * to list_b. */ - if (expect_b != atomic_load(&list_b.c[cpu_b].head->data)) { + if (expect_b != __atomic_load_n(&list_b.c[cpu_b].head->data, __ATOMIC_ACQUIRE)) { fprintf(stderr, "Membarrier test failed\n"); abort(); } /* Make list_b "active". */ - atomic_store(&args->percpu_list_ptr, (intptr_t)&list_b); + __atomic_store_n(&args->percpu_list_ptr, (intptr_t)&list_b, __ATOMIC_RELEASE); if (rseq_membarrier_expedited(cpu_a) && errno != ENXIO /* missing CPU */) { perror("sys_membarrier"); @@ -1324,27 +1324,27 @@ void *test_membarrier_manager_thread(void *arg) * Cpu A should now only modify list_b, so the values * in list_a should be stable. */ - expect_a = atomic_load(&list_a.c[cpu_a].head->data); + expect_a = __atomic_load_n(&list_a.c[cpu_a].head->data, __ATOMIC_ACQUIRE); cpu_b = rand() % CPU_SETSIZE; /* * As list_a is "inactive", we should never see changes * to list_a. */ - if (expect_a != atomic_load(&list_a.c[cpu_a].head->data)) { + if (expect_a != __atomic_load_n(&list_a.c[cpu_a].head->data, __ATOMIC_ACQUIRE)) { fprintf(stderr, "Membarrier test failed\n"); abort(); } /* Make list_a "active". */ - atomic_store(&args->percpu_list_ptr, (intptr_t)&list_a); + __atomic_store_n(&args->percpu_list_ptr, (intptr_t)&list_a, __ATOMIC_RELEASE); if (rseq_membarrier_expedited(cpu_b) && errno != ENXIO /* missing CPU*/) { perror("sys_membarrier"); abort(); } /* Remember a value from list_b. */ - expect_b = atomic_load(&list_b.c[cpu_b].head->data); + expect_b = __atomic_load_n(&list_b.c[cpu_b].head->data, __ATOMIC_ACQUIRE); } test_membarrier_free_percpu_list(&list_a); @@ -1401,7 +1401,7 @@ void test_membarrier(void) } } - atomic_store(&thread_args.stop, 1); + __atomic_store_n(&thread_args.stop, 1, __ATOMIC_RELEASE); ret = pthread_join(manager_thread, NULL); if (ret) { errno = ret; diff --git a/tools/testing/selftests/sigaltstack/sas.c b/tools/testing/selftests/sigaltstack/sas.c index 98d37cb744fb..07227fab1cc9 100644 --- a/tools/testing/selftests/sigaltstack/sas.c +++ b/tools/testing/selftests/sigaltstack/sas.c @@ -111,7 +111,7 @@ int main(void) /* Make sure more than the required minimum. */ stack_size = getauxval(AT_MINSIGSTKSZ) + SIGSTKSZ; - ksft_print_msg("[NOTE]\tthe stack size is %lu\n", stack_size); + ksft_print_msg("[NOTE]\tthe stack size is %u\n", stack_size); ksft_print_header(); ksft_set_plan(3); diff --git a/tools/testing/selftests/static_keys/test_static_keys.sh b/tools/testing/selftests/static_keys/test_static_keys.sh index fc9f8cde7d42..3b0f17b81ac2 100755 --- a/tools/testing/selftests/static_keys/test_static_keys.sh +++ b/tools/testing/selftests/static_keys/test_static_keys.sh @@ -6,18 +6,18 @@ ksft_skip=4 if ! /sbin/modprobe -q -n test_static_key_base; then - echo "static_key: module test_static_key_base is not found [SKIP]" + echo "static_keys: module test_static_key_base is not found [SKIP]" exit $ksft_skip fi if ! /sbin/modprobe -q -n test_static_keys; then - echo "static_key: module test_static_keys is not found [SKIP]" + echo "static_keys: module test_static_keys is not found [SKIP]" exit $ksft_skip fi if /sbin/modprobe -q test_static_key_base; then if /sbin/modprobe -q test_static_keys; then - echo "static_key: ok" + echo "static_keys: ok" /sbin/modprobe -q -r test_static_keys /sbin/modprobe -q -r test_static_key_base else @@ -25,6 +25,6 @@ if /sbin/modprobe -q test_static_key_base; then /sbin/modprobe -q -r test_static_key_base fi else - echo "static_key: [FAIL]" + echo "static_keys: [FAIL]" exit 1 fi diff --git a/tools/testing/selftests/tc-testing/Makefile b/tools/testing/selftests/tc-testing/Makefile index 3c4b7fa05075..e8b3dde4fa16 100644 --- a/tools/testing/selftests/tc-testing/Makefile +++ b/tools/testing/selftests/tc-testing/Makefile @@ -1,31 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 -include ../../../scripts/Makefile.include -top_srcdir = $(abspath ../../../..) -APIDIR := $(top_scrdir)/include/uapi -TEST_GEN_FILES = action.o +TEST_PROGS += ./tdc.sh +TEST_FILES := action-ebpf tdc*.py Tdc*.py plugins plugin-lib tc-tests scripts include ../lib.mk - -PROBE := $(shell $(LLC) -march=bpf -mcpu=probe -filetype=null /dev/null 2>&1) - -ifeq ($(PROBE),) - CPU ?= probe -else - CPU ?= generic -endif - -CLANG_SYS_INCLUDES := $(shell $(CLANG) -v -E - </dev/null 2>&1 \ - | sed -n '/<...> search starts here:/,/End of search list./{ s| \(/.*\)|-idirafter \1|p }') - -CLANG_FLAGS = -I. -I$(APIDIR) \ - $(CLANG_SYS_INCLUDES) \ - -Wno-compare-distinct-pointer-types - -$(OUTPUT)/%.o: %.c - $(CLANG) $(CLANG_FLAGS) \ - -O2 --target=bpf -emit-llvm -c $< -o - | \ - $(LLC) -march=bpf -mcpu=$(CPU) $(LLC_FLAGS) -filetype=obj -o $@ - -TEST_PROGS += ./tdc.sh -TEST_FILES := tdc*.py Tdc*.py plugins plugin-lib tc-tests diff --git a/tools/testing/selftests/tc-testing/README b/tools/testing/selftests/tc-testing/README index be7b00799b3e..fc8e858ff119 100644 --- a/tools/testing/selftests/tc-testing/README +++ b/tools/testing/selftests/tc-testing/README @@ -195,8 +195,6 @@ directory: and the other is a test whether the command leaked memory or not. (This one is a preliminary version, it may not work quite right yet, but the overall template is there and it should only need tweaks.) - - buildebpfPlugin.py: - builds all programs in $EBPFDIR. ACKNOWLEDGEMENTS diff --git a/tools/testing/selftests/tc-testing/action-ebpf b/tools/testing/selftests/tc-testing/action-ebpf Binary files differnew file mode 100644 index 000000000000..4879479b2ee5 --- /dev/null +++ b/tools/testing/selftests/tc-testing/action-ebpf diff --git a/tools/testing/selftests/tc-testing/config b/tools/testing/selftests/tc-testing/config index 5aa8705751f0..012aa33b341b 100644 --- a/tools/testing/selftests/tc-testing/config +++ b/tools/testing/selftests/tc-testing/config @@ -1,12 +1,21 @@ # +# Network +# + +CONFIG_DUMMY=y +CONFIG_VETH=y + +# # Core Netfilter Configuration # +CONFIG_NETFILTER_ADVANCED=y CONFIG_NF_CONNTRACK=m CONFIG_NF_CONNTRACK_MARK=y CONFIG_NF_CONNTRACK_ZONES=y CONFIG_NF_CONNTRACK_LABELS=y CONFIG_NF_CONNTRACK_PROCFS=y CONFIG_NF_FLOW_TABLE=m +CONFIG_NF_TABLES=m CONFIG_NF_NAT=m CONFIG_NETFILTER_XT_TARGET_LOG=m diff --git a/tools/testing/selftests/tc-testing/plugin-lib/buildebpfPlugin.py b/tools/testing/selftests/tc-testing/plugin-lib/buildebpfPlugin.py deleted file mode 100644 index d34fe06268d2..000000000000 --- a/tools/testing/selftests/tc-testing/plugin-lib/buildebpfPlugin.py +++ /dev/null @@ -1,67 +0,0 @@ -''' -build ebpf program -''' - -import os -import signal -from string import Template -import subprocess -import time -from TdcPlugin import TdcPlugin -from tdc_config import * - -class SubPlugin(TdcPlugin): - def __init__(self): - self.sub_class = 'buildebpf/SubPlugin' - self.tap = '' - super().__init__() - - def pre_suite(self, testcount, testidlist): - super().pre_suite(testcount, testidlist) - - if self.args.buildebpf: - self._ebpf_makeall() - - def post_suite(self, index): - super().post_suite(index) - - self._ebpf_makeclean() - - def add_args(self, parser): - super().add_args(parser) - - self.argparser_group = self.argparser.add_argument_group( - 'buildebpf', - 'options for buildebpfPlugin') - self.argparser_group.add_argument( - '--nobuildebpf', action='store_false', default=True, - dest='buildebpf', - help='Don\'t build eBPF programs') - - return self.argparser - - def _ebpf_makeall(self): - if self.args.buildebpf: - self._make('all') - - def _ebpf_makeclean(self): - if self.args.buildebpf: - self._make('clean') - - def _make(self, target): - command = 'make -C {} {}'.format(self.args.NAMES['EBPFDIR'], target) - proc = subprocess.Popen(command, - shell=True, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - env=os.environ.copy()) - (rawout, serr) = proc.communicate() - - if proc.returncode != 0 and len(serr) > 0: - foutput = serr.decode("utf-8") - else: - foutput = rawout.decode("utf-8") - - proc.stdout.close() - proc.stderr.close() - return proc, foutput diff --git a/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py b/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py index b62429b0fcdb..bb19b8b76d3b 100644 --- a/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py +++ b/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py @@ -9,43 +9,13 @@ from TdcPlugin import TdcPlugin from tdc_config import * -def prepare_suite(obj, test): - original = obj.args.NAMES - - if 'skip' in test and test['skip'] == 'yes': - return - - if 'nsPlugin' not in test['plugins']: - return - - shadow = {} - shadow['IP'] = original['IP'] - shadow['TC'] = original['TC'] - shadow['NS'] = '{}-{}'.format(original['NS'], test['random']) - shadow['DEV0'] = '{}id{}'.format(original['DEV0'], test['id']) - shadow['DEV1'] = '{}id{}'.format(original['DEV1'], test['id']) - shadow['DUMMY'] = '{}id{}'.format(original['DUMMY'], test['id']) - shadow['DEV2'] = original['DEV2'] - obj.args.NAMES = shadow - - if obj.args.namespace: - obj._ns_create() - else: - obj._ports_create() - - # Make sure the netns is visible in the fs - while True: - obj._proc_check() - try: - ns = obj.args.NAMES['NS'] - f = open('/run/netns/{}'.format(ns)) - f.close() - break - except: - time.sleep(0.1) - continue - - obj.args.NAMES = original +try: + from pyroute2 import netns + from pyroute2 import IPRoute + netlink = True +except ImportError: + netlink = False + print("!!! Consider installing pyroute2 !!!") class SubPlugin(TdcPlugin): def __init__(self): @@ -53,64 +23,71 @@ class SubPlugin(TdcPlugin): super().__init__() def pre_suite(self, testcount, testlist): - from itertools import cycle - super().pre_suite(testcount, testlist) - print("Setting up namespaces and devices...") + def prepare_test(self, test): + if 'skip' in test and test['skip'] == 'yes': + return - with Pool(self.args.mp) as p: - it = zip(cycle([self]), testlist) - p.starmap(prepare_suite, it) + if 'nsPlugin' not in test['plugins']: + return - def pre_case(self, caseinfo, test_skip): + if netlink == True: + self._nl_ns_create() + else: + self._ipr2_ns_create() + + # Make sure the netns is visible in the fs + ticks = 20 + while True: + if ticks == 0: + raise TimeoutError + self._proc_check() + try: + ns = self.args.NAMES['NS'] + f = open('/run/netns/{}'.format(ns)) + f.close() + break + except: + time.sleep(0.1) + ticks -= 1 + continue + + def pre_case(self, test, test_skip): if self.args.verbose: print('{}.pre_case'.format(self.sub_class)) if test_skip: return + self.prepare_test(test) def post_case(self): if self.args.verbose: print('{}.post_case'.format(self.sub_class)) - if self.args.namespace: - self._ns_destroy() + if netlink == True: + self._nl_ns_destroy() else: - self._ports_destroy() + self._ipr2_ns_destroy() def post_suite(self, index): if self.args.verbose: print('{}.post_suite'.format(self.sub_class)) # Make sure we don't leak resources - for f in os.listdir('/run/netns/'): - cmd = self._replace_keywords("$IP netns del {}".format(f)) + cmd = self._replace_keywords("$IP -a netns del") - if self.args.verbose > 3: - print('_exec_cmd: command "{}"'.format(cmd)) - - subprocess.run(cmd, shell=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) + if self.args.verbose > 3: + print('_exec_cmd: command "{}"'.format(cmd)) - def add_args(self, parser): - super().add_args(parser) - self.argparser_group = self.argparser.add_argument_group( - 'netns', - 'options for nsPlugin(run commands in net namespace)') - self.argparser_group.add_argument( - '-N', '--no-namespace', action='store_false', default=True, - dest='namespace', help='Don\'t run commands in namespace') - return self.argparser + subprocess.run(cmd, shell=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) def adjust_command(self, stage, command): super().adjust_command(stage, command) cmdform = 'list' cmdlist = list() - if not self.args.namespace: - return command - if self.args.verbose: print('{}.adjust_command'.format(self.sub_class)) @@ -138,63 +115,90 @@ class SubPlugin(TdcPlugin): print('adjust_command: return command [{}]'.format(command)) return command - def _ports_create_cmds(self): - cmds = [] - - cmds.append(self._replace_keywords('link add $DEV0 type veth peer name $DEV1')) - cmds.append(self._replace_keywords('link set $DEV0 up')) - cmds.append(self._replace_keywords('link add $DUMMY type dummy')) - if not self.args.namespace: - cmds.append(self._replace_keywords('link set $DEV1 up')) - - return cmds - - def _ports_create(self): - self._exec_cmd_batched('pre', self._ports_create_cmds()) + def _nl_ns_create(self): + ns = self.args.NAMES["NS"]; + dev0 = self.args.NAMES["DEV0"]; + dev1 = self.args.NAMES["DEV1"]; + dummy = self.args.NAMES["DUMMY"]; - def _ports_destroy_cmd(self): - return self._replace_keywords('link del $DEV0') - - def _ports_destroy(self): - self._exec_cmd('post', self._ports_destroy_cmd()) - - def _ns_create_cmds(self): + if self.args.verbose: + print('{}._nl_ns_create'.format(self.sub_class)) + + netns.create(ns) + netns.pushns(newns=ns) + with IPRoute() as ip: + ip.link('add', ifname=dev1, kind='veth', peer={'ifname': dev0, 'net_ns_fd':'/proc/1/ns/net'}) + ip.link('add', ifname=dummy, kind='dummy') + ticks = 20 + while True: + if ticks == 0: + raise TimeoutError + try: + dev1_idx = ip.link_lookup(ifname=dev1)[0] + dummy_idx = ip.link_lookup(ifname=dummy)[0] + ip.link('set', index=dev1_idx, state='up') + ip.link('set', index=dummy_idx, state='up') + break + except: + time.sleep(0.1) + ticks -= 1 + continue + netns.popns() + + with IPRoute() as ip: + ticks = 20 + while True: + if ticks == 0: + raise TimeoutError + try: + dev0_idx = ip.link_lookup(ifname=dev0)[0] + ip.link('set', index=dev0_idx, state='up') + break + except: + time.sleep(0.1) + ticks -= 1 + continue + + def _ipr2_ns_create_cmds(self): cmds = [] - if self.args.namespace: - ns = self.args.NAMES['NS'] + ns = self.args.NAMES['NS'] - cmds.append(self._replace_keywords('netns add {}'.format(ns))) - cmds.append(self._replace_keywords('link set $DEV1 netns {}'.format(ns))) - cmds.append(self._replace_keywords('link set $DUMMY netns {}'.format(ns))) - cmds.append(self._replace_keywords('netns exec {} $IP link set $DEV1 up'.format(ns))) - cmds.append(self._replace_keywords('netns exec {} $IP link set $DUMMY up'.format(ns))) + cmds.append(self._replace_keywords('netns add {}'.format(ns))) + cmds.append(self._replace_keywords('link add $DEV1 type veth peer name $DEV0')) + cmds.append(self._replace_keywords('link set $DEV1 netns {}'.format(ns))) + cmds.append(self._replace_keywords('link add $DUMMY type dummy'.format(ns))) + cmds.append(self._replace_keywords('link set $DUMMY netns {}'.format(ns))) + cmds.append(self._replace_keywords('netns exec {} $IP link set $DEV1 up'.format(ns))) + cmds.append(self._replace_keywords('netns exec {} $IP link set $DUMMY up'.format(ns))) + cmds.append(self._replace_keywords('link set $DEV0 up'.format(ns))) - if self.args.device: - cmds.append(self._replace_keywords('link set $DEV2 netns {}'.format(ns))) - cmds.append(self._replace_keywords('netns exec {} $IP link set $DEV2 up'.format(ns))) + if self.args.device: + cmds.append(self._replace_keywords('link set $DEV2 netns {}'.format(ns))) + cmds.append(self._replace_keywords('netns exec {} $IP link set $DEV2 up'.format(ns))) return cmds - def _ns_create(self): + def _ipr2_ns_create(self): ''' Create the network namespace in which the tests will be run and set up the required network devices for it. ''' - self._ports_create() - self._exec_cmd_batched('pre', self._ns_create_cmds()) + self._exec_cmd_batched('pre', self._ipr2_ns_create_cmds()) + + def _nl_ns_destroy(self): + ns = self.args.NAMES['NS'] + netns.remove(ns) - def _ns_destroy_cmd(self): + def _ipr2_ns_destroy_cmd(self): return self._replace_keywords('netns delete {}'.format(self.args.NAMES['NS'])) - def _ns_destroy(self): + def _ipr2_ns_destroy(self): ''' Destroy the network namespace for testing (and any associated network devices as well) ''' - if self.args.namespace: - self._exec_cmd('post', self._ns_destroy_cmd()) - self._ports_destroy() + self._exec_cmd('post', self._ipr2_ns_destroy_cmd()) @cached_property def _proc(self): diff --git a/tools/testing/selftests/tc-testing/taprio_wait_for_admin.sh b/tools/testing/selftests/tc-testing/scripts/taprio_wait_for_admin.sh index f5335e8ad6b4..f5335e8ad6b4 100755 --- a/tools/testing/selftests/tc-testing/taprio_wait_for_admin.sh +++ b/tools/testing/selftests/tc-testing/scripts/taprio_wait_for_admin.sh diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json b/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json index 91832400ddbd..6e00bf32ef9a 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json @@ -54,9 +54,6 @@ "actions", "bpf" ], - "plugins": { - "requires": "buildebpfPlugin" - }, "setup": [ [ "$TC action flush action bpf", @@ -65,10 +62,10 @@ 255 ] ], - "cmdUnderTest": "$TC action add action bpf object-file $EBPFDIR/action.o section action-ok index 667", + "cmdUnderTest": "$TC action add action bpf object-file $EBPFDIR/action-ebpf section action-ok index 667", "expExitCode": "0", "verifyCmd": "$TC action get action bpf index 667", - "matchPattern": "action order [0-9]*: bpf action.o:\\[action-ok\\] id [0-9].* tag [0-9a-f]{16}( jited)? default-action pipe.*index 667 ref", + "matchPattern": "action order [0-9]*: bpf action-ebpf:\\[action-ok\\] id [0-9].* tag [0-9a-f]{16}( jited)? default-action pipe.*index 667 ref", "matchCount": "1", "teardown": [ "$TC action flush action bpf" @@ -81,9 +78,6 @@ "actions", "bpf" ], - "plugins": { - "requires": "buildebpfPlugin" - }, "setup": [ [ "$TC action flush action bpf", @@ -92,10 +86,10 @@ 255 ] ], - "cmdUnderTest": "$TC action add action bpf object-file $EBPFDIR/action.o section action-ko index 667", + "cmdUnderTest": "$TC action add action bpf object-file $EBPFDIR/action-ebpf section action-ko index 667", "expExitCode": "255", "verifyCmd": "$TC action get action bpf index 667", - "matchPattern": "action order [0-9]*: bpf action.o:\\[action-ko\\] id [0-9].*index 667 ref", + "matchPattern": "action order [0-9]*: bpf action-ebpf:\\[action-ko\\] id [0-9].*index 667 ref", "matchCount": "0", "teardown": [ [ diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/bpf.json b/tools/testing/selftests/tc-testing/tc-tests/filters/bpf.json index 013fb983bc3f..725d406a30ac 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/filters/bpf.json +++ b/tools/testing/selftests/tc-testing/tc-tests/filters/bpf.json @@ -52,17 +52,16 @@ ], "plugins": { "requires": [ - "buildebpfPlugin", "nsPlugin" ] }, "setup": [ "$TC qdisc add dev $DEV1 ingress" ], - "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 100 bpf object-file $EBPFDIR/action.o section action-ok", + "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 100 bpf object-file $EBPFDIR/action-ebpf section action-ok", "expExitCode": "0", "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 protocol ip prio 100 bpf", - "matchPattern": "filter parent ffff: protocol ip pref 100 bpf chain [0-9]+ handle 0x1 action.o:\\[action-ok\\].*tag [0-9a-f]{16}( jited)?", + "matchPattern": "filter parent ffff: protocol ip pref 100 bpf chain [0-9]+ handle 0x1 action-ebpf:\\[action-ok\\].*tag [0-9a-f]{16}( jited)?", "matchCount": "1", "teardown": [ "$TC qdisc del dev $DEV1 ingress" @@ -77,17 +76,16 @@ ], "plugins": { "requires": [ - "buildebpfPlugin", "nsPlugin" ] }, "setup": [ "$TC qdisc add dev $DEV1 ingress" ], - "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 100 bpf object-file $EBPFDIR/action.o section action-ko", + "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 100 bpf object-file $EBPFDIR/action-ebpf section action-ko", "expExitCode": "1", "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 protocol ip prio 100 bpf", - "matchPattern": "filter parent ffff: protocol ip pref 100 bpf chain [0-9]+ handle 0x1 action.o:\\[action-ko\\].*tag [0-9a-f]{16}( jited)?", + "matchPattern": "filter parent ffff: protocol ip pref 100 bpf chain [0-9]+ handle 0x1 action-ebpf:\\[action-ko\\].*tag [0-9a-f]{16}( jited)?", "matchCount": "0", "teardown": [ "$TC qdisc del dev $DEV1 ingress" diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/u32.json b/tools/testing/selftests/tc-testing/tc-tests/filters/u32.json index ddc7c355be0a..24bd0c2a3014 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/filters/u32.json +++ b/tools/testing/selftests/tc-testing/tc-tests/filters/u32.json @@ -272,5 +272,62 @@ "teardown": [ "$TC qdisc del dev $DEV1 parent root drr" ] + }, + { + "id": "bd32", + "name": "Try to delete hashtable referenced by another u32 filter", + "category": [ + "filter", + "u32" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DEV1 parent root handle 10: drr", + "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 handle 1: u32 divisor 1", + "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 u32 ht 800: match ip src any link 1:" + ], + "cmdUnderTest": "$TC filter delete dev $DEV1 parent 10: prio 2 handle 1: u32", + "expExitCode": "2", + "verifyCmd": "$TC filter show dev $DEV1", + "matchPattern": "protocol ip pref 2 u32 chain 0 fh 1:", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DEV1 parent root drr" + ] + }, + { + "id": "4585", + "name": "Delete small tree of u32 hashtables and filters", + "category": [ + "filter", + "u32" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DEV1 parent root handle 10: drr", + "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 handle 1: u32 divisor 1", + "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 handle 2: u32 divisor 1", + "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 handle 3: u32 divisor 2", + "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 handle 4: u32 divisor 1", + "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 u32 ht 1: match ip src any action drop", + "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 u32 ht 2: match ip src any action drop", + "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 u32 ht 3: match ip src any link 2:", + "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 u32 ht 3: match ip src any link 1:", + "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 u32 ht 4: match ip src any action drop", + "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 u32 ht 800: match ip src any link 3:", + "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 u32 ht 800: match ip src any link 4:" + ], + "cmdUnderTest": "$TC filter delete dev $DEV1 parent 10:", + "expExitCode": "0", + "verifyCmd": "$TC filter show dev $DEV1", + "matchPattern": "protocol ip pref 2 u32", + "matchCount": "0", + "teardown": [ + "$TC qdisc del dev $DEV1 parent root drr" + ] } ] diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/hfsc.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/hfsc.json index 0ddb8e1b4369..c98c339424d4 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/hfsc.json +++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/hfsc.json @@ -9,8 +9,7 @@ "plugins": { "requires": "nsPlugin" }, - "setup": [ - ], + "setup": [], "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root hfsc", "expExitCode": "0", "verifyCmd": "$TC qdisc show dev $DUMMY", @@ -126,8 +125,7 @@ "verifyCmd": "$TC qdisc show dev $DUMMY", "matchPattern": "qdisc hfsc 1: root refcnt [0-9]+", "matchCount": "0", - "teardown": [ - ] + "teardown": [] }, { "id": "8436", @@ -139,8 +137,7 @@ "plugins": { "requires": "nsPlugin" }, - "setup": [ - ], + "setup": [], "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root hfsc", "expExitCode": "0", "verifyCmd": "$TC class show dev $DUMMY", @@ -149,5 +146,28 @@ "teardown": [ "$TC qdisc del dev $DUMMY handle 1: root" ] + }, + { + "id": "bef4", + "name": "HFSC rt inner class upgrade to sc", + "category": [ + "qdisc", + "hfsc" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DUMMY handle 1: root hfsc default 1", + "$TC class add dev $DUMMY parent 1: classid 1:1 hfsc rt rate 8" + ], + "cmdUnderTest": "$TC class add dev $DUMMY parent 1:1 classid 1:2 hfsc rt rate 8", + "expExitCode": "0", + "verifyCmd": "$TC class show dev $DUMMY", + "matchPattern": "class hfsc 1:1 parent 1: sc m1 0bit d 0us m2 8bit.*rt m1 0bit d 0us m2 8bit", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root" + ] } ] diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/taprio.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/taprio.json index 0599635c4bc6..2d603ef2e375 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/taprio.json +++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/taprio.json @@ -170,11 +170,11 @@ "setup": [ "echo \"1 1 8\" > /sys/bus/netdevsim/new_device", "$TC qdisc replace dev $ETH handle 8001: parent root stab overhead 24 taprio num_tc 8 map 0 1 2 3 4 5 6 7 queues 1@0 1@1 1@2 1@3 1@4 1@5 1@6 1@7 base-time 0 sched-entry S ff 20000000 clockid CLOCK_TAI", - "./taprio_wait_for_admin.sh $TC $ETH" + "./scripts/taprio_wait_for_admin.sh $TC $ETH" ], "cmdUnderTest": "$TC qdisc replace dev $ETH parent 8001:7 taprio num_tc 8 map 0 1 2 3 4 5 6 7 queues 1@0 1@1 1@2 1@3 1@4 1@5 1@6 1@7 base-time 200 sched-entry S ff 20000000 clockid CLOCK_TAI", "expExitCode": "2", - "verifyCmd": "bash -c \"./taprio_wait_for_admin.sh $TC $ETH && $TC -j qdisc show dev $ETH root | jq '.[].options.base_time'\"", + "verifyCmd": "bash -c \"./scripts/taprio_wait_for_admin.sh $TC $ETH && $TC -j qdisc show dev $ETH root | jq '.[].options.base_time'\"", "matchPattern": "0", "matchCount": "1", "teardown": [ @@ -195,11 +195,11 @@ "setup": [ "echo \"1 1 8\" > /sys/bus/netdevsim/new_device", "$TC qdisc replace dev $ETH handle 8001: parent root stab overhead 24 taprio num_tc 8 map 0 1 2 3 4 5 6 7 queues 1@0 1@1 1@2 1@3 1@4 1@5 1@6 1@7 base-time 0 sched-entry S ff 20000000 flags 0x2", - "./taprio_wait_for_admin.sh $TC $ETH" + "./scripts/taprio_wait_for_admin.sh $TC $ETH" ], "cmdUnderTest": "$TC qdisc replace dev $ETH parent 8001:7 taprio num_tc 8 map 0 1 2 3 4 5 6 7 queues 1@0 1@1 1@2 1@3 1@4 1@5 1@6 1@7 base-time 200 sched-entry S ff 20000000 flags 0x2", "expExitCode": "2", - "verifyCmd": "bash -c \"./taprio_wait_for_admin.sh $TC $ETH && $TC -j qdisc show dev $ETH root | jq '.[].options.base_time'\"", + "verifyCmd": "bash -c \"./scripts/taprio_wait_for_admin.sh $TC $ETH && $TC -j qdisc show dev $ETH root | jq '.[].options.base_time'\"", "matchPattern": "0", "matchCount": "1", "teardown": [ diff --git a/tools/testing/selftests/tc-testing/tdc.py b/tools/testing/selftests/tc-testing/tdc.py index a6718192aff3..caeacc691587 100755 --- a/tools/testing/selftests/tc-testing/tdc.py +++ b/tools/testing/selftests/tc-testing/tdc.py @@ -497,11 +497,6 @@ def prepare_run(pm, args, testlist): pm.call_post_suite(1) return emergency_exit_message - if args.verbose: - print('give test rig 2 seconds to stabilize') - - time.sleep(2) - def purge_run(pm, index): pm.call_post_suite(index) @@ -616,7 +611,7 @@ def test_runner_mp(pm, args, alltests): batches.insert(0, serial) print("Executing {} tests in parallel and {} in serial".format(len(parallel), len(serial))) - print("Using {} batches".format(len(batches))) + print("Using {} batches and {} workers".format(len(batches), args.mp)) # We can't pickle these objects so workaround them global mp_pm @@ -1017,12 +1012,17 @@ def main(): parser = pm.call_add_args(parser) (args, remaining) = parser.parse_known_args() args.NAMES = NAMES + args.mp = min(args.mp, 4) pm.set_args(args) check_default_settings(args, remaining, pm) if args.verbose > 2: print('args is {}'.format(args)) - set_operation_mode(pm, parser, args, remaining) + try: + set_operation_mode(pm, parser, args, remaining) + except KeyboardInterrupt: + # Cleanup on Ctrl-C + pm.call_post_suite(None) if __name__ == "__main__": main() diff --git a/tools/testing/selftests/tc-testing/tdc.sh b/tools/testing/selftests/tc-testing/tdc.sh index eb357bd7923c..407fa53822a0 100755 --- a/tools/testing/selftests/tc-testing/tdc.sh +++ b/tools/testing/selftests/tc-testing/tdc.sh @@ -1,7 +1,68 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 -modprobe netdevsim -modprobe sch_teql -./tdc.py -c actions --nobuildebpf -./tdc.py -c qdisc +# If a module is required and was not compiled +# the test that requires it will fail anyways +try_modprobe() { + modprobe -q -R "$1" + if [ $? -ne 0 ]; then + echo "Module $1 not found... skipping." + else + modprobe "$1" + fi +} + +try_modprobe netdevsim +try_modprobe act_bpf +try_modprobe act_connmark +try_modprobe act_csum +try_modprobe act_ct +try_modprobe act_ctinfo +try_modprobe act_gact +try_modprobe act_gate +try_modprobe act_ipt +try_modprobe act_mirred +try_modprobe act_mpls +try_modprobe act_nat +try_modprobe act_pedit +try_modprobe act_police +try_modprobe act_sample +try_modprobe act_simple +try_modprobe act_skbedit +try_modprobe act_skbmod +try_modprobe act_tunnel_key +try_modprobe act_vlan +try_modprobe cls_basic +try_modprobe cls_bpf +try_modprobe cls_cgroup +try_modprobe cls_flow +try_modprobe cls_flower +try_modprobe cls_fw +try_modprobe cls_matchall +try_modprobe cls_route +try_modprobe cls_u32 +try_modprobe em_canid +try_modprobe em_cmp +try_modprobe em_ipset +try_modprobe em_ipt +try_modprobe em_meta +try_modprobe em_nbyte +try_modprobe em_text +try_modprobe em_u32 +try_modprobe sch_cake +try_modprobe sch_cbs +try_modprobe sch_choke +try_modprobe sch_codel +try_modprobe sch_drr +try_modprobe sch_etf +try_modprobe sch_ets +try_modprobe sch_fq +try_modprobe sch_fq_codel +try_modprobe sch_fq_pie +try_modprobe sch_gred +try_modprobe sch_hfsc +try_modprobe sch_hhf +try_modprobe sch_htb +try_modprobe sch_teql +./tdc.py -J`nproc` -c actions +./tdc.py -J`nproc` -c qdisc diff --git a/tools/testing/selftests/tdx/.gitignore b/tools/testing/selftests/tdx/.gitignore new file mode 100644 index 000000000000..5db4d15cc673 --- /dev/null +++ b/tools/testing/selftests/tdx/.gitignore @@ -0,0 +1 @@ +tdx_guest_test diff --git a/tools/testing/selftests/thermal/intel/power_floor/Makefile b/tools/testing/selftests/thermal/intel/power_floor/Makefile new file mode 100644 index 000000000000..9b88e57dbba5 --- /dev/null +++ b/tools/testing/selftests/thermal/intel/power_floor/Makefile @@ -0,0 +1,12 @@ +# SPDX-License-Identifier: GPL-2.0 +ifndef CROSS_COMPILE +uname_M := $(shell uname -m 2>/dev/null || echo not) +ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/) + +ifeq ($(ARCH),x86) +TEST_GEN_PROGS := power_floor_test + +include ../../../lib.mk + +endif +endif diff --git a/tools/testing/selftests/thermal/intel/power_floor/power_floor_test.c b/tools/testing/selftests/thermal/intel/power_floor/power_floor_test.c new file mode 100644 index 000000000000..0326b39a11b9 --- /dev/null +++ b/tools/testing/selftests/thermal/intel/power_floor/power_floor_test.c @@ -0,0 +1,108 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define _GNU_SOURCE + +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <unistd.h> +#include <fcntl.h> +#include <poll.h> +#include <signal.h> + +#define POWER_FLOOR_ENABLE_ATTRIBUTE "/sys/bus/pci/devices/0000:00:04.0/power_limits/power_floor_enable" +#define POWER_FLOOR_STATUS_ATTRIBUTE "/sys/bus/pci/devices/0000:00:04.0/power_limits/power_floor_status" + +void power_floor_exit(int signum) +{ + int fd; + + /* Disable feature via sysfs knob */ + + fd = open(POWER_FLOOR_ENABLE_ATTRIBUTE, O_RDWR); + if (fd < 0) { + perror("Unable to open power floor enable file\n"); + exit(1); + } + + if (write(fd, "0\n", 2) < 0) { + perror("Can' disable power floor notifications\n"); + exit(1); + } + + printf("Disabled power floor notifications\n"); + + close(fd); +} + +int main(int argc, char **argv) +{ + struct pollfd ufd; + char status_str[3]; + int fd, ret; + + if (signal(SIGINT, power_floor_exit) == SIG_IGN) + signal(SIGINT, SIG_IGN); + if (signal(SIGHUP, power_floor_exit) == SIG_IGN) + signal(SIGHUP, SIG_IGN); + if (signal(SIGTERM, power_floor_exit) == SIG_IGN) + signal(SIGTERM, SIG_IGN); + + /* Enable feature via sysfs knob */ + fd = open(POWER_FLOOR_ENABLE_ATTRIBUTE, O_RDWR); + if (fd < 0) { + perror("Unable to open power floor enable file\n"); + exit(1); + } + + if (write(fd, "1\n", 2) < 0) { + perror("Can' enable power floor notifications\n"); + exit(1); + } + + close(fd); + + printf("Enabled power floor notifications\n"); + + while (1) { + fd = open(POWER_FLOOR_STATUS_ATTRIBUTE, O_RDONLY); + if (fd < 0) { + perror("Unable to power floor status file\n"); + exit(1); + } + + if ((lseek(fd, 0L, SEEK_SET)) < 0) { + fprintf(stderr, "Failed to set pointer to beginning\n"); + exit(1); + } + + if (read(fd, status_str, sizeof(status_str)) < 0) { + fprintf(stderr, "Failed to read from:%s\n", + POWER_FLOOR_STATUS_ATTRIBUTE); + exit(1); + } + + ufd.fd = fd; + ufd.events = POLLPRI; + + ret = poll(&ufd, 1, -1); + if (ret < 0) { + perror("poll error"); + exit(1); + } else if (ret == 0) { + printf("Poll Timeout\n"); + } else { + if ((lseek(fd, 0L, SEEK_SET)) < 0) { + fprintf(stderr, "Failed to set pointer to beginning\n"); + exit(1); + } + + if (read(fd, status_str, sizeof(status_str)) < 0) + exit(0); + + printf("power floor status: %s\n", status_str); + } + + close(fd); + } +} diff --git a/tools/testing/selftests/thermal/intel/workload_hint/Makefile b/tools/testing/selftests/thermal/intel/workload_hint/Makefile new file mode 100644 index 000000000000..37ff3286283b --- /dev/null +++ b/tools/testing/selftests/thermal/intel/workload_hint/Makefile @@ -0,0 +1,12 @@ +# SPDX-License-Identifier: GPL-2.0 +ifndef CROSS_COMPILE +uname_M := $(shell uname -m 2>/dev/null || echo not) +ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/) + +ifeq ($(ARCH),x86) +TEST_GEN_PROGS := workload_hint_test + +include ../../../lib.mk + +endif +endif diff --git a/tools/testing/selftests/thermal/intel/workload_hint/workload_hint_test.c b/tools/testing/selftests/thermal/intel/workload_hint/workload_hint_test.c new file mode 100644 index 000000000000..217c3a641c53 --- /dev/null +++ b/tools/testing/selftests/thermal/intel/workload_hint/workload_hint_test.c @@ -0,0 +1,157 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define _GNU_SOURCE + +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <unistd.h> +#include <fcntl.h> +#include <poll.h> +#include <signal.h> + +#define WORKLOAD_NOTIFICATION_DELAY_ATTRIBUTE "/sys/bus/pci/devices/0000:00:04.0/workload_hint/notification_delay_ms" +#define WORKLOAD_ENABLE_ATTRIBUTE "/sys/bus/pci/devices/0000:00:04.0/workload_hint/workload_hint_enable" +#define WORKLOAD_TYPE_INDEX_ATTRIBUTE "/sys/bus/pci/devices/0000:00:04.0/workload_hint/workload_type_index" + +static const char * const workload_types[] = { + "idle", + "battery_life", + "sustained", + "bursty", + NULL +}; + +#define WORKLOAD_TYPE_MAX_INDEX 3 + +void workload_hint_exit(int signum) +{ + int fd; + + /* Disable feature via sysfs knob */ + + fd = open(WORKLOAD_ENABLE_ATTRIBUTE, O_RDWR); + if (fd < 0) { + perror("Unable to open workload type feature enable file\n"); + exit(1); + } + + if (write(fd, "0\n", 2) < 0) { + perror("Can' disable workload hints\n"); + exit(1); + } + + printf("Disabled workload type prediction\n"); + + close(fd); +} + +int main(int argc, char **argv) +{ + struct pollfd ufd; + char index_str[4]; + int fd, ret, index; + char delay_str[64]; + int delay = 0; + + printf("Usage: workload_hint_test [notification delay in milli seconds]\n"); + + if (argc > 1) { + ret = sscanf(argv[1], "%d", &delay); + if (ret < 0) { + printf("Invalid delay\n"); + exit(1); + } + + printf("Setting notification delay to %d ms\n", delay); + if (delay < 0) + exit(1); + + sprintf(delay_str, "%s\n", argv[1]); + + sprintf(delay_str, "%s\n", argv[1]); + fd = open(WORKLOAD_NOTIFICATION_DELAY_ATTRIBUTE, O_RDWR); + if (fd < 0) { + perror("Unable to open workload notification delay\n"); + exit(1); + } + + if (write(fd, delay_str, strlen(delay_str)) < 0) { + perror("Can't set delay\n"); + exit(1); + } + + close(fd); + } + + if (signal(SIGINT, workload_hint_exit) == SIG_IGN) + signal(SIGINT, SIG_IGN); + if (signal(SIGHUP, workload_hint_exit) == SIG_IGN) + signal(SIGHUP, SIG_IGN); + if (signal(SIGTERM, workload_hint_exit) == SIG_IGN) + signal(SIGTERM, SIG_IGN); + + /* Enable feature via sysfs knob */ + fd = open(WORKLOAD_ENABLE_ATTRIBUTE, O_RDWR); + if (fd < 0) { + perror("Unable to open workload type feature enable file\n"); + exit(1); + } + + if (write(fd, "1\n", 2) < 0) { + perror("Can' enable workload hints\n"); + exit(1); + } + + close(fd); + + printf("Enabled workload type prediction\n"); + + while (1) { + fd = open(WORKLOAD_TYPE_INDEX_ATTRIBUTE, O_RDONLY); + if (fd < 0) { + perror("Unable to open workload type file\n"); + exit(1); + } + + if ((lseek(fd, 0L, SEEK_SET)) < 0) { + fprintf(stderr, "Failed to set pointer to beginning\n"); + exit(1); + } + + if (read(fd, index_str, sizeof(index_str)) < 0) { + fprintf(stderr, "Failed to read from:%s\n", + WORKLOAD_TYPE_INDEX_ATTRIBUTE); + exit(1); + } + + ufd.fd = fd; + ufd.events = POLLPRI; + + ret = poll(&ufd, 1, -1); + if (ret < 0) { + perror("poll error"); + exit(1); + } else if (ret == 0) { + printf("Poll Timeout\n"); + } else { + if ((lseek(fd, 0L, SEEK_SET)) < 0) { + fprintf(stderr, "Failed to set pointer to beginning\n"); + exit(1); + } + + if (read(fd, index_str, sizeof(index_str)) < 0) + exit(0); + + ret = sscanf(index_str, "%d", &index); + if (ret < 0) + break; + if (index > WORKLOAD_TYPE_MAX_INDEX) + printf("Invalid workload type index\n"); + else + printf("workload type:%s\n", workload_types[index]); + } + + close(fd); + } +} diff --git a/tools/testing/selftests/timers/nsleep-lat.c b/tools/testing/selftests/timers/nsleep-lat.c index eb3e79ed7b4a..edb5acacf214 100644 --- a/tools/testing/selftests/timers/nsleep-lat.c +++ b/tools/testing/selftests/timers/nsleep-lat.c @@ -118,7 +118,7 @@ int nanosleep_lat_test(int clockid, long long ns) clock_gettime(clockid, &end); if (((timespec_sub(start, end)/count)-ns) > UNRESONABLE_LATENCY) { - printf("Large rel latency: %lld ns :", (timespec_sub(start, end)/count)-ns); + ksft_print_msg("Large rel latency: %lld ns :", (timespec_sub(start, end)/count)-ns); return -1; } @@ -132,20 +132,23 @@ int nanosleep_lat_test(int clockid, long long ns) } if (latency/count > UNRESONABLE_LATENCY) { - printf("Large abs latency: %lld ns :", latency/count); + ksft_print_msg("Large abs latency: %lld ns :", latency/count); return -1; } return 0; } - +#define SKIPPED_CLOCK_COUNT 3 int main(int argc, char **argv) { long long length; int clockid, ret; + ksft_print_header(); + ksft_set_plan(NR_CLOCKIDS - CLOCK_REALTIME - SKIPPED_CLOCK_COUNT); + for (clockid = CLOCK_REALTIME; clockid < NR_CLOCKIDS; clockid++) { /* Skip cputime clockids since nanosleep won't increment cputime */ @@ -154,9 +157,6 @@ int main(int argc, char **argv) clockid == CLOCK_HWSPECIFIC) continue; - printf("nsleep latency %-26s ", clockstring(clockid)); - fflush(stdout); - length = 10; while (length <= (NSEC_PER_SEC * 10)) { ret = nanosleep_lat_test(clockid, length); @@ -167,14 +167,12 @@ int main(int argc, char **argv) } if (ret == UNSUPPORTED) { - printf("[UNSUPPORTED]\n"); - continue; - } - if (ret < 0) { - printf("[FAILED]\n"); - return ksft_exit_fail(); + ksft_test_result_skip("%s\n", clockstring(clockid)); + } else { + ksft_test_result(ret >= 0, "%s\n", + clockstring(clockid)); } - printf("[OK]\n"); } - return ksft_exit_pass(); + + ksft_finished(); } diff --git a/tools/testing/selftests/timers/posix_timers.c b/tools/testing/selftests/timers/posix_timers.c index 8a17c0e8d82b..d49dd3ffd0d9 100644 --- a/tools/testing/selftests/timers/posix_timers.c +++ b/tools/testing/selftests/timers/posix_timers.c @@ -76,22 +76,21 @@ static int check_diff(struct timeval start, struct timeval end) static int check_itimer(int which) { + const char *name; int err; struct timeval start, end; struct itimerval val = { .it_value.tv_sec = DELAY, }; - printf("Check itimer "); - if (which == ITIMER_VIRTUAL) - printf("virtual... "); + name = "ITIMER_VIRTUAL"; else if (which == ITIMER_PROF) - printf("prof... "); + name = "ITIMER_PROF"; else if (which == ITIMER_REAL) - printf("real... "); - - fflush(stdout); + name = "ITIMER_REAL"; + else + return -1; done = 0; @@ -104,13 +103,13 @@ static int check_itimer(int which) err = gettimeofday(&start, NULL); if (err < 0) { - perror("Can't call gettimeofday()\n"); + ksft_perror("Can't call gettimeofday()"); return -1; } err = setitimer(which, &val, NULL); if (err < 0) { - perror("Can't set timer\n"); + ksft_perror("Can't set timer"); return -1; } @@ -123,20 +122,18 @@ static int check_itimer(int which) err = gettimeofday(&end, NULL); if (err < 0) { - perror("Can't call gettimeofday()\n"); + ksft_perror("Can't call gettimeofday()"); return -1; } - if (!check_diff(start, end)) - printf("[OK]\n"); - else - printf("[FAIL]\n"); + ksft_test_result(check_diff(start, end) == 0, "%s\n", name); return 0; } static int check_timer_create(int which) { + const char *type; int err; timer_t id; struct timeval start, end; @@ -144,31 +141,32 @@ static int check_timer_create(int which) .it_value.tv_sec = DELAY, }; - printf("Check timer_create() "); if (which == CLOCK_THREAD_CPUTIME_ID) { - printf("per thread... "); + type = "thread"; } else if (which == CLOCK_PROCESS_CPUTIME_ID) { - printf("per process... "); + type = "process"; + } else { + ksft_print_msg("Unknown timer_create() type %d\n", which); + return -1; } - fflush(stdout); done = 0; err = timer_create(which, NULL, &id); if (err < 0) { - perror("Can't create timer\n"); + ksft_perror("Can't create timer"); return -1; } signal(SIGALRM, sig_handler); err = gettimeofday(&start, NULL); if (err < 0) { - perror("Can't call gettimeofday()\n"); + ksft_perror("Can't call gettimeofday()"); return -1; } err = timer_settime(id, 0, &val, NULL); if (err < 0) { - perror("Can't set timer\n"); + ksft_perror("Can't set timer"); return -1; } @@ -176,14 +174,12 @@ static int check_timer_create(int which) err = gettimeofday(&end, NULL); if (err < 0) { - perror("Can't call gettimeofday()\n"); + ksft_perror("Can't call gettimeofday()"); return -1; } - if (!check_diff(start, end)) - printf("[OK]\n"); - else - printf("[FAIL]\n"); + ksft_test_result(check_diff(start, end) == 0, + "timer_create() per %s\n", type); return 0; } @@ -220,25 +216,25 @@ static int check_timer_distribution(void) .it_interval.tv_nsec = 1000 * 1000, }; - printf("Check timer_create() per process signal distribution... "); - fflush(stdout); - remain = nthreads + 1; /* worker threads + this thread */ signal(SIGALRM, distribution_handler); err = timer_create(CLOCK_PROCESS_CPUTIME_ID, NULL, &id); if (err < 0) { - perror("Can't create timer\n"); + ksft_perror("Can't create timer"); return -1; } err = timer_settime(id, 0, &val, NULL); if (err < 0) { - perror("Can't set timer\n"); + ksft_perror("Can't set timer"); return -1; } for (i = 0; i < nthreads; i++) { - if (pthread_create(&threads[i], NULL, distribution_thread, NULL)) { - perror("Can't create thread\n"); + err = pthread_create(&threads[i], NULL, distribution_thread, + NULL); + if (err) { + ksft_print_msg("Can't create thread: %s (%d)\n", + strerror(errno), errno); return -1; } } @@ -247,25 +243,30 @@ static int check_timer_distribution(void) while (__atomic_load_n(&remain, __ATOMIC_RELAXED)); for (i = 0; i < nthreads; i++) { - if (pthread_join(threads[i], NULL)) { - perror("Can't join thread\n"); + err = pthread_join(threads[i], NULL); + if (err) { + ksft_print_msg("Can't join thread: %s (%d)\n", + strerror(errno), errno); return -1; } } if (timer_delete(id)) { - perror("Can't delete timer\n"); + ksft_perror("Can't delete timer"); return -1; } - printf("[OK]\n"); + ksft_test_result_pass("check_timer_distribution\n"); return 0; } int main(int argc, char **argv) { - printf("Testing posix timers. False negative may happen on CPU execution \n"); - printf("based timers if other threads run on the CPU...\n"); + ksft_print_header(); + ksft_set_plan(6); + + ksft_print_msg("Testing posix timers. False negative may happen on CPU execution \n"); + ksft_print_msg("based timers if other threads run on the CPU...\n"); if (check_itimer(ITIMER_VIRTUAL) < 0) return ksft_exit_fail(); @@ -294,5 +295,5 @@ int main(int argc, char **argv) if (check_timer_distribution() < 0) return ksft_exit_fail(); - return ksft_exit_pass(); + ksft_finished(); } diff --git a/tools/testing/selftests/uevent/uevent_filtering.c b/tools/testing/selftests/uevent/uevent_filtering.c index 5cebfb356345..dbe55f3a66f4 100644 --- a/tools/testing/selftests/uevent/uevent_filtering.c +++ b/tools/testing/selftests/uevent/uevent_filtering.c @@ -78,7 +78,7 @@ static int uevent_listener(unsigned long post_flags, bool expect_uevent, { int sk_fd, ret; socklen_t sk_addr_len; - int fret = -1, rcv_buf_sz = __UEVENT_BUFFER_SIZE; + int rcv_buf_sz = __UEVENT_BUFFER_SIZE; uint64_t sync_add = 1; struct sockaddr_nl sk_addr = { 0 }, rcv_addr = { 0 }; char buf[__UEVENT_BUFFER_SIZE] = { 0 }; @@ -121,6 +121,7 @@ static int uevent_listener(unsigned long post_flags, bool expect_uevent, if ((size_t)sk_addr_len != sizeof(sk_addr)) { fprintf(stderr, "Invalid socket address size\n"); + ret = -1; goto on_error; } @@ -147,11 +148,12 @@ static int uevent_listener(unsigned long post_flags, bool expect_uevent, ret = write_nointr(sync_fd, &sync_add, sizeof(sync_add)); close(sync_fd); if (ret != sizeof(sync_add)) { + ret = -1; fprintf(stderr, "Failed to synchronize with parent process\n"); goto on_error; } - fret = 0; + ret = 0; for (;;) { ssize_t r; @@ -187,7 +189,7 @@ static int uevent_listener(unsigned long post_flags, bool expect_uevent, on_error: close(sk_fd); - return fret; + return ret; } int trigger_uevent(unsigned int times) diff --git a/tools/testing/selftests/user_events/.gitignore b/tools/testing/selftests/user_events/.gitignore new file mode 100644 index 000000000000..f570febd211b --- /dev/null +++ b/tools/testing/selftests/user_events/.gitignore @@ -0,0 +1,4 @@ +abi_test +dyn_test +ftrace_test +perf_test diff --git a/tools/testing/selftests/user_events/abi_test.c b/tools/testing/selftests/user_events/abi_test.c index 8202f1327c39..cef1ff1af223 100644 --- a/tools/testing/selftests/user_events/abi_test.c +++ b/tools/testing/selftests/user_events/abi_test.c @@ -24,6 +24,18 @@ const char *data_file = "/sys/kernel/tracing/user_events_data"; const char *enable_file = "/sys/kernel/tracing/events/user_events/__abi_event/enable"; +static bool event_exists(void) +{ + int fd = open(enable_file, O_RDWR); + + if (fd < 0) + return false; + + close(fd); + + return true; +} + static int change_event(bool enable) { int fd = open(enable_file, O_RDWR); @@ -47,7 +59,22 @@ static int change_event(bool enable) return ret; } -static int reg_enable(long *enable, int size, int bit) +static int event_delete(void) +{ + int fd = open(data_file, O_RDWR); + int ret; + + if (fd < 0) + return -1; + + ret = ioctl(fd, DIAG_IOCSDEL, "__abi_event"); + + close(fd); + + return ret; +} + +static int reg_enable_flags(void *enable, int size, int bit, int flags) { struct user_reg reg = {0}; int fd = open(data_file, O_RDWR); @@ -58,6 +85,7 @@ static int reg_enable(long *enable, int size, int bit) reg.size = sizeof(reg); reg.name_args = (__u64)"__abi_event"; + reg.flags = flags; reg.enable_bit = bit; reg.enable_addr = (__u64)enable; reg.enable_size = size; @@ -69,7 +97,12 @@ static int reg_enable(long *enable, int size, int bit) return ret; } -static int reg_disable(long *enable, int bit) +static int reg_enable(void *enable, int size, int bit) +{ + return reg_enable_flags(enable, size, bit, 0); +} + +static int reg_disable(void *enable, int bit) { struct user_unreg reg = {0}; int fd = open(data_file, O_RDWR); @@ -90,7 +123,8 @@ static int reg_disable(long *enable, int bit) } FIXTURE(user) { - long check; + int check; + long check_long; bool umount; }; @@ -99,6 +133,7 @@ FIXTURE_SETUP(user) { change_event(false); self->check = 0; + self->check_long = 0; } FIXTURE_TEARDOWN(user) { @@ -126,6 +161,26 @@ TEST_F(user, enablement) { ASSERT_EQ(0, change_event(false)); } +TEST_F(user, flags) { + /* USER_EVENT_REG_PERSIST is allowed */ + ASSERT_EQ(0, reg_enable_flags(&self->check, sizeof(int), 0, + USER_EVENT_REG_PERSIST)); + ASSERT_EQ(0, reg_disable(&self->check, 0)); + + /* Ensure it exists after close and disable */ + ASSERT_TRUE(event_exists()); + + /* Ensure we can delete it */ + ASSERT_EQ(0, event_delete()); + + /* USER_EVENT_REG_MAX or above is not allowed */ + ASSERT_EQ(-1, reg_enable_flags(&self->check, sizeof(int), 0, + USER_EVENT_REG_MAX)); + + /* Ensure it does not exist after invalid flags */ + ASSERT_FALSE(event_exists()); +} + TEST_F(user, bit_sizes) { /* Allow 0-31 bits for 32-bit */ ASSERT_EQ(0, reg_enable(&self->check, sizeof(int), 0)); @@ -136,9 +191,9 @@ TEST_F(user, bit_sizes) { #if BITS_PER_LONG == 8 /* Allow 0-64 bits for 64-bit */ - ASSERT_EQ(0, reg_enable(&self->check, sizeof(long), 63)); - ASSERT_NE(0, reg_enable(&self->check, sizeof(long), 64)); - ASSERT_EQ(0, reg_disable(&self->check, 63)); + ASSERT_EQ(0, reg_enable(&self->check_long, sizeof(long), 63)); + ASSERT_NE(0, reg_enable(&self->check_long, sizeof(long), 64)); + ASSERT_EQ(0, reg_disable(&self->check_long, 63)); #endif /* Disallowed sizes (everything beside 4 and 8) */ @@ -200,7 +255,7 @@ static int clone_check(void *check) for (i = 0; i < 10; ++i) { usleep(100000); - if (*(long *)check) + if (*(int *)check) return 0; } diff --git a/tools/testing/selftests/user_events/dyn_test.c b/tools/testing/selftests/user_events/dyn_test.c index a85980190bea..bdf9ab127488 100644 --- a/tools/testing/selftests/user_events/dyn_test.c +++ b/tools/testing/selftests/user_events/dyn_test.c @@ -17,9 +17,25 @@ #include "../kselftest_harness.h" #include "user_events_selftests.h" +const char *dyn_file = "/sys/kernel/tracing/dynamic_events"; const char *abi_file = "/sys/kernel/tracing/user_events_data"; const char *enable_file = "/sys/kernel/tracing/events/user_events/__test_event/enable"; +static int event_delete(void) +{ + int fd = open(abi_file, O_RDWR); + int ret; + + if (fd < 0) + return -1; + + ret = ioctl(fd, DIAG_IOCSDEL, "__test_event"); + + close(fd); + + return ret; +} + static bool wait_for_delete(void) { int i; @@ -64,7 +80,31 @@ static int unreg_event(int fd, int *check, int bit) return ioctl(fd, DIAG_IOCSUNREG, &unreg); } -static int parse(int *check, const char *value) +static int parse_dyn(const char *value) +{ + int fd = open(dyn_file, O_RDWR | O_APPEND); + int len = strlen(value); + int ret; + + if (fd == -1) + return -1; + + ret = write(fd, value, len); + + if (ret == len) + ret = 0; + else + ret = -1; + + close(fd); + + if (ret == 0) + event_delete(); + + return ret; +} + +static int parse_abi(int *check, const char *value) { int fd = open(abi_file, O_RDWR); int ret; @@ -90,6 +130,18 @@ static int parse(int *check, const char *value) return ret; } +static int parse(int *check, const char *value) +{ + int abi_ret = parse_abi(check, value); + int dyn_ret = parse_dyn(value); + + /* Ensure both ABI and DYN parse the same way */ + if (dyn_ret != abi_ret) + return -1; + + return dyn_ret; +} + static int check_match(int *check, const char *first, const char *second, bool *match) { int fd = open(abi_file, O_RDWR); diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile index 7e8c937627dd..0b872c0a42d2 100644 --- a/tools/testing/selftests/x86/Makefile +++ b/tools/testing/selftests/x86/Makefile @@ -14,6 +14,7 @@ TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt test_mremap check_initial_reg_state sigreturn iopl ioperm \ test_vsyscall mov_ss_trap \ syscall_arg_fault fsgsbase_restore sigaltstack +TARGETS_C_BOTHBITS += nx_stack TARGETS_C_32BIT_ONLY := entry_from_vm86 test_syscall_vdso unwind_vdso \ test_FCMOV test_FCOMI test_FISTTP \ vdso_restorer @@ -109,3 +110,6 @@ $(OUTPUT)/test_syscall_vdso_32: thunks_32.S # state. $(OUTPUT)/check_initial_reg_state_32: CFLAGS += -Wl,-ereal_start -static $(OUTPUT)/check_initial_reg_state_64: CFLAGS += -Wl,-ereal_start -static + +$(OUTPUT)/nx_stack_32: CFLAGS += -Wl,-z,noexecstack +$(OUTPUT)/nx_stack_64: CFLAGS += -Wl,-z,noexecstack diff --git a/tools/testing/selftests/x86/lam.c b/tools/testing/selftests/x86/lam.c index eb0e46905bf9..8f9b06d9ce03 100644 --- a/tools/testing/selftests/x86/lam.c +++ b/tools/testing/selftests/x86/lam.c @@ -573,7 +573,7 @@ int do_uring(unsigned long lam) char path[PATH_MAX] = {0}; /* get current process path */ - if (readlink("/proc/self/exe", path, PATH_MAX) <= 0) + if (readlink("/proc/self/exe", path, PATH_MAX - 1) <= 0) return 1; int file_fd = open(path, O_RDONLY); @@ -680,14 +680,14 @@ static int handle_execve(struct testcases *test) perror("Fork failed."); ret = 1; } else if (pid == 0) { - char path[PATH_MAX]; + char path[PATH_MAX] = {0}; /* Set LAM mode in parent process */ if (set_lam(lam) != 0) return 1; /* Get current binary's path and the binary was run by execve */ - if (readlink("/proc/self/exe", path, PATH_MAX) <= 0) + if (readlink("/proc/self/exe", path, PATH_MAX - 1) <= 0) exit(-1); /* run binary to get LAM mode and return to parent process */ diff --git a/tools/testing/selftests/x86/nx_stack.c b/tools/testing/selftests/x86/nx_stack.c new file mode 100644 index 000000000000..ea4a4e246879 --- /dev/null +++ b/tools/testing/selftests/x86/nx_stack.c @@ -0,0 +1,212 @@ +/* + * Copyright (c) 2023 Alexey Dobriyan <adobriyan@gmail.com> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +/* + * Test that userspace stack is NX. Requires linking with -Wl,-z,noexecstack + * because I don't want to bother with PT_GNU_STACK detection. + * + * Fill the stack with INT3's and then try to execute some of them: + * SIGSEGV -- good, SIGTRAP -- bad. + * + * Regular stack is completely overwritten before testing. + * Test doesn't exit SIGSEGV handler after first fault at INT3. + */ +#undef _GNU_SOURCE +#define _GNU_SOURCE +#undef NDEBUG +#include <assert.h> +#include <signal.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <sys/mman.h> +#include <sys/resource.h> +#include <unistd.h> + +#define PAGE_SIZE 4096 + +/* + * This is memset(rsp, 0xcc, -1); but down. + * It will SIGSEGV when bottom of the stack is reached. + * Byte-size access is important! (see rdi tweak in the signal handler). + */ +void make_stack1(void); +asm( +".pushsection .text\n" +".globl make_stack1\n" +".align 16\n" +"make_stack1:\n" + "mov $0xcc, %al\n" +#if defined __amd64__ + "mov %rsp, %rdi\n" + "mov $-1, %rcx\n" +#elif defined __i386__ + "mov %esp, %edi\n" + "mov $-1, %ecx\n" +#else +#error +#endif + "std\n" + "rep stosb\n" + /* unreachable */ + "hlt\n" +".type make_stack1,@function\n" +".size make_stack1,.-make_stack1\n" +".popsection\n" +); + +/* + * memset(p, 0xcc, -1); + * It will SIGSEGV when top of the stack is reached. + */ +void make_stack2(uint64_t p); +asm( +".pushsection .text\n" +".globl make_stack2\n" +".align 16\n" +"make_stack2:\n" + "mov $0xcc, %al\n" +#if defined __amd64__ + "mov $-1, %rcx\n" +#elif defined __i386__ + "mov $-1, %ecx\n" +#else +#error +#endif + "cld\n" + "rep stosb\n" + /* unreachable */ + "hlt\n" +".type make_stack2,@function\n" +".size make_stack2,.-make_stack2\n" +".popsection\n" +); + +static volatile int test_state = 0; +static volatile unsigned long stack_min_addr; + +#if defined __amd64__ +#define RDI REG_RDI +#define RIP REG_RIP +#define RIP_STRING "rip" +#elif defined __i386__ +#define RDI REG_EDI +#define RIP REG_EIP +#define RIP_STRING "eip" +#else +#error +#endif + +static void sigsegv(int _, siginfo_t *__, void *uc_) +{ + /* + * Some Linux versions didn't clear DF before entering signal + * handler. make_stack1() doesn't have a chance to clear DF + * either so we clear it by hand here. + */ + asm volatile ("cld" ::: "memory"); + + ucontext_t *uc = uc_; + + if (test_state == 0) { + /* Stack is faulted and cleared from RSP to the lowest address. */ + stack_min_addr = ++uc->uc_mcontext.gregs[RDI]; + if (1) { + printf("stack min %lx\n", stack_min_addr); + } + uc->uc_mcontext.gregs[RIP] = (uintptr_t)&make_stack2; + test_state = 1; + } else if (test_state == 1) { + /* Stack has been cleared from top to bottom. */ + unsigned long stack_max_addr = uc->uc_mcontext.gregs[RDI]; + if (1) { + printf("stack max %lx\n", stack_max_addr); + } + /* Start faulting pages on stack and see what happens. */ + uc->uc_mcontext.gregs[RIP] = stack_max_addr - PAGE_SIZE; + test_state = 2; + } else if (test_state == 2) { + /* Stack page is NX -- good, test next page. */ + uc->uc_mcontext.gregs[RIP] -= PAGE_SIZE; + if (uc->uc_mcontext.gregs[RIP] == stack_min_addr) { + /* One more SIGSEGV and test ends. */ + test_state = 3; + } + } else { + printf("PASS\tAll stack pages are NX\n"); + _exit(EXIT_SUCCESS); + } +} + +static void sigtrap(int _, siginfo_t *__, void *uc_) +{ + const ucontext_t *uc = uc_; + unsigned long rip = uc->uc_mcontext.gregs[RIP]; + printf("FAIL\texecutable page on the stack: " RIP_STRING " %lx\n", rip); + _exit(EXIT_FAILURE); +} + +int main(void) +{ + { + struct sigaction act = {}; + sigemptyset(&act.sa_mask); + act.sa_flags = SA_SIGINFO; + act.sa_sigaction = &sigsegv; + int rv = sigaction(SIGSEGV, &act, NULL); + assert(rv == 0); + } + { + struct sigaction act = {}; + sigemptyset(&act.sa_mask); + act.sa_flags = SA_SIGINFO; + act.sa_sigaction = &sigtrap; + int rv = sigaction(SIGTRAP, &act, NULL); + assert(rv == 0); + } + { + struct rlimit rlim; + int rv = getrlimit(RLIMIT_STACK, &rlim); + assert(rv == 0); + /* Cap stack at time-honored 8 MiB value. */ + rlim.rlim_max = rlim.rlim_cur; + if (rlim.rlim_max > 8 * 1024 * 1024) { + rlim.rlim_max = 8 * 1024 * 1024; + } + rv = setrlimit(RLIMIT_STACK, &rlim); + assert(rv == 0); + } + { + /* + * We don't know now much stack SIGSEGV handler uses. + * Bump this by 1 page every time someone complains, + * or rewrite it in assembly. + */ + const size_t len = SIGSTKSZ; + void *p = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); + assert(p != MAP_FAILED); + stack_t ss = {}; + ss.ss_sp = p; + ss.ss_size = len; + int rv = sigaltstack(&ss, NULL); + assert(rv == 0); + } + make_stack1(); + /* + * Unreachable, but if _this_ INT3 is ever reached, it's a bug somewhere. + * Fold it into main SIGTRAP pathway. + */ + __builtin_trap(); +} |