280 files changed, 17274 insertions, 2914 deletions
diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index 1a21d6beebc6..3b2061d1c1a5 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -18,6 +18,7 @@ TARGETS += drivers/dma-buf
 TARGETS += drivers/s390x/uvdevice
 TARGETS += drivers/net/bonding
 TARGETS += drivers/net/team
+TARGETS += dt
 TARGETS += efivarfs
 TARGETS += exec
 TARGETS += fchmodat2
@@ -85,6 +86,8 @@ TARGETS += syscall_user_dispatch
 TARGETS += sysctl
 TARGETS += tc-testing
 TARGETS += tdx
+TARGETS += thermal/intel/power_floor
+TARGETS += thermal/intel/workload_hint
 TARGETS += timens
 ifneq (1, $(quicktest))
 TARGETS += timers
diff --git a/tools/testing/selftests/alsa/alsa-local.h b/tools/testing/selftests/alsa/alsa-local.h
index de030dc23bd1..29143ef52101 100644
--- a/tools/testing/selftests/alsa/alsa-local.h
+++ b/tools/testing/selftests/alsa/alsa-local.h
@@ -24,4 +24,14 @@ int conf_get_bool(snd_config_t *root, const char *key1, const char *key2, int de
 void conf_get_string_array(snd_config_t *root, const char *key1, const char *key2,
 			   const char **array, int array_size, const char *def);
 
+struct card_cfg_data {
+	int card;
+	snd_config_t *config;
+	const char *filename;
+	const char *config_id;
+	struct card_cfg_data *next;
+};
+
+extern struct card_cfg_data *conf_cards;
+
 #endif /* __ALSA_LOCAL_H */
diff --git a/tools/testing/selftests/alsa/conf.c b/tools/testing/selftests/alsa/conf.c
index 2f1685a3eae1..00925eb8d9f4 100644
--- a/tools/testing/selftests/alsa/conf.c
+++ b/tools/testing/selftests/alsa/conf.c
@@ -19,14 +19,7 @@
 
 #define SYSFS_ROOT "/sys"
 
-struct card_data {
-	int card;
-	snd_config_t *config;
-	const char *filename;
-	struct card_data *next;
-};
-
-static struct card_data *conf_cards;
+struct card_cfg_data *conf_cards;
 
 static const char *alsa_config =
 "ctl.hw {\n"
@@ -97,9 +90,9 @@ snd_config_t *get_alsalib_config(void)
 	return config;
 }
 
-static struct card_data *conf_data_by_card(int card, bool msg)
+static struct card_cfg_data *conf_data_by_card(int card, bool msg)
 {
-	struct card_data *conf;
+	struct card_cfg_data *conf;
 
 	for (conf = conf_cards; conf; conf = conf->next) {
 		if (conf->card == card) {
@@ -229,55 +222,31 @@ static bool sysfs_match(const char *sysfs_root, snd_config_t *config)
 	return iter > 0;
 }
 
-static bool test_filename1(int card, const char *filename, const char *sysfs_card_root)
+static void assign_card_config(int card, const char *sysfs_card_root)
 {
-	struct card_data *data, *data2;
-	snd_config_t *config, *sysfs_config, *card_config, *sysfs_card_config, *node;
-	snd_config_iterator_t i, next;
+	struct card_cfg_data *data;
+	snd_config_t *sysfs_card_config;
 
-	config = conf_load_from_file(filename);
-	if (snd_config_search(config, "sysfs", &sysfs_config) ||
-	    snd_config_get_type(sysfs_config) != SND_CONFIG_TYPE_COMPOUND)
-		ksft_exit_fail_msg("Missing global sysfs block in filename %s\n", filename);
-	if (snd_config_search(config, "card", &card_config) ||
-	    snd_config_get_type(card_config) != SND_CONFIG_TYPE_COMPOUND)
-		ksft_exit_fail_msg("Missing global card block in filename %s\n", filename);
-	if (!sysfs_match(SYSFS_ROOT, sysfs_config))
-		return false;
-	snd_config_for_each(i, next, card_config) {
-		node = snd_config_iterator_entry(i);
-		if (snd_config_search(node, "sysfs", &sysfs_card_config) ||
-		    snd_config_get_type(sysfs_card_config) != SND_CONFIG_TYPE_COMPOUND)
-			ksft_exit_fail_msg("Missing card sysfs block in filename %s\n", filename);
+	for (data = conf_cards; data; data = data->next) {
+		snd_config_search(data->config, "sysfs", &sysfs_card_config);
 		if (!sysfs_match(sysfs_card_root, sysfs_card_config))
 			continue;
-		data = malloc(sizeof(*data));
-		if (!data)
-			ksft_exit_fail_msg("Out of memory\n");
-		data2 = conf_data_by_card(card, false);
-		if (data2)
-			ksft_exit_fail_msg("Duplicate card '%s' <-> '%s'\n", filename, data2->filename);
+
 		data->card = card;
-		data->filename = filename;
-		data->config = node;
-		data->next = conf_cards;
-		conf_cards = data;
-		return true;
+		break;
 	}
-	return false;
 }
 
-static bool test_filename(const char *filename)
+static void assign_card_configs(void)
 {
 	char fn[128];
 	int card;
 
 	for (card = 0; card < 32; card++) {
 		snprintf(fn, sizeof(fn), "%s/class/sound/card%d", SYSFS_ROOT, card);
-		if (access(fn, R_OK) == 0 && test_filename1(card, filename, fn))
-			return true;
+		if (access(fn, R_OK) == 0)
+			assign_card_config(card, fn);
 	}
-	return false;
 }
 
 static int filename_filter(const struct dirent *dirent)
@@ -296,6 +265,41 @@ static int filename_filter(const struct dirent *dirent)
 	return 0;
 }
 
+static bool match_config(const char *filename)
+{
+	struct card_cfg_data *data;
+	snd_config_t *config, *sysfs_config, *card_config, *sysfs_card_config, *node;
+	snd_config_iterator_t i, next;
+
+	config = conf_load_from_file(filename);
+	if (snd_config_search(config, "sysfs", &sysfs_config) ||
+	    snd_config_get_type(sysfs_config) != SND_CONFIG_TYPE_COMPOUND)
+		ksft_exit_fail_msg("Missing global sysfs block in filename %s\n", filename);
+	if (snd_config_search(config, "card", &card_config) ||
+	    snd_config_get_type(card_config) != SND_CONFIG_TYPE_COMPOUND)
+		ksft_exit_fail_msg("Missing global card block in filename %s\n", filename);
+	if (!sysfs_match(SYSFS_ROOT, sysfs_config))
+		return false;
+	snd_config_for_each(i, next, card_config) {
+		node = snd_config_iterator_entry(i);
+		if (snd_config_search(node, "sysfs", &sysfs_card_config) ||
+		    snd_config_get_type(sysfs_card_config) != SND_CONFIG_TYPE_COMPOUND)
+			ksft_exit_fail_msg("Missing card sysfs block in filename %s\n", filename);
+
+		data = malloc(sizeof(*data));
+		if (!data)
+			ksft_exit_fail_msg("Out of memory\n");
+		data->filename = filename;
+		data->config = node;
+		data->card = -1;
+		if (snd_config_get_id(node, &data->config_id))
+			ksft_exit_fail_msg("snd_config_get_id failed for card\n");
+		data->next = conf_cards;
+		conf_cards = data;
+	}
+	return true;
+}
+
 void conf_load(void)
 {
 	const char *fn = "conf.d";
@@ -311,17 +315,19 @@ void conf_load(void)
 		if (filename == NULL)
 			ksft_exit_fail_msg("Out of memory\n");
 		sprintf(filename, "%s/%s", fn, namelist[j]->d_name);
-		if (test_filename(filename))
+		if (match_config(filename))
 			filename = NULL;
 		free(filename);
 		free(namelist[j]);
 	}
 	free(namelist);
+
+	assign_card_configs();
 }
 
 void conf_free(void)
 {
-	struct card_data *conf;
+	struct card_cfg_data *conf;
 
 	while (conf_cards) {
 		conf = conf_cards;
@@ -332,7 +338,7 @@ void conf_free(void)
 
 snd_config_t *conf_by_card(int card)
 {
-	struct card_data *conf;
+	struct card_cfg_data *conf;
 
 	conf = conf_data_by_card(card, true);
 	if (conf)
diff --git a/tools/testing/selftests/alsa/pcm-test.c b/tools/testing/selftests/alsa/pcm-test.c
index c0a39818c5a4..de664dedb541 100644
--- a/tools/testing/selftests/alsa/pcm-test.c
+++ b/tools/testing/selftests/alsa/pcm-test.c
@@ -566,6 +566,7 @@ void *card_thread(void *data)
 int main(void)
 {
 	struct card_data *card;
+	struct card_cfg_data *conf;
 	struct pcm_data *pcm;
 	snd_config_t *global_config, *cfg;
 	int num_pcm_tests = 0, num_tests, num_std_pcm_tests;
@@ -583,6 +584,10 @@ int main(void)
 
 	find_pcms();
 
+	for (conf = conf_cards; conf; conf = conf->next)
+		if (conf->card < 0)
+			num_missing++;
+
 	num_std_pcm_tests = conf_get_count(default_pcm_config, "test", NULL);
 
 	for (pcm = pcm_list; pcm != NULL; pcm = pcm->next) {
@@ -598,6 +603,11 @@ int main(void)
 
 	ksft_set_plan(num_missing + num_pcm_tests);
 
+	for (conf = conf_cards; conf; conf = conf->next)
+		if (conf->card < 0)
+			ksft_test_result_fail("test.missing.%s.%s\n",
+					      conf->filename, conf->config_id);
+
 	for (pcm = pcm_missing; pcm != NULL; pcm = pcm->next) {
 		ksft_test_result(false, "test.missing.%d.%d.%d.%s\n",
 				 pcm->card, pcm->device, pcm->subdevice,
diff --git a/tools/testing/selftests/amd-pstate/gitsource.sh b/tools/testing/selftests/amd-pstate/gitsource.sh
index 5f2171f0116d..4cde62f90468 100755
--- a/tools/testing/selftests/amd-pstate/gitsource.sh
+++ b/tools/testing/selftests/amd-pstate/gitsource.sh
@@ -1,4 +1,4 @@
-#!/bin/sh
+#!/bin/bash
 # SPDX-License-Identifier: GPL-2.0
 
 # Testing and monitor the cpu desire performance, frequency, load,
@@ -66,12 +66,15 @@ post_clear_gitsource()
 
 install_gitsource()
 {
-	if [ ! -d $git_name ]; then
+	if [ ! -d $SCRIPTDIR/$git_name ]; then
+		pushd $(pwd) > /dev/null 2>&1
+		cd $SCRIPTDIR
 		printf "Download gitsource, please wait a moment ...\n\n"
 		wget -O $git_tar $gitsource_url > /dev/null 2>&1
 
 		printf "Tar gitsource ...\n\n"
 		tar -xzf $git_tar
+		popd > /dev/null 2>&1
 	fi
 }
 
@@ -79,12 +82,14 @@ install_gitsource()
 run_gitsource()
 {
 	echo "Launching amd pstate tracer for $1 #$2 tracer_interval: $TRACER_INTERVAL"
-	./amd_pstate_trace.py -n tracer-gitsource-$1-$2 -i $TRACER_INTERVAL > /dev/null 2>&1 &
+	$TRACER -n tracer-gitsource-$1-$2 -i $TRACER_INTERVAL > /dev/null 2>&1 &
 
 	printf "Make and test gitsource for $1 #$2 make_cpus: $MAKE_CPUS\n"
-	cd $git_name
-	perf stat -a --per-socket -I 1000 -e power/energy-pkg/ /usr/bin/time -o ../$OUTFILE_GIT.time-gitsource-$1-$2.log make test -j$MAKE_CPUS > ../$OUTFILE_GIT-perf-$1-$2.log 2>&1
-	cd ..
+	BACKUP_DIR=$(pwd)
+	pushd $BACKUP_DIR > /dev/null 2>&1
+	cd $SCRIPTDIR/$git_name
+	$PERF stat -a --per-socket -I 1000 -e power/energy-pkg/ /usr/bin/time -o $BACKUP_DIR/$OUTFILE_GIT.time-gitsource-$1-$2.log make test -j$MAKE_CPUS > $BACKUP_DIR/$OUTFILE_GIT-perf-$1-$2.log 2>&1
+	popd > /dev/null 2>&1
 
 	for job in `jobs -p`
 	do
diff --git a/tools/testing/selftests/amd-pstate/run.sh b/tools/testing/selftests/amd-pstate/run.sh
index de4d8e9c9565..b053eea8bb19 100755
--- a/tools/testing/selftests/amd-pstate/run.sh
+++ b/tools/testing/selftests/amd-pstate/run.sh
@@ -8,9 +8,12 @@ else
 	FILE_MAIN=DONE
 fi
 
-source basic.sh
-source tbench.sh
-source gitsource.sh
+SCRIPTDIR=`dirname "$0"`
+TRACER=$SCRIPTDIR/../../../power/x86/amd_pstate_tracer/amd_pstate_trace.py
+
+source $SCRIPTDIR/basic.sh
+source $SCRIPTDIR/tbench.sh
+source $SCRIPTDIR/gitsource.sh
 
 # amd-pstate-ut only run on x86/x86_64 AMD systems.
 ARCH=$(uname -m 2>/dev/null | sed -e 's/i.86/x86/' -e 's/x86_64/x86/')
@@ -22,6 +25,7 @@ OUTFILE=selftest
 OUTFILE_TBENCH="$OUTFILE.tbench"
 OUTFILE_GIT="$OUTFILE.gitsource"
 
+PERF=/usr/bin/perf
 SYSFS=
 CPUROOT=
 CPUFREQROOT=
@@ -151,6 +155,7 @@ help()
 	[-p <tbench process number>]
 	[-l <loop times for tbench>]
 	[-i <amd tracer interval>]
+	[-b <perf binary>]
 	[-m <comparative test: acpi-cpufreq>]
 	\n"
 	exit 2
@@ -158,7 +163,7 @@ help()
 
 parse_arguments()
 {
-	while getopts ho:c:t:p:l:i:m: arg
+	while getopts ho:c:t:p:l:i:b:m: arg
 	do
 		case $arg in
 			h) # --help
@@ -189,6 +194,10 @@ parse_arguments()
 				TRACER_INTERVAL=$OPTARG
 				;;
 
+			b) # --perf-binary
+				PERF=`realpath $OPTARG`
+				;;
+
 			m) # --comparative-test
 				COMPARATIVE_TEST=$OPTARG
 				;;
@@ -202,8 +211,8 @@ parse_arguments()
 
 command_perf()
 {
-	if ! command -v perf > /dev/null; then
-		echo $msg please install perf. >&2
+	if ! $PERF -v; then
+		echo $msg please install perf or provide perf binary path as argument >&2
 		exit $ksft_skip
 	fi
 }
diff --git a/tools/testing/selftests/amd-pstate/tbench.sh b/tools/testing/selftests/amd-pstate/tbench.sh
index 49c9850341f6..2a98d9c9202e 100755
--- a/tools/testing/selftests/amd-pstate/tbench.sh
+++ b/tools/testing/selftests/amd-pstate/tbench.sh
@@ -64,11 +64,11 @@ post_clear_tbench()
 run_tbench()
 {
 	echo "Launching amd pstate tracer for $1 #$2 tracer_interval: $TRACER_INTERVAL"
-	./amd_pstate_trace.py -n tracer-tbench-$1-$2 -i $TRACER_INTERVAL > /dev/null 2>&1 &
+	$TRACER -n tracer-tbench-$1-$2 -i $TRACER_INTERVAL > /dev/null 2>&1 &
 
 	printf "Test tbench for $1 #$2 time_limit: $TIME_LIMIT procs_num: $PROCESS_NUM\n"
 	tbench_srv > /dev/null 2>&1 &
-	perf stat -a --per-socket -I 1000 -e power/energy-pkg/ tbench -t $TIME_LIMIT $PROCESS_NUM > $OUTFILE_TBENCH-perf-$1-$2.log 2>&1
+	$PERF stat -a --per-socket -I 1000 -e power/energy-pkg/ tbench -t $TIME_LIMIT $PROCESS_NUM > $OUTFILE_TBENCH-perf-$1-$2.log 2>&1
 
 	pid=`pidof tbench_srv`
 	kill $pid
diff --git a/tools/testing/selftests/arm64/abi/hwcap.c b/tools/testing/selftests/arm64/abi/hwcap.c
index e3d262831d91..1189e77c8152 100644
--- a/tools/testing/selftests/arm64/abi/hwcap.c
+++ b/tools/testing/selftests/arm64/abi/hwcap.c
@@ -81,6 +81,20 @@ static void lrcpc_sigill(void)
 	asm volatile(".inst 0xb8bfc3e0" : : : );
 }
 
+static void lse128_sigill(void)
+{
+	u64 __attribute__ ((aligned (16))) mem[2] = { 10, 20 };
+	register u64 *memp asm ("x0") = mem;
+	register u64 val0 asm ("x1") = 5;
+	register u64 val1 asm ("x2") = 4;
+
+	/* SWPP X1, X2, [X0] */
+	asm volatile(".inst 0x19228001"
+		     : "+r" (memp), "+r" (val0), "+r" (val1)
+		     :
+		     : "cc", "memory");
+}
+
 static void mops_sigill(void)
 {
 	char dst[1], src[1];
@@ -226,6 +240,12 @@ static void sveaes_sigill(void)
 	asm volatile(".inst 0x4522e400" : : : "z0");
 }
 
+static void sveb16b16_sigill(void)
+{
+	/* BFADD ZA.H[W0, 0], {Z0.H-Z1.H} */
+	asm volatile(".inst 0xC1E41C00" : : : );
+}
+
 static void svepmull_sigill(void)
 {
 	/* PMULLB Z0.Q, Z0.D, Z0.D */
@@ -289,6 +309,19 @@ static void uscat_sigbus(void)
 	asm volatile(".inst 0xb820003f" : : : );
 }
 
+static void lrcpc3_sigill(void)
+{
+	int data[2] = { 1, 2 };
+
+	register int *src asm ("x0") = data;
+	register int data0 asm ("w2") = 0;
+	register int data1 asm ("w3") = 0;
+
+	/* LDIAPP w2, w3, [x0] */
+	asm volatile(".inst 0x99431802"
+	              : "=r" (data0), "=r" (data1) : "r" (src) :);
+}
+
 static const struct hwcap_data {
 	const char *name;
 	unsigned long at_hwcap;
@@ -349,6 +382,13 @@ static const struct hwcap_data {
 		.sigill_fn = ilrcpc_sigill,
 	},
 	{
+		.name = "LRCPC3",
+		.at_hwcap = AT_HWCAP2,
+		.hwcap_bit = HWCAP2_LRCPC3,
+		.cpuinfo = "lrcpc3",
+		.sigill_fn = lrcpc3_sigill,
+	},
+	{
 		.name = "LSE",
 		.at_hwcap = AT_HWCAP,
 		.hwcap_bit = HWCAP_ATOMICS,
@@ -365,6 +405,13 @@ static const struct hwcap_data {
 		.sigbus_reliable = true,
 	},
 	{
+		.name = "LSE128",
+		.at_hwcap = AT_HWCAP2,
+		.hwcap_bit = HWCAP2_LSE128,
+		.cpuinfo = "lse128",
+		.sigill_fn = lse128_sigill,
+	},
+	{
 		.name = "MOPS",
 		.at_hwcap = AT_HWCAP2,
 		.hwcap_bit = HWCAP2_MOPS,
@@ -494,6 +541,13 @@ static const struct hwcap_data {
 		.sigill_fn = sveaes_sigill,
 	},
 	{
+		.name = "SVE2 B16B16",
+		.at_hwcap = AT_HWCAP2,
+		.hwcap_bit = HWCAP2_SVE_B16B16,
+		.cpuinfo = "sveb16b16",
+		.sigill_fn = sveb16b16_sigill,
+	},
+	{
 		.name = "SVE2 PMULL",
 		.at_hwcap = AT_HWCAP2,
 		.hwcap_bit = HWCAP2_SVEPMULL,
diff --git a/tools/testing/selftests/arm64/fp/sve-test.S b/tools/testing/selftests/arm64/fp/sve-test.S
index 4328895dfc87..547d077e3517 100644
--- a/tools/testing/selftests/arm64/fp/sve-test.S
+++ b/tools/testing/selftests/arm64/fp/sve-test.S
@@ -473,6 +473,13 @@ function _start
 //	mov	x8, #__NR_sched_yield	// Encourage preemption
 //	svc	#0
 
+#ifdef SSVE
+	mrs	x0, S3_3_C4_C2_2	// SVCR should have ZA=0,SM=1
+	and	x1, x0, #3
+	cmp	x1, #1
+	b.ne	svcr_barf
+#endif
+
 	mov	x21, #0
 0:	mov	x0, x21
 	bl	check_zreg
@@ -553,3 +560,15 @@ function vl_barf
 	mov	x1, #1
 	svc	#0
 endfunction
+
+function svcr_barf
+	mov	x10, x0
+
+	puts	"Bad SVCR: "
+	mov	x0, x10
+	bl	putdecn
+
+	mov	x8, #__NR_exit
+	mov	x1, #1
+	svc	#0
+endfunction
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 4225f975fce3..9c27b67bc7b1 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -585,11 +585,20 @@ endef
 # Define test_progs test runner.
 TRUNNER_TESTS_DIR := prog_tests
 TRUNNER_BPF_PROGS_DIR := progs
-TRUNNER_EXTRA_SOURCES := test_progs.c cgroup_helpers.c trace_helpers.c	\
-			 network_helpers.c testing_helpers.c		\
-			 btf_helpers.c flow_dissector_load.h		\
-			 cap_helpers.c test_loader.c xsk.c disasm.c	\
-			 json_writer.c unpriv_helpers.c 		\
+TRUNNER_EXTRA_SOURCES := test_progs.c		\
+			 cgroup_helpers.c	\
+			 trace_helpers.c	\
+			 network_helpers.c	\
+			 testing_helpers.c	\
+			 btf_helpers.c		\
+			 cap_helpers.c		\
+			 unpriv_helpers.c 	\
+			 netlink_helpers.c	\
+			 test_loader.c		\
+			 xsk.c			\
+			 disasm.c		\
+			 json_writer.c 		\
+			 flow_dissector_load.h	\
 			 ip_check_defrag_frags.h
 TRUNNER_EXTRA_FILES := $(OUTPUT)/urandom_read $(OUTPUT)/bpf_testmod.ko	\
 		       $(OUTPUT)/liburandom_read.so			\
diff --git a/tools/testing/selftests/bpf/bpf_experimental.h b/tools/testing/selftests/bpf/bpf_experimental.h
index 2c8cb3f61529..1386baf9ae4a 100644
--- a/tools/testing/selftests/bpf/bpf_experimental.h
+++ b/tools/testing/selftests/bpf/bpf_experimental.h
@@ -458,4 +458,23 @@ extern void bpf_throw(u64 cookie) __ksym;
 		__bpf_assert_op(LHS, <=, END, value, false);		\
 	})
 
+struct bpf_iter_css_task;
+struct cgroup_subsys_state;
+extern int bpf_iter_css_task_new(struct bpf_iter_css_task *it,
+		struct cgroup_subsys_state *css, unsigned int flags) __weak __ksym;
+extern struct task_struct *bpf_iter_css_task_next(struct bpf_iter_css_task *it) __weak __ksym;
+extern void bpf_iter_css_task_destroy(struct bpf_iter_css_task *it) __weak __ksym;
+
+struct bpf_iter_task;
+extern int bpf_iter_task_new(struct bpf_iter_task *it,
+		struct task_struct *task, unsigned int flags) __weak __ksym;
+extern struct task_struct *bpf_iter_task_next(struct bpf_iter_task *it) __weak __ksym;
+extern void bpf_iter_task_destroy(struct bpf_iter_task *it) __weak __ksym;
+
+struct bpf_iter_css;
+extern int bpf_iter_css_new(struct bpf_iter_css *it,
+				struct cgroup_subsys_state *start, unsigned int flags) __weak __ksym;
+extern struct cgroup_subsys_state *bpf_iter_css_next(struct bpf_iter_css *it) __weak __ksym;
+extern void bpf_iter_css_destroy(struct bpf_iter_css *it) __weak __ksym;
+
 #endif
diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c
index a5e246f7b202..91907b321f91 100644
--- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c
+++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c
@@ -39,9 +39,7 @@ struct bpf_testmod_struct_arg_4 {
 	int b;
 };
 
-__diag_push();
-__diag_ignore_all("-Wmissing-prototypes",
-		  "Global functions as their definitions will be in bpf_testmod.ko BTF");
+__bpf_hook_start();
 
 noinline int
 bpf_testmod_test_struct_arg_1(struct bpf_testmod_struct_arg_2 a, int b, int c) {
@@ -335,7 +333,7 @@ noinline int bpf_fentry_shadow_test(int a)
 }
 EXPORT_SYMBOL_GPL(bpf_fentry_shadow_test);
 
-__diag_pop();
+__bpf_hook_end();
 
 static struct bin_attribute bin_attr_bpf_testmod_file __ro_after_init = {
 	.attr = { .name = "bpf_testmod", .mode = 0666, },
diff --git a/tools/testing/selftests/bpf/cgroup_helpers.c b/tools/testing/selftests/bpf/cgroup_helpers.c
index 5b1da2a32ea7..5aa133bf3688 100644
--- a/tools/testing/selftests/bpf/cgroup_helpers.c
+++ b/tools/testing/selftests/bpf/cgroup_helpers.c
@@ -45,9 +45,12 @@
 #define format_parent_cgroup_path(buf, path) \
 	format_cgroup_path_pid(buf, path, getppid())
 
-#define format_classid_path(buf)				\
-	snprintf(buf, sizeof(buf), "%s%s", NETCLS_MOUNT_PATH,	\
-		 CGROUP_WORK_DIR)
+#define format_classid_path_pid(buf, pid)				\
+	snprintf(buf, sizeof(buf), "%s%s%d", NETCLS_MOUNT_PATH,	\
+		 CGROUP_WORK_DIR, pid)
+
+#define format_classid_path(buf)	\
+	format_classid_path_pid(buf, getpid())
 
 static __thread bool cgroup_workdir_mounted;
 
@@ -419,26 +422,23 @@ int create_and_get_cgroup(const char *relative_path)
 }
 
 /**
- * get_cgroup_id() - Get cgroup id for a particular cgroup path
- * @relative_path: The cgroup path, relative to the workdir, to join
+ * get_cgroup_id_from_path - Get cgroup id for a particular cgroup path
+ * @cgroup_workdir: The absolute cgroup path
  *
  * On success, it returns the cgroup id. On failure it returns 0,
  * which is an invalid cgroup id.
  * If there is a failure, it prints the error to stderr.
  */
-unsigned long long get_cgroup_id(const char *relative_path)
+unsigned long long get_cgroup_id_from_path(const char *cgroup_workdir)
 {
 	int dirfd, err, flags, mount_id, fhsize;
 	union {
 		unsigned long long cgid;
 		unsigned char raw_bytes[8];
 	} id;
-	char cgroup_workdir[PATH_MAX + 1];
 	struct file_handle *fhp, *fhp2;
 	unsigned long long ret = 0;
 
-	format_cgroup_path(cgroup_workdir, relative_path);
-
 	dirfd = AT_FDCWD;
 	flags = 0;
 	fhsize = sizeof(*fhp);
@@ -474,6 +474,14 @@ free_mem:
 	return ret;
 }
 
+unsigned long long get_cgroup_id(const char *relative_path)
+{
+	char cgroup_workdir[PATH_MAX + 1];
+
+	format_cgroup_path(cgroup_workdir, relative_path);
+	return get_cgroup_id_from_path(cgroup_workdir);
+}
+
 int cgroup_setup_and_join(const char *path) {
 	int cg_fd;
 
@@ -523,10 +531,20 @@ int setup_classid_environment(void)
 		return 1;
 	}
 
-	if (mount("net_cls", NETCLS_MOUNT_PATH, "cgroup", 0, "net_cls") &&
-	    errno != EBUSY) {
-		log_err("mount cgroup net_cls");
-		return 1;
+	if (mount("net_cls", NETCLS_MOUNT_PATH, "cgroup", 0, "net_cls")) {
+		if (errno != EBUSY) {
+			log_err("mount cgroup net_cls");
+			return 1;
+		}
+
+		if (rmdir(NETCLS_MOUNT_PATH)) {
+			log_err("rmdir cgroup net_cls");
+			return 1;
+		}
+		if (umount(CGROUP_MOUNT_DFLT)) {
+			log_err("umount cgroup base");
+			return 1;
+		}
 	}
 
 	cleanup_classid_environment();
@@ -541,15 +559,16 @@ int setup_classid_environment(void)
 
 /**
  * set_classid() - Set a cgroupv1 net_cls classid
- * @id: the numeric classid
  *
- * Writes the passed classid into the cgroup work dir's net_cls.classid
+ * Writes the classid into the cgroup work dir's net_cls.classid
  * file in order to later on trigger socket tagging.
  *
+ * We leverage the current pid as the classid, ensuring unique identification.
+ *
  * On success, it returns 0, otherwise on failure it returns 1. If there
  * is a failure, it prints the error to stderr.
  */
-int set_classid(unsigned int id)
+int set_classid(void)
 {
 	char cgroup_workdir[PATH_MAX - 42];
 	char cgroup_classid_path[PATH_MAX + 1];
@@ -565,7 +584,7 @@ int set_classid(unsigned int id)
 		return 1;
 	}
 
-	if (dprintf(fd, "%u\n", id) < 0) {
+	if (dprintf(fd, "%u\n", getpid()) < 0) {
 		log_err("Setting cgroup classid");
 		rc = 1;
 	}
@@ -607,3 +626,66 @@ void cleanup_classid_environment(void)
 	join_cgroup_from_top(NETCLS_MOUNT_PATH);
 	nftw(cgroup_workdir, nftwfunc, WALK_FD_LIMIT, FTW_DEPTH | FTW_MOUNT);
 }
+
+/**
+ * get_classid_cgroup_id - Get the cgroup id of a net_cls cgroup
+ */
+unsigned long long get_classid_cgroup_id(void)
+{
+	char cgroup_workdir[PATH_MAX + 1];
+
+	format_classid_path(cgroup_workdir);
+	return get_cgroup_id_from_path(cgroup_workdir);
+}
+
+/**
+ * get_cgroup1_hierarchy_id - Retrieves the ID of a cgroup1 hierarchy from the cgroup1 subsys name.
+ * @subsys_name: The cgroup1 subsys name, which can be retrieved from /proc/self/cgroup. It can be
+ * a named cgroup like "name=systemd", a controller name like "net_cls", or multi-contollers like
+ * "net_cls,net_prio".
+ */
+int get_cgroup1_hierarchy_id(const char *subsys_name)
+{
+	char *c, *c2, *c3, *c4;
+	bool found = false;
+	char line[1024];
+	FILE *file;
+	int i, id;
+
+	if (!subsys_name)
+		return -1;
+
+	file = fopen("/proc/self/cgroup", "r");
+	if (!file) {
+		log_err("fopen /proc/self/cgroup");
+		return -1;
+	}
+
+	while (fgets(line, 1024, file)) {
+		i = 0;
+		for (c = strtok_r(line, ":", &c2); c && i < 2; c = strtok_r(NULL, ":", &c2)) {
+			if (i == 0) {
+				id = strtol(c, NULL, 10);
+			} else if (i == 1) {
+				if (!strcmp(c, subsys_name)) {
+					found = true;
+					break;
+				}
+
+				/* Multiple subsystems may share one single mount point */
+				for (c3 = strtok_r(c, ",", &c4); c3;
+				     c3 = strtok_r(NULL, ",", &c4)) {
+					if (!strcmp(c, subsys_name)) {
+						found = true;
+						break;
+					}
+				}
+			}
+			i++;
+		}
+		if (found)
+			break;
+	}
+	fclose(file);
+	return found ? id : -1;
+}
diff --git a/tools/testing/selftests/bpf/cgroup_helpers.h b/tools/testing/selftests/bpf/cgroup_helpers.h
index 5c2cb9c8b546..ee053641c026 100644
--- a/tools/testing/selftests/bpf/cgroup_helpers.h
+++ b/tools/testing/selftests/bpf/cgroup_helpers.h
@@ -20,6 +20,7 @@ int get_root_cgroup(void);
 int create_and_get_cgroup(const char *relative_path);
 void remove_cgroup(const char *relative_path);
 unsigned long long get_cgroup_id(const char *relative_path);
+int get_cgroup1_hierarchy_id(const char *subsys_name);
 
 int join_cgroup(const char *relative_path);
 int join_root_cgroup(void);
@@ -29,8 +30,9 @@ int setup_cgroup_environment(void);
 void cleanup_cgroup_environment(void);
 
 /* cgroupv1 related */
-int set_classid(unsigned int id);
+int set_classid(void);
 int join_classid(void);
+unsigned long long get_classid_cgroup_id(void);
 
 int setup_classid_environment(void);
 void cleanup_classid_environment(void);
diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config
index 02dd4409200e..3ec5927ec3e5 100644
--- a/tools/testing/selftests/bpf/config
+++ b/tools/testing/selftests/bpf/config
@@ -71,6 +71,7 @@ CONFIG_NETFILTER_SYNPROXY=y
 CONFIG_NETFILTER_XT_CONNMARK=y
 CONFIG_NETFILTER_XT_MATCH_STATE=y
 CONFIG_NETFILTER_XT_TARGET_CT=y
+CONFIG_NETKIT=y
 CONFIG_NF_CONNTRACK=y
 CONFIG_NF_CONNTRACK_MARK=y
 CONFIG_NF_DEFRAG_IPV4=y
diff --git a/tools/testing/selftests/bpf/config.aarch64 b/tools/testing/selftests/bpf/config.aarch64
index 253821494884..29c8635c5722 100644
--- a/tools/testing/selftests/bpf/config.aarch64
+++ b/tools/testing/selftests/bpf/config.aarch64
@@ -1,4 +1,3 @@
-CONFIG_9P_FS=y
 CONFIG_ARCH_VEXPRESS=y
 CONFIG_ARCH_WANT_DEFAULT_BPF_JIT=y
 CONFIG_ARM_SMMU_V3=y
@@ -37,6 +36,7 @@ CONFIG_CRYPTO_USER_API_SKCIPHER=y
 CONFIG_DEBUG_ATOMIC_SLEEP=y
 CONFIG_DEBUG_INFO_BTF=y
 CONFIG_DEBUG_INFO_DWARF4=y
+CONFIG_DEBUG_INFO_REDUCED=n
 CONFIG_DEBUG_LIST=y
 CONFIG_DEBUG_LOCKDEP=y
 CONFIG_DEBUG_NOTIFIERS=y
@@ -46,7 +46,6 @@ CONFIG_DEBUG_SG=y
 CONFIG_DETECT_HUNG_TASK=y
 CONFIG_DEVTMPFS_MOUNT=y
 CONFIG_DEVTMPFS=y
-CONFIG_DRM_VIRTIO_GPU=y
 CONFIG_DRM=y
 CONFIG_DUMMY=y
 CONFIG_EXPERT=y
@@ -67,7 +66,6 @@ CONFIG_HAVE_KRETPROBES=y
 CONFIG_HEADERS_INSTALL=y
 CONFIG_HIGH_RES_TIMERS=y
 CONFIG_HUGETLBFS=y
-CONFIG_HW_RANDOM_VIRTIO=y
 CONFIG_HW_RANDOM=y
 CONFIG_HZ_100=y
 CONFIG_IDLE_PAGE_TRACKING=y
@@ -99,8 +97,6 @@ CONFIG_MEMCG=y
 CONFIG_MEMORY_HOTPLUG=y
 CONFIG_MEMORY_HOTREMOVE=y
 CONFIG_NAMESPACES=y
-CONFIG_NET_9P_VIRTIO=y
-CONFIG_NET_9P=y
 CONFIG_NET_ACT_BPF=y
 CONFIG_NET_ACT_GACT=y
 CONFIG_NETDEVICES=y
@@ -140,7 +136,6 @@ CONFIG_SCHED_TRACER=y
 CONFIG_SCSI_CONSTANTS=y
 CONFIG_SCSI_LOGGING=y
 CONFIG_SCSI_SCAN_ASYNC=y
-CONFIG_SCSI_VIRTIO=y
 CONFIG_SCSI=y
 CONFIG_SECURITY_NETWORK=y
 CONFIG_SERIAL_AMBA_PL011_CONSOLE=y
@@ -167,16 +162,6 @@ CONFIG_UPROBES=y
 CONFIG_USELIB=y
 CONFIG_USER_NS=y
 CONFIG_VETH=y
-CONFIG_VIRTIO_BALLOON=y
-CONFIG_VIRTIO_BLK=y
-CONFIG_VIRTIO_CONSOLE=y
-CONFIG_VIRTIO_FS=y
-CONFIG_VIRTIO_INPUT=y
-CONFIG_VIRTIO_MMIO_CMDLINE_DEVICES=y
-CONFIG_VIRTIO_MMIO=y
-CONFIG_VIRTIO_NET=y
-CONFIG_VIRTIO_PCI=y
-CONFIG_VIRTIO_VSOCKETS_COMMON=y
 CONFIG_VLAN_8021Q=y
 CONFIG_VSOCKETS=y
 CONFIG_VSOCKETS_LOOPBACK=y
diff --git a/tools/testing/selftests/bpf/config.s390x b/tools/testing/selftests/bpf/config.s390x
index 2ba92167be35..e93330382849 100644
--- a/tools/testing/selftests/bpf/config.s390x
+++ b/tools/testing/selftests/bpf/config.s390x
@@ -1,4 +1,3 @@
-CONFIG_9P_FS=y
 CONFIG_ARCH_WANT_DEFAULT_BPF_JIT=y
 CONFIG_AUDIT=y
 CONFIG_BLK_CGROUP=y
@@ -84,8 +83,6 @@ CONFIG_MEMORY_HOTPLUG=y
 CONFIG_MEMORY_HOTREMOVE=y
 CONFIG_NAMESPACES=y
 CONFIG_NET=y
-CONFIG_NET_9P=y
-CONFIG_NET_9P_VIRTIO=y
 CONFIG_NET_ACT_BPF=y
 CONFIG_NET_ACT_GACT=y
 CONFIG_NET_KEY=y
@@ -114,7 +111,6 @@ CONFIG_SAMPLE_SECCOMP=y
 CONFIG_SAMPLES=y
 CONFIG_SCHED_TRACER=y
 CONFIG_SCSI=y
-CONFIG_SCSI_VIRTIO=y
 CONFIG_SECURITY_NETWORK=y
 CONFIG_STACK_TRACER=y
 CONFIG_STATIC_KEYS_SELFTEST=y
@@ -136,11 +132,6 @@ CONFIG_UPROBES=y
 CONFIG_USELIB=y
 CONFIG_USER_NS=y
 CONFIG_VETH=y
-CONFIG_VIRTIO_BALLOON=y
-CONFIG_VIRTIO_BLK=y
-CONFIG_VIRTIO_NET=y
-CONFIG_VIRTIO_PCI=y
-CONFIG_VIRTIO_VSOCKETS_COMMON=y
 CONFIG_VLAN_8021Q=y
 CONFIG_VSOCKETS=y
 CONFIG_VSOCKETS_LOOPBACK=y
diff --git a/tools/testing/selftests/bpf/config.vm b/tools/testing/selftests/bpf/config.vm
new file mode 100644
index 000000000000..a9746ca78777
--- /dev/null
+++ b/tools/testing/selftests/bpf/config.vm
@@ -0,0 +1,12 @@
+CONFIG_9P_FS=y
+CONFIG_9P_FS_POSIX_ACL=y
+CONFIG_9P_FS_SECURITY=y
+CONFIG_CRYPTO_DEV_VIRTIO=y
+CONFIG_NET_9P=y
+CONFIG_NET_9P_VIRTIO=y
+CONFIG_VIRTIO_BALLOON=y
+CONFIG_VIRTIO_BLK=y
+CONFIG_VIRTIO_CONSOLE=y
+CONFIG_VIRTIO_NET=y
+CONFIG_VIRTIO_PCI=y
+CONFIG_VIRTIO_VSOCKETS_COMMON=y
diff --git a/tools/testing/selftests/bpf/config.x86_64 b/tools/testing/selftests/bpf/config.x86_64
index 2e70a6048278..f7bfb2b09c82 100644
--- a/tools/testing/selftests/bpf/config.x86_64
+++ b/tools/testing/selftests/bpf/config.x86_64
@@ -1,6 +1,3 @@
-CONFIG_9P_FS=y
-CONFIG_9P_FS_POSIX_ACL=y
-CONFIG_9P_FS_SECURITY=y
 CONFIG_AGP=y
 CONFIG_AGP_AMD64=y
 CONFIG_AGP_INTEL=y
@@ -45,7 +42,6 @@ CONFIG_CPU_IDLE_GOV_LADDER=y
 CONFIG_CPUSETS=y
 CONFIG_CRC_T10DIF=y
 CONFIG_CRYPTO_BLAKE2B=y
-CONFIG_CRYPTO_DEV_VIRTIO=y
 CONFIG_CRYPTO_SEQIV=y
 CONFIG_CRYPTO_XXHASH=y
 CONFIG_DCB=y
@@ -145,8 +141,6 @@ CONFIG_MEMORY_FAILURE=y
 CONFIG_MINIX_SUBPARTITION=y
 CONFIG_NAMESPACES=y
 CONFIG_NET=y
-CONFIG_NET_9P=y
-CONFIG_NET_9P_VIRTIO=y
 CONFIG_NET_ACT_BPF=y
 CONFIG_NET_CLS_CGROUP=y
 CONFIG_NET_EMATCH=y
@@ -228,12 +222,6 @@ CONFIG_USER_NS=y
 CONFIG_VALIDATE_FS_PARSER=y
 CONFIG_VETH=y
 CONFIG_VIRT_DRIVERS=y
-CONFIG_VIRTIO_BALLOON=y
-CONFIG_VIRTIO_BLK=y
-CONFIG_VIRTIO_CONSOLE=y
-CONFIG_VIRTIO_NET=y
-CONFIG_VIRTIO_PCI=y
-CONFIG_VIRTIO_VSOCKETS_COMMON=y
 CONFIG_VLAN_8021Q=y
 CONFIG_VSOCKETS=y
 CONFIG_VSOCKETS_LOOPBACK=y
diff --git a/tools/testing/selftests/bpf/map_tests/map_percpu_stats.c b/tools/testing/selftests/bpf/map_tests/map_percpu_stats.c
index 1a9eeefda9a8..2ea36408816b 100644
--- a/tools/testing/selftests/bpf/map_tests/map_percpu_stats.c
+++ b/tools/testing/selftests/bpf/map_tests/map_percpu_stats.c
@@ -131,10 +131,17 @@ static bool is_lru(__u32 map_type)
 	       map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH;
 }
 
+static bool is_percpu(__u32 map_type)
+{
+	return map_type == BPF_MAP_TYPE_PERCPU_HASH ||
+	       map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH;
+}
+
 struct upsert_opts {
 	__u32 map_type;
 	int map_fd;
 	__u32 n;
+	bool retry_for_nomem;
 };
 
 static int create_small_hash(void)
@@ -148,19 +155,38 @@ static int create_small_hash(void)
 	return map_fd;
 }
 
+static bool retry_for_nomem_fn(int err)
+{
+	return err == ENOMEM;
+}
+
 static void *patch_map_thread(void *arg)
 {
+	/* 8KB is enough for 1024 CPUs. And it is shared between N_THREADS. */
+	static __u8 blob[8 << 10];
 	struct upsert_opts *opts = arg;
+	void *val_ptr;
 	int val;
 	int ret;
 	int i;
 
 	for (i = 0; i < opts->n; i++) {
-		if (opts->map_type == BPF_MAP_TYPE_HASH_OF_MAPS)
+		if (opts->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) {
 			val = create_small_hash();
-		else
+			val_ptr = &val;
+		} else if (is_percpu(opts->map_type)) {
+			val_ptr = blob;
+		} else {
 			val = rand();
-		ret = bpf_map_update_elem(opts->map_fd, &i, &val, 0);
+			val_ptr = &val;
+		}
+
+		/* 2 seconds may be enough ? */
+		if (opts->retry_for_nomem)
+			ret = map_update_retriable(opts->map_fd, &i, val_ptr, 0,
+						   40, retry_for_nomem_fn);
+		else
+			ret = bpf_map_update_elem(opts->map_fd, &i, val_ptr, 0);
 		CHECK(ret < 0, "bpf_map_update_elem", "key=%d error: %s\n", i, strerror(errno));
 
 		if (opts->map_type == BPF_MAP_TYPE_HASH_OF_MAPS)
@@ -281,6 +307,13 @@ static void __test(int map_fd)
 	else
 		opts.n /= 2;
 
+	/* per-cpu bpf memory allocator may not be able to allocate per-cpu
+	 * pointer successfully and it can not refill free llist timely, and
+	 * bpf_map_update_elem() will return -ENOMEM. so just retry to mitigate
+	 * the problem temporarily.
+	 */
+	opts.retry_for_nomem = is_percpu(opts.map_type) && (info.map_flags & BPF_F_NO_PREALLOC);
+
 	/*
 	 * Upsert keys [0, n) under some competition: with random values from
 	 * N_THREADS threads. Check values, then delete all elements and check
@@ -326,20 +359,14 @@ static int map_create(__u32 type, const char *name, struct bpf_map_create_opts *
 
 static int create_hash(void)
 {
-	struct bpf_map_create_opts map_opts = {
-		.sz = sizeof(map_opts),
-		.map_flags = BPF_F_NO_PREALLOC,
-	};
+	LIBBPF_OPTS(bpf_map_create_opts, map_opts, .map_flags = BPF_F_NO_PREALLOC);
 
 	return map_create(BPF_MAP_TYPE_HASH, "hash", &map_opts);
 }
 
 static int create_percpu_hash(void)
 {
-	struct bpf_map_create_opts map_opts = {
-		.sz = sizeof(map_opts),
-		.map_flags = BPF_F_NO_PREALLOC,
-	};
+	LIBBPF_OPTS(bpf_map_create_opts, map_opts, .map_flags = BPF_F_NO_PREALLOC);
 
 	return map_create(BPF_MAP_TYPE_PERCPU_HASH, "percpu_hash", &map_opts);
 }
@@ -356,21 +383,17 @@ static int create_percpu_hash_prealloc(void)
 
 static int create_lru_hash(__u32 type, __u32 map_flags)
 {
-	struct bpf_map_create_opts map_opts = {
-		.sz = sizeof(map_opts),
-		.map_flags = map_flags,
-	};
+	LIBBPF_OPTS(bpf_map_create_opts, map_opts, .map_flags = map_flags);
 
 	return map_create(type, "lru_hash", &map_opts);
 }
 
 static int create_hash_of_maps(void)
 {
-	struct bpf_map_create_opts map_opts = {
-		.sz = sizeof(map_opts),
+	LIBBPF_OPTS(bpf_map_create_opts, map_opts,
 		.map_flags = BPF_F_NO_PREALLOC,
 		.inner_map_fd = create_small_hash(),
-	};
+	);
 	int ret;
 
 	ret = map_create_opts(BPF_MAP_TYPE_HASH_OF_MAPS, "hash_of_maps",
diff --git a/tools/testing/selftests/bpf/netlink_helpers.c b/tools/testing/selftests/bpf/netlink_helpers.c
new file mode 100644
index 000000000000..caf36eb1d032
--- /dev/null
+++ b/tools/testing/selftests/bpf/netlink_helpers.c
@@ -0,0 +1,358 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* Taken & modified from iproute2's libnetlink.c
+ * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <errno.h>
+#include <time.h>
+#include <sys/socket.h>
+
+#include "netlink_helpers.h"
+
+static int rcvbuf = 1024 * 1024;
+
+void rtnl_close(struct rtnl_handle *rth)
+{
+	if (rth->fd >= 0) {
+		close(rth->fd);
+		rth->fd = -1;
+	}
+}
+
+int rtnl_open_byproto(struct rtnl_handle *rth, unsigned int subscriptions,
+		      int protocol)
+{
+	socklen_t addr_len;
+	int sndbuf = 32768;
+	int one = 1;
+
+	memset(rth, 0, sizeof(*rth));
+	rth->proto = protocol;
+	rth->fd = socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC, protocol);
+	if (rth->fd < 0) {
+		perror("Cannot open netlink socket");
+		return -1;
+	}
+	if (setsockopt(rth->fd, SOL_SOCKET, SO_SNDBUF,
+		       &sndbuf, sizeof(sndbuf)) < 0) {
+		perror("SO_SNDBUF");
+		goto err;
+	}
+	if (setsockopt(rth->fd, SOL_SOCKET, SO_RCVBUF,
+		       &rcvbuf, sizeof(rcvbuf)) < 0) {
+		perror("SO_RCVBUF");
+		goto err;
+	}
+
+	/* Older kernels may no support extended ACK reporting */
+	setsockopt(rth->fd, SOL_NETLINK, NETLINK_EXT_ACK,
+		   &one, sizeof(one));
+
+	memset(&rth->local, 0, sizeof(rth->local));
+	rth->local.nl_family = AF_NETLINK;
+	rth->local.nl_groups = subscriptions;
+
+	if (bind(rth->fd, (struct sockaddr *)&rth->local,
+		 sizeof(rth->local)) < 0) {
+		perror("Cannot bind netlink socket");
+		goto err;
+	}
+	addr_len = sizeof(rth->local);
+	if (getsockname(rth->fd, (struct sockaddr *)&rth->local,
+			&addr_len) < 0) {
+		perror("Cannot getsockname");
+		goto err;
+	}
+	if (addr_len != sizeof(rth->local)) {
+		fprintf(stderr, "Wrong address length %d\n", addr_len);
+		goto err;
+	}
+	if (rth->local.nl_family != AF_NETLINK) {
+		fprintf(stderr, "Wrong address family %d\n",
+			rth->local.nl_family);
+		goto err;
+	}
+	rth->seq = time(NULL);
+	return 0;
+err:
+	rtnl_close(rth);
+	return -1;
+}
+
+int rtnl_open(struct rtnl_handle *rth, unsigned int subscriptions)
+{
+	return rtnl_open_byproto(rth, subscriptions, NETLINK_ROUTE);
+}
+
+static int __rtnl_recvmsg(int fd, struct msghdr *msg, int flags)
+{
+	int len;
+
+	do {
+		len = recvmsg(fd, msg, flags);
+	} while (len < 0 && (errno == EINTR || errno == EAGAIN));
+	if (len < 0) {
+		fprintf(stderr, "netlink receive error %s (%d)\n",
+			strerror(errno), errno);
+		return -errno;
+	}
+	if (len == 0) {
+		fprintf(stderr, "EOF on netlink\n");
+		return -ENODATA;
+	}
+	return len;
+}
+
+static int rtnl_recvmsg(int fd, struct msghdr *msg, char **answer)
+{
+	struct iovec *iov = msg->msg_iov;
+	char *buf;
+	int len;
+
+	iov->iov_base = NULL;
+	iov->iov_len = 0;
+
+	len = __rtnl_recvmsg(fd, msg, MSG_PEEK | MSG_TRUNC);
+	if (len < 0)
+		return len;
+	if (len < 32768)
+		len = 32768;
+	buf = malloc(len);
+	if (!buf) {
+		fprintf(stderr, "malloc error: not enough buffer\n");
+		return -ENOMEM;
+	}
+	iov->iov_base = buf;
+	iov->iov_len = len;
+	len = __rtnl_recvmsg(fd, msg, 0);
+	if (len < 0) {
+		free(buf);
+		return len;
+	}
+	if (answer)
+		*answer = buf;
+	else
+		free(buf);
+	return len;
+}
+
+static void rtnl_talk_error(struct nlmsghdr *h, struct nlmsgerr *err,
+			    nl_ext_ack_fn_t errfn)
+{
+	fprintf(stderr, "RTNETLINK answers: %s\n",
+		strerror(-err->error));
+}
+
+static int __rtnl_talk_iov(struct rtnl_handle *rtnl, struct iovec *iov,
+			   size_t iovlen, struct nlmsghdr **answer,
+			   bool show_rtnl_err, nl_ext_ack_fn_t errfn)
+{
+	struct sockaddr_nl nladdr = { .nl_family = AF_NETLINK };
+	struct iovec riov;
+	struct msghdr msg = {
+		.msg_name	= &nladdr,
+		.msg_namelen	= sizeof(nladdr),
+		.msg_iov	= iov,
+		.msg_iovlen	= iovlen,
+	};
+	unsigned int seq = 0;
+	struct nlmsghdr *h;
+	int i, status;
+	char *buf;
+
+	for (i = 0; i < iovlen; i++) {
+		h = iov[i].iov_base;
+		h->nlmsg_seq = seq = ++rtnl->seq;
+		if (answer == NULL)
+			h->nlmsg_flags |= NLM_F_ACK;
+	}
+	status = sendmsg(rtnl->fd, &msg, 0);
+	if (status < 0) {
+		perror("Cannot talk to rtnetlink");
+		return -1;
+	}
+	/* change msg to use the response iov */
+	msg.msg_iov = &riov;
+	msg.msg_iovlen = 1;
+	i = 0;
+	while (1) {
+next:
+		status = rtnl_recvmsg(rtnl->fd, &msg, &buf);
+		++i;
+		if (status < 0)
+			return status;
+		if (msg.msg_namelen != sizeof(nladdr)) {
+			fprintf(stderr,
+				"Sender address length == %d!\n",
+				msg.msg_namelen);
+			exit(1);
+		}
+		for (h = (struct nlmsghdr *)buf; status >= sizeof(*h); ) {
+			int len = h->nlmsg_len;
+			int l = len - sizeof(*h);
+
+			if (l < 0 || len > status) {
+				if (msg.msg_flags & MSG_TRUNC) {
+					fprintf(stderr, "Truncated message!\n");
+					free(buf);
+					return -1;
+				}
+				fprintf(stderr,
+					"Malformed message: len=%d!\n",
+					len);
+				exit(1);
+			}
+			if (nladdr.nl_pid != 0 ||
+			    h->nlmsg_pid != rtnl->local.nl_pid ||
+			    h->nlmsg_seq > seq || h->nlmsg_seq < seq - iovlen) {
+				/* Don't forget to skip that message. */
+				status -= NLMSG_ALIGN(len);
+				h = (struct nlmsghdr *)((char *)h + NLMSG_ALIGN(len));
+				continue;
+			}
+			if (h->nlmsg_type == NLMSG_ERROR) {
+				struct nlmsgerr *err = (struct nlmsgerr *)NLMSG_DATA(h);
+				int error = err->error;
+
+				if (l < sizeof(struct nlmsgerr)) {
+					fprintf(stderr, "ERROR truncated\n");
+					free(buf);
+					return -1;
+				}
+				if (error) {
+					errno = -error;
+					if (rtnl->proto != NETLINK_SOCK_DIAG &&
+					    show_rtnl_err)
+						rtnl_talk_error(h, err, errfn);
+				}
+				if (i < iovlen) {
+					free(buf);
+					goto next;
+				}
+				if (error) {
+					free(buf);
+					return -i;
+				}
+				if (answer)
+					*answer = (struct nlmsghdr *)buf;
+				else
+					free(buf);
+				return 0;
+			}
+			if (answer) {
+				*answer = (struct nlmsghdr *)buf;
+				return 0;
+			}
+			fprintf(stderr, "Unexpected reply!\n");
+			status -= NLMSG_ALIGN(len);
+			h = (struct nlmsghdr *)((char *)h + NLMSG_ALIGN(len));
+		}
+		free(buf);
+		if (msg.msg_flags & MSG_TRUNC) {
+			fprintf(stderr, "Message truncated!\n");
+			continue;
+		}
+		if (status) {
+			fprintf(stderr, "Remnant of size %d!\n", status);
+			exit(1);
+		}
+	}
+}
+
+static int __rtnl_talk(struct rtnl_handle *rtnl, struct nlmsghdr *n,
+		       struct nlmsghdr **answer, bool show_rtnl_err,
+		       nl_ext_ack_fn_t errfn)
+{
+	struct iovec iov = {
+		.iov_base	= n,
+		.iov_len	= n->nlmsg_len,
+	};
+
+	return __rtnl_talk_iov(rtnl, &iov, 1, answer, show_rtnl_err, errfn);
+}
+
+int rtnl_talk(struct rtnl_handle *rtnl, struct nlmsghdr *n,
+	      struct nlmsghdr **answer)
+{
+	return __rtnl_talk(rtnl, n, answer, true, NULL);
+}
+
+int addattr(struct nlmsghdr *n, int maxlen, int type)
+{
+	return addattr_l(n, maxlen, type, NULL, 0);
+}
+
+int addattr8(struct nlmsghdr *n, int maxlen, int type, __u8 data)
+{
+	return addattr_l(n, maxlen, type, &data, sizeof(__u8));
+}
+
+int addattr16(struct nlmsghdr *n, int maxlen, int type, __u16 data)
+{
+	return addattr_l(n, maxlen, type, &data, sizeof(__u16));
+}
+
+int addattr32(struct nlmsghdr *n, int maxlen, int type, __u32 data)
+{
+	return addattr_l(n, maxlen, type, &data, sizeof(__u32));
+}
+
+int addattr64(struct nlmsghdr *n, int maxlen, int type, __u64 data)
+{
+	return addattr_l(n, maxlen, type, &data, sizeof(__u64));
+}
+
+int addattrstrz(struct nlmsghdr *n, int maxlen, int type, const char *str)
+{
+	return addattr_l(n, maxlen, type, str, strlen(str)+1);
+}
+
+int addattr_l(struct nlmsghdr *n, int maxlen, int type, const void *data,
+	      int alen)
+{
+	int len = RTA_LENGTH(alen);
+	struct rtattr *rta;
+
+	if (NLMSG_ALIGN(n->nlmsg_len) + RTA_ALIGN(len) > maxlen) {
+		fprintf(stderr, "%s: Message exceeded bound of %d\n",
+			__func__, maxlen);
+		return -1;
+	}
+	rta = NLMSG_TAIL(n);
+	rta->rta_type = type;
+	rta->rta_len = len;
+	if (alen)
+		memcpy(RTA_DATA(rta), data, alen);
+	n->nlmsg_len = NLMSG_ALIGN(n->nlmsg_len) + RTA_ALIGN(len);
+	return 0;
+}
+
+int addraw_l(struct nlmsghdr *n, int maxlen, const void *data, int len)
+{
+	if (NLMSG_ALIGN(n->nlmsg_len) + NLMSG_ALIGN(len) > maxlen) {
+		fprintf(stderr, "%s: Message exceeded bound of %d\n",
+			__func__, maxlen);
+		return -1;
+	}
+
+	memcpy(NLMSG_TAIL(n), data, len);
+	memset((void *) NLMSG_TAIL(n) + len, 0, NLMSG_ALIGN(len) - len);
+	n->nlmsg_len = NLMSG_ALIGN(n->nlmsg_len) + NLMSG_ALIGN(len);
+	return 0;
+}
+
+struct rtattr *addattr_nest(struct nlmsghdr *n, int maxlen, int type)
+{
+	struct rtattr *nest = NLMSG_TAIL(n);
+
+	addattr_l(n, maxlen, type, NULL, 0);
+	return nest;
+}
+
+int addattr_nest_end(struct nlmsghdr *n, struct rtattr *nest)
+{
+	nest->rta_len = (void *)NLMSG_TAIL(n) - (void *)nest;
+	return n->nlmsg_len;
+}
diff --git a/tools/testing/selftests/bpf/netlink_helpers.h b/tools/testing/selftests/bpf/netlink_helpers.h
new file mode 100644
index 000000000000..68116818a47e
--- /dev/null
+++ b/tools/testing/selftests/bpf/netlink_helpers.h
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef NETLINK_HELPERS_H
+#define NETLINK_HELPERS_H
+
+#include <string.h>
+#include <linux/netlink.h>
+#include <linux/rtnetlink.h>
+
+struct rtnl_handle {
+	int			fd;
+	struct sockaddr_nl	local;
+	struct sockaddr_nl	peer;
+	__u32			seq;
+	__u32			dump;
+	int			proto;
+	FILE			*dump_fp;
+#define RTNL_HANDLE_F_LISTEN_ALL_NSID		0x01
+#define RTNL_HANDLE_F_SUPPRESS_NLERR		0x02
+#define RTNL_HANDLE_F_STRICT_CHK		0x04
+	int			flags;
+};
+
+#define NLMSG_TAIL(nmsg) \
+	((struct rtattr *) (((void *) (nmsg)) + NLMSG_ALIGN((nmsg)->nlmsg_len)))
+
+typedef int (*nl_ext_ack_fn_t)(const char *errmsg, uint32_t off,
+			       const struct nlmsghdr *inner_nlh);
+
+int rtnl_open(struct rtnl_handle *rth, unsigned int subscriptions)
+	      __attribute__((warn_unused_result));
+void rtnl_close(struct rtnl_handle *rth);
+int rtnl_talk(struct rtnl_handle *rtnl, struct nlmsghdr *n,
+	      struct nlmsghdr **answer)
+	      __attribute__((warn_unused_result));
+
+int addattr(struct nlmsghdr *n, int maxlen, int type);
+int addattr8(struct nlmsghdr *n, int maxlen, int type, __u8 data);
+int addattr16(struct nlmsghdr *n, int maxlen, int type, __u16 data);
+int addattr32(struct nlmsghdr *n, int maxlen, int type, __u32 data);
+int addattr64(struct nlmsghdr *n, int maxlen, int type, __u64 data);
+int addattrstrz(struct nlmsghdr *n, int maxlen, int type, const char *data);
+int addattr_l(struct nlmsghdr *n, int maxlen, int type, const void *data, int alen);
+int addraw_l(struct nlmsghdr *n, int maxlen, const void *data, int len);
+struct rtattr *addattr_nest(struct nlmsghdr *n, int maxlen, int type);
+int addattr_nest_end(struct nlmsghdr *n, struct rtattr *nest);
+#endif /* NETLINK_HELPERS_H */
diff --git a/tools/testing/selftests/bpf/prog_tests/align.c b/tools/testing/selftests/bpf/prog_tests/align.c
index 465c1c3a3d3c..4ebd0da898f5 100644
--- a/tools/testing/selftests/bpf/prog_tests/align.c
+++ b/tools/testing/selftests/bpf/prog_tests/align.c
@@ -40,7 +40,7 @@ static struct bpf_align_test tests[] = {
 		},
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 		.matches = {
-			{0, "R1", "ctx(off=0,imm=0)"},
+			{0, "R1", "ctx()"},
 			{0, "R10", "fp0"},
 			{0, "R3_w", "2"},
 			{1, "R3_w", "4"},
@@ -68,7 +68,7 @@ static struct bpf_align_test tests[] = {
 		},
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 		.matches = {
-			{0, "R1", "ctx(off=0,imm=0)"},
+			{0, "R1", "ctx()"},
 			{0, "R10", "fp0"},
 			{0, "R3_w", "1"},
 			{1, "R3_w", "2"},
@@ -97,7 +97,7 @@ static struct bpf_align_test tests[] = {
 		},
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 		.matches = {
-			{0, "R1", "ctx(off=0,imm=0)"},
+			{0, "R1", "ctx()"},
 			{0, "R10", "fp0"},
 			{0, "R3_w", "4"},
 			{1, "R3_w", "8"},
@@ -119,7 +119,7 @@ static struct bpf_align_test tests[] = {
 		},
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 		.matches = {
-			{0, "R1", "ctx(off=0,imm=0)"},
+			{0, "R1", "ctx()"},
 			{0, "R10", "fp0"},
 			{0, "R3_w", "7"},
 			{1, "R3_w", "7"},
@@ -162,13 +162,13 @@ static struct bpf_align_test tests[] = {
 		},
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 		.matches = {
-			{6, "R0_w", "pkt(off=8,r=8,imm=0)"},
+			{6, "R0_w", "pkt(off=8,r=8)"},
 			{6, "R3_w", "var_off=(0x0; 0xff)"},
 			{7, "R3_w", "var_off=(0x0; 0x1fe)"},
 			{8, "R3_w", "var_off=(0x0; 0x3fc)"},
 			{9, "R3_w", "var_off=(0x0; 0x7f8)"},
 			{10, "R3_w", "var_off=(0x0; 0xff0)"},
-			{12, "R3_w", "pkt_end(off=0,imm=0)"},
+			{12, "R3_w", "pkt_end()"},
 			{17, "R4_w", "var_off=(0x0; 0xff)"},
 			{18, "R4_w", "var_off=(0x0; 0x1fe0)"},
 			{19, "R4_w", "var_off=(0x0; 0xff0)"},
@@ -235,11 +235,11 @@ static struct bpf_align_test tests[] = {
 		},
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 		.matches = {
-			{2, "R5_w", "pkt(off=0,r=0,imm=0)"},
-			{4, "R5_w", "pkt(off=14,r=0,imm=0)"},
-			{5, "R4_w", "pkt(off=14,r=0,imm=0)"},
-			{9, "R2", "pkt(off=0,r=18,imm=0)"},
-			{10, "R5", "pkt(off=14,r=18,imm=0)"},
+			{2, "R5_w", "pkt(r=0)"},
+			{4, "R5_w", "pkt(off=14,r=0)"},
+			{5, "R4_w", "pkt(off=14,r=0)"},
+			{9, "R2", "pkt(r=18)"},
+			{10, "R5", "pkt(off=14,r=18)"},
 			{10, "R4_w", "var_off=(0x0; 0xff)"},
 			{13, "R4_w", "var_off=(0x0; 0xffff)"},
 			{14, "R4_w", "var_off=(0x0; 0xffff)"},
@@ -299,7 +299,7 @@ static struct bpf_align_test tests[] = {
 			/* Calculated offset in R6 has unknown value, but known
 			 * alignment of 4.
 			 */
-			{6, "R2_w", "pkt(off=0,r=8,imm=0)"},
+			{6, "R2_w", "pkt(r=8)"},
 			{7, "R6_w", "var_off=(0x0; 0x3fc)"},
 			/* Offset is added to packet pointer R5, resulting in
 			 * known fixed offset, and variable offset from R6.
@@ -337,7 +337,7 @@ static struct bpf_align_test tests[] = {
 			/* Constant offset is added to R5 packet pointer,
 			 * resulting in reg->off value of 14.
 			 */
-			{26, "R5_w", "pkt(off=14,r=8,"},
+			{26, "R5_w", "pkt(off=14,r=8)"},
 			/* Variable offset is added to R5, resulting in a
 			 * variable offset of (4n). See comment for insn #18
 			 * for R4 = R5 trick.
@@ -397,7 +397,7 @@ static struct bpf_align_test tests[] = {
 			/* Calculated offset in R6 has unknown value, but known
 			 * alignment of 4.
 			 */
-			{6, "R2_w", "pkt(off=0,r=8,imm=0)"},
+			{6, "R2_w", "pkt(r=8)"},
 			{7, "R6_w", "var_off=(0x0; 0x3fc)"},
 			/* Adding 14 makes R6 be (4n+2) */
 			{8, "R6_w", "var_off=(0x2; 0x7fc)"},
@@ -459,7 +459,7 @@ static struct bpf_align_test tests[] = {
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 		.result = REJECT,
 		.matches = {
-			{3, "R5_w", "pkt_end(off=0,imm=0)"},
+			{3, "R5_w", "pkt_end()"},
 			/* (ptr - ptr) << 2 == unknown, (4n) */
 			{5, "R5_w", "var_off=(0x0; 0xfffffffffffffffc)"},
 			/* (4n) + 14 == (4n+2).  We blow our bounds, because
@@ -513,7 +513,7 @@ static struct bpf_align_test tests[] = {
 			/* Calculated offset in R6 has unknown value, but known
 			 * alignment of 4.
 			 */
-			{6, "R2_w", "pkt(off=0,r=8,imm=0)"},
+			{6, "R2_w", "pkt(r=8)"},
 			{8, "R6_w", "var_off=(0x0; 0x3fc)"},
 			/* Adding 14 makes R6 be (4n+2) */
 			{9, "R6_w", "var_off=(0x2; 0x7fc)"},
@@ -566,7 +566,7 @@ static struct bpf_align_test tests[] = {
 			/* Calculated offset in R6 has unknown value, but known
 			 * alignment of 4.
 			 */
-			{6, "R2_w", "pkt(off=0,r=8,imm=0)"},
+			{6, "R2_w", "pkt(r=8)"},
 			{9, "R6_w", "var_off=(0x0; 0x3c)"},
 			/* Adding 14 makes R6 be (4n+2) */
 			{10, "R6_w", "var_off=(0x2; 0x7c)"},
@@ -659,14 +659,14 @@ static int do_test_single(struct bpf_align_test *test)
 			/* Check the next line as well in case the previous line
 			 * did not have a corresponding bpf insn. Example:
 			 * func#0 @0
-			 * 0: R1=ctx(off=0,imm=0) R10=fp0
+			 * 0: R1=ctx() R10=fp0
 			 * 0: (b7) r3 = 2                 ; R3_w=2
 			 *
 			 * Sometimes it's actually two lines below, e.g. when
 			 * searching for "6: R3_w=scalar(umax=255,var_off=(0x0; 0xff))":
-			 *   from 4 to 6: R0_w=pkt(off=8,r=8,imm=0) R1=ctx(off=0,imm=0) R2_w=pkt(off=0,r=8,imm=0) R3_w=pkt_end(off=0,imm=0) R10=fp0
-			 *   6: R0_w=pkt(off=8,r=8,imm=0) R1=ctx(off=0,imm=0) R2_w=pkt(off=0,r=8,imm=0) R3_w=pkt_end(off=0,imm=0) R10=fp0
-			 *   6: (71) r3 = *(u8 *)(r2 +0)           ; R2_w=pkt(off=0,r=8,imm=0) R3_w=scalar(umax=255,var_off=(0x0; 0xff))
+			 *   from 4 to 6: R0_w=pkt(off=8,r=8) R1=ctx() R2_w=pkt(r=8) R3_w=pkt_end() R10=fp0
+			 *   6: R0_w=pkt(off=8,r=8) R1=ctx() R2_w=pkt(r=8) R3_w=pkt_end() R10=fp0
+			 *   6: (71) r3 = *(u8 *)(r2 +0)           ; R2_w=pkt(r=8) R3_w=scalar(umax=255,var_off=(0x0; 0xff))
 			 */
 			while (!(p = strstr(line_ptr, m.reg)) || !strstr(p, m.match)) {
 				cur_line = -1;
diff --git a/tools/testing/selftests/bpf/prog_tests/bind_perm.c b/tools/testing/selftests/bpf/prog_tests/bind_perm.c
index a1766a298bb7..f7cd129cb82b 100644
--- a/tools/testing/selftests/bpf/prog_tests/bind_perm.c
+++ b/tools/testing/selftests/bpf/prog_tests/bind_perm.c
@@ -9,8 +9,6 @@
 #include "cap_helpers.h"
 #include "bind_perm.skel.h"
 
-static int duration;
-
 static int create_netns(void)
 {
 	if (!ASSERT_OK(unshare(CLONE_NEWNET), "create netns"))
@@ -27,7 +25,7 @@ void try_bind(int family, int port, int expected_errno)
 	int fd = -1;
 
 	fd = socket(family, SOCK_STREAM, 0);
-	if (CHECK(fd < 0, "fd", "errno %d", errno))
+	if (!ASSERT_GE(fd, 0, "socket"))
 		goto close_socket;
 
 	if (family == AF_INET) {
@@ -60,7 +58,7 @@ void test_bind_perm(void)
 		return;
 
 	cgroup_fd = test__join_cgroup("/bind_perm");
-	if (CHECK(cgroup_fd < 0, "cg-join", "errno %d", errno))
+	if (!ASSERT_GE(cgroup_fd, 0, "test__join_cgroup"))
 		return;
 
 	skel = bind_perm__open_and_load();
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
index 41aba139b20b..618af9dfae9b 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
@@ -7,7 +7,7 @@
 #include "bpf_iter_ipv6_route.skel.h"
 #include "bpf_iter_netlink.skel.h"
 #include "bpf_iter_bpf_map.skel.h"
-#include "bpf_iter_task.skel.h"
+#include "bpf_iter_tasks.skel.h"
 #include "bpf_iter_task_stack.skel.h"
 #include "bpf_iter_task_file.skel.h"
 #include "bpf_iter_task_vmas.skel.h"
@@ -34,8 +34,6 @@
 #include "bpf_iter_ksym.skel.h"
 #include "bpf_iter_sockmap.skel.h"
 
-static int duration;
-
 static void test_btf_id_or_null(void)
 {
 	struct bpf_iter_test_kern3 *skel;
@@ -64,7 +62,7 @@ static void do_dummy_read_opts(struct bpf_program *prog, struct bpf_iter_attach_
 	/* not check contents, but ensure read() ends without error */
 	while ((len = read(iter_fd, buf, sizeof(buf))) > 0)
 		;
-	CHECK(len < 0, "read", "read failed: %s\n", strerror(errno));
+	ASSERT_GE(len, 0, "read");
 
 	close(iter_fd);
 
@@ -215,12 +213,12 @@ static void *do_nothing_wait(void *arg)
 static void test_task_common_nocheck(struct bpf_iter_attach_opts *opts,
 				     int *num_unknown, int *num_known)
 {
-	struct bpf_iter_task *skel;
+	struct bpf_iter_tasks *skel;
 	pthread_t thread_id;
 	void *ret;
 
-	skel = bpf_iter_task__open_and_load();
-	if (!ASSERT_OK_PTR(skel, "bpf_iter_task__open_and_load"))
+	skel = bpf_iter_tasks__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "bpf_iter_tasks__open_and_load"))
 		return;
 
 	ASSERT_OK(pthread_mutex_lock(&do_nothing_mutex), "pthread_mutex_lock");
@@ -239,7 +237,7 @@ static void test_task_common_nocheck(struct bpf_iter_attach_opts *opts,
 	ASSERT_FALSE(pthread_join(thread_id, &ret) || ret != NULL,
 		     "pthread_join");
 
-	bpf_iter_task__destroy(skel);
+	bpf_iter_tasks__destroy(skel);
 }
 
 static void test_task_common(struct bpf_iter_attach_opts *opts, int num_unknown, int num_known)
@@ -307,10 +305,10 @@ static void test_task_pidfd(void)
 
 static void test_task_sleepable(void)
 {
-	struct bpf_iter_task *skel;
+	struct bpf_iter_tasks *skel;
 
-	skel = bpf_iter_task__open_and_load();
-	if (!ASSERT_OK_PTR(skel, "bpf_iter_task__open_and_load"))
+	skel = bpf_iter_tasks__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "bpf_iter_tasks__open_and_load"))
 		return;
 
 	do_dummy_read(skel->progs.dump_task_sleepable);
@@ -320,7 +318,7 @@ static void test_task_sleepable(void)
 	ASSERT_GT(skel->bss->num_success_copy_from_user_task, 0,
 		  "num_success_copy_from_user_task");
 
-	bpf_iter_task__destroy(skel);
+	bpf_iter_tasks__destroy(skel);
 }
 
 static void test_task_stack(void)
@@ -334,6 +332,8 @@ static void test_task_stack(void)
 	do_dummy_read(skel->progs.dump_task_stack);
 	do_dummy_read(skel->progs.get_task_user_stacks);
 
+	ASSERT_EQ(skel->bss->num_user_stacks, 1, "num_user_stacks");
+
 	bpf_iter_task_stack__destroy(skel);
 }
 
@@ -413,7 +413,7 @@ static int do_btf_read(struct bpf_iter_task_btf *skel)
 		goto free_link;
 	}
 
-	if (CHECK(err < 0, "read", "read failed: %s\n", strerror(errno)))
+	if (!ASSERT_GE(err, 0, "read"))
 		goto free_link;
 
 	ASSERT_HAS_SUBSTR(taskbuf, "(struct task_struct)",
@@ -526,11 +526,11 @@ static int do_read_with_fd(int iter_fd, const char *expected,
 	start = 0;
 	while ((len = read(iter_fd, buf + start, read_buf_len)) > 0) {
 		start += len;
-		if (CHECK(start >= 16, "read", "read len %d\n", len))
+		if (!ASSERT_LT(start, 16, "read"))
 			return -1;
 		read_buf_len = read_one_char ? 1 : 16 - start;
 	}
-	if (CHECK(len < 0, "read", "read failed: %s\n", strerror(errno)))
+	if (!ASSERT_GE(len, 0, "read"))
 		return -1;
 
 	if (!ASSERT_STREQ(buf, expected, "read"))
@@ -571,8 +571,7 @@ static int do_read(const char *path, const char *expected)
 	int err, iter_fd;
 
 	iter_fd = open(path, O_RDONLY);
-	if (CHECK(iter_fd < 0, "open", "open %s failed: %s\n",
-		  path, strerror(errno)))
+	if (!ASSERT_GE(iter_fd, 0, "open"))
 		return -1;
 
 	err = do_read_with_fd(iter_fd, expected, false);
@@ -600,7 +599,7 @@ static void test_file_iter(void)
 	unlink(path);
 
 	err = bpf_link__pin(link, path);
-	if (CHECK(err, "pin_iter", "pin_iter to %s failed: %d\n", path, err))
+	if (!ASSERT_OK(err, "pin_iter"))
 		goto free_link;
 
 	err = do_read(path, "abcd");
@@ -651,12 +650,10 @@ static void test_overflow(bool test_e2big_overflow, bool ret1)
 	 * overflow and needs restart.
 	 */
 	map1_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, 4, 8, 1, NULL);
-	if (CHECK(map1_fd < 0, "bpf_map_create",
-		  "map_creation failed: %s\n", strerror(errno)))
+	if (!ASSERT_GE(map1_fd, 0, "bpf_map_create"))
 		goto out;
 	map2_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, 4, 8, 1, NULL);
-	if (CHECK(map2_fd < 0, "bpf_map_create",
-		  "map_creation failed: %s\n", strerror(errno)))
+	if (!ASSERT_GE(map2_fd, 0, "bpf_map_create"))
 		goto free_map1;
 
 	/* bpf_seq_printf kernel buffer is 8 pages, so one map
@@ -685,14 +682,12 @@ static void test_overflow(bool test_e2big_overflow, bool ret1)
 	/* setup filtering map_id in bpf program */
 	map_info_len = sizeof(map_info);
 	err = bpf_map_get_info_by_fd(map1_fd, &map_info, &map_info_len);
-	if (CHECK(err, "get_map_info", "get map info failed: %s\n",
-		  strerror(errno)))
+	if (!ASSERT_OK(err, "get_map_info"))
 		goto free_map2;
 	skel->bss->map1_id = map_info.id;
 
 	err = bpf_map_get_info_by_fd(map2_fd, &map_info, &map_info_len);
-	if (CHECK(err, "get_map_info", "get map info failed: %s\n",
-		  strerror(errno)))
+	if (!ASSERT_OK(err, "get_map_info"))
 		goto free_map2;
 	skel->bss->map2_id = map_info.id;
 
@@ -705,7 +700,7 @@ static void test_overflow(bool test_e2big_overflow, bool ret1)
 		goto free_link;
 
 	buf = malloc(expected_read_len);
-	if (!buf)
+	if (!ASSERT_OK_PTR(buf, "malloc"))
 		goto close_iter;
 
 	/* do read */
@@ -714,16 +709,14 @@ static void test_overflow(bool test_e2big_overflow, bool ret1)
 		while ((len = read(iter_fd, buf, expected_read_len)) > 0)
 			total_read_len += len;
 
-		CHECK(len != -1 || errno != E2BIG, "read",
-		      "expected ret -1, errno E2BIG, but get ret %d, error %s\n",
-			  len, strerror(errno));
+		ASSERT_EQ(len, -1, "read");
+		ASSERT_EQ(errno, E2BIG, "read");
 		goto free_buf;
 	} else if (!ret1) {
 		while ((len = read(iter_fd, buf, expected_read_len)) > 0)
 			total_read_len += len;
 
-		if (CHECK(len < 0, "read", "read failed: %s\n",
-			  strerror(errno)))
+		if (!ASSERT_GE(len, 0, "read"))
 			goto free_buf;
 	} else {
 		do {
@@ -732,8 +725,7 @@ static void test_overflow(bool test_e2big_overflow, bool ret1)
 				total_read_len += len;
 		} while (len > 0 || len == -EAGAIN);
 
-		if (CHECK(len < 0, "read", "read failed: %s\n",
-			  strerror(errno)))
+		if (!ASSERT_GE(len, 0, "read"))
 			goto free_buf;
 	}
 
@@ -836,7 +828,7 @@ static void test_bpf_hash_map(void)
 	/* do some tests */
 	while ((len = read(iter_fd, buf, sizeof(buf))) > 0)
 		;
-	if (CHECK(len < 0, "read", "read failed: %s\n", strerror(errno)))
+	if (!ASSERT_GE(len, 0, "read"))
 		goto close_iter;
 
 	/* test results */
@@ -878,6 +870,8 @@ static void test_bpf_percpu_hash_map(void)
 
 	skel->rodata->num_cpus = bpf_num_possible_cpus();
 	val = malloc(8 * bpf_num_possible_cpus());
+	if (!ASSERT_OK_PTR(val, "malloc"))
+		goto out;
 
 	err = bpf_iter_bpf_percpu_hash_map__load(skel);
 	if (!ASSERT_OK_PTR(skel, "bpf_iter_bpf_percpu_hash_map__load"))
@@ -917,7 +911,7 @@ static void test_bpf_percpu_hash_map(void)
 	/* do some tests */
 	while ((len = read(iter_fd, buf, sizeof(buf))) > 0)
 		;
-	if (CHECK(len < 0, "read", "read failed: %s\n", strerror(errno)))
+	if (!ASSERT_GE(len, 0, "read"))
 		goto close_iter;
 
 	/* test results */
@@ -983,17 +977,14 @@ static void test_bpf_array_map(void)
 	start = 0;
 	while ((len = read(iter_fd, buf + start, sizeof(buf) - start)) > 0)
 		start += len;
-	if (CHECK(len < 0, "read", "read failed: %s\n", strerror(errno)))
+	if (!ASSERT_GE(len, 0, "read"))
 		goto close_iter;
 
 	/* test results */
 	res_first_key = *(__u32 *)buf;
 	res_first_val = *(__u64 *)(buf + sizeof(__u32));
-	if (CHECK(res_first_key != 0 || res_first_val != first_val,
-		  "bpf_seq_write",
-		  "seq_write failure: first key %u vs expected 0, "
-		  " first value %llu vs expected %llu\n",
-		  res_first_key, res_first_val, first_val))
+	if (!ASSERT_EQ(res_first_key, 0, "bpf_seq_write") ||
+			!ASSERT_EQ(res_first_val, first_val, "bpf_seq_write"))
 		goto close_iter;
 
 	if (!ASSERT_EQ(skel->bss->key_sum, expected_key, "key_sum"))
@@ -1057,6 +1048,8 @@ static void test_bpf_percpu_array_map(void)
 
 	skel->rodata->num_cpus = bpf_num_possible_cpus();
 	val = malloc(8 * bpf_num_possible_cpus());
+	if (!ASSERT_OK_PTR(val, "malloc"))
+		goto out;
 
 	err = bpf_iter_bpf_percpu_array_map__load(skel);
 	if (!ASSERT_OK_PTR(skel, "bpf_iter_bpf_percpu_array_map__load"))
@@ -1092,7 +1085,7 @@ static void test_bpf_percpu_array_map(void)
 	/* do some tests */
 	while ((len = read(iter_fd, buf, sizeof(buf))) > 0)
 		;
-	if (CHECK(len < 0, "read", "read failed: %s\n", strerror(errno)))
+	if (!ASSERT_GE(len, 0, "read"))
 		goto close_iter;
 
 	/* test results */
@@ -1131,6 +1124,7 @@ static void test_bpf_sk_storage_delete(void)
 	sock_fd = socket(AF_INET6, SOCK_STREAM, 0);
 	if (!ASSERT_GE(sock_fd, 0, "socket"))
 		goto out;
+
 	err = bpf_map_update_elem(map_fd, &sock_fd, &val, BPF_NOEXIST);
 	if (!ASSERT_OK(err, "map_update"))
 		goto out;
@@ -1151,14 +1145,19 @@ static void test_bpf_sk_storage_delete(void)
 	/* do some tests */
 	while ((len = read(iter_fd, buf, sizeof(buf))) > 0)
 		;
-	if (CHECK(len < 0, "read", "read failed: %s\n", strerror(errno)))
+	if (!ASSERT_GE(len, 0, "read"))
 		goto close_iter;
 
 	/* test results */
 	err = bpf_map_lookup_elem(map_fd, &sock_fd, &val);
-	if (CHECK(!err || errno != ENOENT, "bpf_map_lookup_elem",
-		  "map value wasn't deleted (err=%d, errno=%d)\n", err, errno))
-		goto close_iter;
+
+	 /* Note: The following assertions serve to ensure
+	  * the value was deleted. It does so by asserting
+	  * that bpf_map_lookup_elem has failed. This might
+	  * seem counterintuitive at first.
+	  */
+	ASSERT_ERR(err, "bpf_map_lookup_elem");
+	ASSERT_EQ(errno, ENOENT, "bpf_map_lookup_elem");
 
 close_iter:
 	close(iter_fd);
@@ -1203,17 +1202,15 @@ static void test_bpf_sk_storage_get(void)
 	do_dummy_read(skel->progs.fill_socket_owner);
 
 	err = bpf_map_lookup_elem(map_fd, &sock_fd, &val);
-	if (CHECK(err || val != getpid(), "bpf_map_lookup_elem",
-	    "map value wasn't set correctly (expected %d, got %d, err=%d)\n",
-	    getpid(), val, err))
+	if (!ASSERT_OK(err, "bpf_map_lookup_elem") ||
+			!ASSERT_EQ(val, getpid(), "bpf_map_lookup_elem"))
 		goto close_socket;
 
 	do_dummy_read(skel->progs.negate_socket_local_storage);
 
 	err = bpf_map_lookup_elem(map_fd, &sock_fd, &val);
-	CHECK(err || val != -getpid(), "bpf_map_lookup_elem",
-	      "map value wasn't set correctly (expected %d, got %d, err=%d)\n",
-	      -getpid(), val, err);
+	ASSERT_OK(err, "bpf_map_lookup_elem");
+	ASSERT_EQ(val, -getpid(), "bpf_map_lookup_elem");
 
 close_socket:
 	close(sock_fd);
@@ -1290,7 +1287,7 @@ static void test_bpf_sk_storage_map(void)
 	/* do some tests */
 	while ((len = read(iter_fd, buf, sizeof(buf))) > 0)
 		;
-	if (CHECK(len < 0, "read", "read failed: %s\n", strerror(errno)))
+	if (!ASSERT_GE(len, 0, "read"))
 		goto close_iter;
 
 	/* test results */
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c b/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c
index 675b90b15280..f09d6ac2ef09 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c
@@ -25,7 +25,7 @@ void serial_test_bpf_obj_id(void)
 	 */
 	__u32 map_ids[nr_iters + 1];
 	char jited_insns[128], xlated_insns[128], zeros[128], tp_name[128];
-	__u32 i, next_id, info_len, nr_id_found, duration = 0;
+	__u32 i, next_id, info_len, nr_id_found;
 	struct timespec real_time_ts, boot_time_ts;
 	int err = 0;
 	__u64 array_value;
@@ -33,16 +33,16 @@ void serial_test_bpf_obj_id(void)
 	time_t now, load_time;
 
 	err = bpf_prog_get_fd_by_id(0);
-	CHECK(err >= 0 || errno != ENOENT,
-	      "get-fd-by-notexist-prog-id", "err %d errno %d\n", err, errno);
+	ASSERT_LT(err, 0, "bpf_prog_get_fd_by_id");
+	ASSERT_EQ(errno, ENOENT, "bpf_prog_get_fd_by_id");
 
 	err = bpf_map_get_fd_by_id(0);
-	CHECK(err >= 0 || errno != ENOENT,
-	      "get-fd-by-notexist-map-id", "err %d errno %d\n", err, errno);
+	ASSERT_LT(err, 0, "bpf_map_get_fd_by_id");
+	ASSERT_EQ(errno, ENOENT, "bpf_map_get_fd_by_id");
 
 	err = bpf_link_get_fd_by_id(0);
-	CHECK(err >= 0 || errno != ENOENT,
-	      "get-fd-by-notexist-link-id", "err %d errno %d\n", err, errno);
+	ASSERT_LT(err, 0, "bpf_map_get_fd_by_id");
+	ASSERT_EQ(errno, ENOENT, "bpf_map_get_fd_by_id");
 
 	/* Check bpf_map_get_info_by_fd() */
 	bzero(zeros, sizeof(zeros));
@@ -53,25 +53,26 @@ void serial_test_bpf_obj_id(void)
 		/* test_obj_id.o is a dumb prog. It should never fail
 		 * to load.
 		 */
-		if (CHECK_FAIL(err))
+		if (!ASSERT_OK(err, "bpf_prog_test_load"))
 			continue;
 
 		/* Insert a magic value to the map */
 		map_fds[i] = bpf_find_map(__func__, objs[i], "test_map_id");
-		if (CHECK_FAIL(map_fds[i] < 0))
+		if (!ASSERT_GE(map_fds[i], 0, "bpf_find_map"))
 			goto done;
+
 		err = bpf_map_update_elem(map_fds[i], &array_key,
 					  &array_magic_value, 0);
-		if (CHECK_FAIL(err))
+		if (!ASSERT_OK(err, "bpf_map_update_elem"))
 			goto done;
 
-		prog = bpf_object__find_program_by_name(objs[i],
-							"test_obj_id");
-		if (CHECK_FAIL(!prog))
+		prog = bpf_object__find_program_by_name(objs[i], "test_obj_id");
+		if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name"))
 			goto done;
+
 		links[i] = bpf_program__attach(prog);
 		err = libbpf_get_error(links[i]);
-		if (CHECK(err, "prog_attach", "prog #%d, err %d\n", i, err)) {
+		if (!ASSERT_OK(err, "bpf_program__attach")) {
 			links[i] = NULL;
 			goto done;
 		}
@@ -81,24 +82,14 @@ void serial_test_bpf_obj_id(void)
 		bzero(&map_infos[i], info_len);
 		err = bpf_map_get_info_by_fd(map_fds[i], &map_infos[i],
 					     &info_len);
-		if (CHECK(err ||
-			  map_infos[i].type != BPF_MAP_TYPE_ARRAY ||
-			  map_infos[i].key_size != sizeof(__u32) ||
-			  map_infos[i].value_size != sizeof(__u64) ||
-			  map_infos[i].max_entries != 1 ||
-			  map_infos[i].map_flags != 0 ||
-			  info_len != sizeof(struct bpf_map_info) ||
-			  strcmp((char *)map_infos[i].name, expected_map_name),
-			  "get-map-info(fd)",
-			  "err %d errno %d type %d(%d) info_len %u(%zu) key_size %u value_size %u max_entries %u map_flags %X name %s(%s)\n",
-			  err, errno,
-			  map_infos[i].type, BPF_MAP_TYPE_ARRAY,
-			  info_len, sizeof(struct bpf_map_info),
-			  map_infos[i].key_size,
-			  map_infos[i].value_size,
-			  map_infos[i].max_entries,
-			  map_infos[i].map_flags,
-			  map_infos[i].name, expected_map_name))
+		if (!ASSERT_OK(err, "bpf_map_get_info_by_fd") ||
+				!ASSERT_EQ(map_infos[i].type, BPF_MAP_TYPE_ARRAY, "map_type") ||
+				!ASSERT_EQ(map_infos[i].key_size, sizeof(__u32), "key_size") ||
+				!ASSERT_EQ(map_infos[i].value_size, sizeof(__u64), "value_size") ||
+				!ASSERT_EQ(map_infos[i].max_entries, 1, "max_entries") ||
+				!ASSERT_EQ(map_infos[i].map_flags, 0, "map_flags") ||
+				!ASSERT_EQ(info_len, sizeof(struct bpf_map_info), "map_info_len") ||
+				!ASSERT_STREQ((char *)map_infos[i].name, expected_map_name, "map_name"))
 			goto done;
 
 		/* Check getting prog info */
@@ -112,48 +103,34 @@ void serial_test_bpf_obj_id(void)
 		prog_infos[i].xlated_prog_len = sizeof(xlated_insns);
 		prog_infos[i].map_ids = ptr_to_u64(map_ids + i);
 		prog_infos[i].nr_map_ids = 2;
+
 		err = clock_gettime(CLOCK_REALTIME, &real_time_ts);
-		if (CHECK_FAIL(err))
+		if (!ASSERT_OK(err, "clock_gettime"))
 			goto done;
+
 		err = clock_gettime(CLOCK_BOOTTIME, &boot_time_ts);
-		if (CHECK_FAIL(err))
+		if (!ASSERT_OK(err, "clock_gettime"))
 			goto done;
+
 		err = bpf_prog_get_info_by_fd(prog_fds[i], &prog_infos[i],
 					      &info_len);
 		load_time = (real_time_ts.tv_sec - boot_time_ts.tv_sec)
 			+ (prog_infos[i].load_time / nsec_per_sec);
-		if (CHECK(err ||
-			  prog_infos[i].type != BPF_PROG_TYPE_RAW_TRACEPOINT ||
-			  info_len != sizeof(struct bpf_prog_info) ||
-			  (env.jit_enabled && !prog_infos[i].jited_prog_len) ||
-			  (env.jit_enabled &&
-			   !memcmp(jited_insns, zeros, sizeof(zeros))) ||
-			  !prog_infos[i].xlated_prog_len ||
-			  !memcmp(xlated_insns, zeros, sizeof(zeros)) ||
-			  load_time < now - 60 || load_time > now + 60 ||
-			  prog_infos[i].created_by_uid != my_uid ||
-			  prog_infos[i].nr_map_ids != 1 ||
-			  *(int *)(long)prog_infos[i].map_ids != map_infos[i].id ||
-			  strcmp((char *)prog_infos[i].name, expected_prog_name),
-			  "get-prog-info(fd)",
-			  "err %d errno %d i %d type %d(%d) info_len %u(%zu) "
-			  "jit_enabled %d jited_prog_len %u xlated_prog_len %u "
-			  "jited_prog %d xlated_prog %d load_time %lu(%lu) "
-			  "uid %u(%u) nr_map_ids %u(%u) map_id %u(%u) "
-			  "name %s(%s)\n",
-			  err, errno, i,
-			  prog_infos[i].type, BPF_PROG_TYPE_SOCKET_FILTER,
-			  info_len, sizeof(struct bpf_prog_info),
-			  env.jit_enabled,
-			  prog_infos[i].jited_prog_len,
-			  prog_infos[i].xlated_prog_len,
-			  !!memcmp(jited_insns, zeros, sizeof(zeros)),
-			  !!memcmp(xlated_insns, zeros, sizeof(zeros)),
-			  load_time, now,
-			  prog_infos[i].created_by_uid, my_uid,
-			  prog_infos[i].nr_map_ids, 1,
-			  *(int *)(long)prog_infos[i].map_ids, map_infos[i].id,
-			  prog_infos[i].name, expected_prog_name))
+
+		if (!ASSERT_OK(err, "bpf_prog_get_info_by_fd") ||
+				!ASSERT_EQ(prog_infos[i].type, BPF_PROG_TYPE_RAW_TRACEPOINT, "prog_type") ||
+				!ASSERT_EQ(info_len, sizeof(struct bpf_prog_info), "prog_info_len") ||
+				!ASSERT_FALSE((env.jit_enabled && !prog_infos[i].jited_prog_len), "jited_prog_len") ||
+				!ASSERT_FALSE((env.jit_enabled && !memcmp(jited_insns, zeros, sizeof(zeros))),
+					"jited_insns") ||
+				!ASSERT_NEQ(prog_infos[i].xlated_prog_len, 0, "xlated_prog_len") ||
+				!ASSERT_NEQ(memcmp(xlated_insns, zeros, sizeof(zeros)), 0, "xlated_insns") ||
+				!ASSERT_GE(load_time, (now - 60), "load_time") ||
+				!ASSERT_LE(load_time, (now + 60), "load_time") ||
+				!ASSERT_EQ(prog_infos[i].created_by_uid, my_uid, "created_by_uid") ||
+				!ASSERT_EQ(prog_infos[i].nr_map_ids, 1, "nr_map_ids") ||
+				!ASSERT_EQ(*(int *)(long)prog_infos[i].map_ids, map_infos[i].id, "map_ids") ||
+				!ASSERT_STREQ((char *)prog_infos[i].name, expected_prog_name, "prog_name"))
 			goto done;
 
 		/* Check getting link info */
@@ -163,25 +140,12 @@ void serial_test_bpf_obj_id(void)
 		link_infos[i].raw_tracepoint.tp_name_len = sizeof(tp_name);
 		err = bpf_link_get_info_by_fd(bpf_link__fd(links[i]),
 					      &link_infos[i], &info_len);
-		if (CHECK(err ||
-			  link_infos[i].type != BPF_LINK_TYPE_RAW_TRACEPOINT ||
-			  link_infos[i].prog_id != prog_infos[i].id ||
-			  link_infos[i].raw_tracepoint.tp_name != ptr_to_u64(&tp_name) ||
-			  strcmp(u64_to_ptr(link_infos[i].raw_tracepoint.tp_name),
-				 "sys_enter") ||
-			  info_len != sizeof(struct bpf_link_info),
-			  "get-link-info(fd)",
-			  "err %d errno %d info_len %u(%zu) type %d(%d) id %d "
-			  "prog_id %d (%d) tp_name %s(%s)\n",
-			  err, errno,
-			  info_len, sizeof(struct bpf_link_info),
-			  link_infos[i].type, BPF_LINK_TYPE_RAW_TRACEPOINT,
-			  link_infos[i].id,
-			  link_infos[i].prog_id, prog_infos[i].id,
-			  (const char *)u64_to_ptr(link_infos[i].raw_tracepoint.tp_name),
-			  "sys_enter"))
+		if (!ASSERT_OK(err, "bpf_link_get_info_by_fd") ||
+				!ASSERT_EQ(link_infos[i].type, BPF_LINK_TYPE_RAW_TRACEPOINT, "link_type") ||
+				!ASSERT_EQ(link_infos[i].prog_id, prog_infos[i].id, "prog_id") ||
+				!ASSERT_EQ(link_infos[i].raw_tracepoint.tp_name, ptr_to_u64(&tp_name), "&tp_name") ||
+				!ASSERT_STREQ(u64_to_ptr(link_infos[i].raw_tracepoint.tp_name), "sys_enter", "tp_name"))
 			goto done;
-
 	}
 
 	/* Check bpf_prog_get_next_id() */
@@ -190,7 +154,7 @@ void serial_test_bpf_obj_id(void)
 	while (!bpf_prog_get_next_id(next_id, &next_id)) {
 		struct bpf_prog_info prog_info = {};
 		__u32 saved_map_id;
-		int prog_fd;
+		int prog_fd, cmp_res;
 
 		info_len = sizeof(prog_info);
 
@@ -198,9 +162,7 @@ void serial_test_bpf_obj_id(void)
 		if (prog_fd < 0 && errno == ENOENT)
 			/* The bpf_prog is in the dead row */
 			continue;
-		if (CHECK(prog_fd < 0, "get-prog-fd(next_id)",
-			  "prog_fd %d next_id %d errno %d\n",
-			  prog_fd, next_id, errno))
+		if (!ASSERT_GE(prog_fd, 0, "bpf_prog_get_fd_by_id"))
 			break;
 
 		for (i = 0; i < nr_iters; i++)
@@ -218,9 +180,8 @@ void serial_test_bpf_obj_id(void)
 		 */
 		prog_info.nr_map_ids = 1;
 		err = bpf_prog_get_info_by_fd(prog_fd, &prog_info, &info_len);
-		if (CHECK(!err || errno != EFAULT,
-			  "get-prog-fd-bad-nr-map-ids", "err %d errno %d(%d)",
-			  err, errno, EFAULT))
+		if (!ASSERT_ERR(err, "bpf_prog_get_info_by_fd") ||
+				!ASSERT_EQ(errno, EFAULT, "bpf_prog_get_info_by_fd"))
 			break;
 		bzero(&prog_info, sizeof(prog_info));
 		info_len = sizeof(prog_info);
@@ -231,27 +192,22 @@ void serial_test_bpf_obj_id(void)
 		err = bpf_prog_get_info_by_fd(prog_fd, &prog_info, &info_len);
 		prog_infos[i].jited_prog_insns = 0;
 		prog_infos[i].xlated_prog_insns = 0;
-		CHECK(err || info_len != sizeof(struct bpf_prog_info) ||
-		      memcmp(&prog_info, &prog_infos[i], info_len) ||
-		      *(int *)(long)prog_info.map_ids != saved_map_id,
-		      "get-prog-info(next_id->fd)",
-		      "err %d errno %d info_len %u(%zu) memcmp %d map_id %u(%u)\n",
-		      err, errno, info_len, sizeof(struct bpf_prog_info),
-		      memcmp(&prog_info, &prog_infos[i], info_len),
-		      *(int *)(long)prog_info.map_ids, saved_map_id);
+		cmp_res = memcmp(&prog_info, &prog_infos[i], info_len);
+
+		ASSERT_OK(err, "bpf_prog_get_info_by_fd");
+		ASSERT_EQ(info_len, sizeof(struct bpf_prog_info), "prog_info_len");
+		ASSERT_OK(cmp_res, "memcmp");
+		ASSERT_EQ(*(int *)(long)prog_info.map_ids, saved_map_id, "map_id");
 		close(prog_fd);
 	}
-	CHECK(nr_id_found != nr_iters,
-	      "check total prog id found by get_next_id",
-	      "nr_id_found %u(%u)\n",
-	      nr_id_found, nr_iters);
+	ASSERT_EQ(nr_id_found, nr_iters, "prog_nr_id_found");
 
 	/* Check bpf_map_get_next_id() */
 	nr_id_found = 0;
 	next_id = 0;
 	while (!bpf_map_get_next_id(next_id, &next_id)) {
 		struct bpf_map_info map_info = {};
-		int map_fd;
+		int map_fd, cmp_res;
 
 		info_len = sizeof(map_info);
 
@@ -259,9 +215,7 @@ void serial_test_bpf_obj_id(void)
 		if (map_fd < 0 && errno == ENOENT)
 			/* The bpf_map is in the dead row */
 			continue;
-		if (CHECK(map_fd < 0, "get-map-fd(next_id)",
-			  "map_fd %d next_id %u errno %d\n",
-			  map_fd, next_id, errno))
+		if (!ASSERT_GE(map_fd, 0, "bpf_map_get_fd_by_id"))
 			break;
 
 		for (i = 0; i < nr_iters; i++)
@@ -274,25 +228,19 @@ void serial_test_bpf_obj_id(void)
 		nr_id_found++;
 
 		err = bpf_map_lookup_elem(map_fd, &array_key, &array_value);
-		if (CHECK_FAIL(err))
+		if (!ASSERT_OK(err, "bpf_map_lookup_elem"))
 			goto done;
 
 		err = bpf_map_get_info_by_fd(map_fd, &map_info, &info_len);
-		CHECK(err || info_len != sizeof(struct bpf_map_info) ||
-		      memcmp(&map_info, &map_infos[i], info_len) ||
-		      array_value != array_magic_value,
-		      "check get-map-info(next_id->fd)",
-		      "err %d errno %d info_len %u(%zu) memcmp %d array_value %llu(%llu)\n",
-		      err, errno, info_len, sizeof(struct bpf_map_info),
-		      memcmp(&map_info, &map_infos[i], info_len),
-		      array_value, array_magic_value);
+		cmp_res = memcmp(&map_info, &map_infos[i], info_len);
+		ASSERT_OK(err, "bpf_map_get_info_by_fd");
+		ASSERT_EQ(info_len, sizeof(struct bpf_map_info), "info_len");
+		ASSERT_OK(cmp_res, "memcmp");
+		ASSERT_EQ(array_value, array_magic_value, "array_value");
 
 		close(map_fd);
 	}
-	CHECK(nr_id_found != nr_iters,
-	      "check total map id found by get_next_id",
-	      "nr_id_found %u(%u)\n",
-	      nr_id_found, nr_iters);
+	ASSERT_EQ(nr_id_found, nr_iters, "map_nr_id_found");
 
 	/* Check bpf_link_get_next_id() */
 	nr_id_found = 0;
@@ -308,9 +256,7 @@ void serial_test_bpf_obj_id(void)
 		if (link_fd < 0 && errno == ENOENT)
 			/* The bpf_link is in the dead row */
 			continue;
-		if (CHECK(link_fd < 0, "get-link-fd(next_id)",
-			  "link_fd %d next_id %u errno %d\n",
-			  link_fd, next_id, errno))
+		if (!ASSERT_GE(link_fd, 0, "bpf_link_get_fd_by_id"))
 			break;
 
 		for (i = 0; i < nr_iters; i++)
@@ -325,17 +271,13 @@ void serial_test_bpf_obj_id(void)
 		err = bpf_link_get_info_by_fd(link_fd, &link_info, &info_len);
 		cmp_res = memcmp(&link_info, &link_infos[i],
 				offsetof(struct bpf_link_info, raw_tracepoint));
-		CHECK(err || info_len != sizeof(link_info) || cmp_res,
-		      "check get-link-info(next_id->fd)",
-		      "err %d errno %d info_len %u(%zu) memcmp %d\n",
-		      err, errno, info_len, sizeof(struct bpf_link_info),
-		      cmp_res);
+		ASSERT_OK(err, "bpf_link_get_info_by_fd");
+		ASSERT_EQ(info_len, sizeof(link_info), "info_len");
+		ASSERT_OK(cmp_res, "memcmp");
 
 		close(link_fd);
 	}
-	CHECK(nr_id_found != nr_iters,
-	      "check total link id found by get_next_id",
-	      "nr_id_found %u(%u)\n", nr_id_found, nr_iters);
+	ASSERT_EQ(nr_id_found, nr_iters, "link_nr_id_found");
 
 done:
 	for (i = 0; i < nr_iters; i++) {
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c
index 4aabeaa525d4..a88e6e07e4f5 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c
@@ -20,15 +20,14 @@
 
 static const unsigned int total_bytes = 10 * 1024 * 1024;
 static int expected_stg = 0xeB9F;
-static int stop, duration;
+static int stop;
 
 static int settcpca(int fd, const char *tcp_ca)
 {
 	int err;
 
 	err = setsockopt(fd, IPPROTO_TCP, TCP_CONGESTION, tcp_ca, strlen(tcp_ca));
-	if (CHECK(err == -1, "setsockopt(fd, TCP_CONGESTION)", "errno:%d\n",
-		  errno))
+	if (!ASSERT_NEQ(err, -1, "setsockopt"))
 		return -1;
 
 	return 0;
@@ -65,8 +64,7 @@ static void *server(void *arg)
 		bytes += nr_sent;
 	}
 
-	CHECK(bytes != total_bytes, "send", "%zd != %u nr_sent:%zd errno:%d\n",
-	      bytes, total_bytes, nr_sent, errno);
+	ASSERT_EQ(bytes, total_bytes, "send");
 
 done:
 	if (fd >= 0)
@@ -92,10 +90,11 @@ static void do_test(const char *tcp_ca, const struct bpf_map *sk_stg_map)
 	WRITE_ONCE(stop, 0);
 
 	lfd = socket(AF_INET6, SOCK_STREAM, 0);
-	if (CHECK(lfd == -1, "socket", "errno:%d\n", errno))
+	if (!ASSERT_NEQ(lfd, -1, "socket"))
 		return;
+
 	fd = socket(AF_INET6, SOCK_STREAM, 0);
-	if (CHECK(fd == -1, "socket", "errno:%d\n", errno)) {
+	if (!ASSERT_NEQ(fd, -1, "socket")) {
 		close(lfd);
 		return;
 	}
@@ -108,26 +107,27 @@ static void do_test(const char *tcp_ca, const struct bpf_map *sk_stg_map)
 	sa6.sin6_family = AF_INET6;
 	sa6.sin6_addr = in6addr_loopback;
 	err = bind(lfd, (struct sockaddr *)&sa6, addrlen);
-	if (CHECK(err == -1, "bind", "errno:%d\n", errno))
+	if (!ASSERT_NEQ(err, -1, "bind"))
 		goto done;
+
 	err = getsockname(lfd, (struct sockaddr *)&sa6, &addrlen);
-	if (CHECK(err == -1, "getsockname", "errno:%d\n", errno))
+	if (!ASSERT_NEQ(err, -1, "getsockname"))
 		goto done;
+
 	err = listen(lfd, 1);
-	if (CHECK(err == -1, "listen", "errno:%d\n", errno))
+	if (!ASSERT_NEQ(err, -1, "listen"))
 		goto done;
 
 	if (sk_stg_map) {
 		err = bpf_map_update_elem(bpf_map__fd(sk_stg_map), &fd,
 					  &expected_stg, BPF_NOEXIST);
-		if (CHECK(err, "bpf_map_update_elem(sk_stg_map)",
-			  "err:%d errno:%d\n", err, errno))
+		if (!ASSERT_OK(err, "bpf_map_update_elem(sk_stg_map)"))
 			goto done;
 	}
 
 	/* connect to server */
 	err = connect(fd, (struct sockaddr *)&sa6, addrlen);
-	if (CHECK(err == -1, "connect", "errno:%d\n", errno))
+	if (!ASSERT_NEQ(err, -1, "connect"))
 		goto done;
 
 	if (sk_stg_map) {
@@ -135,14 +135,13 @@ static void do_test(const char *tcp_ca, const struct bpf_map *sk_stg_map)
 
 		err = bpf_map_lookup_elem(bpf_map__fd(sk_stg_map), &fd,
 					  &tmp_stg);
-		if (CHECK(!err || errno != ENOENT,
-			  "bpf_map_lookup_elem(sk_stg_map)",
-			  "err:%d errno:%d\n", err, errno))
+		if (!ASSERT_ERR(err, "bpf_map_lookup_elem(sk_stg_map)") ||
+				!ASSERT_EQ(errno, ENOENT, "bpf_map_lookup_elem(sk_stg_map)"))
 			goto done;
 	}
 
 	err = pthread_create(&srv_thread, NULL, server, (void *)(long)lfd);
-	if (CHECK(err != 0, "pthread_create", "err:%d errno:%d\n", err, errno))
+	if (!ASSERT_OK(err, "pthread_create"))
 		goto done;
 
 	/* recv total_bytes */
@@ -156,13 +155,12 @@ static void do_test(const char *tcp_ca, const struct bpf_map *sk_stg_map)
 		bytes += nr_recv;
 	}
 
-	CHECK(bytes != total_bytes, "recv", "%zd != %u nr_recv:%zd errno:%d\n",
-	      bytes, total_bytes, nr_recv, errno);
+	ASSERT_EQ(bytes, total_bytes, "recv");
 
 	WRITE_ONCE(stop, 1);
 	pthread_join(srv_thread, &thread_ret);
-	CHECK(IS_ERR(thread_ret), "pthread_join", "thread_ret:%ld",
-	      PTR_ERR(thread_ret));
+	ASSERT_OK(IS_ERR(thread_ret), "thread_ret");
+
 done:
 	close(lfd);
 	close(fd);
@@ -174,7 +172,7 @@ static void test_cubic(void)
 	struct bpf_link *link;
 
 	cubic_skel = bpf_cubic__open_and_load();
-	if (CHECK(!cubic_skel, "bpf_cubic__open_and_load", "failed\n"))
+	if (!ASSERT_OK_PTR(cubic_skel, "bpf_cubic__open_and_load"))
 		return;
 
 	link = bpf_map__attach_struct_ops(cubic_skel->maps.cubic);
@@ -197,7 +195,7 @@ static void test_dctcp(void)
 	struct bpf_link *link;
 
 	dctcp_skel = bpf_dctcp__open_and_load();
-	if (CHECK(!dctcp_skel, "bpf_dctcp__open_and_load", "failed\n"))
+	if (!ASSERT_OK_PTR(dctcp_skel, "bpf_dctcp__open_and_load"))
 		return;
 
 	link = bpf_map__attach_struct_ops(dctcp_skel->maps.dctcp);
@@ -207,9 +205,7 @@ static void test_dctcp(void)
 	}
 
 	do_test("bpf_dctcp", dctcp_skel->maps.sk_stg_map);
-	CHECK(dctcp_skel->bss->stg_result != expected_stg,
-	      "Unexpected stg_result", "stg_result (%x) != expected_stg (%x)\n",
-	      dctcp_skel->bss->stg_result, expected_stg);
+	ASSERT_EQ(dctcp_skel->bss->stg_result, expected_stg, "stg_result");
 
 	bpf_link__destroy(link);
 	bpf_dctcp__destroy(dctcp_skel);
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c
index 731c343897d8..e770912fc1d2 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c
@@ -35,7 +35,7 @@ static int check_load(const char *file, enum bpf_prog_type type)
 	}
 
 	bpf_program__set_type(prog, type);
-	bpf_program__set_flags(prog, BPF_F_TEST_RND_HI32);
+	bpf_program__set_flags(prog, BPF_F_TEST_RND_HI32 | BPF_F_TEST_REG_INVARIANTS);
 	bpf_program__set_log_level(prog, 4 | extra_prog_load_log_flags);
 
 	err = bpf_object__load(obj);
diff --git a/tools/testing/selftests/bpf/prog_tests/btf.c b/tools/testing/selftests/bpf/prog_tests/btf.c
index 92d51f377fe5..8fb4a04fbbc0 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf.c
@@ -5265,6 +5265,7 @@ static size_t get_pprint_mapv_size(enum pprint_mapv_kind_t mapv_kind)
 #endif
 
 	assert(0);
+	return 0;
 }
 
 static void set_pprint_mapv(enum pprint_mapv_kind_t mapv_kind,
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup1_hierarchy.c b/tools/testing/selftests/bpf/prog_tests/cgroup1_hierarchy.c
new file mode 100644
index 000000000000..74d6d7546f40
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup1_hierarchy.c
@@ -0,0 +1,158 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2023 Yafang Shao <laoar.shao@gmail.com> */
+
+#include <sys/types.h>
+#include <unistd.h>
+#include <test_progs.h>
+#include "cgroup_helpers.h"
+#include "test_cgroup1_hierarchy.skel.h"
+
+static void bpf_cgroup1(struct test_cgroup1_hierarchy *skel)
+{
+	struct bpf_link *lsm_link, *fentry_link;
+	int err;
+
+	/* Attach LSM prog first */
+	lsm_link = bpf_program__attach_lsm(skel->progs.lsm_run);
+	if (!ASSERT_OK_PTR(lsm_link, "lsm_attach"))
+		return;
+
+	/* LSM prog will be triggered when attaching fentry */
+	fentry_link = bpf_program__attach_trace(skel->progs.fentry_run);
+	ASSERT_NULL(fentry_link, "fentry_attach_fail");
+
+	err = bpf_link__destroy(lsm_link);
+	ASSERT_OK(err, "destroy_lsm");
+}
+
+static void bpf_cgroup1_sleepable(struct test_cgroup1_hierarchy *skel)
+{
+	struct bpf_link *lsm_link, *fentry_link;
+	int err;
+
+	/* Attach LSM prog first */
+	lsm_link = bpf_program__attach_lsm(skel->progs.lsm_s_run);
+	if (!ASSERT_OK_PTR(lsm_link, "lsm_attach"))
+		return;
+
+	/* LSM prog will be triggered when attaching fentry */
+	fentry_link = bpf_program__attach_trace(skel->progs.fentry_run);
+	ASSERT_NULL(fentry_link, "fentry_attach_fail");
+
+	err = bpf_link__destroy(lsm_link);
+	ASSERT_OK(err, "destroy_lsm");
+}
+
+static void bpf_cgroup1_invalid_id(struct test_cgroup1_hierarchy *skel)
+{
+	struct bpf_link *lsm_link, *fentry_link;
+	int err;
+
+	/* Attach LSM prog first */
+	lsm_link = bpf_program__attach_lsm(skel->progs.lsm_run);
+	if (!ASSERT_OK_PTR(lsm_link, "lsm_attach"))
+		return;
+
+	/* LSM prog will be triggered when attaching fentry */
+	fentry_link = bpf_program__attach_trace(skel->progs.fentry_run);
+	if (!ASSERT_OK_PTR(fentry_link, "fentry_attach_success"))
+		goto cleanup;
+
+	err = bpf_link__destroy(fentry_link);
+	ASSERT_OK(err, "destroy_lsm");
+
+cleanup:
+	err = bpf_link__destroy(lsm_link);
+	ASSERT_OK(err, "destroy_fentry");
+}
+
+void test_cgroup1_hierarchy(void)
+{
+	struct test_cgroup1_hierarchy *skel;
+	__u64 current_cgid;
+	int hid, err;
+
+	skel = test_cgroup1_hierarchy__open();
+	if (!ASSERT_OK_PTR(skel, "open"))
+		return;
+
+	skel->bss->target_pid = getpid();
+
+	err = bpf_program__set_attach_target(skel->progs.fentry_run, 0, "bpf_fentry_test1");
+	if (!ASSERT_OK(err, "fentry_set_target"))
+		goto destroy;
+
+	err = test_cgroup1_hierarchy__load(skel);
+	if (!ASSERT_OK(err, "load"))
+		goto destroy;
+
+	/* Setup cgroup1 hierarchy */
+	err = setup_classid_environment();
+	if (!ASSERT_OK(err, "setup_classid_environment"))
+		goto destroy;
+
+	err = join_classid();
+	if (!ASSERT_OK(err, "join_cgroup1"))
+		goto cleanup;
+
+	current_cgid = get_classid_cgroup_id();
+	if (!ASSERT_GE(current_cgid, 0, "cgroup1 id"))
+		goto cleanup;
+
+	hid = get_cgroup1_hierarchy_id("net_cls");
+	if (!ASSERT_GE(hid, 0, "cgroup1 id"))
+		goto cleanup;
+	skel->bss->target_hid = hid;
+
+	if (test__start_subtest("test_cgroup1_hierarchy")) {
+		skel->bss->target_ancestor_cgid = current_cgid;
+		bpf_cgroup1(skel);
+	}
+
+	if (test__start_subtest("test_root_cgid")) {
+		skel->bss->target_ancestor_cgid = 1;
+		skel->bss->target_ancestor_level = 0;
+		bpf_cgroup1(skel);
+	}
+
+	if (test__start_subtest("test_invalid_level")) {
+		skel->bss->target_ancestor_cgid = 1;
+		skel->bss->target_ancestor_level = 1;
+		bpf_cgroup1_invalid_id(skel);
+	}
+
+	if (test__start_subtest("test_invalid_cgid")) {
+		skel->bss->target_ancestor_cgid = 0;
+		bpf_cgroup1_invalid_id(skel);
+	}
+
+	if (test__start_subtest("test_invalid_hid")) {
+		skel->bss->target_ancestor_cgid = 1;
+		skel->bss->target_ancestor_level = 0;
+		skel->bss->target_hid = -1;
+		bpf_cgroup1_invalid_id(skel);
+	}
+
+	if (test__start_subtest("test_invalid_cgrp_name")) {
+		skel->bss->target_hid = get_cgroup1_hierarchy_id("net_cl");
+		skel->bss->target_ancestor_cgid = current_cgid;
+		bpf_cgroup1_invalid_id(skel);
+	}
+
+	if (test__start_subtest("test_invalid_cgrp_name2")) {
+		skel->bss->target_hid = get_cgroup1_hierarchy_id("net_cls,");
+		skel->bss->target_ancestor_cgid = current_cgid;
+		bpf_cgroup1_invalid_id(skel);
+	}
+
+	if (test__start_subtest("test_sleepable_prog")) {
+		skel->bss->target_hid = hid;
+		skel->bss->target_ancestor_cgid = current_cgid;
+		bpf_cgroup1_sleepable(skel);
+	}
+
+cleanup:
+	cleanup_classid_environment();
+destroy:
+	test_cgroup1_hierarchy__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_iter.c b/tools/testing/selftests/bpf/prog_tests/cgroup_iter.c
index e02feb5fae97..574d9a0cdc8e 100644
--- a/tools/testing/selftests/bpf/prog_tests/cgroup_iter.c
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_iter.c
@@ -4,6 +4,7 @@
 #include <test_progs.h>
 #include <bpf/libbpf.h>
 #include <bpf/btf.h>
+#include "iters_css_task.skel.h"
 #include "cgroup_iter.skel.h"
 #include "cgroup_helpers.h"
 
@@ -263,6 +264,35 @@ close_cgrp:
 	close(cgrp_fd);
 }
 
+static void test_walk_self_only_css_task(void)
+{
+	struct iters_css_task *skel;
+	int err;
+
+	skel = iters_css_task__open();
+	if (!ASSERT_OK_PTR(skel, "skel_open"))
+		return;
+
+	bpf_program__set_autoload(skel->progs.cgroup_id_printer, true);
+
+	err = iters_css_task__load(skel);
+	if (!ASSERT_OK(err, "skel_load"))
+		goto cleanup;
+
+	err = join_cgroup(cg_path[CHILD2]);
+	if (!ASSERT_OK(err, "join_cgroup"))
+		goto cleanup;
+
+	skel->bss->target_pid = getpid();
+	snprintf(expected_output, sizeof(expected_output),
+		PROLOGUE "%8llu\n" EPILOGUE, cg_id[CHILD2]);
+	read_from_cgroup_iter(skel->progs.cgroup_id_printer, cg_fd[CHILD2],
+		BPF_CGROUP_ITER_SELF_ONLY, "test_walk_self_only_css_task");
+	ASSERT_EQ(skel->bss->css_task_cnt, 1, "css_task_cnt");
+cleanup:
+	iters_css_task__destroy(skel);
+}
+
 void test_cgroup_iter(void)
 {
 	struct cgroup_iter *skel = NULL;
@@ -293,6 +323,9 @@ void test_cgroup_iter(void)
 		test_walk_self_only(skel);
 	if (test__start_subtest("cgroup_iter__dead_self_only"))
 		test_walk_dead_self_only(skel);
+	if (test__start_subtest("cgroup_iter__self_only_css_task"))
+		test_walk_self_only_css_task();
+
 out:
 	cgroup_iter__destroy(skel);
 	cleanup_cgroups();
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_v1v2.c b/tools/testing/selftests/bpf/prog_tests/cgroup_v1v2.c
index 9026b42914d3..addf720428f7 100644
--- a/tools/testing/selftests/bpf/prog_tests/cgroup_v1v2.c
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_v1v2.c
@@ -71,7 +71,7 @@ void test_cgroup_v1v2(void)
 	}
 	ASSERT_OK(run_test(cgroup_fd, server_fd, false), "cgroup-v2-only");
 	setup_classid_environment();
-	set_classid(42);
+	set_classid();
 	ASSERT_OK(run_test(cgroup_fd, server_fd, true), "cgroup-v1v2");
 	cleanup_classid_environment();
 	close(server_fd);
diff --git a/tools/testing/selftests/bpf/prog_tests/iters.c b/tools/testing/selftests/bpf/prog_tests/iters.c
index b696873c5455..bf84d4a1d9ae 100644
--- a/tools/testing/selftests/bpf/prog_tests/iters.c
+++ b/tools/testing/selftests/bpf/prog_tests/iters.c
@@ -1,7 +1,14 @@
 // SPDX-License-Identifier: GPL-2.0
 /* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
 
+#include <sys/syscall.h>
+#include <sys/mman.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#include <malloc.h>
+#include <stdlib.h>
 #include <test_progs.h>
+#include "cgroup_helpers.h"
 
 #include "iters.skel.h"
 #include "iters_state_safety.skel.h"
@@ -9,6 +16,10 @@
 #include "iters_num.skel.h"
 #include "iters_testmod_seq.skel.h"
 #include "iters_task_vma.skel.h"
+#include "iters_task.skel.h"
+#include "iters_css_task.skel.h"
+#include "iters_css.skel.h"
+#include "iters_task_failure.skel.h"
 
 static void subtest_num_iters(void)
 {
@@ -146,11 +157,144 @@ cleanup:
 	iters_task_vma__destroy(skel);
 }
 
+static pthread_mutex_t do_nothing_mutex;
+
+static void *do_nothing_wait(void *arg)
+{
+	pthread_mutex_lock(&do_nothing_mutex);
+	pthread_mutex_unlock(&do_nothing_mutex);
+
+	pthread_exit(arg);
+}
+
+#define thread_num 2
+
+static void subtest_task_iters(void)
+{
+	struct iters_task *skel = NULL;
+	pthread_t thread_ids[thread_num];
+	void *ret;
+	int err;
+
+	skel = iters_task__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "open_and_load"))
+		goto cleanup;
+	skel->bss->target_pid = getpid();
+	err = iters_task__attach(skel);
+	if (!ASSERT_OK(err, "iters_task__attach"))
+		goto cleanup;
+	pthread_mutex_lock(&do_nothing_mutex);
+	for (int i = 0; i < thread_num; i++)
+		ASSERT_OK(pthread_create(&thread_ids[i], NULL, &do_nothing_wait, NULL),
+			"pthread_create");
+
+	syscall(SYS_getpgid);
+	iters_task__detach(skel);
+	ASSERT_EQ(skel->bss->procs_cnt, 1, "procs_cnt");
+	ASSERT_EQ(skel->bss->threads_cnt, thread_num + 1, "threads_cnt");
+	ASSERT_EQ(skel->bss->proc_threads_cnt, thread_num + 1, "proc_threads_cnt");
+	pthread_mutex_unlock(&do_nothing_mutex);
+	for (int i = 0; i < thread_num; i++)
+		ASSERT_OK(pthread_join(thread_ids[i], &ret), "pthread_join");
+cleanup:
+	iters_task__destroy(skel);
+}
+
+extern int stack_mprotect(void);
+
+static void subtest_css_task_iters(void)
+{
+	struct iters_css_task *skel = NULL;
+	int err, cg_fd, cg_id;
+	const char *cgrp_path = "/cg1";
+
+	err = setup_cgroup_environment();
+	if (!ASSERT_OK(err, "setup_cgroup_environment"))
+		goto cleanup;
+	cg_fd = create_and_get_cgroup(cgrp_path);
+	if (!ASSERT_GE(cg_fd, 0, "create_and_get_cgroup"))
+		goto cleanup;
+	cg_id = get_cgroup_id(cgrp_path);
+	err = join_cgroup(cgrp_path);
+	if (!ASSERT_OK(err, "join_cgroup"))
+		goto cleanup;
+
+	skel = iters_css_task__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "open_and_load"))
+		goto cleanup;
+
+	skel->bss->target_pid = getpid();
+	skel->bss->cg_id = cg_id;
+	err = iters_css_task__attach(skel);
+	if (!ASSERT_OK(err, "iters_task__attach"))
+		goto cleanup;
+	err = stack_mprotect();
+	if (!ASSERT_EQ(err, -1, "stack_mprotect") ||
+	    !ASSERT_EQ(errno, EPERM, "stack_mprotect"))
+		goto cleanup;
+	iters_css_task__detach(skel);
+	ASSERT_EQ(skel->bss->css_task_cnt, 1, "css_task_cnt");
+
+cleanup:
+	cleanup_cgroup_environment();
+	iters_css_task__destroy(skel);
+}
+
+static void subtest_css_iters(void)
+{
+	struct iters_css *skel = NULL;
+	struct {
+		const char *path;
+		int fd;
+	} cgs[] = {
+		{ "/cg1" },
+		{ "/cg1/cg2" },
+		{ "/cg1/cg2/cg3" },
+		{ "/cg1/cg2/cg3/cg4" },
+	};
+	int err, cg_nr = ARRAY_SIZE(cgs);
+	int i;
+
+	err = setup_cgroup_environment();
+	if (!ASSERT_OK(err, "setup_cgroup_environment"))
+		goto cleanup;
+	for (i = 0; i < cg_nr; i++) {
+		cgs[i].fd = create_and_get_cgroup(cgs[i].path);
+		if (!ASSERT_GE(cgs[i].fd, 0, "create_and_get_cgroup"))
+			goto cleanup;
+	}
+
+	skel = iters_css__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "open_and_load"))
+		goto cleanup;
+
+	skel->bss->target_pid = getpid();
+	skel->bss->root_cg_id = get_cgroup_id(cgs[0].path);
+	skel->bss->leaf_cg_id = get_cgroup_id(cgs[cg_nr - 1].path);
+	err = iters_css__attach(skel);
+
+	if (!ASSERT_OK(err, "iters_task__attach"))
+		goto cleanup;
+
+	syscall(SYS_getpgid);
+	ASSERT_EQ(skel->bss->pre_order_cnt, cg_nr, "pre_order_cnt");
+	ASSERT_EQ(skel->bss->first_cg_id, get_cgroup_id(cgs[0].path), "first_cg_id");
+
+	ASSERT_EQ(skel->bss->post_order_cnt, cg_nr, "post_order_cnt");
+	ASSERT_EQ(skel->bss->last_cg_id, get_cgroup_id(cgs[0].path), "last_cg_id");
+	ASSERT_EQ(skel->bss->tree_high, cg_nr - 1, "tree_high");
+	iters_css__detach(skel);
+cleanup:
+	cleanup_cgroup_environment();
+	iters_css__destroy(skel);
+}
+
 void test_iters(void)
 {
 	RUN_TESTS(iters_state_safety);
 	RUN_TESTS(iters_looping);
 	RUN_TESTS(iters);
+	RUN_TESTS(iters_css_task);
 
 	if (env.has_testmod)
 		RUN_TESTS(iters_testmod_seq);
@@ -161,4 +305,11 @@ void test_iters(void)
 		subtest_testmod_seq_iters();
 	if (test__start_subtest("task_vma"))
 		subtest_task_vma_iters();
+	if (test__start_subtest("task"))
+		subtest_task_iters();
+	if (test__start_subtest("css_task"))
+		subtest_css_task_iters();
+	if (test__start_subtest("css"))
+		subtest_css_iters();
+	RUN_TESTS(iters_task_failure);
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/linked_list.c b/tools/testing/selftests/bpf/prog_tests/linked_list.c
index 69dc31383b78..2fb89de63bd2 100644
--- a/tools/testing/selftests/bpf/prog_tests/linked_list.c
+++ b/tools/testing/selftests/bpf/prog_tests/linked_list.c
@@ -94,14 +94,8 @@ static struct {
 	{ "incorrect_head_var_off2", "variable ptr_ access var_off=(0x0; 0xffffffff) disallowed" },
 	{ "incorrect_head_off1", "bpf_list_head not found at offset=25" },
 	{ "incorrect_head_off2", "bpf_list_head not found at offset=1" },
-	{ "pop_front_off",
-	  "15: (bf) r1 = r6                      ; R1_w=ptr_or_null_foo(id=4,ref_obj_id=4,off=48,imm=0) "
-	  "R6_w=ptr_or_null_foo(id=4,ref_obj_id=4,off=48,imm=0) refs=2,4\n"
-	  "16: (85) call bpf_this_cpu_ptr#154\nR1 type=ptr_or_null_ expected=percpu_ptr_" },
-	{ "pop_back_off",
-	  "15: (bf) r1 = r6                      ; R1_w=ptr_or_null_foo(id=4,ref_obj_id=4,off=48,imm=0) "
-	  "R6_w=ptr_or_null_foo(id=4,ref_obj_id=4,off=48,imm=0) refs=2,4\n"
-	  "16: (85) call bpf_this_cpu_ptr#154\nR1 type=ptr_or_null_ expected=percpu_ptr_" },
+	{ "pop_front_off", "off 48 doesn't point to 'struct bpf_spin_lock' that is at 40" },
+	{ "pop_back_off", "off 48 doesn't point to 'struct bpf_spin_lock' that is at 40" },
 };
 
 static void test_linked_list_fail_prog(const char *prog_name, const char *err_msg)
diff --git a/tools/testing/selftests/bpf/prog_tests/local_kptr_stash.c b/tools/testing/selftests/bpf/prog_tests/local_kptr_stash.c
index b25b870f87ba..e6e50a394472 100644
--- a/tools/testing/selftests/bpf/prog_tests/local_kptr_stash.c
+++ b/tools/testing/selftests/bpf/prog_tests/local_kptr_stash.c
@@ -73,6 +73,37 @@ static void test_local_kptr_stash_unstash(void)
 	local_kptr_stash__destroy(skel);
 }
 
+static void test_refcount_acquire_without_unstash(void)
+{
+	LIBBPF_OPTS(bpf_test_run_opts, opts,
+		    .data_in = &pkt_v4,
+		    .data_size_in = sizeof(pkt_v4),
+		    .repeat = 1,
+	);
+	struct local_kptr_stash *skel;
+	int ret;
+
+	skel = local_kptr_stash__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "local_kptr_stash__open_and_load"))
+		return;
+
+	ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.refcount_acquire_without_unstash),
+				     &opts);
+	ASSERT_OK(ret, "refcount_acquire_without_unstash run");
+	ASSERT_EQ(opts.retval, 2, "refcount_acquire_without_unstash retval");
+
+	ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.stash_refcounted_node), &opts);
+	ASSERT_OK(ret, "stash_refcounted_node run");
+	ASSERT_OK(opts.retval, "stash_refcounted_node retval");
+
+	ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.refcount_acquire_without_unstash),
+				     &opts);
+	ASSERT_OK(ret, "refcount_acquire_without_unstash (2) run");
+	ASSERT_EQ(opts.retval, 42, "refcount_acquire_without_unstash (2) retval");
+
+	local_kptr_stash__destroy(skel);
+}
+
 static void test_local_kptr_stash_fail(void)
 {
 	RUN_TESTS(local_kptr_stash_fail);
@@ -86,6 +117,8 @@ void test_local_kptr_stash(void)
 		test_local_kptr_stash_plain();
 	if (test__start_subtest("local_kptr_stash_unstash"))
 		test_local_kptr_stash_unstash();
+	if (test__start_subtest("refcount_acquire_without_unstash"))
+		test_refcount_acquire_without_unstash();
 	if (test__start_subtest("local_kptr_stash_fail"))
 		test_local_kptr_stash_fail();
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/log_buf.c b/tools/testing/selftests/bpf/prog_tests/log_buf.c
index fe9a23e65ef4..0f7ea4d7d9f6 100644
--- a/tools/testing/selftests/bpf/prog_tests/log_buf.c
+++ b/tools/testing/selftests/bpf/prog_tests/log_buf.c
@@ -78,7 +78,7 @@ static void obj_load_log_buf(void)
 	ASSERT_OK_PTR(strstr(libbpf_log_buf, "prog 'bad_prog': BPF program load failed"),
 		      "libbpf_log_not_empty");
 	ASSERT_OK_PTR(strstr(obj_log_buf, "DATASEC license"), "obj_log_not_empty");
-	ASSERT_OK_PTR(strstr(good_log_buf, "0: R1=ctx(off=0,imm=0) R10=fp0"),
+	ASSERT_OK_PTR(strstr(good_log_buf, "0: R1=ctx() R10=fp0"),
 		      "good_log_verbose");
 	ASSERT_OK_PTR(strstr(bad_log_buf, "invalid access to map value, value_size=16 off=16000 size=4"),
 		      "bad_log_not_empty");
@@ -175,7 +175,7 @@ static void bpf_prog_load_log_buf(void)
 	opts.log_level = 2;
 	fd = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, "good_prog", "GPL",
 			   good_prog_insns, good_prog_insn_cnt, &opts);
-	ASSERT_OK_PTR(strstr(log_buf, "0: R1=ctx(off=0,imm=0) R10=fp0"), "good_log_2");
+	ASSERT_OK_PTR(strstr(log_buf, "0: R1=ctx() R10=fp0"), "good_log_2");
 	ASSERT_GE(fd, 0, "good_fd2");
 	if (fd >= 0)
 		close(fd);
diff --git a/tools/testing/selftests/bpf/prog_tests/reg_bounds.c b/tools/testing/selftests/bpf/prog_tests/reg_bounds.c
new file mode 100644
index 000000000000..0c9abd279e18
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/reg_bounds.c
@@ -0,0 +1,2124 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#define _GNU_SOURCE
+#include <limits.h>
+#include <test_progs.h>
+#include <linux/filter.h>
+#include <linux/bpf.h>
+
+/* =================================
+ * SHORT AND CONSISTENT NUMBER TYPES
+ * =================================
+ */
+#define U64_MAX ((u64)UINT64_MAX)
+#define U32_MAX ((u32)UINT_MAX)
+#define U16_MAX ((u32)UINT_MAX)
+#define S64_MIN ((s64)INT64_MIN)
+#define S64_MAX ((s64)INT64_MAX)
+#define S32_MIN ((s32)INT_MIN)
+#define S32_MAX ((s32)INT_MAX)
+#define S16_MIN ((s16)0x80000000)
+#define S16_MAX ((s16)0x7fffffff)
+
+typedef unsigned long long ___u64;
+typedef unsigned int ___u32;
+typedef long long ___s64;
+typedef int ___s32;
+
+/* avoid conflicts with already defined types in kernel headers */
+#define u64 ___u64
+#define u32 ___u32
+#define s64 ___s64
+#define s32 ___s32
+
+/* ==================================
+ * STRING BUF ABSTRACTION AND HELPERS
+ * ==================================
+ */
+struct strbuf {
+	size_t buf_sz;
+	int pos;
+	char buf[0];
+};
+
+#define DEFINE_STRBUF(name, N)						\
+	struct { struct strbuf buf; char data[(N)]; } ___##name;	\
+	struct strbuf *name = (___##name.buf.buf_sz = (N), ___##name.buf.pos = 0, &___##name.buf)
+
+__printf(2, 3)
+static inline void snappendf(struct strbuf *s, const char *fmt, ...)
+{
+	va_list args;
+
+	va_start(args, fmt);
+	s->pos += vsnprintf(s->buf + s->pos,
+			    s->pos < s->buf_sz ? s->buf_sz - s->pos : 0,
+			    fmt, args);
+	va_end(args);
+}
+
+/* ==================================
+ * GENERIC NUMBER TYPE AND OPERATIONS
+ * ==================================
+ */
+enum num_t { U64, first_t = U64, U32, S64, S32, last_t = S32 };
+
+static __always_inline u64 min_t(enum num_t t, u64 x, u64 y)
+{
+	switch (t) {
+	case U64: return (u64)x < (u64)y ? (u64)x : (u64)y;
+	case U32: return (u32)x < (u32)y ? (u32)x : (u32)y;
+	case S64: return (s64)x < (s64)y ? (s64)x : (s64)y;
+	case S32: return (s32)x < (s32)y ? (s32)x : (s32)y;
+	default: printf("min_t!\n"); exit(1);
+	}
+}
+
+static __always_inline u64 max_t(enum num_t t, u64 x, u64 y)
+{
+	switch (t) {
+	case U64: return (u64)x > (u64)y ? (u64)x : (u64)y;
+	case U32: return (u32)x > (u32)y ? (u32)x : (u32)y;
+	case S64: return (s64)x > (s64)y ? (s64)x : (s64)y;
+	case S32: return (s32)x > (s32)y ? (u32)(s32)x : (u32)(s32)y;
+	default: printf("max_t!\n"); exit(1);
+	}
+}
+
+static __always_inline u64 cast_t(enum num_t t, u64 x)
+{
+	switch (t) {
+	case U64: return (u64)x;
+	case U32: return (u32)x;
+	case S64: return (s64)x;
+	case S32: return (u32)(s32)x;
+	default: printf("cast_t!\n"); exit(1);
+	}
+}
+
+static const char *t_str(enum num_t t)
+{
+	switch (t) {
+	case U64: return "u64";
+	case U32: return "u32";
+	case S64: return "s64";
+	case S32: return "s32";
+	default: printf("t_str!\n"); exit(1);
+	}
+}
+
+static enum num_t t_is_32(enum num_t t)
+{
+	switch (t) {
+	case U64: return false;
+	case U32: return true;
+	case S64: return false;
+	case S32: return true;
+	default: printf("t_is_32!\n"); exit(1);
+	}
+}
+
+static enum num_t t_signed(enum num_t t)
+{
+	switch (t) {
+	case U64: return S64;
+	case U32: return S32;
+	case S64: return S64;
+	case S32: return S32;
+	default: printf("t_signed!\n"); exit(1);
+	}
+}
+
+static enum num_t t_unsigned(enum num_t t)
+{
+	switch (t) {
+	case U64: return U64;
+	case U32: return U32;
+	case S64: return U64;
+	case S32: return U32;
+	default: printf("t_unsigned!\n"); exit(1);
+	}
+}
+
+#define UNUM_MAX_DECIMAL U16_MAX
+#define SNUM_MAX_DECIMAL S16_MAX
+#define SNUM_MIN_DECIMAL S16_MIN
+
+static bool num_is_small(enum num_t t, u64 x)
+{
+	switch (t) {
+	case U64: return (u64)x <= UNUM_MAX_DECIMAL;
+	case U32: return (u32)x <= UNUM_MAX_DECIMAL;
+	case S64: return (s64)x >= SNUM_MIN_DECIMAL && (s64)x <= SNUM_MAX_DECIMAL;
+	case S32: return (s32)x >= SNUM_MIN_DECIMAL && (s32)x <= SNUM_MAX_DECIMAL;
+	default: printf("num_is_small!\n"); exit(1);
+	}
+}
+
+static void snprintf_num(enum num_t t, struct strbuf *sb, u64 x)
+{
+	bool is_small = num_is_small(t, x);
+
+	if (is_small) {
+		switch (t) {
+		case U64: return snappendf(sb, "%llu", (u64)x);
+		case U32: return snappendf(sb, "%u", (u32)x);
+		case S64: return snappendf(sb, "%lld", (s64)x);
+		case S32: return snappendf(sb, "%d", (s32)x);
+		default: printf("snprintf_num!\n"); exit(1);
+		}
+	} else {
+		switch (t) {
+		case U64:
+			if (x == U64_MAX)
+				return snappendf(sb, "U64_MAX");
+			else if (x >= U64_MAX - 256)
+				return snappendf(sb, "U64_MAX-%llu", U64_MAX - x);
+			else
+				return snappendf(sb, "%#llx", (u64)x);
+		case U32:
+			if ((u32)x == U32_MAX)
+				return snappendf(sb, "U32_MAX");
+			else if ((u32)x >= U32_MAX - 256)
+				return snappendf(sb, "U32_MAX-%u", U32_MAX - (u32)x);
+			else
+				return snappendf(sb, "%#x", (u32)x);
+		case S64:
+			if ((s64)x == S64_MAX)
+				return snappendf(sb, "S64_MAX");
+			else if ((s64)x >= S64_MAX - 256)
+				return snappendf(sb, "S64_MAX-%lld", S64_MAX - (s64)x);
+			else if ((s64)x == S64_MIN)
+				return snappendf(sb, "S64_MIN");
+			else if ((s64)x <= S64_MIN + 256)
+				return snappendf(sb, "S64_MIN+%lld", (s64)x - S64_MIN);
+			else
+				return snappendf(sb, "%#llx", (s64)x);
+		case S32:
+			if ((s32)x == S32_MAX)
+				return snappendf(sb, "S32_MAX");
+			else if ((s32)x >= S32_MAX - 256)
+				return snappendf(sb, "S32_MAX-%d", S32_MAX - (s32)x);
+			else if ((s32)x == S32_MIN)
+				return snappendf(sb, "S32_MIN");
+			else if ((s32)x <= S32_MIN + 256)
+				return snappendf(sb, "S32_MIN+%d", (s32)x - S32_MIN);
+			else
+				return snappendf(sb, "%#x", (s32)x);
+		default: printf("snprintf_num!\n"); exit(1);
+		}
+	}
+}
+
+/* ===================================
+ * GENERIC RANGE STRUCT AND OPERATIONS
+ * ===================================
+ */
+struct range {
+	u64 a, b;
+};
+
+static void snprintf_range(enum num_t t, struct strbuf *sb, struct range x)
+{
+	if (x.a == x.b)
+		return snprintf_num(t, sb, x.a);
+
+	snappendf(sb, "[");
+	snprintf_num(t, sb, x.a);
+	snappendf(sb, "; ");
+	snprintf_num(t, sb, x.b);
+	snappendf(sb, "]");
+}
+
+static void print_range(enum num_t t, struct range x, const char *sfx)
+{
+	DEFINE_STRBUF(sb, 128);
+
+	snprintf_range(t, sb, x);
+	printf("%s%s", sb->buf, sfx);
+}
+
+static const struct range unkn[] = {
+	[U64] = { 0, U64_MAX },
+	[U32] = { 0, U32_MAX },
+	[S64] = { (u64)S64_MIN, (u64)S64_MAX },
+	[S32] = { (u64)(u32)S32_MIN, (u64)(u32)S32_MAX },
+};
+
+static struct range unkn_subreg(enum num_t t)
+{
+	switch (t) {
+	case U64: return unkn[U32];
+	case U32: return unkn[U32];
+	case S64: return unkn[U32];
+	case S32: return unkn[S32];
+	default: printf("unkn_subreg!\n"); exit(1);
+	}
+}
+
+static struct range range(enum num_t t, u64 a, u64 b)
+{
+	switch (t) {
+	case U64: return (struct range){ (u64)a, (u64)b };
+	case U32: return (struct range){ (u32)a, (u32)b };
+	case S64: return (struct range){ (s64)a, (s64)b };
+	case S32: return (struct range){ (u32)(s32)a, (u32)(s32)b };
+	default: printf("range!\n"); exit(1);
+	}
+}
+
+static __always_inline u32 sign64(u64 x) { return (x >> 63) & 1; }
+static __always_inline u32 sign32(u64 x) { return ((u32)x >> 31) & 1; }
+static __always_inline u32 upper32(u64 x) { return (u32)(x >> 32); }
+static __always_inline u64 swap_low32(u64 x, u32 y) { return (x & 0xffffffff00000000ULL) | y; }
+
+static bool range_eq(struct range x, struct range y)
+{
+	return x.a == y.a && x.b == y.b;
+}
+
+static struct range range_cast_to_s32(struct range x)
+{
+	u64 a = x.a, b = x.b;
+
+	/* if upper 32 bits are constant, lower 32 bits should form a proper
+	 * s32 range to be correct
+	 */
+	if (upper32(a) == upper32(b) && (s32)a <= (s32)b)
+		return range(S32, a, b);
+
+	/* Special case where upper bits form a small sequence of two
+	 * sequential numbers (in 32-bit unsigned space, so 0xffffffff to
+	 * 0x00000000 is also valid), while lower bits form a proper s32 range
+	 * going from negative numbers to positive numbers.
+	 *
+	 * E.g.: [0xfffffff0ffffff00; 0xfffffff100000010]. Iterating
+	 * over full 64-bit numbers range will form a proper [-16, 16]
+	 * ([0xffffff00; 0x00000010]) range in its lower 32 bits.
+	 */
+	if (upper32(a) + 1 == upper32(b) && (s32)a < 0 && (s32)b >= 0)
+		return range(S32, a, b);
+
+	/* otherwise we can't derive much meaningful information */
+	return unkn[S32];
+}
+
+static struct range range_cast_u64(enum num_t to_t, struct range x)
+{
+	u64 a = (u64)x.a, b = (u64)x.b;
+
+	switch (to_t) {
+	case U64:
+		return x;
+	case U32:
+		if (upper32(a) != upper32(b))
+			return unkn[U32];
+		return range(U32, a, b);
+	case S64:
+		if (sign64(a) != sign64(b))
+			return unkn[S64];
+		return range(S64, a, b);
+	case S32:
+		return range_cast_to_s32(x);
+	default: printf("range_cast_u64!\n"); exit(1);
+	}
+}
+
+static struct range range_cast_s64(enum num_t to_t, struct range x)
+{
+	s64 a = (s64)x.a, b = (s64)x.b;
+
+	switch (to_t) {
+	case U64:
+		/* equivalent to (s64)a <= (s64)b check */
+		if (sign64(a) != sign64(b))
+			return unkn[U64];
+		return range(U64, a, b);
+	case U32:
+		if (upper32(a) != upper32(b) || sign32(a) != sign32(b))
+			return unkn[U32];
+		return range(U32, a, b);
+	case S64:
+		return x;
+	case S32:
+		return range_cast_to_s32(x);
+	default: printf("range_cast_s64!\n"); exit(1);
+	}
+}
+
+static struct range range_cast_u32(enum num_t to_t, struct range x)
+{
+	u32 a = (u32)x.a, b = (u32)x.b;
+
+	switch (to_t) {
+	case U64:
+	case S64:
+		/* u32 is always a valid zero-extended u64/s64 */
+		return range(to_t, a, b);
+	case U32:
+		return x;
+	case S32:
+		return range_cast_to_s32(range(U32, a, b));
+	default: printf("range_cast_u32!\n"); exit(1);
+	}
+}
+
+static struct range range_cast_s32(enum num_t to_t, struct range x)
+{
+	s32 a = (s32)x.a, b = (s32)x.b;
+
+	switch (to_t) {
+	case U64:
+	case U32:
+	case S64:
+		if (sign32(a) != sign32(b))
+			return unkn[to_t];
+		return range(to_t, a, b);
+	case S32:
+		return x;
+	default: printf("range_cast_s32!\n"); exit(1);
+	}
+}
+
+/* Reinterpret range in *from_t* domain as a range in *to_t* domain preserving
+ * all possible information. Worst case, it will be unknown range within
+ * *to_t* domain, if nothing more specific can be guaranteed during the
+ * conversion
+ */
+static struct range range_cast(enum num_t from_t, enum num_t to_t, struct range from)
+{
+	switch (from_t) {
+	case U64: return range_cast_u64(to_t, from);
+	case U32: return range_cast_u32(to_t, from);
+	case S64: return range_cast_s64(to_t, from);
+	case S32: return range_cast_s32(to_t, from);
+	default: printf("range_cast!\n"); exit(1);
+	}
+}
+
+static bool is_valid_num(enum num_t t, u64 x)
+{
+	switch (t) {
+	case U64: return true;
+	case U32: return upper32(x) == 0;
+	case S64: return true;
+	case S32: return upper32(x) == 0;
+	default: printf("is_valid_num!\n"); exit(1);
+	}
+}
+
+static bool is_valid_range(enum num_t t, struct range x)
+{
+	if (!is_valid_num(t, x.a) || !is_valid_num(t, x.b))
+		return false;
+
+	switch (t) {
+	case U64: return (u64)x.a <= (u64)x.b;
+	case U32: return (u32)x.a <= (u32)x.b;
+	case S64: return (s64)x.a <= (s64)x.b;
+	case S32: return (s32)x.a <= (s32)x.b;
+	default: printf("is_valid_range!\n"); exit(1);
+	}
+}
+
+static struct range range_improve(enum num_t t, struct range old, struct range new)
+{
+	return range(t, max_t(t, old.a, new.a), min_t(t, old.b, new.b));
+}
+
+static struct range range_refine(enum num_t x_t, struct range x, enum num_t y_t, struct range y)
+{
+	struct range y_cast;
+
+	y_cast = range_cast(y_t, x_t, y);
+
+	/* the case when new range knowledge, *y*, is a 32-bit subregister
+	 * range, while previous range knowledge, *x*, is a full register
+	 * 64-bit range, needs special treatment to take into account upper 32
+	 * bits of full register range
+	 */
+	if (t_is_32(y_t) && !t_is_32(x_t)) {
+		struct range x_swap;
+
+		/* some combinations of upper 32 bits and sign bit can lead to
+		 * invalid ranges, in such cases it's easier to detect them
+		 * after cast/swap than try to enumerate all the conditions
+		 * under which transformation and knowledge transfer is valid
+		 */
+		x_swap = range(x_t, swap_low32(x.a, y_cast.a), swap_low32(x.b, y_cast.b));
+		if (!is_valid_range(x_t, x_swap))
+			return x;
+		return range_improve(x_t, x, x_swap);
+	}
+
+	/* otherwise, plain range cast and intersection works */
+	return range_improve(x_t, x, y_cast);
+}
+
+/* =======================
+ * GENERIC CONDITIONAL OPS
+ * =======================
+ */
+enum op { OP_LT, OP_LE, OP_GT, OP_GE, OP_EQ, OP_NE, first_op = OP_LT, last_op = OP_NE };
+
+static enum op complement_op(enum op op)
+{
+	switch (op) {
+	case OP_LT: return OP_GE;
+	case OP_LE: return OP_GT;
+	case OP_GT: return OP_LE;
+	case OP_GE: return OP_LT;
+	case OP_EQ: return OP_NE;
+	case OP_NE: return OP_EQ;
+	default: printf("complement_op!\n"); exit(1);
+	}
+}
+
+static const char *op_str(enum op op)
+{
+	switch (op) {
+	case OP_LT: return "<";
+	case OP_LE: return "<=";
+	case OP_GT: return ">";
+	case OP_GE: return ">=";
+	case OP_EQ: return "==";
+	case OP_NE: return "!=";
+	default: printf("op_str!\n"); exit(1);
+	}
+}
+
+/* Can register with range [x.a, x.b] *EVER* satisfy
+ * OP (<, <=, >, >=, ==, !=) relation to
+ * a regsiter with range [y.a, y.b]
+ * _in *num_t* domain_
+ */
+static bool range_canbe_op(enum num_t t, struct range x, struct range y, enum op op)
+{
+#define range_canbe(T) do {									\
+	switch (op) {										\
+	case OP_LT: return (T)x.a < (T)y.b;							\
+	case OP_LE: return (T)x.a <= (T)y.b;							\
+	case OP_GT: return (T)x.b > (T)y.a;							\
+	case OP_GE: return (T)x.b >= (T)y.a;							\
+	case OP_EQ: return (T)max_t(t, x.a, y.a) <= (T)min_t(t, x.b, y.b);			\
+	case OP_NE: return !((T)x.a == (T)x.b && (T)y.a == (T)y.b && (T)x.a == (T)y.a);		\
+	default: printf("range_canbe op %d\n", op); exit(1);					\
+	}											\
+} while (0)
+
+	switch (t) {
+	case U64: { range_canbe(u64); }
+	case U32: { range_canbe(u32); }
+	case S64: { range_canbe(s64); }
+	case S32: { range_canbe(s32); }
+	default: printf("range_canbe!\n"); exit(1);
+	}
+#undef range_canbe
+}
+
+/* Does register with range [x.a, x.b] *ALWAYS* satisfy
+ * OP (<, <=, >, >=, ==, !=) relation to
+ * a regsiter with range [y.a, y.b]
+ * _in *num_t* domain_
+ */
+static bool range_always_op(enum num_t t, struct range x, struct range y, enum op op)
+{
+	/* always op <=> ! canbe complement(op) */
+	return !range_canbe_op(t, x, y, complement_op(op));
+}
+
+/* Does register with range [x.a, x.b] *NEVER* satisfy
+ * OP (<, <=, >, >=, ==, !=) relation to
+ * a regsiter with range [y.a, y.b]
+ * _in *num_t* domain_
+ */
+static bool range_never_op(enum num_t t, struct range x, struct range y, enum op op)
+{
+	return !range_canbe_op(t, x, y, op);
+}
+
+/* similar to verifier's is_branch_taken():
+ *    1 - always taken;
+ *    0 - never taken,
+ *   -1 - unsure.
+ */
+static int range_branch_taken_op(enum num_t t, struct range x, struct range y, enum op op)
+{
+	if (range_always_op(t, x, y, op))
+		return 1;
+	if (range_never_op(t, x, y, op))
+		return 0;
+	return -1;
+}
+
+/* What would be the new estimates for register x and y ranges assuming truthful
+ * OP comparison between them. I.e., (x OP y == true) => x <- newx, y <- newy.
+ *
+ * We assume "interesting" cases where ranges overlap. Cases where it's
+ * obvious that (x OP y) is either always true or false should be filtered with
+ * range_never and range_always checks.
+ */
+static void range_cond(enum num_t t, struct range x, struct range y,
+		       enum op op, struct range *newx, struct range *newy)
+{
+	if (!range_canbe_op(t, x, y, op)) {
+		/* nothing to adjust, can't happen, return original values */
+		*newx = x;
+		*newy = y;
+		return;
+	}
+	switch (op) {
+	case OP_LT:
+		*newx = range(t, x.a, min_t(t, x.b, y.b - 1));
+		*newy = range(t, max_t(t, x.a + 1, y.a), y.b);
+		break;
+	case OP_LE:
+		*newx = range(t, x.a, min_t(t, x.b, y.b));
+		*newy = range(t, max_t(t, x.a, y.a), y.b);
+		break;
+	case OP_GT:
+		*newx = range(t, max_t(t, x.a, y.a + 1), x.b);
+		*newy = range(t, y.a, min_t(t, x.b - 1, y.b));
+		break;
+	case OP_GE:
+		*newx = range(t, max_t(t, x.a, y.a), x.b);
+		*newy = range(t, y.a, min_t(t, x.b, y.b));
+		break;
+	case OP_EQ:
+		*newx = range(t, max_t(t, x.a, y.a), min_t(t, x.b, y.b));
+		*newy = range(t, max_t(t, x.a, y.a), min_t(t, x.b, y.b));
+		break;
+	case OP_NE:
+		/* generic case, can't derive more information */
+		*newx = range(t, x.a, x.b);
+		*newy = range(t, y.a, y.b);
+		break;
+
+		/* below extended logic is not supported by verifier just yet */
+		if (x.a == x.b && x.a == y.a) {
+			/* X is a constant matching left side of Y */
+			*newx = range(t, x.a, x.b);
+			*newy = range(t, y.a + 1, y.b);
+		} else if (x.a == x.b && x.b == y.b) {
+			/* X is a constant matching rigth side of Y */
+			*newx = range(t, x.a, x.b);
+			*newy = range(t, y.a, y.b - 1);
+		} else if (y.a == y.b && x.a == y.a) {
+			/* Y is a constant matching left side of X */
+			*newx = range(t, x.a + 1, x.b);
+			*newy = range(t, y.a, y.b);
+		} else if (y.a == y.b && x.b == y.b) {
+			/* Y is a constant matching rigth side of X */
+			*newx = range(t, x.a, x.b - 1);
+			*newy = range(t, y.a, y.b);
+		} else {
+			/* generic case, can't derive more information */
+			*newx = range(t, x.a, x.b);
+			*newy = range(t, y.a, y.b);
+		}
+
+		break;
+	default:
+		break;
+	}
+}
+
+/* =======================
+ * REGISTER STATE HANDLING
+ * =======================
+ */
+struct reg_state {
+	struct range r[4]; /* indexed by enum num_t: U64, U32, S64, S32 */
+	bool valid;
+};
+
+static void print_reg_state(struct reg_state *r, const char *sfx)
+{
+	DEFINE_STRBUF(sb, 512);
+	enum num_t t;
+	int cnt = 0;
+
+	if (!r->valid) {
+		printf("<not found>%s", sfx);
+		return;
+	}
+
+	snappendf(sb, "scalar(");
+	for (t = first_t; t <= last_t; t++) {
+		snappendf(sb, "%s%s=", cnt++ ? "," : "", t_str(t));
+		snprintf_range(t, sb, r->r[t]);
+	}
+	snappendf(sb, ")");
+
+	printf("%s%s", sb->buf, sfx);
+}
+
+static void print_refinement(enum num_t s_t, struct range src,
+			     enum num_t d_t, struct range old, struct range new,
+			     const char *ctx)
+{
+	printf("REFINING (%s) (%s)SRC=", ctx, t_str(s_t));
+	print_range(s_t, src, "");
+	printf(" (%s)DST_OLD=", t_str(d_t));
+	print_range(d_t, old, "");
+	printf(" (%s)DST_NEW=", t_str(d_t));
+	print_range(d_t, new, "\n");
+}
+
+static void reg_state_refine(struct reg_state *r, enum num_t t, struct range x, const char *ctx)
+{
+	enum num_t d_t, s_t;
+	struct range old;
+	bool keep_going = false;
+
+again:
+	/* try to derive new knowledge from just learned range x of type t */
+	for (d_t = first_t; d_t <= last_t; d_t++) {
+		old = r->r[d_t];
+		r->r[d_t] = range_refine(d_t, r->r[d_t], t, x);
+		if (!range_eq(r->r[d_t], old)) {
+			keep_going = true;
+			if (env.verbosity >= VERBOSE_VERY)
+				print_refinement(t, x, d_t, old, r->r[d_t], ctx);
+		}
+	}
+
+	/* now see if we can derive anything new from updated reg_state's ranges */
+	for (s_t = first_t; s_t <= last_t; s_t++) {
+		for (d_t = first_t; d_t <= last_t; d_t++) {
+			old = r->r[d_t];
+			r->r[d_t] = range_refine(d_t, r->r[d_t], s_t, r->r[s_t]);
+			if (!range_eq(r->r[d_t], old)) {
+				keep_going = true;
+				if (env.verbosity >= VERBOSE_VERY)
+					print_refinement(s_t, r->r[s_t], d_t, old, r->r[d_t], ctx);
+			}
+		}
+	}
+
+	/* keep refining until we converge */
+	if (keep_going) {
+		keep_going = false;
+		goto again;
+	}
+}
+
+static void reg_state_set_const(struct reg_state *rs, enum num_t t, u64 val)
+{
+	enum num_t tt;
+
+	rs->valid = true;
+	for (tt = first_t; tt <= last_t; tt++)
+		rs->r[tt] = tt == t ? range(t, val, val) : unkn[tt];
+
+	reg_state_refine(rs, t, rs->r[t], "CONST");
+}
+
+static void reg_state_cond(enum num_t t, struct reg_state *x, struct reg_state *y, enum op op,
+			   struct reg_state *newx, struct reg_state *newy, const char *ctx)
+{
+	char buf[32];
+	enum num_t ts[2];
+	struct reg_state xx = *x, yy = *y;
+	int i, t_cnt;
+	struct range z1, z2;
+
+	if (op == OP_EQ || op == OP_NE) {
+		/* OP_EQ and OP_NE are sign-agnostic, so we need to process
+		 * both signed and unsigned domains at the same time
+		 */
+		ts[0] = t_unsigned(t);
+		ts[1] = t_signed(t);
+		t_cnt = 2;
+	} else {
+		ts[0] = t;
+		t_cnt = 1;
+	}
+
+	for (i = 0; i < t_cnt; i++) {
+		t = ts[i];
+		z1 = x->r[t];
+		z2 = y->r[t];
+
+		range_cond(t, z1, z2, op, &z1, &z2);
+
+		if (newx) {
+			snprintf(buf, sizeof(buf), "%s R1", ctx);
+			reg_state_refine(&xx, t, z1, buf);
+		}
+		if (newy) {
+			snprintf(buf, sizeof(buf), "%s R2", ctx);
+			reg_state_refine(&yy, t, z2, buf);
+		}
+	}
+
+	if (newx)
+		*newx = xx;
+	if (newy)
+		*newy = yy;
+}
+
+static int reg_state_branch_taken_op(enum num_t t, struct reg_state *x, struct reg_state *y,
+				     enum op op)
+{
+	if (op == OP_EQ || op == OP_NE) {
+		/* OP_EQ and OP_NE are sign-agnostic */
+		enum num_t tu = t_unsigned(t);
+		enum num_t ts = t_signed(t);
+		int br_u, br_s, br;
+
+		br_u = range_branch_taken_op(tu, x->r[tu], y->r[tu], op);
+		br_s = range_branch_taken_op(ts, x->r[ts], y->r[ts], op);
+
+		if (br_u >= 0 && br_s >= 0 && br_u != br_s)
+			ASSERT_FALSE(true, "branch taken inconsistency!\n");
+
+		/* if 64-bit ranges are indecisive, use 32-bit subranges to
+		 * eliminate always/never taken branches, if possible
+		 */
+		if (br_u == -1 && (t == U64 || t == S64)) {
+			br = range_branch_taken_op(U32, x->r[U32], y->r[U32], op);
+			/* we can only reject for OP_EQ, never take branch
+			 * based on lower 32 bits
+			 */
+			if (op == OP_EQ && br == 0)
+				return 0;
+			/* for OP_NEQ we can be conclusive only if lower 32 bits
+			 * differ and thus inequality branch is always taken
+			 */
+			if (op == OP_NE && br == 1)
+				return 1;
+
+			br = range_branch_taken_op(S32, x->r[S32], y->r[S32], op);
+			if (op == OP_EQ && br == 0)
+				return 0;
+			if (op == OP_NE && br == 1)
+				return 1;
+		}
+
+		return br_u >= 0 ? br_u : br_s;
+	}
+	return range_branch_taken_op(t, x->r[t], y->r[t], op);
+}
+
+/* =====================================
+ * BPF PROGS GENERATION AND VERIFICATION
+ * =====================================
+ */
+struct case_spec {
+	/* whether to init full register (r1) or sub-register (w1) */
+	bool init_subregs;
+	/* whether to establish initial value range on full register (r1) or
+	 * sub-register (w1)
+	 */
+	bool setup_subregs;
+	/* whether to establish initial value range using signed or unsigned
+	 * comparisons (i.e., initialize umin/umax or smin/smax directly)
+	 */
+	bool setup_signed;
+	/* whether to perform comparison on full registers or sub-registers */
+	bool compare_subregs;
+	/* whether to perform comparison using signed or unsigned operations */
+	bool compare_signed;
+};
+
+/* Generate test BPF program based on provided test ranges, operation, and
+ * specifications about register bitness and signedness.
+ */
+static int load_range_cmp_prog(struct range x, struct range y, enum op op,
+			       int branch_taken, struct case_spec spec,
+			       char *log_buf, size_t log_sz,
+			       int *false_pos, int *true_pos)
+{
+#define emit(insn) ({							\
+	struct bpf_insn __insns[] = { insn };				\
+	int __i;							\
+	for (__i = 0; __i < ARRAY_SIZE(__insns); __i++)			\
+		insns[cur_pos + __i] = __insns[__i];			\
+	cur_pos += __i;							\
+})
+#define JMP_TO(target) (target - cur_pos - 1)
+	int cur_pos = 0, exit_pos, fd, op_code;
+	struct bpf_insn insns[64];
+	LIBBPF_OPTS(bpf_prog_load_opts, opts,
+		.log_level = 2,
+		.log_buf = log_buf,
+		.log_size = log_sz,
+		.prog_flags = BPF_F_TEST_REG_INVARIANTS,
+	);
+
+	/* ; skip exit block below
+	 * goto +2;
+	 */
+	emit(BPF_JMP_A(2));
+	exit_pos = cur_pos;
+	/* ; exit block for all the preparatory conditionals
+	 * out:
+	 * r0 = 0;
+	 * exit;
+	 */
+	emit(BPF_MOV64_IMM(BPF_REG_0, 0));
+	emit(BPF_EXIT_INSN());
+	/*
+	 * ; assign r6/w6 and r7/w7 unpredictable u64/u32 value
+	 * call bpf_get_current_pid_tgid;
+	 * r6 = r0;               | w6 = w0;
+	 * call bpf_get_current_pid_tgid;
+	 * r7 = r0;               | w7 = w0;
+	 */
+	emit(BPF_EMIT_CALL(BPF_FUNC_get_current_pid_tgid));
+	if (spec.init_subregs)
+		emit(BPF_MOV32_REG(BPF_REG_6, BPF_REG_0));
+	else
+		emit(BPF_MOV64_REG(BPF_REG_6, BPF_REG_0));
+	emit(BPF_EMIT_CALL(BPF_FUNC_get_current_pid_tgid));
+	if (spec.init_subregs)
+		emit(BPF_MOV32_REG(BPF_REG_7, BPF_REG_0));
+	else
+		emit(BPF_MOV64_REG(BPF_REG_7, BPF_REG_0));
+	/* ; setup initial r6/w6 possible value range ([x.a, x.b])
+	 * r1 = %[x.a] ll;        | w1 = %[x.a];
+	 * r2 = %[x.b] ll;        | w2 = %[x.b];
+	 * if r6 < r1 goto out;   | if w6 < w1 goto out;
+	 * if r6 > r2 goto out;   | if w6 > w2 goto out;
+	 */
+	if (spec.setup_subregs) {
+		emit(BPF_MOV32_IMM(BPF_REG_1, (s32)x.a));
+		emit(BPF_MOV32_IMM(BPF_REG_2, (s32)x.b));
+		emit(BPF_JMP32_REG(spec.setup_signed ? BPF_JSLT : BPF_JLT,
+				   BPF_REG_6, BPF_REG_1, JMP_TO(exit_pos)));
+		emit(BPF_JMP32_REG(spec.setup_signed ? BPF_JSGT : BPF_JGT,
+				   BPF_REG_6, BPF_REG_2, JMP_TO(exit_pos)));
+	} else {
+		emit(BPF_LD_IMM64(BPF_REG_1, x.a));
+		emit(BPF_LD_IMM64(BPF_REG_2, x.b));
+		emit(BPF_JMP_REG(spec.setup_signed ? BPF_JSLT : BPF_JLT,
+				 BPF_REG_6, BPF_REG_1, JMP_TO(exit_pos)));
+		emit(BPF_JMP_REG(spec.setup_signed ? BPF_JSGT : BPF_JGT,
+				 BPF_REG_6, BPF_REG_2, JMP_TO(exit_pos)));
+	}
+	/* ; setup initial r7/w7 possible value range ([y.a, y.b])
+	 * r1 = %[y.a] ll;        | w1 = %[y.a];
+	 * r2 = %[y.b] ll;        | w2 = %[y.b];
+	 * if r7 < r1 goto out;   | if w7 < w1 goto out;
+	 * if r7 > r2 goto out;   | if w7 > w2 goto out;
+	 */
+	if (spec.setup_subregs) {
+		emit(BPF_MOV32_IMM(BPF_REG_1, (s32)y.a));
+		emit(BPF_MOV32_IMM(BPF_REG_2, (s32)y.b));
+		emit(BPF_JMP32_REG(spec.setup_signed ? BPF_JSLT : BPF_JLT,
+				   BPF_REG_7, BPF_REG_1, JMP_TO(exit_pos)));
+		emit(BPF_JMP32_REG(spec.setup_signed ? BPF_JSGT : BPF_JGT,
+				   BPF_REG_7, BPF_REG_2, JMP_TO(exit_pos)));
+	} else {
+		emit(BPF_LD_IMM64(BPF_REG_1, y.a));
+		emit(BPF_LD_IMM64(BPF_REG_2, y.b));
+		emit(BPF_JMP_REG(spec.setup_signed ? BPF_JSLT : BPF_JLT,
+				 BPF_REG_7, BPF_REG_1, JMP_TO(exit_pos)));
+		emit(BPF_JMP_REG(spec.setup_signed ? BPF_JSGT : BPF_JGT,
+				 BPF_REG_7, BPF_REG_2, JMP_TO(exit_pos)));
+	}
+	/* ; range test instruction
+	 * if r6 <op> r7 goto +3; | if w6 <op> w7 goto +3;
+	 */
+	switch (op) {
+	case OP_LT: op_code = spec.compare_signed ? BPF_JSLT : BPF_JLT; break;
+	case OP_LE: op_code = spec.compare_signed ? BPF_JSLE : BPF_JLE; break;
+	case OP_GT: op_code = spec.compare_signed ? BPF_JSGT : BPF_JGT; break;
+	case OP_GE: op_code = spec.compare_signed ? BPF_JSGE : BPF_JGE; break;
+	case OP_EQ: op_code = BPF_JEQ; break;
+	case OP_NE: op_code = BPF_JNE; break;
+	default:
+		printf("unrecognized op %d\n", op);
+		return -ENOTSUP;
+	}
+	/* ; BEFORE conditional, r0/w0 = {r6/w6,r7/w7} is to extract verifier state reliably
+	 * ; this is used for debugging, as verifier doesn't always print
+	 * ; registers states as of condition jump instruction (e.g., when
+	 * ; precision marking happens)
+	 * r0 = r6;               | w0 = w6;
+	 * r0 = r7;               | w0 = w7;
+	 */
+	if (spec.compare_subregs) {
+		emit(BPF_MOV32_REG(BPF_REG_0, BPF_REG_6));
+		emit(BPF_MOV32_REG(BPF_REG_0, BPF_REG_7));
+	} else {
+		emit(BPF_MOV64_REG(BPF_REG_0, BPF_REG_6));
+		emit(BPF_MOV64_REG(BPF_REG_0, BPF_REG_7));
+	}
+	if (spec.compare_subregs)
+		emit(BPF_JMP32_REG(op_code, BPF_REG_6, BPF_REG_7, 3));
+	else
+		emit(BPF_JMP_REG(op_code, BPF_REG_6, BPF_REG_7, 3));
+	/* ; FALSE branch, r0/w0 = {r6/w6,r7/w7} is to extract verifier state reliably
+	 * r0 = r6;               | w0 = w6;
+	 * r0 = r7;               | w0 = w7;
+	 * exit;
+	 */
+	*false_pos = cur_pos;
+	if (spec.compare_subregs) {
+		emit(BPF_MOV32_REG(BPF_REG_0, BPF_REG_6));
+		emit(BPF_MOV32_REG(BPF_REG_0, BPF_REG_7));
+	} else {
+		emit(BPF_MOV64_REG(BPF_REG_0, BPF_REG_6));
+		emit(BPF_MOV64_REG(BPF_REG_0, BPF_REG_7));
+	}
+	if (branch_taken == 1) /* false branch is never taken */
+		emit(BPF_EMIT_CALL(0xDEAD)); /* poison this branch */
+	else
+		emit(BPF_EXIT_INSN());
+	/* ; TRUE branch, r0/w0 = {r6/w6,r7/w7} is to extract verifier state reliably
+	 * r0 = r6;               | w0 = w6;
+	 * r0 = r7;               | w0 = w7;
+	 * exit;
+	 */
+	*true_pos = cur_pos;
+	if (spec.compare_subregs) {
+		emit(BPF_MOV32_REG(BPF_REG_0, BPF_REG_6));
+		emit(BPF_MOV32_REG(BPF_REG_0, BPF_REG_7));
+	} else {
+		emit(BPF_MOV64_REG(BPF_REG_0, BPF_REG_6));
+		emit(BPF_MOV64_REG(BPF_REG_0, BPF_REG_7));
+	}
+	if (branch_taken == 0) /* true branch is never taken */
+		emit(BPF_EMIT_CALL(0xDEAD)); /* poison this branch */
+	emit(BPF_EXIT_INSN()); /* last instruction has to be exit */
+
+	fd = bpf_prog_load(BPF_PROG_TYPE_RAW_TRACEPOINT, "reg_bounds_test",
+			   "GPL", insns, cur_pos, &opts);
+	if (fd < 0)
+		return fd;
+
+	close(fd);
+	return 0;
+#undef emit
+#undef JMP_TO
+}
+
+#define str_has_pfx(str, pfx) (strncmp(str, pfx, strlen(pfx)) == 0)
+
+/* Parse register state from verifier log.
+ * `s` should point to the start of "Rx = ..." substring in the verifier log.
+ */
+static int parse_reg_state(const char *s, struct reg_state *reg)
+{
+	/* There are two generic forms for SCALAR register:
+	 * - known constant: R6_rwD=P%lld
+	 * - range: R6_rwD=scalar(id=1,...), where "..." is a comma-separated
+	 *   list of optional range specifiers:
+	 *     - umin=%llu, if missing, assumed 0;
+	 *     - umax=%llu, if missing, assumed U64_MAX;
+	 *     - smin=%lld, if missing, assumed S64_MIN;
+	 *     - smax=%lld, if missing, assummed S64_MAX;
+	 *     - umin32=%d, if missing, assumed 0;
+	 *     - umax32=%d, if missing, assumed U32_MAX;
+	 *     - smin32=%d, if missing, assumed S32_MIN;
+	 *     - smax32=%d, if missing, assummed S32_MAX;
+	 *     - var_off=(%#llx; %#llx), tnum part, we don't care about it.
+	 *
+	 * If some of the values are equal, they will be grouped (but min/max
+	 * are not mixed together, and similarly negative values are not
+	 * grouped with non-negative ones). E.g.:
+	 *
+	 *   R6_w=Pscalar(smin=smin32=0, smax=umax=umax32=1000)
+	 *
+	 * _rwD part is optional (and any of the letters can be missing).
+	 * P (precision mark) is optional as well.
+	 *
+	 * Anything inside scalar() is optional, including id, of course.
+	 */
+	struct {
+		const char *pfx;
+		u64 *dst, def;
+		bool is_32, is_set;
+	} *f, fields[8] = {
+		{"smin=", &reg->r[S64].a, S64_MIN},
+		{"smax=", &reg->r[S64].b, S64_MAX},
+		{"umin=", &reg->r[U64].a, 0},
+		{"umax=", &reg->r[U64].b, U64_MAX},
+		{"smin32=", &reg->r[S32].a, (u32)S32_MIN, true},
+		{"smax32=", &reg->r[S32].b, (u32)S32_MAX, true},
+		{"umin32=", &reg->r[U32].a, 0,            true},
+		{"umax32=", &reg->r[U32].b, U32_MAX,      true},
+	};
+	const char *p;
+	int i;
+
+	p = strchr(s, '=');
+	if (!p)
+		return -EINVAL;
+	p++;
+	if (*p == 'P')
+		p++;
+
+	if (!str_has_pfx(p, "scalar(")) {
+		long long sval;
+		enum num_t t;
+
+		if (p[0] == '0' && p[1] == 'x') {
+			if (sscanf(p, "%llx", &sval) != 1)
+				return -EINVAL;
+		} else {
+			if (sscanf(p, "%lld", &sval) != 1)
+				return -EINVAL;
+		}
+
+		reg->valid = true;
+		for (t = first_t; t <= last_t; t++) {
+			reg->r[t] = range(t, sval, sval);
+		}
+		return 0;
+	}
+
+	p += sizeof("scalar");
+	while (p) {
+		int midxs[ARRAY_SIZE(fields)], mcnt = 0;
+		u64 val;
+
+		for (i = 0; i < ARRAY_SIZE(fields); i++) {
+			f = &fields[i];
+			if (!str_has_pfx(p, f->pfx))
+				continue;
+			midxs[mcnt++] = i;
+			p += strlen(f->pfx);
+		}
+
+		if (mcnt) {
+			/* populate all matched fields */
+			if (p[0] == '0' && p[1] == 'x') {
+				if (sscanf(p, "%llx", &val) != 1)
+					return -EINVAL;
+			} else {
+				if (sscanf(p, "%lld", &val) != 1)
+					return -EINVAL;
+			}
+
+			for (i = 0; i < mcnt; i++) {
+				f = &fields[midxs[i]];
+				f->is_set = true;
+				*f->dst = f->is_32 ? (u64)(u32)val : val;
+			}
+		} else if (str_has_pfx(p, "var_off")) {
+			/* skip "var_off=(0x0; 0x3f)" part completely */
+			p = strchr(p, ')');
+			if (!p)
+				return -EINVAL;
+			p++;
+		}
+
+		p = strpbrk(p, ",)");
+		if (*p == ')')
+			break;
+		if (p)
+			p++;
+	}
+
+	reg->valid = true;
+
+	for (i = 0; i < ARRAY_SIZE(fields); i++) {
+		f = &fields[i];
+		if (!f->is_set)
+			*f->dst = f->def;
+	}
+
+	return 0;
+}
+
+
+/* Parse all register states (TRUE/FALSE branches and DST/SRC registers)
+ * out of the verifier log for a corresponding test case BPF program.
+ */
+static int parse_range_cmp_log(const char *log_buf, struct case_spec spec,
+			       int false_pos, int true_pos,
+			       struct reg_state *false1_reg, struct reg_state *false2_reg,
+			       struct reg_state *true1_reg, struct reg_state *true2_reg)
+{
+	struct {
+		int insn_idx;
+		int reg_idx;
+		const char *reg_upper;
+		struct reg_state *state;
+	} specs[] = {
+		{false_pos,     6, "R6=", false1_reg},
+		{false_pos + 1, 7, "R7=", false2_reg},
+		{true_pos,      6, "R6=", true1_reg},
+		{true_pos + 1,  7, "R7=", true2_reg},
+	};
+	char buf[32];
+	const char *p = log_buf, *q;
+	int i, err;
+
+	for (i = 0; i < 4; i++) {
+		sprintf(buf, "%d: (%s) %s = %s%d", specs[i].insn_idx,
+			spec.compare_subregs ? "bc" : "bf",
+			spec.compare_subregs ? "w0" : "r0",
+			spec.compare_subregs ? "w" : "r", specs[i].reg_idx);
+
+		q = strstr(p, buf);
+		if (!q) {
+			*specs[i].state = (struct reg_state){.valid = false};
+			continue;
+		}
+		p = strstr(q, specs[i].reg_upper);
+		if (!p)
+			return -EINVAL;
+		err = parse_reg_state(p, specs[i].state);
+		if (err)
+			return -EINVAL;
+	}
+	return 0;
+}
+
+/* Validate ranges match, and print details if they don't */
+static bool assert_range_eq(enum num_t t, struct range x, struct range y,
+			    const char *ctx1, const char *ctx2)
+{
+	DEFINE_STRBUF(sb, 512);
+
+	if (range_eq(x, y))
+		return true;
+
+	snappendf(sb, "MISMATCH %s.%s: ", ctx1, ctx2);
+	snprintf_range(t, sb, x);
+	snappendf(sb, " != ");
+	snprintf_range(t, sb, y);
+
+	printf("%s\n", sb->buf);
+
+	return false;
+}
+
+/* Validate that register states match, and print details if they don't */
+static bool assert_reg_state_eq(struct reg_state *r, struct reg_state *e, const char *ctx)
+{
+	bool ok = true;
+	enum num_t t;
+
+	if (r->valid != e->valid) {
+		printf("MISMATCH %s: actual %s != expected %s\n", ctx,
+		       r->valid ? "<valid>" : "<invalid>",
+		       e->valid ? "<valid>" : "<invalid>");
+		return false;
+	}
+
+	if (!r->valid)
+		return true;
+
+	for (t = first_t; t <= last_t; t++) {
+		if (!assert_range_eq(t, r->r[t], e->r[t], ctx, t_str(t)))
+			ok = false;
+	}
+
+	return ok;
+}
+
+/* Printf verifier log, filtering out irrelevant noise */
+static void print_verifier_log(const char *buf)
+{
+	const char *p;
+
+	while (buf[0]) {
+		p = strchrnul(buf, '\n');
+
+		/* filter out irrelevant precision backtracking logs */
+		if (str_has_pfx(buf, "mark_precise: "))
+			goto skip_line;
+
+		printf("%.*s\n", (int)(p - buf), buf);
+
+skip_line:
+		buf = *p == '\0' ? p : p + 1;
+	}
+}
+
+/* Simulate provided test case purely with our own range-based logic.
+ * This is done to set up expectations for verifier's branch_taken logic and
+ * verifier's register states in the verifier log.
+ */
+static void sim_case(enum num_t init_t, enum num_t cond_t,
+		     struct range x, struct range y, enum op op,
+		     struct reg_state *fr1, struct reg_state *fr2,
+		     struct reg_state *tr1, struct reg_state *tr2,
+		     int *branch_taken)
+{
+	const u64 A = x.a;
+	const u64 B = x.b;
+	const u64 C = y.a;
+	const u64 D = y.b;
+	struct reg_state rc;
+	enum op rev_op = complement_op(op);
+	enum num_t t;
+
+	fr1->valid = fr2->valid = true;
+	tr1->valid = tr2->valid = true;
+	for (t = first_t; t <= last_t; t++) {
+		/* if we are initializing using 32-bit subregisters,
+		 * full registers get upper 32 bits zeroed automatically
+		 */
+		struct range z = t_is_32(init_t) ? unkn_subreg(t) : unkn[t];
+
+		fr1->r[t] = fr2->r[t] = tr1->r[t] = tr2->r[t] = z;
+	}
+
+	/* step 1: r1 >= A, r2 >= C */
+	reg_state_set_const(&rc, init_t, A);
+	reg_state_cond(init_t, fr1, &rc, OP_GE, fr1, NULL, "r1>=A");
+	reg_state_set_const(&rc, init_t, C);
+	reg_state_cond(init_t, fr2, &rc, OP_GE, fr2, NULL, "r2>=C");
+	*tr1 = *fr1;
+	*tr2 = *fr2;
+	if (env.verbosity >= VERBOSE_VERY) {
+		printf("STEP1 (%s) R1: ", t_str(init_t)); print_reg_state(fr1, "\n");
+		printf("STEP1 (%s) R2: ", t_str(init_t)); print_reg_state(fr2, "\n");
+	}
+
+	/* step 2: r1 <= B, r2 <= D */
+	reg_state_set_const(&rc, init_t, B);
+	reg_state_cond(init_t, fr1, &rc, OP_LE, fr1, NULL, "r1<=B");
+	reg_state_set_const(&rc, init_t, D);
+	reg_state_cond(init_t, fr2, &rc, OP_LE, fr2, NULL, "r2<=D");
+	*tr1 = *fr1;
+	*tr2 = *fr2;
+	if (env.verbosity >= VERBOSE_VERY) {
+		printf("STEP2 (%s) R1: ", t_str(init_t)); print_reg_state(fr1, "\n");
+		printf("STEP2 (%s) R2: ", t_str(init_t)); print_reg_state(fr2, "\n");
+	}
+
+	/* step 3: r1 <op> r2 */
+	*branch_taken = reg_state_branch_taken_op(cond_t, fr1, fr2, op);
+	fr1->valid = fr2->valid = false;
+	tr1->valid = tr2->valid = false;
+	if (*branch_taken != 1) { /* FALSE is possible */
+		fr1->valid = fr2->valid = true;
+		reg_state_cond(cond_t, fr1, fr2, rev_op, fr1, fr2, "FALSE");
+	}
+	if (*branch_taken != 0) { /* TRUE is possible */
+		tr1->valid = tr2->valid = true;
+		reg_state_cond(cond_t, tr1, tr2, op, tr1, tr2, "TRUE");
+	}
+	if (env.verbosity >= VERBOSE_VERY) {
+		printf("STEP3 (%s) FALSE R1:", t_str(cond_t)); print_reg_state(fr1, "\n");
+		printf("STEP3 (%s) FALSE R2:", t_str(cond_t)); print_reg_state(fr2, "\n");
+		printf("STEP3 (%s) TRUE  R1:", t_str(cond_t)); print_reg_state(tr1, "\n");
+		printf("STEP3 (%s) TRUE  R2:", t_str(cond_t)); print_reg_state(tr2, "\n");
+	}
+}
+
+/* ===============================
+ * HIGH-LEVEL TEST CASE VALIDATION
+ * ===============================
+ */
+static u32 upper_seeds[] = {
+	0,
+	1,
+	U32_MAX,
+	U32_MAX - 1,
+	S32_MAX,
+	(u32)S32_MIN,
+};
+
+static u32 lower_seeds[] = {
+	0,
+	1,
+	2, (u32)-2,
+	255, (u32)-255,
+	UINT_MAX,
+	UINT_MAX - 1,
+	INT_MAX,
+	(u32)INT_MIN,
+};
+
+struct ctx {
+	int val_cnt, subval_cnt, range_cnt, subrange_cnt;
+	u64 uvals[ARRAY_SIZE(upper_seeds) * ARRAY_SIZE(lower_seeds)];
+	s64 svals[ARRAY_SIZE(upper_seeds) * ARRAY_SIZE(lower_seeds)];
+	u32 usubvals[ARRAY_SIZE(lower_seeds)];
+	s32 ssubvals[ARRAY_SIZE(lower_seeds)];
+	struct range *uranges, *sranges;
+	struct range *usubranges, *ssubranges;
+	int max_failure_cnt, cur_failure_cnt;
+	int total_case_cnt, case_cnt;
+	int rand_case_cnt;
+	unsigned rand_seed;
+	__u64 start_ns;
+	char progress_ctx[64];
+};
+
+static void cleanup_ctx(struct ctx *ctx)
+{
+	free(ctx->uranges);
+	free(ctx->sranges);
+	free(ctx->usubranges);
+	free(ctx->ssubranges);
+}
+
+struct subtest_case {
+	enum num_t init_t;
+	enum num_t cond_t;
+	struct range x;
+	struct range y;
+	enum op op;
+};
+
+static void subtest_case_str(struct strbuf *sb, struct subtest_case *t, bool use_op)
+{
+	snappendf(sb, "(%s)", t_str(t->init_t));
+	snprintf_range(t->init_t, sb, t->x);
+	snappendf(sb, " (%s)%s ", t_str(t->cond_t), use_op ? op_str(t->op) : "<op>");
+	snprintf_range(t->init_t, sb, t->y);
+}
+
+/* Generate and validate test case based on specific combination of setup
+ * register ranges (including their expected num_t domain), and conditional
+ * operation to perform (including num_t domain in which it has to be
+ * performed)
+ */
+static int verify_case_op(enum num_t init_t, enum num_t cond_t,
+			  struct range x, struct range y, enum op op)
+{
+	char log_buf[256 * 1024];
+	size_t log_sz = sizeof(log_buf);
+	int err, false_pos = 0, true_pos = 0, branch_taken;
+	struct reg_state fr1, fr2, tr1, tr2;
+	struct reg_state fe1, fe2, te1, te2;
+	bool failed = false;
+	struct case_spec spec = {
+		.init_subregs = (init_t == U32 || init_t == S32),
+		.setup_subregs = (init_t == U32 || init_t == S32),
+		.setup_signed = (init_t == S64 || init_t == S32),
+		.compare_subregs = (cond_t == U32 || cond_t == S32),
+		.compare_signed = (cond_t == S64 || cond_t == S32),
+	};
+
+	log_buf[0] = '\0';
+
+	sim_case(init_t, cond_t, x, y, op, &fe1, &fe2, &te1, &te2, &branch_taken);
+
+	err = load_range_cmp_prog(x, y, op, branch_taken, spec,
+				  log_buf, log_sz, &false_pos, &true_pos);
+	if (err) {
+		ASSERT_OK(err, "load_range_cmp_prog");
+		failed = true;
+	}
+
+	err = parse_range_cmp_log(log_buf, spec, false_pos, true_pos,
+				  &fr1, &fr2, &tr1, &tr2);
+	if (err) {
+		ASSERT_OK(err, "parse_range_cmp_log");
+		failed = true;
+	}
+
+	if (!assert_reg_state_eq(&fr1, &fe1, "false_reg1") ||
+	    !assert_reg_state_eq(&fr2, &fe2, "false_reg2") ||
+	    !assert_reg_state_eq(&tr1, &te1, "true_reg1") ||
+	    !assert_reg_state_eq(&tr2, &te2, "true_reg2")) {
+		failed = true;
+	}
+
+	if (failed || env.verbosity >= VERBOSE_NORMAL) {
+		if (failed || env.verbosity >= VERBOSE_VERY) {
+			printf("VERIFIER LOG:\n========================\n");
+			print_verifier_log(log_buf);
+			printf("=====================\n");
+		}
+		printf("ACTUAL   FALSE1: "); print_reg_state(&fr1, "\n");
+		printf("EXPECTED FALSE1: "); print_reg_state(&fe1, "\n");
+		printf("ACTUAL   FALSE2: "); print_reg_state(&fr2, "\n");
+		printf("EXPECTED FALSE2: "); print_reg_state(&fe2, "\n");
+		printf("ACTUAL   TRUE1:  "); print_reg_state(&tr1, "\n");
+		printf("EXPECTED TRUE1:  "); print_reg_state(&te1, "\n");
+		printf("ACTUAL   TRUE2:  "); print_reg_state(&tr2, "\n");
+		printf("EXPECTED TRUE2:  "); print_reg_state(&te2, "\n");
+
+		return failed ? -EINVAL : 0;
+	}
+
+	return 0;
+}
+
+/* Given setup ranges and number types, go over all supported operations,
+ * generating individual subtest for each allowed combination
+ */
+static int verify_case_opt(struct ctx *ctx, enum num_t init_t, enum num_t cond_t,
+			   struct range x, struct range y, bool is_subtest)
+{
+	DEFINE_STRBUF(sb, 256);
+	int err;
+	struct subtest_case sub = {
+		.init_t = init_t,
+		.cond_t = cond_t,
+		.x = x,
+		.y = y,
+	};
+
+	sb->pos = 0; /* reset position in strbuf */
+	subtest_case_str(sb, &sub, false /* ignore op */);
+	if (is_subtest && !test__start_subtest(sb->buf))
+		return 0;
+
+	for (sub.op = first_op; sub.op <= last_op; sub.op++) {
+		sb->pos = 0; /* reset position in strbuf */
+		subtest_case_str(sb, &sub, true /* print op */);
+
+		if (env.verbosity >= VERBOSE_NORMAL) /* this speeds up debugging */
+			printf("TEST CASE: %s\n", sb->buf);
+
+		err = verify_case_op(init_t, cond_t, x, y, sub.op);
+		if (err || env.verbosity >= VERBOSE_NORMAL)
+			ASSERT_OK(err, sb->buf);
+		if (err) {
+			ctx->cur_failure_cnt++;
+			if (ctx->cur_failure_cnt > ctx->max_failure_cnt)
+				return err;
+			return 0; /* keep testing other cases */
+		}
+		ctx->case_cnt++;
+		if ((ctx->case_cnt % 10000) == 0) {
+			double progress = (ctx->case_cnt + 0.0) / ctx->total_case_cnt;
+			u64 elapsed_ns = get_time_ns() - ctx->start_ns;
+			double remain_ns = elapsed_ns / progress * (1 - progress);
+
+			fprintf(env.stderr, "PROGRESS (%s): %d/%d (%.2lf%%), "
+					    "elapsed %llu mins (%.2lf hrs), "
+					    "ETA %.0lf mins (%.2lf hrs)\n",
+				ctx->progress_ctx,
+				ctx->case_cnt, ctx->total_case_cnt, 100.0 * progress,
+				elapsed_ns / 1000000000 / 60,
+				elapsed_ns / 1000000000.0 / 3600,
+				remain_ns / 1000000000.0 / 60,
+				remain_ns / 1000000000.0 / 3600);
+		}
+	}
+
+	return 0;
+}
+
+static int verify_case(struct ctx *ctx, enum num_t init_t, enum num_t cond_t,
+		       struct range x, struct range y)
+{
+	return verify_case_opt(ctx, init_t, cond_t, x, y, true /* is_subtest */);
+}
+
+/* ================================
+ * GENERATED CASES FROM SEED VALUES
+ * ================================
+ */
+static int u64_cmp(const void *p1, const void *p2)
+{
+	u64 x1 = *(const u64 *)p1, x2 = *(const u64 *)p2;
+
+	return x1 != x2 ? (x1 < x2 ? -1 : 1) : 0;
+}
+
+static int u32_cmp(const void *p1, const void *p2)
+{
+	u32 x1 = *(const u32 *)p1, x2 = *(const u32 *)p2;
+
+	return x1 != x2 ? (x1 < x2 ? -1 : 1) : 0;
+}
+
+static int s64_cmp(const void *p1, const void *p2)
+{
+	s64 x1 = *(const s64 *)p1, x2 = *(const s64 *)p2;
+
+	return x1 != x2 ? (x1 < x2 ? -1 : 1) : 0;
+}
+
+static int s32_cmp(const void *p1, const void *p2)
+{
+	s32 x1 = *(const s32 *)p1, x2 = *(const s32 *)p2;
+
+	return x1 != x2 ? (x1 < x2 ? -1 : 1) : 0;
+}
+
+/* Generate valid unique constants from seeds, both signed and unsigned */
+static void gen_vals(struct ctx *ctx)
+{
+	int i, j, cnt = 0;
+
+	for (i = 0; i < ARRAY_SIZE(upper_seeds); i++) {
+		for (j = 0; j < ARRAY_SIZE(lower_seeds); j++) {
+			ctx->uvals[cnt++] = (((u64)upper_seeds[i]) << 32) | lower_seeds[j];
+		}
+	}
+
+	/* sort and compact uvals (i.e., it's `sort | uniq`) */
+	qsort(ctx->uvals, cnt, sizeof(*ctx->uvals), u64_cmp);
+	for (i = 1, j = 0; i < cnt; i++) {
+		if (ctx->uvals[j] == ctx->uvals[i])
+			continue;
+		j++;
+		ctx->uvals[j] = ctx->uvals[i];
+	}
+	ctx->val_cnt = j + 1;
+
+	/* we have exactly the same number of s64 values, they are just in
+	 * a different order than u64s, so just sort them differently
+	 */
+	for (i = 0; i < ctx->val_cnt; i++)
+		ctx->svals[i] = ctx->uvals[i];
+	qsort(ctx->svals, ctx->val_cnt, sizeof(*ctx->svals), s64_cmp);
+
+	if (env.verbosity >= VERBOSE_SUPER) {
+		DEFINE_STRBUF(sb1, 256);
+		DEFINE_STRBUF(sb2, 256);
+
+		for (i = 0; i < ctx->val_cnt; i++) {
+			sb1->pos = sb2->pos = 0;
+			snprintf_num(U64, sb1, ctx->uvals[i]);
+			snprintf_num(S64, sb2, ctx->svals[i]);
+			printf("SEED #%d: u64=%-20s s64=%-20s\n", i, sb1->buf, sb2->buf);
+		}
+	}
+
+	/* 32-bit values are generated separately */
+	cnt = 0;
+	for (i = 0; i < ARRAY_SIZE(lower_seeds); i++) {
+		ctx->usubvals[cnt++] = lower_seeds[i];
+	}
+
+	/* sort and compact usubvals (i.e., it's `sort | uniq`) */
+	qsort(ctx->usubvals, cnt, sizeof(*ctx->usubvals), u32_cmp);
+	for (i = 1, j = 0; i < cnt; i++) {
+		if (ctx->usubvals[j] == ctx->usubvals[i])
+			continue;
+		j++;
+		ctx->usubvals[j] = ctx->usubvals[i];
+	}
+	ctx->subval_cnt = j + 1;
+
+	for (i = 0; i < ctx->subval_cnt; i++)
+		ctx->ssubvals[i] = ctx->usubvals[i];
+	qsort(ctx->ssubvals, ctx->subval_cnt, sizeof(*ctx->ssubvals), s32_cmp);
+
+	if (env.verbosity >= VERBOSE_SUPER) {
+		DEFINE_STRBUF(sb1, 256);
+		DEFINE_STRBUF(sb2, 256);
+
+		for (i = 0; i < ctx->subval_cnt; i++) {
+			sb1->pos = sb2->pos = 0;
+			snprintf_num(U32, sb1, ctx->usubvals[i]);
+			snprintf_num(S32, sb2, ctx->ssubvals[i]);
+			printf("SUBSEED #%d: u32=%-10s s32=%-10s\n", i, sb1->buf, sb2->buf);
+		}
+	}
+}
+
+/* Generate valid ranges from upper/lower seeds */
+static int gen_ranges(struct ctx *ctx)
+{
+	int i, j, cnt = 0;
+
+	for (i = 0; i < ctx->val_cnt; i++) {
+		for (j = i; j < ctx->val_cnt; j++) {
+			if (env.verbosity >= VERBOSE_SUPER) {
+				DEFINE_STRBUF(sb1, 256);
+				DEFINE_STRBUF(sb2, 256);
+
+				sb1->pos = sb2->pos = 0;
+				snprintf_range(U64, sb1, range(U64, ctx->uvals[i], ctx->uvals[j]));
+				snprintf_range(S64, sb2, range(S64, ctx->svals[i], ctx->svals[j]));
+				printf("RANGE #%d: u64=%-40s s64=%-40s\n", cnt, sb1->buf, sb2->buf);
+			}
+			cnt++;
+		}
+	}
+	ctx->range_cnt = cnt;
+
+	ctx->uranges = calloc(ctx->range_cnt, sizeof(*ctx->uranges));
+	if (!ASSERT_OK_PTR(ctx->uranges, "uranges_calloc"))
+		return -EINVAL;
+	ctx->sranges = calloc(ctx->range_cnt, sizeof(*ctx->sranges));
+	if (!ASSERT_OK_PTR(ctx->sranges, "sranges_calloc"))
+		return -EINVAL;
+
+	cnt = 0;
+	for (i = 0; i < ctx->val_cnt; i++) {
+		for (j = i; j < ctx->val_cnt; j++) {
+			ctx->uranges[cnt] = range(U64, ctx->uvals[i], ctx->uvals[j]);
+			ctx->sranges[cnt] = range(S64, ctx->svals[i], ctx->svals[j]);
+			cnt++;
+		}
+	}
+
+	cnt = 0;
+	for (i = 0; i < ctx->subval_cnt; i++) {
+		for (j = i; j < ctx->subval_cnt; j++) {
+			if (env.verbosity >= VERBOSE_SUPER) {
+				DEFINE_STRBUF(sb1, 256);
+				DEFINE_STRBUF(sb2, 256);
+
+				sb1->pos = sb2->pos = 0;
+				snprintf_range(U32, sb1, range(U32, ctx->usubvals[i], ctx->usubvals[j]));
+				snprintf_range(S32, sb2, range(S32, ctx->ssubvals[i], ctx->ssubvals[j]));
+				printf("SUBRANGE #%d: u32=%-20s s32=%-20s\n", cnt, sb1->buf, sb2->buf);
+			}
+			cnt++;
+		}
+	}
+	ctx->subrange_cnt = cnt;
+
+	ctx->usubranges = calloc(ctx->subrange_cnt, sizeof(*ctx->usubranges));
+	if (!ASSERT_OK_PTR(ctx->usubranges, "usubranges_calloc"))
+		return -EINVAL;
+	ctx->ssubranges = calloc(ctx->subrange_cnt, sizeof(*ctx->ssubranges));
+	if (!ASSERT_OK_PTR(ctx->ssubranges, "ssubranges_calloc"))
+		return -EINVAL;
+
+	cnt = 0;
+	for (i = 0; i < ctx->subval_cnt; i++) {
+		for (j = i; j < ctx->subval_cnt; j++) {
+			ctx->usubranges[cnt] = range(U32, ctx->usubvals[i], ctx->usubvals[j]);
+			ctx->ssubranges[cnt] = range(S32, ctx->ssubvals[i], ctx->ssubvals[j]);
+			cnt++;
+		}
+	}
+
+	return 0;
+}
+
+static int parse_env_vars(struct ctx *ctx)
+{
+	const char *s;
+
+	if ((s = getenv("REG_BOUNDS_MAX_FAILURE_CNT"))) {
+		errno = 0;
+		ctx->max_failure_cnt = strtol(s, NULL, 10);
+		if (errno || ctx->max_failure_cnt < 0) {
+			ASSERT_OK(-errno, "REG_BOUNDS_MAX_FAILURE_CNT");
+			return -EINVAL;
+		}
+	}
+
+	if ((s = getenv("REG_BOUNDS_RAND_CASE_CNT"))) {
+		errno = 0;
+		ctx->rand_case_cnt = strtol(s, NULL, 10);
+		if (errno || ctx->rand_case_cnt < 0) {
+			ASSERT_OK(-errno, "REG_BOUNDS_RAND_CASE_CNT");
+			return -EINVAL;
+		}
+	}
+
+	if ((s = getenv("REG_BOUNDS_RAND_SEED"))) {
+		errno = 0;
+		ctx->rand_seed = strtoul(s, NULL, 10);
+		if (errno) {
+			ASSERT_OK(-errno, "REG_BOUNDS_RAND_SEED");
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
+static int prepare_gen_tests(struct ctx *ctx)
+{
+	const char *s;
+	int err;
+
+	if (!(s = getenv("SLOW_TESTS")) || strcmp(s, "1") != 0) {
+		test__skip();
+		return -ENOTSUP;
+	}
+
+	err = parse_env_vars(ctx);
+	if (err)
+		return err;
+
+	gen_vals(ctx);
+	err = gen_ranges(ctx);
+	if (err) {
+		ASSERT_OK(err, "gen_ranges");
+		return err;
+	}
+
+	return 0;
+}
+
+/* Go over generated constants and ranges and validate various supported
+ * combinations of them
+ */
+static void validate_gen_range_vs_const_64(enum num_t init_t, enum num_t cond_t)
+{
+	struct ctx ctx;
+	struct range rconst;
+	const struct range *ranges;
+	const u64 *vals;
+	int i, j;
+
+	memset(&ctx, 0, sizeof(ctx));
+
+	if (prepare_gen_tests(&ctx))
+		goto cleanup;
+
+	ranges = init_t == U64 ? ctx.uranges : ctx.sranges;
+	vals = init_t == U64 ? ctx.uvals : (const u64 *)ctx.svals;
+
+	ctx.total_case_cnt = (last_op - first_op + 1) * (2 * ctx.range_cnt * ctx.val_cnt);
+	ctx.start_ns = get_time_ns();
+	snprintf(ctx.progress_ctx, sizeof(ctx.progress_ctx),
+		 "RANGE x CONST, %s -> %s",
+		 t_str(init_t), t_str(cond_t));
+
+	for (i = 0; i < ctx.val_cnt; i++) {
+		for (j = 0; j < ctx.range_cnt; j++) {
+			rconst = range(init_t, vals[i], vals[i]);
+
+			/* (u64|s64)(<range> x <const>) */
+			if (verify_case(&ctx, init_t, cond_t, ranges[j], rconst))
+				goto cleanup;
+			/* (u64|s64)(<const> x <range>) */
+			if (verify_case(&ctx, init_t, cond_t, rconst, ranges[j]))
+				goto cleanup;
+		}
+	}
+
+cleanup:
+	cleanup_ctx(&ctx);
+}
+
+static void validate_gen_range_vs_const_32(enum num_t init_t, enum num_t cond_t)
+{
+	struct ctx ctx;
+	struct range rconst;
+	const struct range *ranges;
+	const u32 *vals;
+	int i, j;
+
+	memset(&ctx, 0, sizeof(ctx));
+
+	if (prepare_gen_tests(&ctx))
+		goto cleanup;
+
+	ranges = init_t == U32 ? ctx.usubranges : ctx.ssubranges;
+	vals = init_t == U32 ? ctx.usubvals : (const u32 *)ctx.ssubvals;
+
+	ctx.total_case_cnt = (last_op - first_op + 1) * (2 * ctx.subrange_cnt * ctx.subval_cnt);
+	ctx.start_ns = get_time_ns();
+	snprintf(ctx.progress_ctx, sizeof(ctx.progress_ctx),
+		 "RANGE x CONST, %s -> %s",
+		 t_str(init_t), t_str(cond_t));
+
+	for (i = 0; i < ctx.subval_cnt; i++) {
+		for (j = 0; j < ctx.subrange_cnt; j++) {
+			rconst = range(init_t, vals[i], vals[i]);
+
+			/* (u32|s32)(<range> x <const>) */
+			if (verify_case(&ctx, init_t, cond_t, ranges[j], rconst))
+				goto cleanup;
+			/* (u32|s32)(<const> x <range>) */
+			if (verify_case(&ctx, init_t, cond_t, rconst, ranges[j]))
+				goto cleanup;
+		}
+	}
+
+cleanup:
+	cleanup_ctx(&ctx);
+}
+
+static void validate_gen_range_vs_range(enum num_t init_t, enum num_t cond_t)
+{
+	struct ctx ctx;
+	const struct range *ranges;
+	int i, j, rcnt;
+
+	memset(&ctx, 0, sizeof(ctx));
+
+	if (prepare_gen_tests(&ctx))
+		goto cleanup;
+
+	switch (init_t)
+	{
+	case U64:
+		ranges = ctx.uranges;
+		rcnt = ctx.range_cnt;
+		break;
+	case U32:
+		ranges = ctx.usubranges;
+		rcnt = ctx.subrange_cnt;
+		break;
+	case S64:
+		ranges = ctx.sranges;
+		rcnt = ctx.range_cnt;
+		break;
+	case S32:
+		ranges = ctx.ssubranges;
+		rcnt = ctx.subrange_cnt;
+		break;
+	default:
+		printf("validate_gen_range_vs_range!\n");
+		exit(1);
+	}
+
+	ctx.total_case_cnt = (last_op - first_op + 1) * (2 * rcnt * (rcnt + 1) / 2);
+	ctx.start_ns = get_time_ns();
+	snprintf(ctx.progress_ctx, sizeof(ctx.progress_ctx),
+		 "RANGE x RANGE, %s -> %s",
+		 t_str(init_t), t_str(cond_t));
+
+	for (i = 0; i < rcnt; i++) {
+		for (j = i; j < rcnt; j++) {
+			/* (<range> x <range>) */
+			if (verify_case(&ctx, init_t, cond_t, ranges[i], ranges[j]))
+				goto cleanup;
+			if (verify_case(&ctx, init_t, cond_t, ranges[j], ranges[i]))
+				goto cleanup;
+		}
+	}
+
+cleanup:
+	cleanup_ctx(&ctx);
+}
+
+/* Go over thousands of test cases generated from initial seed values.
+ * Given this take a long time, guard this begind SLOW_TESTS=1 envvar. If
+ * envvar is not set, this test is skipped during test_progs testing.
+ *
+ * We split this up into smaller subsets based on initialization and
+ * conditiona numeric domains to get an easy parallelization with test_progs'
+ * -j argument.
+ */
+
+/* RANGE x CONST, U64 initial range */
+void test_reg_bounds_gen_consts_u64_u64(void) { validate_gen_range_vs_const_64(U64, U64); }
+void test_reg_bounds_gen_consts_u64_s64(void) { validate_gen_range_vs_const_64(U64, S64); }
+void test_reg_bounds_gen_consts_u64_u32(void) { validate_gen_range_vs_const_64(U64, U32); }
+void test_reg_bounds_gen_consts_u64_s32(void) { validate_gen_range_vs_const_64(U64, S32); }
+/* RANGE x CONST, S64 initial range */
+void test_reg_bounds_gen_consts_s64_u64(void) { validate_gen_range_vs_const_64(S64, U64); }
+void test_reg_bounds_gen_consts_s64_s64(void) { validate_gen_range_vs_const_64(S64, S64); }
+void test_reg_bounds_gen_consts_s64_u32(void) { validate_gen_range_vs_const_64(S64, U32); }
+void test_reg_bounds_gen_consts_s64_s32(void) { validate_gen_range_vs_const_64(S64, S32); }
+/* RANGE x CONST, U32 initial range */
+void test_reg_bounds_gen_consts_u32_u64(void) { validate_gen_range_vs_const_32(U32, U64); }
+void test_reg_bounds_gen_consts_u32_s64(void) { validate_gen_range_vs_const_32(U32, S64); }
+void test_reg_bounds_gen_consts_u32_u32(void) { validate_gen_range_vs_const_32(U32, U32); }
+void test_reg_bounds_gen_consts_u32_s32(void) { validate_gen_range_vs_const_32(U32, S32); }
+/* RANGE x CONST, S32 initial range */
+void test_reg_bounds_gen_consts_s32_u64(void) { validate_gen_range_vs_const_32(S32, U64); }
+void test_reg_bounds_gen_consts_s32_s64(void) { validate_gen_range_vs_const_32(S32, S64); }
+void test_reg_bounds_gen_consts_s32_u32(void) { validate_gen_range_vs_const_32(S32, U32); }
+void test_reg_bounds_gen_consts_s32_s32(void) { validate_gen_range_vs_const_32(S32, S32); }
+
+/* RANGE x RANGE, U64 initial range */
+void test_reg_bounds_gen_ranges_u64_u64(void) { validate_gen_range_vs_range(U64, U64); }
+void test_reg_bounds_gen_ranges_u64_s64(void) { validate_gen_range_vs_range(U64, S64); }
+void test_reg_bounds_gen_ranges_u64_u32(void) { validate_gen_range_vs_range(U64, U32); }
+void test_reg_bounds_gen_ranges_u64_s32(void) { validate_gen_range_vs_range(U64, S32); }
+/* RANGE x RANGE, S64 initial range */
+void test_reg_bounds_gen_ranges_s64_u64(void) { validate_gen_range_vs_range(S64, U64); }
+void test_reg_bounds_gen_ranges_s64_s64(void) { validate_gen_range_vs_range(S64, S64); }
+void test_reg_bounds_gen_ranges_s64_u32(void) { validate_gen_range_vs_range(S64, U32); }
+void test_reg_bounds_gen_ranges_s64_s32(void) { validate_gen_range_vs_range(S64, S32); }
+/* RANGE x RANGE, U32 initial range */
+void test_reg_bounds_gen_ranges_u32_u64(void) { validate_gen_range_vs_range(U32, U64); }
+void test_reg_bounds_gen_ranges_u32_s64(void) { validate_gen_range_vs_range(U32, S64); }
+void test_reg_bounds_gen_ranges_u32_u32(void) { validate_gen_range_vs_range(U32, U32); }
+void test_reg_bounds_gen_ranges_u32_s32(void) { validate_gen_range_vs_range(U32, S32); }
+/* RANGE x RANGE, S32 initial range */
+void test_reg_bounds_gen_ranges_s32_u64(void) { validate_gen_range_vs_range(S32, U64); }
+void test_reg_bounds_gen_ranges_s32_s64(void) { validate_gen_range_vs_range(S32, S64); }
+void test_reg_bounds_gen_ranges_s32_u32(void) { validate_gen_range_vs_range(S32, U32); }
+void test_reg_bounds_gen_ranges_s32_s32(void) { validate_gen_range_vs_range(S32, S32); }
+
+#define DEFAULT_RAND_CASE_CNT 100
+
+#define RAND_21BIT_MASK ((1 << 22) - 1)
+
+static u64 rand_u64()
+{
+	/* RAND_MAX is guaranteed to be at least 1<<15, but in practice it
+	 * seems to be 1<<31, so we need to call it thrice to get full u64;
+	 * we'll use rougly equal split: 22 + 21 + 21 bits
+	 */
+	return ((u64)random() << 42) |
+	       (((u64)random() & RAND_21BIT_MASK) << 21) |
+	       (random() & RAND_21BIT_MASK);
+}
+
+static u64 rand_const(enum num_t t)
+{
+	return cast_t(t, rand_u64());
+}
+
+static struct range rand_range(enum num_t t)
+{
+	u64 x = rand_const(t), y = rand_const(t);
+
+	return range(t, min_t(t, x, y), max_t(t, x, y));
+}
+
+static void validate_rand_ranges(enum num_t init_t, enum num_t cond_t, bool const_range)
+{
+	struct ctx ctx;
+	struct range range1, range2;
+	int err, i;
+	u64 t;
+
+	memset(&ctx, 0, sizeof(ctx));
+
+	err = parse_env_vars(&ctx);
+	if (err) {
+		ASSERT_OK(err, "parse_env_vars");
+		return;
+	}
+
+	if (ctx.rand_case_cnt == 0)
+		ctx.rand_case_cnt = DEFAULT_RAND_CASE_CNT;
+	if (ctx.rand_seed == 0)
+		ctx.rand_seed = (unsigned)get_time_ns();
+
+	srandom(ctx.rand_seed);
+
+	ctx.total_case_cnt = (last_op - first_op + 1) * (2 * ctx.rand_case_cnt);
+	ctx.start_ns = get_time_ns();
+	snprintf(ctx.progress_ctx, sizeof(ctx.progress_ctx),
+		 "[RANDOM SEED %u] RANGE x %s, %s -> %s",
+		 ctx.rand_seed, const_range ? "CONST" : "RANGE",
+		 t_str(init_t), t_str(cond_t));
+
+	for (i = 0; i < ctx.rand_case_cnt; i++) {
+		range1 = rand_range(init_t);
+		if (const_range) {
+			t = rand_const(init_t);
+			range2 = range(init_t, t, t);
+		} else {
+			range2 = rand_range(init_t);
+		}
+
+		/* <range1> x <range2> */
+		if (verify_case_opt(&ctx, init_t, cond_t, range1, range2, false /* !is_subtest */))
+			goto cleanup;
+		/* <range2> x <range1> */
+		if (verify_case_opt(&ctx, init_t, cond_t, range2, range1, false /* !is_subtest */))
+			goto cleanup;
+	}
+
+cleanup:
+	/* make sure we report random seed for reproducing */
+	ASSERT_TRUE(true, ctx.progress_ctx);
+	cleanup_ctx(&ctx);
+}
+
+/* [RANDOM] RANGE x CONST, U64 initial range */
+void test_reg_bounds_rand_consts_u64_u64(void) { validate_rand_ranges(U64, U64, true /* const */); }
+void test_reg_bounds_rand_consts_u64_s64(void) { validate_rand_ranges(U64, S64, true /* const */); }
+void test_reg_bounds_rand_consts_u64_u32(void) { validate_rand_ranges(U64, U32, true /* const */); }
+void test_reg_bounds_rand_consts_u64_s32(void) { validate_rand_ranges(U64, S32, true /* const */); }
+/* [RANDOM] RANGE x CONST, S64 initial range */
+void test_reg_bounds_rand_consts_s64_u64(void) { validate_rand_ranges(S64, U64, true /* const */); }
+void test_reg_bounds_rand_consts_s64_s64(void) { validate_rand_ranges(S64, S64, true /* const */); }
+void test_reg_bounds_rand_consts_s64_u32(void) { validate_rand_ranges(S64, U32, true /* const */); }
+void test_reg_bounds_rand_consts_s64_s32(void) { validate_rand_ranges(S64, S32, true /* const */); }
+/* [RANDOM] RANGE x CONST, U32 initial range */
+void test_reg_bounds_rand_consts_u32_u64(void) { validate_rand_ranges(U32, U64, true /* const */); }
+void test_reg_bounds_rand_consts_u32_s64(void) { validate_rand_ranges(U32, S64, true /* const */); }
+void test_reg_bounds_rand_consts_u32_u32(void) { validate_rand_ranges(U32, U32, true /* const */); }
+void test_reg_bounds_rand_consts_u32_s32(void) { validate_rand_ranges(U32, S32, true /* const */); }
+/* [RANDOM] RANGE x CONST, S32 initial range */
+void test_reg_bounds_rand_consts_s32_u64(void) { validate_rand_ranges(S32, U64, true /* const */); }
+void test_reg_bounds_rand_consts_s32_s64(void) { validate_rand_ranges(S32, S64, true /* const */); }
+void test_reg_bounds_rand_consts_s32_u32(void) { validate_rand_ranges(S32, U32, true /* const */); }
+void test_reg_bounds_rand_consts_s32_s32(void) { validate_rand_ranges(S32, S32, true /* const */); }
+
+/* [RANDOM] RANGE x RANGE, U64 initial range */
+void test_reg_bounds_rand_ranges_u64_u64(void) { validate_rand_ranges(U64, U64, false /* range */); }
+void test_reg_bounds_rand_ranges_u64_s64(void) { validate_rand_ranges(U64, S64, false /* range */); }
+void test_reg_bounds_rand_ranges_u64_u32(void) { validate_rand_ranges(U64, U32, false /* range */); }
+void test_reg_bounds_rand_ranges_u64_s32(void) { validate_rand_ranges(U64, S32, false /* range */); }
+/* [RANDOM] RANGE x RANGE, S64 initial range */
+void test_reg_bounds_rand_ranges_s64_u64(void) { validate_rand_ranges(S64, U64, false /* range */); }
+void test_reg_bounds_rand_ranges_s64_s64(void) { validate_rand_ranges(S64, S64, false /* range */); }
+void test_reg_bounds_rand_ranges_s64_u32(void) { validate_rand_ranges(S64, U32, false /* range */); }
+void test_reg_bounds_rand_ranges_s64_s32(void) { validate_rand_ranges(S64, S32, false /* range */); }
+/* [RANDOM] RANGE x RANGE, U32 initial range */
+void test_reg_bounds_rand_ranges_u32_u64(void) { validate_rand_ranges(U32, U64, false /* range */); }
+void test_reg_bounds_rand_ranges_u32_s64(void) { validate_rand_ranges(U32, S64, false /* range */); }
+void test_reg_bounds_rand_ranges_u32_u32(void) { validate_rand_ranges(U32, U32, false /* range */); }
+void test_reg_bounds_rand_ranges_u32_s32(void) { validate_rand_ranges(U32, S32, false /* range */); }
+/* [RANDOM] RANGE x RANGE, S32 initial range */
+void test_reg_bounds_rand_ranges_s32_u64(void) { validate_rand_ranges(S32, U64, false /* range */); }
+void test_reg_bounds_rand_ranges_s32_s64(void) { validate_rand_ranges(S32, S64, false /* range */); }
+void test_reg_bounds_rand_ranges_s32_u32(void) { validate_rand_ranges(S32, U32, false /* range */); }
+void test_reg_bounds_rand_ranges_s32_s32(void) { validate_rand_ranges(S32, S32, false /* range */); }
+
+/* A set of hard-coded "interesting" cases to validate as part of normal
+ * test_progs test runs
+ */
+static struct subtest_case crafted_cases[] = {
+	{U64, U64, {0, 0xffffffff}, {0, 0}},
+	{U64, U64, {0, 0x80000000}, {0, 0}},
+	{U64, U64, {0x100000000ULL, 0x100000100ULL}, {0, 0}},
+	{U64, U64, {0x100000000ULL, 0x180000000ULL}, {0, 0}},
+	{U64, U64, {0x100000000ULL, 0x1ffffff00ULL}, {0, 0}},
+	{U64, U64, {0x100000000ULL, 0x1ffffff01ULL}, {0, 0}},
+	{U64, U64, {0x100000000ULL, 0x1fffffffeULL}, {0, 0}},
+	{U64, U64, {0x100000001ULL, 0x1000000ffULL}, {0, 0}},
+
+	/* single point overlap, interesting BPF_EQ and BPF_NE interactions */
+	{U64, U64, {0, 1}, {1, 0x80000000}},
+	{U64, S64, {0, 1}, {1, 0x80000000}},
+	{U64, U32, {0, 1}, {1, 0x80000000}},
+	{U64, S32, {0, 1}, {1, 0x80000000}},
+
+	{U64, S64, {0, 0xffffffff00000000ULL}, {0, 0}},
+	{U64, S64, {0x7fffffffffffffffULL, 0xffffffff00000000ULL}, {0, 0}},
+	{U64, S64, {0x7fffffff00000001ULL, 0xffffffff00000000ULL}, {0, 0}},
+	{U64, S64, {0, 0xffffffffULL}, {1, 1}},
+	{U64, S64, {0, 0xffffffffULL}, {0x7fffffff, 0x7fffffff}},
+
+	{U64, U32, {0, 0x100000000}, {0, 0}},
+	{U64, U32, {0xfffffffe, 0x100000000}, {0x80000000, 0x80000000}},
+
+	{U64, S32, {0, 0xffffffff00000000ULL}, {0, 0}},
+	/* these are tricky cases where lower 32 bits allow to tighten 64
+	 * bit boundaries based on tightened lower 32 bit boundaries
+	 */
+	{U64, S32, {0, 0x0ffffffffULL}, {0, 0}},
+	{U64, S32, {0, 0x100000000ULL}, {0, 0}},
+	{U64, S32, {0, 0x100000001ULL}, {0, 0}},
+	{U64, S32, {0, 0x180000000ULL}, {0, 0}},
+	{U64, S32, {0, 0x17fffffffULL}, {0, 0}},
+	{U64, S32, {0, 0x180000001ULL}, {0, 0}},
+
+	/* verifier knows about [-1, 0] range for s32 for this case already */
+	{S64, S64, {0xffffffffffffffffULL, 0}, {0xffffffff00000000ULL, 0xffffffff00000000ULL}},
+	/* but didn't know about these cases initially */
+	{U64, U64, {0xffffffff, 0x100000000ULL}, {0, 0}}, /* s32: [-1, 0] */
+	{U64, U64, {0xffffffff, 0x100000001ULL}, {0, 0}}, /* s32: [-1, 1] */
+
+	/* longer convergence case: learning from u64 -> s64 -> u64 -> u32,
+	 * arriving at u32: [1, U32_MAX] (instead of more pessimistic [0, U32_MAX])
+	 */
+	{S64, U64, {0xffffffff00000001ULL, 0}, {0xffffffff00000000ULL, 0xffffffff00000000ULL}},
+
+	{U32, U32, {1, U32_MAX}, {0, 0}},
+
+	{U32, S32, {0, U32_MAX}, {U32_MAX, U32_MAX}},
+
+	{S32, U64, {(u32)(s32)S32_MIN, (u32)(s32)S32_MIN}, {(u32)(s32)-255, 0}},
+	{S32, S64, {(u32)(s32)S32_MIN, (u32)(s32)-255}, {(u32)(s32)-2, 0}},
+	{S32, S64, {0, 1}, {(u32)(s32)S32_MIN, (u32)(s32)S32_MIN}},
+	{S32, U32, {(u32)(s32)S32_MIN, (u32)(s32)S32_MIN}, {(u32)(s32)S32_MIN, (u32)(s32)S32_MIN}},
+};
+
+/* Go over crafted hard-coded cases. This is fast, so we do it as part of
+ * normal test_progs run.
+ */
+void test_reg_bounds_crafted(void)
+{
+	struct ctx ctx;
+	int i;
+
+	memset(&ctx, 0, sizeof(ctx));
+
+	for (i = 0; i < ARRAY_SIZE(crafted_cases); i++) {
+		struct subtest_case *c = &crafted_cases[i];
+
+		verify_case(&ctx, c->init_t, c->cond_t, c->x, c->y);
+		verify_case(&ctx, c->init_t, c->cond_t, c->y, c->x);
+	}
+
+	cleanup_ctx(&ctx);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/sockopt.c b/tools/testing/selftests/bpf/prog_tests/sockopt.c
index 9e6a5e3ed4de..5a4491d4edfe 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockopt.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockopt.c
@@ -1,5 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <test_progs.h>
+#include <io_uring/mini_liburing.h>
 #include "cgroup_helpers.h"
 
 static char bpf_log_buf[4096];
@@ -38,6 +39,7 @@ static struct sockopt_test {
 	socklen_t			get_optlen_ret;
 
 	enum sockopt_test_error		error;
+	bool				io_uring_support;
 } tests[] = {
 
 	/* ==================== getsockopt ====================  */
@@ -251,7 +253,9 @@ static struct sockopt_test {
 		.attach_type = BPF_CGROUP_GETSOCKOPT,
 		.expected_attach_type = BPF_CGROUP_GETSOCKOPT,
 
+		.get_level = SOL_SOCKET,
 		.get_optlen = 64,
+		.io_uring_support = true,
 	},
 	{
 		.descr = "getsockopt: deny bigger ctx->optlen",
@@ -276,6 +280,7 @@ static struct sockopt_test {
 		.get_optlen = 64,
 
 		.error = EFAULT_GETSOCKOPT,
+		.io_uring_support = true,
 	},
 	{
 		.descr = "getsockopt: ignore >PAGE_SIZE optlen",
@@ -318,6 +323,7 @@ static struct sockopt_test {
 		.get_optval = {}, /* the changes are ignored */
 		.get_optlen = PAGE_SIZE + 1,
 		.error = EOPNOTSUPP_GETSOCKOPT,
+		.io_uring_support = true,
 	},
 	{
 		.descr = "getsockopt: support smaller ctx->optlen",
@@ -337,8 +343,10 @@ static struct sockopt_test {
 		.attach_type = BPF_CGROUP_GETSOCKOPT,
 		.expected_attach_type = BPF_CGROUP_GETSOCKOPT,
 
+		.get_level = SOL_SOCKET,
 		.get_optlen = 64,
 		.get_optlen_ret = 32,
+		.io_uring_support = true,
 	},
 	{
 		.descr = "getsockopt: deny writing to ctx->optval",
@@ -518,6 +526,7 @@ static struct sockopt_test {
 		.set_level = 123,
 
 		.set_optlen = 1,
+		.io_uring_support = true,
 	},
 	{
 		.descr = "setsockopt: allow changing ctx->level",
@@ -572,6 +581,7 @@ static struct sockopt_test {
 		.set_optname = 123,
 
 		.set_optlen = 1,
+		.io_uring_support = true,
 	},
 	{
 		.descr = "setsockopt: allow changing ctx->optname",
@@ -624,6 +634,7 @@ static struct sockopt_test {
 		.expected_attach_type = BPF_CGROUP_SETSOCKOPT,
 
 		.set_optlen = 64,
+		.io_uring_support = true,
 	},
 	{
 		.descr = "setsockopt: ctx->optlen == -1 is ok",
@@ -640,6 +651,7 @@ static struct sockopt_test {
 		.expected_attach_type = BPF_CGROUP_SETSOCKOPT,
 
 		.set_optlen = 64,
+		.io_uring_support = true,
 	},
 	{
 		.descr = "setsockopt: deny ctx->optlen < 0 (except -1)",
@@ -658,6 +670,7 @@ static struct sockopt_test {
 		.set_optlen = 4,
 
 		.error = EFAULT_SETSOCKOPT,
+		.io_uring_support = true,
 	},
 	{
 		.descr = "setsockopt: deny ctx->optlen > input optlen",
@@ -675,6 +688,7 @@ static struct sockopt_test {
 		.set_optlen = 64,
 
 		.error = EFAULT_SETSOCKOPT,
+		.io_uring_support = true,
 	},
 	{
 		.descr = "setsockopt: ignore >PAGE_SIZE optlen",
@@ -940,7 +954,89 @@ static int load_prog(const struct bpf_insn *insns,
 	return fd;
 }
 
-static int run_test(int cgroup_fd, struct sockopt_test *test)
+/* Core function that handles io_uring ring initialization,
+ * sending SQE with sockopt command and waiting for the CQE.
+ */
+static int uring_sockopt(int op, int fd, int level, int optname,
+			 const void *optval, socklen_t optlen)
+{
+	struct io_uring_cqe *cqe;
+	struct io_uring_sqe *sqe;
+	struct io_uring ring;
+	int err;
+
+	err = io_uring_queue_init(1, &ring, 0);
+	if (!ASSERT_OK(err, "io_uring initialization"))
+		return err;
+
+	sqe = io_uring_get_sqe(&ring);
+	if (!ASSERT_NEQ(sqe, NULL, "Get an SQE")) {
+		err = -1;
+		goto fail;
+	}
+
+	io_uring_prep_cmd(sqe, op, fd, level, optname, optval, optlen);
+
+	err = io_uring_submit(&ring);
+	if (!ASSERT_EQ(err, 1, "Submit SQE"))
+		goto fail;
+
+	err = io_uring_wait_cqe(&ring, &cqe);
+	if (!ASSERT_OK(err, "Wait for CQE"))
+		goto fail;
+
+	err = cqe->res;
+
+fail:
+	io_uring_queue_exit(&ring);
+
+	return err;
+}
+
+static int uring_setsockopt(int fd, int level, int optname, const void *optval,
+			    socklen_t optlen)
+{
+	return uring_sockopt(SOCKET_URING_OP_SETSOCKOPT, fd, level, optname,
+			     optval, optlen);
+}
+
+static int uring_getsockopt(int fd, int level, int optname, void *optval,
+			    socklen_t *optlen)
+{
+	int ret = uring_sockopt(SOCKET_URING_OP_GETSOCKOPT, fd, level, optname,
+				optval, *optlen);
+	if (ret < 0)
+		return ret;
+
+	/* Populate optlen back to be compatible with systemcall interface,
+	 * and simplify the test.
+	 */
+	*optlen = ret;
+
+	return 0;
+}
+
+/* Execute the setsocktopt operation */
+static int call_setsockopt(bool use_io_uring, int fd, int level, int optname,
+			   const void *optval, socklen_t optlen)
+{
+	if (use_io_uring)
+		return uring_setsockopt(fd, level, optname, optval, optlen);
+
+	return setsockopt(fd, level, optname, optval, optlen);
+}
+
+/* Execute the getsocktopt operation */
+static int call_getsockopt(bool use_io_uring, int fd, int level, int optname,
+			   void *optval, socklen_t *optlen)
+{
+	if (use_io_uring)
+		return uring_getsockopt(fd, level, optname, optval, optlen);
+
+	return getsockopt(fd, level, optname, optval, optlen);
+}
+
+static int run_test(int cgroup_fd, struct sockopt_test *test, bool use_io_uring)
 {
 	int sock_fd, err, prog_fd;
 	void *optval = NULL;
@@ -980,8 +1076,9 @@ static int run_test(int cgroup_fd, struct sockopt_test *test)
 			test->set_optlen = num_pages * sysconf(_SC_PAGESIZE) + remainder;
 		}
 
-		err = setsockopt(sock_fd, test->set_level, test->set_optname,
-				 test->set_optval, test->set_optlen);
+		err = call_setsockopt(use_io_uring, sock_fd, test->set_level,
+				      test->set_optname, test->set_optval,
+				      test->set_optlen);
 		if (err) {
 			if (errno == EPERM && test->error == EPERM_SETSOCKOPT)
 				goto close_sock_fd;
@@ -1008,8 +1105,8 @@ static int run_test(int cgroup_fd, struct sockopt_test *test)
 		socklen_t expected_get_optlen = test->get_optlen_ret ?:
 			test->get_optlen;
 
-		err = getsockopt(sock_fd, test->get_level, test->get_optname,
-				 optval, &optlen);
+		err = call_getsockopt(use_io_uring, sock_fd, test->get_level,
+				      test->get_optname, optval, &optlen);
 		if (err) {
 			if (errno == EOPNOTSUPP && test->error == EOPNOTSUPP_GETSOCKOPT)
 				goto free_optval;
@@ -1063,7 +1160,11 @@ void test_sockopt(void)
 		if (!test__start_subtest(tests[i].descr))
 			continue;
 
-		ASSERT_OK(run_test(cgroup_fd, &tests[i]), tests[i].descr);
+		ASSERT_OK(run_test(cgroup_fd, &tests[i], false),
+			  tests[i].descr);
+		if (tests[i].io_uring_support)
+			ASSERT_OK(run_test(cgroup_fd, &tests[i], true),
+				  tests[i].descr);
 	}
 
 	close(cgroup_fd);
diff --git a/tools/testing/selftests/bpf/prog_tests/spin_lock.c b/tools/testing/selftests/bpf/prog_tests/spin_lock.c
index f29c08d93beb..18d451be57c8 100644
--- a/tools/testing/selftests/bpf/prog_tests/spin_lock.c
+++ b/tools/testing/selftests/bpf/prog_tests/spin_lock.c
@@ -13,22 +13,22 @@ static struct {
 	const char *err_msg;
 } spin_lock_fail_tests[] = {
 	{ "lock_id_kptr_preserve",
-	  "5: (bf) r1 = r0                       ; R0_w=ptr_foo(id=2,ref_obj_id=2,off=0,imm=0) "
-	  "R1_w=ptr_foo(id=2,ref_obj_id=2,off=0,imm=0) refs=2\n6: (85) call bpf_this_cpu_ptr#154\n"
+	  "5: (bf) r1 = r0                       ; R0_w=ptr_foo(id=2,ref_obj_id=2) "
+	  "R1_w=ptr_foo(id=2,ref_obj_id=2) refs=2\n6: (85) call bpf_this_cpu_ptr#154\n"
 	  "R1 type=ptr_ expected=percpu_ptr_" },
 	{ "lock_id_global_zero",
-	  "; R1_w=map_value(off=0,ks=4,vs=4,imm=0)\n2: (85) call bpf_this_cpu_ptr#154\n"
+	  "; R1_w=map_value(map=.data.A,ks=4,vs=4)\n2: (85) call bpf_this_cpu_ptr#154\n"
 	  "R1 type=map_value expected=percpu_ptr_" },
 	{ "lock_id_mapval_preserve",
 	  "[0-9]\\+: (bf) r1 = r0                       ;"
-	  " R0_w=map_value(id=1,off=0,ks=4,vs=8,imm=0)"
-	  " R1_w=map_value(id=1,off=0,ks=4,vs=8,imm=0)\n"
+	  " R0_w=map_value(id=1,map=array_map,ks=4,vs=8)"
+	  " R1_w=map_value(id=1,map=array_map,ks=4,vs=8)\n"
 	  "[0-9]\\+: (85) call bpf_this_cpu_ptr#154\n"
 	  "R1 type=map_value expected=percpu_ptr_" },
 	{ "lock_id_innermapval_preserve",
 	  "[0-9]\\+: (bf) r1 = r0                      ;"
-	  " R0=map_value(id=2,off=0,ks=4,vs=8,imm=0)"
-	  " R1_w=map_value(id=2,off=0,ks=4,vs=8,imm=0)\n"
+	  " R0=map_value(id=2,ks=4,vs=8)"
+	  " R1_w=map_value(id=2,ks=4,vs=8)\n"
 	  "[0-9]\\+: (85) call bpf_this_cpu_ptr#154\n"
 	  "R1 type=map_value expected=percpu_ptr_" },
 	{ "lock_id_mismatch_kptr_kptr", "bpf_spin_unlock of different lock" },
diff --git a/tools/testing/selftests/bpf/prog_tests/task_under_cgroup.c b/tools/testing/selftests/bpf/prog_tests/task_under_cgroup.c
index 4224727fb364..626d76fe43a2 100644
--- a/tools/testing/selftests/bpf/prog_tests/task_under_cgroup.c
+++ b/tools/testing/selftests/bpf/prog_tests/task_under_cgroup.c
@@ -30,8 +30,15 @@ void test_task_under_cgroup(void)
 	if (!ASSERT_OK(ret, "test_task_under_cgroup__load"))
 		goto cleanup;
 
-	ret = test_task_under_cgroup__attach(skel);
-	if (!ASSERT_OK(ret, "test_task_under_cgroup__attach"))
+	/* First, attach the LSM program, and then it will be triggered when the
+	 * TP_BTF program is attached.
+	 */
+	skel->links.lsm_run = bpf_program__attach_lsm(skel->progs.lsm_run);
+	if (!ASSERT_OK_PTR(skel->links.lsm_run, "attach_lsm"))
+		goto cleanup;
+
+	skel->links.tp_btf_run = bpf_program__attach_trace(skel->progs.tp_btf_run);
+	if (!ASSERT_OK_PTR(skel->links.tp_btf_run, "attach_tp_btf"))
 		goto cleanup;
 
 	pid = fork();
diff --git a/tools/testing/selftests/bpf/prog_tests/tc_helpers.h b/tools/testing/selftests/bpf/prog_tests/tc_helpers.h
index 67f985f7d215..924d0e25320c 100644
--- a/tools/testing/selftests/bpf/prog_tests/tc_helpers.h
+++ b/tools/testing/selftests/bpf/prog_tests/tc_helpers.h
@@ -4,6 +4,10 @@
 #define TC_HELPERS
 #include <test_progs.h>
 
+#ifndef loopback
+# define loopback 1
+#endif
+
 static inline __u32 id_from_prog_fd(int fd)
 {
 	struct bpf_prog_info prog_info = {};
diff --git a/tools/testing/selftests/bpf/prog_tests/tc_netkit.c b/tools/testing/selftests/bpf/prog_tests/tc_netkit.c
new file mode 100644
index 000000000000..15ee7b2fc410
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/tc_netkit.c
@@ -0,0 +1,687 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Isovalent */
+#include <uapi/linux/if_link.h>
+#include <net/if.h>
+#include <test_progs.h>
+
+#define netkit_peer "nk0"
+#define netkit_name "nk1"
+
+#define ping_addr_neigh		0x0a000002 /* 10.0.0.2 */
+#define ping_addr_noneigh	0x0a000003 /* 10.0.0.3 */
+
+#include "test_tc_link.skel.h"
+#include "netlink_helpers.h"
+#include "tc_helpers.h"
+
+#define ICMP_ECHO 8
+
+struct icmphdr {
+	__u8		type;
+	__u8		code;
+	__sum16		checksum;
+	struct {
+		__be16	id;
+		__be16	sequence;
+	} echo;
+};
+
+struct iplink_req {
+	struct nlmsghdr  n;
+	struct ifinfomsg i;
+	char             buf[1024];
+};
+
+static int create_netkit(int mode, int policy, int peer_policy, int *ifindex,
+			 bool same_netns)
+{
+	struct rtnl_handle rth = { .fd = -1 };
+	struct iplink_req req = {};
+	struct rtattr *linkinfo, *data;
+	const char *type = "netkit";
+	int err;
+
+	err = rtnl_open(&rth, 0);
+	if (!ASSERT_OK(err, "open_rtnetlink"))
+		return err;
+
+	memset(&req, 0, sizeof(req));
+	req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg));
+	req.n.nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
+	req.n.nlmsg_type = RTM_NEWLINK;
+	req.i.ifi_family = AF_UNSPEC;
+
+	addattr_l(&req.n, sizeof(req), IFLA_IFNAME, netkit_name,
+		  strlen(netkit_name));
+	linkinfo = addattr_nest(&req.n, sizeof(req), IFLA_LINKINFO);
+	addattr_l(&req.n, sizeof(req), IFLA_INFO_KIND, type, strlen(type));
+	data = addattr_nest(&req.n, sizeof(req), IFLA_INFO_DATA);
+	addattr32(&req.n, sizeof(req), IFLA_NETKIT_POLICY, policy);
+	addattr32(&req.n, sizeof(req), IFLA_NETKIT_PEER_POLICY, peer_policy);
+	addattr32(&req.n, sizeof(req), IFLA_NETKIT_MODE, mode);
+	addattr_nest_end(&req.n, data);
+	addattr_nest_end(&req.n, linkinfo);
+
+	err = rtnl_talk(&rth, &req.n, NULL);
+	ASSERT_OK(err, "talk_rtnetlink");
+	rtnl_close(&rth);
+	*ifindex = if_nametoindex(netkit_name);
+
+	ASSERT_GT(*ifindex, 0, "retrieve_ifindex");
+	ASSERT_OK(system("ip netns add foo"), "create netns");
+	ASSERT_OK(system("ip link set dev " netkit_name " up"),
+			 "up primary");
+	ASSERT_OK(system("ip addr add dev " netkit_name " 10.0.0.1/24"),
+			 "addr primary");
+	if (same_netns) {
+		ASSERT_OK(system("ip link set dev " netkit_peer " up"),
+				 "up peer");
+		ASSERT_OK(system("ip addr add dev " netkit_peer " 10.0.0.2/24"),
+				 "addr peer");
+	} else {
+		ASSERT_OK(system("ip link set " netkit_peer " netns foo"),
+				 "move peer");
+		ASSERT_OK(system("ip netns exec foo ip link set dev "
+				 netkit_peer " up"), "up peer");
+		ASSERT_OK(system("ip netns exec foo ip addr add dev "
+				 netkit_peer " 10.0.0.2/24"), "addr peer");
+	}
+	return err;
+}
+
+static void destroy_netkit(void)
+{
+	ASSERT_OK(system("ip link del dev " netkit_name), "del primary");
+	ASSERT_OK(system("ip netns del foo"), "delete netns");
+	ASSERT_EQ(if_nametoindex(netkit_name), 0, netkit_name "_ifindex");
+}
+
+static int __send_icmp(__u32 dest)
+{
+	struct sockaddr_in addr;
+	struct icmphdr icmp;
+	int sock, ret;
+
+	ret = write_sysctl("/proc/sys/net/ipv4/ping_group_range", "0 0");
+	if (!ASSERT_OK(ret, "write_sysctl(net.ipv4.ping_group_range)"))
+		return ret;
+
+	sock = socket(AF_INET, SOCK_DGRAM, IPPROTO_ICMP);
+	if (!ASSERT_GE(sock, 0, "icmp_socket"))
+		return -errno;
+
+	ret = setsockopt(sock, SOL_SOCKET, SO_BINDTODEVICE,
+			 netkit_name, strlen(netkit_name) + 1);
+	if (!ASSERT_OK(ret, "setsockopt(SO_BINDTODEVICE)"))
+		goto out;
+
+	memset(&addr, 0, sizeof(addr));
+	addr.sin_family = AF_INET;
+	addr.sin_addr.s_addr = htonl(dest);
+
+	memset(&icmp, 0, sizeof(icmp));
+	icmp.type = ICMP_ECHO;
+	icmp.echo.id = 1234;
+	icmp.echo.sequence = 1;
+
+	ret = sendto(sock, &icmp, sizeof(icmp), 0,
+		     (struct sockaddr *)&addr, sizeof(addr));
+	if (!ASSERT_GE(ret, 0, "icmp_sendto"))
+		ret = -errno;
+	else
+		ret = 0;
+out:
+	close(sock);
+	return ret;
+}
+
+static int send_icmp(void)
+{
+	return __send_icmp(ping_addr_neigh);
+}
+
+void serial_test_tc_netkit_basic(void)
+{
+	LIBBPF_OPTS(bpf_prog_query_opts, optq);
+	LIBBPF_OPTS(bpf_netkit_opts, optl);
+	__u32 prog_ids[2], link_ids[2];
+	__u32 pid1, pid2, lid1, lid2;
+	struct test_tc_link *skel;
+	struct bpf_link *link;
+	int err, ifindex;
+
+	err = create_netkit(NETKIT_L2, NETKIT_PASS, NETKIT_PASS,
+			    &ifindex, false);
+	if (err)
+		return;
+
+	skel = test_tc_link__open();
+	if (!ASSERT_OK_PTR(skel, "skel_open"))
+		goto cleanup;
+
+	ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc1,
+		  BPF_NETKIT_PRIMARY), 0, "tc1_attach_type");
+	ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc2,
+		  BPF_NETKIT_PEER), 0, "tc2_attach_type");
+
+	err = test_tc_link__load(skel);
+	if (!ASSERT_OK(err, "skel_load"))
+		goto cleanup;
+
+	pid1 = id_from_prog_fd(bpf_program__fd(skel->progs.tc1));
+	pid2 = id_from_prog_fd(bpf_program__fd(skel->progs.tc2));
+
+	ASSERT_NEQ(pid1, pid2, "prog_ids_1_2");
+
+	assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PRIMARY, 0);
+	assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PEER, 0);
+
+	ASSERT_EQ(skel->bss->seen_tc1, false, "seen_tc1");
+	ASSERT_EQ(skel->bss->seen_tc2, false, "seen_tc2");
+
+	link = bpf_program__attach_netkit(skel->progs.tc1, ifindex, &optl);
+	if (!ASSERT_OK_PTR(link, "link_attach"))
+		goto cleanup;
+
+	skel->links.tc1 = link;
+
+	lid1 = id_from_link_fd(bpf_link__fd(skel->links.tc1));
+
+	assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PRIMARY, 1);
+	assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PEER, 0);
+
+	optq.prog_ids = prog_ids;
+	optq.link_ids = link_ids;
+
+	memset(prog_ids, 0, sizeof(prog_ids));
+	memset(link_ids, 0, sizeof(link_ids));
+	optq.count = ARRAY_SIZE(prog_ids);
+
+	err = bpf_prog_query_opts(ifindex, BPF_NETKIT_PRIMARY, &optq);
+	if (!ASSERT_OK(err, "prog_query"))
+		goto cleanup;
+
+	ASSERT_EQ(optq.count, 1, "count");
+	ASSERT_EQ(optq.revision, 2, "revision");
+	ASSERT_EQ(optq.prog_ids[0], pid1, "prog_ids[0]");
+	ASSERT_EQ(optq.link_ids[0], lid1, "link_ids[0]");
+	ASSERT_EQ(optq.prog_ids[1], 0, "prog_ids[1]");
+	ASSERT_EQ(optq.link_ids[1], 0, "link_ids[1]");
+
+	tc_skel_reset_all_seen(skel);
+	ASSERT_EQ(send_icmp(), 0, "icmp_pkt");
+
+	ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1");
+	ASSERT_EQ(skel->bss->seen_tc2, false, "seen_tc2");
+
+	link = bpf_program__attach_netkit(skel->progs.tc2, ifindex, &optl);
+	if (!ASSERT_OK_PTR(link, "link_attach"))
+		goto cleanup;
+
+	skel->links.tc2 = link;
+
+	lid2 = id_from_link_fd(bpf_link__fd(skel->links.tc2));
+	ASSERT_NEQ(lid1, lid2, "link_ids_1_2");
+
+	assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PRIMARY, 1);
+	assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PEER, 1);
+
+	memset(prog_ids, 0, sizeof(prog_ids));
+	memset(link_ids, 0, sizeof(link_ids));
+	optq.count = ARRAY_SIZE(prog_ids);
+
+	err = bpf_prog_query_opts(ifindex, BPF_NETKIT_PEER, &optq);
+	if (!ASSERT_OK(err, "prog_query"))
+		goto cleanup;
+
+	ASSERT_EQ(optq.count, 1, "count");
+	ASSERT_EQ(optq.revision, 2, "revision");
+	ASSERT_EQ(optq.prog_ids[0], pid2, "prog_ids[0]");
+	ASSERT_EQ(optq.link_ids[0], lid2, "link_ids[0]");
+	ASSERT_EQ(optq.prog_ids[1], 0, "prog_ids[1]");
+	ASSERT_EQ(optq.link_ids[1], 0, "link_ids[1]");
+
+	tc_skel_reset_all_seen(skel);
+	ASSERT_EQ(send_icmp(), 0, "icmp_pkt");
+
+	ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1");
+	ASSERT_EQ(skel->bss->seen_tc2, true, "seen_tc2");
+cleanup:
+	test_tc_link__destroy(skel);
+
+	assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PRIMARY, 0);
+	assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PEER, 0);
+	destroy_netkit();
+}
+
+static void serial_test_tc_netkit_multi_links_target(int mode, int target)
+{
+	LIBBPF_OPTS(bpf_prog_query_opts, optq);
+	LIBBPF_OPTS(bpf_netkit_opts, optl);
+	__u32 prog_ids[3], link_ids[3];
+	__u32 pid1, pid2, lid1, lid2;
+	struct test_tc_link *skel;
+	struct bpf_link *link;
+	int err, ifindex;
+
+	err = create_netkit(mode, NETKIT_PASS, NETKIT_PASS,
+			    &ifindex, false);
+	if (err)
+		return;
+
+	skel = test_tc_link__open();
+	if (!ASSERT_OK_PTR(skel, "skel_open"))
+		goto cleanup;
+
+	ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc1,
+		  target), 0, "tc1_attach_type");
+	ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc2,
+		  target), 0, "tc2_attach_type");
+
+	err = test_tc_link__load(skel);
+	if (!ASSERT_OK(err, "skel_load"))
+		goto cleanup;
+
+	pid1 = id_from_prog_fd(bpf_program__fd(skel->progs.tc1));
+	pid2 = id_from_prog_fd(bpf_program__fd(skel->progs.tc2));
+
+	ASSERT_NEQ(pid1, pid2, "prog_ids_1_2");
+
+	assert_mprog_count_ifindex(ifindex, target, 0);
+
+	ASSERT_EQ(skel->bss->seen_tc1, false, "seen_tc1");
+	ASSERT_EQ(skel->bss->seen_eth, false, "seen_eth");
+	ASSERT_EQ(skel->bss->seen_tc2, false, "seen_tc2");
+
+	link = bpf_program__attach_netkit(skel->progs.tc1, ifindex, &optl);
+	if (!ASSERT_OK_PTR(link, "link_attach"))
+		goto cleanup;
+
+	skel->links.tc1 = link;
+
+	lid1 = id_from_link_fd(bpf_link__fd(skel->links.tc1));
+
+	assert_mprog_count_ifindex(ifindex, target, 1);
+
+	optq.prog_ids = prog_ids;
+	optq.link_ids = link_ids;
+
+	memset(prog_ids, 0, sizeof(prog_ids));
+	memset(link_ids, 0, sizeof(link_ids));
+	optq.count = ARRAY_SIZE(prog_ids);
+
+	err = bpf_prog_query_opts(ifindex, target, &optq);
+	if (!ASSERT_OK(err, "prog_query"))
+		goto cleanup;
+
+	ASSERT_EQ(optq.count, 1, "count");
+	ASSERT_EQ(optq.revision, 2, "revision");
+	ASSERT_EQ(optq.prog_ids[0], pid1, "prog_ids[0]");
+	ASSERT_EQ(optq.link_ids[0], lid1, "link_ids[0]");
+	ASSERT_EQ(optq.prog_ids[1], 0, "prog_ids[1]");
+	ASSERT_EQ(optq.link_ids[1], 0, "link_ids[1]");
+
+	tc_skel_reset_all_seen(skel);
+	ASSERT_EQ(send_icmp(), 0, "icmp_pkt");
+
+	ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1");
+	ASSERT_EQ(skel->bss->seen_eth, true, "seen_eth");
+	ASSERT_EQ(skel->bss->seen_tc2, false, "seen_tc2");
+
+	LIBBPF_OPTS_RESET(optl,
+		.flags = BPF_F_BEFORE,
+		.relative_fd = bpf_program__fd(skel->progs.tc1),
+	);
+
+	link = bpf_program__attach_netkit(skel->progs.tc2, ifindex, &optl);
+	if (!ASSERT_OK_PTR(link, "link_attach"))
+		goto cleanup;
+
+	skel->links.tc2 = link;
+
+	lid2 = id_from_link_fd(bpf_link__fd(skel->links.tc2));
+	ASSERT_NEQ(lid1, lid2, "link_ids_1_2");
+
+	assert_mprog_count_ifindex(ifindex, target, 2);
+
+	memset(prog_ids, 0, sizeof(prog_ids));
+	memset(link_ids, 0, sizeof(link_ids));
+	optq.count = ARRAY_SIZE(prog_ids);
+
+	err = bpf_prog_query_opts(ifindex, target, &optq);
+	if (!ASSERT_OK(err, "prog_query"))
+		goto cleanup;
+
+	ASSERT_EQ(optq.count, 2, "count");
+	ASSERT_EQ(optq.revision, 3, "revision");
+	ASSERT_EQ(optq.prog_ids[0], pid2, "prog_ids[0]");
+	ASSERT_EQ(optq.link_ids[0], lid2, "link_ids[0]");
+	ASSERT_EQ(optq.prog_ids[1], pid1, "prog_ids[1]");
+	ASSERT_EQ(optq.link_ids[1], lid1, "link_ids[1]");
+	ASSERT_EQ(optq.prog_ids[2], 0, "prog_ids[2]");
+	ASSERT_EQ(optq.link_ids[2], 0, "link_ids[2]");
+
+	tc_skel_reset_all_seen(skel);
+	ASSERT_EQ(send_icmp(), 0, "icmp_pkt");
+
+	ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1");
+	ASSERT_EQ(skel->bss->seen_eth, true, "seen_eth");
+	ASSERT_EQ(skel->bss->seen_tc2, true, "seen_tc2");
+cleanup:
+	test_tc_link__destroy(skel);
+
+	assert_mprog_count_ifindex(ifindex, target, 0);
+	destroy_netkit();
+}
+
+void serial_test_tc_netkit_multi_links(void)
+{
+	serial_test_tc_netkit_multi_links_target(NETKIT_L2, BPF_NETKIT_PRIMARY);
+	serial_test_tc_netkit_multi_links_target(NETKIT_L3, BPF_NETKIT_PRIMARY);
+	serial_test_tc_netkit_multi_links_target(NETKIT_L2, BPF_NETKIT_PEER);
+	serial_test_tc_netkit_multi_links_target(NETKIT_L3, BPF_NETKIT_PEER);
+}
+
+static void serial_test_tc_netkit_multi_opts_target(int mode, int target)
+{
+	LIBBPF_OPTS(bpf_prog_attach_opts, opta);
+	LIBBPF_OPTS(bpf_prog_detach_opts, optd);
+	LIBBPF_OPTS(bpf_prog_query_opts, optq);
+	__u32 pid1, pid2, fd1, fd2;
+	__u32 prog_ids[3];
+	struct test_tc_link *skel;
+	int err, ifindex;
+
+	err = create_netkit(mode, NETKIT_PASS, NETKIT_PASS,
+			    &ifindex, false);
+	if (err)
+		return;
+
+	skel = test_tc_link__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "skel_load"))
+		goto cleanup;
+
+	fd1 = bpf_program__fd(skel->progs.tc1);
+	fd2 = bpf_program__fd(skel->progs.tc2);
+
+	pid1 = id_from_prog_fd(fd1);
+	pid2 = id_from_prog_fd(fd2);
+
+	ASSERT_NEQ(pid1, pid2, "prog_ids_1_2");
+
+	assert_mprog_count_ifindex(ifindex, target, 0);
+
+	ASSERT_EQ(skel->bss->seen_tc1, false, "seen_tc1");
+	ASSERT_EQ(skel->bss->seen_eth, false, "seen_eth");
+	ASSERT_EQ(skel->bss->seen_tc2, false, "seen_tc2");
+
+	err = bpf_prog_attach_opts(fd1, ifindex, target, &opta);
+	if (!ASSERT_EQ(err, 0, "prog_attach"))
+		goto cleanup;
+
+	assert_mprog_count_ifindex(ifindex, target, 1);
+
+	optq.prog_ids = prog_ids;
+
+	memset(prog_ids, 0, sizeof(prog_ids));
+	optq.count = ARRAY_SIZE(prog_ids);
+
+	err = bpf_prog_query_opts(ifindex, target, &optq);
+	if (!ASSERT_OK(err, "prog_query"))
+		goto cleanup_fd1;
+
+	ASSERT_EQ(optq.count, 1, "count");
+	ASSERT_EQ(optq.revision, 2, "revision");
+	ASSERT_EQ(optq.prog_ids[0], pid1, "prog_ids[0]");
+	ASSERT_EQ(optq.prog_ids[1], 0, "prog_ids[1]");
+
+	tc_skel_reset_all_seen(skel);
+	ASSERT_EQ(send_icmp(), 0, "icmp_pkt");
+
+	ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1");
+	ASSERT_EQ(skel->bss->seen_eth, true, "seen_eth");
+	ASSERT_EQ(skel->bss->seen_tc2, false, "seen_tc2");
+
+	LIBBPF_OPTS_RESET(opta,
+		.flags = BPF_F_BEFORE,
+		.relative_fd = fd1,
+	);
+
+	err = bpf_prog_attach_opts(fd2, ifindex, target, &opta);
+	if (!ASSERT_EQ(err, 0, "prog_attach"))
+		goto cleanup_fd1;
+
+	assert_mprog_count_ifindex(ifindex, target, 2);
+
+	memset(prog_ids, 0, sizeof(prog_ids));
+	optq.count = ARRAY_SIZE(prog_ids);
+
+	err = bpf_prog_query_opts(ifindex, target, &optq);
+	if (!ASSERT_OK(err, "prog_query"))
+		goto cleanup_fd2;
+
+	ASSERT_EQ(optq.count, 2, "count");
+	ASSERT_EQ(optq.revision, 3, "revision");
+	ASSERT_EQ(optq.prog_ids[0], pid2, "prog_ids[0]");
+	ASSERT_EQ(optq.prog_ids[1], pid1, "prog_ids[1]");
+	ASSERT_EQ(optq.prog_ids[2], 0, "prog_ids[2]");
+
+	tc_skel_reset_all_seen(skel);
+	ASSERT_EQ(send_icmp(), 0, "icmp_pkt");
+
+	ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1");
+	ASSERT_EQ(skel->bss->seen_eth, true, "seen_eth");
+	ASSERT_EQ(skel->bss->seen_tc2, true, "seen_tc2");
+
+cleanup_fd2:
+	err = bpf_prog_detach_opts(fd2, ifindex, target, &optd);
+	ASSERT_OK(err, "prog_detach");
+	assert_mprog_count_ifindex(ifindex, target, 1);
+cleanup_fd1:
+	err = bpf_prog_detach_opts(fd1, ifindex, target, &optd);
+	ASSERT_OK(err, "prog_detach");
+	assert_mprog_count_ifindex(ifindex, target, 0);
+cleanup:
+	test_tc_link__destroy(skel);
+
+	assert_mprog_count_ifindex(ifindex, target, 0);
+	destroy_netkit();
+}
+
+void serial_test_tc_netkit_multi_opts(void)
+{
+	serial_test_tc_netkit_multi_opts_target(NETKIT_L2, BPF_NETKIT_PRIMARY);
+	serial_test_tc_netkit_multi_opts_target(NETKIT_L3, BPF_NETKIT_PRIMARY);
+	serial_test_tc_netkit_multi_opts_target(NETKIT_L2, BPF_NETKIT_PEER);
+	serial_test_tc_netkit_multi_opts_target(NETKIT_L3, BPF_NETKIT_PEER);
+}
+
+void serial_test_tc_netkit_device(void)
+{
+	LIBBPF_OPTS(bpf_prog_query_opts, optq);
+	LIBBPF_OPTS(bpf_netkit_opts, optl);
+	__u32 prog_ids[2], link_ids[2];
+	__u32 pid1, pid2, lid1;
+	struct test_tc_link *skel;
+	struct bpf_link *link;
+	int err, ifindex, ifindex2;
+
+	err = create_netkit(NETKIT_L3, NETKIT_PASS, NETKIT_PASS,
+			    &ifindex, true);
+	if (err)
+		return;
+
+	ifindex2 = if_nametoindex(netkit_peer);
+	ASSERT_NEQ(ifindex, ifindex2, "ifindex_1_2");
+
+	skel = test_tc_link__open();
+	if (!ASSERT_OK_PTR(skel, "skel_open"))
+		goto cleanup;
+
+	ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc1,
+		  BPF_NETKIT_PRIMARY), 0, "tc1_attach_type");
+	ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc2,
+		  BPF_NETKIT_PEER), 0, "tc2_attach_type");
+	ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc3,
+		  BPF_NETKIT_PRIMARY), 0, "tc3_attach_type");
+
+	err = test_tc_link__load(skel);
+	if (!ASSERT_OK(err, "skel_load"))
+		goto cleanup;
+
+	pid1 = id_from_prog_fd(bpf_program__fd(skel->progs.tc1));
+	pid2 = id_from_prog_fd(bpf_program__fd(skel->progs.tc2));
+
+	ASSERT_NEQ(pid1, pid2, "prog_ids_1_2");
+
+	assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PRIMARY, 0);
+	assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PEER, 0);
+
+	ASSERT_EQ(skel->bss->seen_tc1, false, "seen_tc1");
+	ASSERT_EQ(skel->bss->seen_tc2, false, "seen_tc2");
+
+	link = bpf_program__attach_netkit(skel->progs.tc1, ifindex, &optl);
+	if (!ASSERT_OK_PTR(link, "link_attach"))
+		goto cleanup;
+
+	skel->links.tc1 = link;
+
+	lid1 = id_from_link_fd(bpf_link__fd(skel->links.tc1));
+
+	assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PRIMARY, 1);
+	assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PEER, 0);
+
+	optq.prog_ids = prog_ids;
+	optq.link_ids = link_ids;
+
+	memset(prog_ids, 0, sizeof(prog_ids));
+	memset(link_ids, 0, sizeof(link_ids));
+	optq.count = ARRAY_SIZE(prog_ids);
+
+	err = bpf_prog_query_opts(ifindex, BPF_NETKIT_PRIMARY, &optq);
+	if (!ASSERT_OK(err, "prog_query"))
+		goto cleanup;
+
+	ASSERT_EQ(optq.count, 1, "count");
+	ASSERT_EQ(optq.revision, 2, "revision");
+	ASSERT_EQ(optq.prog_ids[0], pid1, "prog_ids[0]");
+	ASSERT_EQ(optq.link_ids[0], lid1, "link_ids[0]");
+	ASSERT_EQ(optq.prog_ids[1], 0, "prog_ids[1]");
+	ASSERT_EQ(optq.link_ids[1], 0, "link_ids[1]");
+
+	tc_skel_reset_all_seen(skel);
+	ASSERT_EQ(send_icmp(), 0, "icmp_pkt");
+
+	ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1");
+	ASSERT_EQ(skel->bss->seen_tc2, false, "seen_tc2");
+
+	memset(prog_ids, 0, sizeof(prog_ids));
+	memset(link_ids, 0, sizeof(link_ids));
+	optq.count = ARRAY_SIZE(prog_ids);
+
+	err = bpf_prog_query_opts(ifindex2, BPF_NETKIT_PRIMARY, &optq);
+	ASSERT_EQ(err, -EACCES, "prog_query_should_fail");
+
+	err = bpf_prog_query_opts(ifindex2, BPF_NETKIT_PEER, &optq);
+	ASSERT_EQ(err, -EACCES, "prog_query_should_fail");
+
+	link = bpf_program__attach_netkit(skel->progs.tc2, ifindex2, &optl);
+	if (!ASSERT_ERR_PTR(link, "link_attach_should_fail")) {
+		bpf_link__destroy(link);
+		goto cleanup;
+	}
+
+	link = bpf_program__attach_netkit(skel->progs.tc3, ifindex2, &optl);
+	if (!ASSERT_ERR_PTR(link, "link_attach_should_fail")) {
+		bpf_link__destroy(link);
+		goto cleanup;
+	}
+
+	assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PRIMARY, 1);
+	assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PEER, 0);
+cleanup:
+	test_tc_link__destroy(skel);
+
+	assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PRIMARY, 0);
+	assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PEER, 0);
+	destroy_netkit();
+}
+
+static void serial_test_tc_netkit_neigh_links_target(int mode, int target)
+{
+	LIBBPF_OPTS(bpf_prog_query_opts, optq);
+	LIBBPF_OPTS(bpf_netkit_opts, optl);
+	__u32 prog_ids[2], link_ids[2];
+	__u32 pid1, lid1;
+	struct test_tc_link *skel;
+	struct bpf_link *link;
+	int err, ifindex;
+
+	err = create_netkit(mode, NETKIT_PASS, NETKIT_PASS,
+			    &ifindex, false);
+	if (err)
+		return;
+
+	skel = test_tc_link__open();
+	if (!ASSERT_OK_PTR(skel, "skel_open"))
+		goto cleanup;
+
+	ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc1,
+		  BPF_NETKIT_PRIMARY), 0, "tc1_attach_type");
+
+	err = test_tc_link__load(skel);
+	if (!ASSERT_OK(err, "skel_load"))
+		goto cleanup;
+
+	pid1 = id_from_prog_fd(bpf_program__fd(skel->progs.tc1));
+
+	assert_mprog_count_ifindex(ifindex, target, 0);
+
+	ASSERT_EQ(skel->bss->seen_tc1, false, "seen_tc1");
+	ASSERT_EQ(skel->bss->seen_eth, false, "seen_eth");
+
+	link = bpf_program__attach_netkit(skel->progs.tc1, ifindex, &optl);
+	if (!ASSERT_OK_PTR(link, "link_attach"))
+		goto cleanup;
+
+	skel->links.tc1 = link;
+
+	lid1 = id_from_link_fd(bpf_link__fd(skel->links.tc1));
+
+	assert_mprog_count_ifindex(ifindex, target, 1);
+
+	optq.prog_ids = prog_ids;
+	optq.link_ids = link_ids;
+
+	memset(prog_ids, 0, sizeof(prog_ids));
+	memset(link_ids, 0, sizeof(link_ids));
+	optq.count = ARRAY_SIZE(prog_ids);
+
+	err = bpf_prog_query_opts(ifindex, target, &optq);
+	if (!ASSERT_OK(err, "prog_query"))
+		goto cleanup;
+
+	ASSERT_EQ(optq.count, 1, "count");
+	ASSERT_EQ(optq.revision, 2, "revision");
+	ASSERT_EQ(optq.prog_ids[0], pid1, "prog_ids[0]");
+	ASSERT_EQ(optq.link_ids[0], lid1, "link_ids[0]");
+	ASSERT_EQ(optq.prog_ids[1], 0, "prog_ids[1]");
+	ASSERT_EQ(optq.link_ids[1], 0, "link_ids[1]");
+
+	tc_skel_reset_all_seen(skel);
+	ASSERT_EQ(__send_icmp(ping_addr_noneigh), 0, "icmp_pkt");
+
+	ASSERT_EQ(skel->bss->seen_tc1, true /* L2: ARP */, "seen_tc1");
+	ASSERT_EQ(skel->bss->seen_eth, mode == NETKIT_L3, "seen_eth");
+cleanup:
+	test_tc_link__destroy(skel);
+
+	assert_mprog_count_ifindex(ifindex, target, 0);
+	destroy_netkit();
+}
+
+void serial_test_tc_netkit_neigh_links(void)
+{
+	serial_test_tc_netkit_neigh_links_target(NETKIT_L2, BPF_NETKIT_PRIMARY);
+	serial_test_tc_netkit_neigh_links_target(NETKIT_L3, BPF_NETKIT_PRIMARY);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/tc_opts.c b/tools/testing/selftests/bpf/prog_tests/tc_opts.c
index ca506d2fcf58..196abf223465 100644
--- a/tools/testing/selftests/bpf/prog_tests/tc_opts.c
+++ b/tools/testing/selftests/bpf/prog_tests/tc_opts.c
@@ -2387,12 +2387,9 @@ static int generate_dummy_prog(void)
 	const size_t prog_insn_cnt = sizeof(prog_insns) / sizeof(struct bpf_insn);
 	LIBBPF_OPTS(bpf_prog_load_opts, opts);
 	const size_t log_buf_sz = 256;
-	char *log_buf;
+	char log_buf[log_buf_sz];
 	int fd = -1;
 
-	log_buf = malloc(log_buf_sz);
-	if (!ASSERT_OK_PTR(log_buf, "log_buf_alloc"))
-		return fd;
 	opts.log_buf = log_buf;
 	opts.log_size = log_buf_sz;
 
@@ -2402,7 +2399,6 @@ static int generate_dummy_prog(void)
 			   prog_insns, prog_insn_cnt, &opts);
 	ASSERT_STREQ(log_buf, "", "log_0");
 	ASSERT_GE(fd, 0, "prog_fd");
-	free(log_buf);
 	return fd;
 }
 
@@ -2471,7 +2467,7 @@ static void test_tc_opts_query_target(int target)
 	__u32 fd1, fd2, fd3, fd4, id1, id2, id3, id4;
 	struct test_tc_link *skel;
 	union bpf_attr attr;
-	__u32 prog_ids[5];
+	__u32 prog_ids[10];
 	int err;
 
 	skel = test_tc_link__open_and_load();
@@ -2599,6 +2595,135 @@ static void test_tc_opts_query_target(int target)
 	ASSERT_EQ(attr.query.link_ids, 0, "link_ids");
 	ASSERT_EQ(attr.query.link_attach_flags, 0, "link_attach_flags");
 
+	/* Test 3: Query with smaller prog_ids array */
+	memset(&attr, 0, attr_size);
+	attr.query.target_ifindex = loopback;
+	attr.query.attach_type = target;
+
+	memset(prog_ids, 0, sizeof(prog_ids));
+	attr.query.prog_ids = ptr_to_u64(prog_ids);
+	attr.query.count = 2;
+
+	err = syscall(__NR_bpf, BPF_PROG_QUERY, &attr, attr_size);
+	ASSERT_EQ(err, -1, "prog_query_should_fail");
+	ASSERT_EQ(errno, ENOSPC, "prog_query_should_fail");
+
+	ASSERT_EQ(attr.query.count, 4, "count");
+	ASSERT_EQ(attr.query.revision, 5, "revision");
+	ASSERT_EQ(attr.query.query_flags, 0, "query_flags");
+	ASSERT_EQ(attr.query.attach_flags, 0, "attach_flags");
+	ASSERT_EQ(attr.query.target_ifindex, loopback, "target_ifindex");
+	ASSERT_EQ(attr.query.attach_type, target, "attach_type");
+	ASSERT_EQ(attr.query.prog_ids, ptr_to_u64(prog_ids), "prog_ids");
+	ASSERT_EQ(prog_ids[0], id1, "prog_ids[0]");
+	ASSERT_EQ(prog_ids[1], id2, "prog_ids[1]");
+	ASSERT_EQ(prog_ids[2], 0, "prog_ids[2]");
+	ASSERT_EQ(prog_ids[3], 0, "prog_ids[3]");
+	ASSERT_EQ(prog_ids[4], 0, "prog_ids[4]");
+	ASSERT_EQ(attr.query.prog_attach_flags, 0, "prog_attach_flags");
+	ASSERT_EQ(attr.query.link_ids, 0, "link_ids");
+	ASSERT_EQ(attr.query.link_attach_flags, 0, "link_attach_flags");
+
+	/* Test 4: Query with larger prog_ids array */
+	memset(&attr, 0, attr_size);
+	attr.query.target_ifindex = loopback;
+	attr.query.attach_type = target;
+
+	memset(prog_ids, 0, sizeof(prog_ids));
+	attr.query.prog_ids = ptr_to_u64(prog_ids);
+	attr.query.count = 10;
+
+	err = syscall(__NR_bpf, BPF_PROG_QUERY, &attr, attr_size);
+	if (!ASSERT_OK(err, "prog_query"))
+		goto cleanup4;
+
+	ASSERT_EQ(attr.query.count, 4, "count");
+	ASSERT_EQ(attr.query.revision, 5, "revision");
+	ASSERT_EQ(attr.query.query_flags, 0, "query_flags");
+	ASSERT_EQ(attr.query.attach_flags, 0, "attach_flags");
+	ASSERT_EQ(attr.query.target_ifindex, loopback, "target_ifindex");
+	ASSERT_EQ(attr.query.attach_type, target, "attach_type");
+	ASSERT_EQ(attr.query.prog_ids, ptr_to_u64(prog_ids), "prog_ids");
+	ASSERT_EQ(prog_ids[0], id1, "prog_ids[0]");
+	ASSERT_EQ(prog_ids[1], id2, "prog_ids[1]");
+	ASSERT_EQ(prog_ids[2], id3, "prog_ids[2]");
+	ASSERT_EQ(prog_ids[3], id4, "prog_ids[3]");
+	ASSERT_EQ(prog_ids[4], 0, "prog_ids[4]");
+	ASSERT_EQ(attr.query.prog_attach_flags, 0, "prog_attach_flags");
+	ASSERT_EQ(attr.query.link_ids, 0, "link_ids");
+	ASSERT_EQ(attr.query.link_attach_flags, 0, "link_attach_flags");
+
+	/* Test 5: Query with NULL prog_ids array but with count > 0 */
+	memset(&attr, 0, attr_size);
+	attr.query.target_ifindex = loopback;
+	attr.query.attach_type = target;
+
+	memset(prog_ids, 0, sizeof(prog_ids));
+	attr.query.count = sizeof(prog_ids);
+
+	err = syscall(__NR_bpf, BPF_PROG_QUERY, &attr, attr_size);
+	if (!ASSERT_OK(err, "prog_query"))
+		goto cleanup4;
+
+	ASSERT_EQ(attr.query.count, 4, "count");
+	ASSERT_EQ(attr.query.revision, 5, "revision");
+	ASSERT_EQ(attr.query.query_flags, 0, "query_flags");
+	ASSERT_EQ(attr.query.attach_flags, 0, "attach_flags");
+	ASSERT_EQ(attr.query.target_ifindex, loopback, "target_ifindex");
+	ASSERT_EQ(attr.query.attach_type, target, "attach_type");
+	ASSERT_EQ(prog_ids[0], 0, "prog_ids[0]");
+	ASSERT_EQ(prog_ids[1], 0, "prog_ids[1]");
+	ASSERT_EQ(prog_ids[2], 0, "prog_ids[2]");
+	ASSERT_EQ(prog_ids[3], 0, "prog_ids[3]");
+	ASSERT_EQ(prog_ids[4], 0, "prog_ids[4]");
+	ASSERT_EQ(attr.query.prog_ids, 0, "prog_ids");
+	ASSERT_EQ(attr.query.prog_attach_flags, 0, "prog_attach_flags");
+	ASSERT_EQ(attr.query.link_ids, 0, "link_ids");
+	ASSERT_EQ(attr.query.link_attach_flags, 0, "link_attach_flags");
+
+	/* Test 6: Query with non-NULL prog_ids array but with count == 0 */
+	memset(&attr, 0, attr_size);
+	attr.query.target_ifindex = loopback;
+	attr.query.attach_type = target;
+
+	memset(prog_ids, 0, sizeof(prog_ids));
+	attr.query.prog_ids = ptr_to_u64(prog_ids);
+
+	err = syscall(__NR_bpf, BPF_PROG_QUERY, &attr, attr_size);
+	if (!ASSERT_OK(err, "prog_query"))
+		goto cleanup4;
+
+	ASSERT_EQ(attr.query.count, 4, "count");
+	ASSERT_EQ(attr.query.revision, 5, "revision");
+	ASSERT_EQ(attr.query.query_flags, 0, "query_flags");
+	ASSERT_EQ(attr.query.attach_flags, 0, "attach_flags");
+	ASSERT_EQ(attr.query.target_ifindex, loopback, "target_ifindex");
+	ASSERT_EQ(attr.query.attach_type, target, "attach_type");
+	ASSERT_EQ(prog_ids[0], 0, "prog_ids[0]");
+	ASSERT_EQ(prog_ids[1], 0, "prog_ids[1]");
+	ASSERT_EQ(prog_ids[2], 0, "prog_ids[2]");
+	ASSERT_EQ(prog_ids[3], 0, "prog_ids[3]");
+	ASSERT_EQ(prog_ids[4], 0, "prog_ids[4]");
+	ASSERT_EQ(attr.query.prog_ids, ptr_to_u64(prog_ids), "prog_ids");
+	ASSERT_EQ(attr.query.prog_attach_flags, 0, "prog_attach_flags");
+	ASSERT_EQ(attr.query.link_ids, 0, "link_ids");
+	ASSERT_EQ(attr.query.link_attach_flags, 0, "link_attach_flags");
+
+	/* Test 7: Query with invalid flags */
+	attr.query.attach_flags = 0;
+	attr.query.query_flags = 1;
+
+	err = syscall(__NR_bpf, BPF_PROG_QUERY, &attr, attr_size);
+	ASSERT_EQ(err, -1, "prog_query_should_fail");
+	ASSERT_EQ(errno, EINVAL, "prog_query_should_fail");
+
+	attr.query.attach_flags = 1;
+	attr.query.query_flags = 0;
+
+	err = syscall(__NR_bpf, BPF_PROG_QUERY, &attr, attr_size);
+	ASSERT_EQ(err, -1, "prog_query_should_fail");
+	ASSERT_EQ(errno, EINVAL, "prog_query_should_fail");
+
 cleanup4:
 	err = bpf_prog_detach_opts(fd4, loopback, target, &optd);
 	ASSERT_OK(err, "prog_detach");
diff --git a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
index 6ee22c3b251a..518f143c5b0f 100644
--- a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
+++ b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
@@ -24,6 +24,7 @@
 
 #include "test_progs.h"
 #include "network_helpers.h"
+#include "netlink_helpers.h"
 #include "test_tc_neigh_fib.skel.h"
 #include "test_tc_neigh.skel.h"
 #include "test_tc_peer.skel.h"
@@ -110,11 +111,17 @@ static void netns_setup_namespaces_nofail(const char *verb)
 	}
 }
 
+enum dev_mode {
+	MODE_VETH,
+	MODE_NETKIT,
+};
+
 struct netns_setup_result {
-	int ifindex_veth_src;
-	int ifindex_veth_src_fwd;
-	int ifindex_veth_dst;
-	int ifindex_veth_dst_fwd;
+	enum dev_mode dev_mode;
+	int ifindex_src;
+	int ifindex_src_fwd;
+	int ifindex_dst;
+	int ifindex_dst_fwd;
 };
 
 static int get_ifaddr(const char *name, char *ifaddr)
@@ -137,58 +144,110 @@ static int get_ifaddr(const char *name, char *ifaddr)
 	return 0;
 }
 
+static int create_netkit(int mode, char *prim, char *peer)
+{
+	struct rtattr *linkinfo, *data, *peer_info;
+	struct rtnl_handle rth = { .fd = -1 };
+	const char *type = "netkit";
+	struct {
+		struct nlmsghdr n;
+		struct ifinfomsg i;
+		char buf[1024];
+	} req = {};
+	int err;
+
+	err = rtnl_open(&rth, 0);
+	if (!ASSERT_OK(err, "open_rtnetlink"))
+		return err;
+
+	memset(&req, 0, sizeof(req));
+	req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg));
+	req.n.nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
+	req.n.nlmsg_type = RTM_NEWLINK;
+	req.i.ifi_family = AF_UNSPEC;
+
+	addattr_l(&req.n, sizeof(req), IFLA_IFNAME, prim, strlen(prim));
+	linkinfo = addattr_nest(&req.n, sizeof(req), IFLA_LINKINFO);
+	addattr_l(&req.n, sizeof(req), IFLA_INFO_KIND, type, strlen(type));
+	data = addattr_nest(&req.n, sizeof(req), IFLA_INFO_DATA);
+	addattr32(&req.n, sizeof(req), IFLA_NETKIT_MODE, mode);
+	peer_info = addattr_nest(&req.n, sizeof(req), IFLA_NETKIT_PEER_INFO);
+	req.n.nlmsg_len += sizeof(struct ifinfomsg);
+	addattr_l(&req.n, sizeof(req), IFLA_IFNAME, peer, strlen(peer));
+	addattr_nest_end(&req.n, peer_info);
+	addattr_nest_end(&req.n, data);
+	addattr_nest_end(&req.n, linkinfo);
+
+	err = rtnl_talk(&rth, &req.n, NULL);
+	ASSERT_OK(err, "talk_rtnetlink");
+	rtnl_close(&rth);
+	return err;
+}
+
 static int netns_setup_links_and_routes(struct netns_setup_result *result)
 {
 	struct nstoken *nstoken = NULL;
-	char veth_src_fwd_addr[IFADDR_STR_LEN+1] = {};
-
-	SYS(fail, "ip link add veth_src type veth peer name veth_src_fwd");
-	SYS(fail, "ip link add veth_dst type veth peer name veth_dst_fwd");
+	char src_fwd_addr[IFADDR_STR_LEN+1] = {};
+	int err;
 
-	SYS(fail, "ip link set veth_dst_fwd address " MAC_DST_FWD);
-	SYS(fail, "ip link set veth_dst address " MAC_DST);
+	if (result->dev_mode == MODE_VETH) {
+		SYS(fail, "ip link add src type veth peer name src_fwd");
+		SYS(fail, "ip link add dst type veth peer name dst_fwd");
+
+		SYS(fail, "ip link set dst_fwd address " MAC_DST_FWD);
+		SYS(fail, "ip link set dst address " MAC_DST);
+	} else if (result->dev_mode == MODE_NETKIT) {
+		err = create_netkit(NETKIT_L3, "src", "src_fwd");
+		if (!ASSERT_OK(err, "create_ifindex_src"))
+			goto fail;
+		err = create_netkit(NETKIT_L3, "dst", "dst_fwd");
+		if (!ASSERT_OK(err, "create_ifindex_dst"))
+			goto fail;
+	}
 
-	if (get_ifaddr("veth_src_fwd", veth_src_fwd_addr))
+	if (get_ifaddr("src_fwd", src_fwd_addr))
 		goto fail;
 
-	result->ifindex_veth_src = if_nametoindex("veth_src");
-	if (!ASSERT_GT(result->ifindex_veth_src, 0, "ifindex_veth_src"))
+	result->ifindex_src = if_nametoindex("src");
+	if (!ASSERT_GT(result->ifindex_src, 0, "ifindex_src"))
 		goto fail;
 
-	result->ifindex_veth_src_fwd = if_nametoindex("veth_src_fwd");
-	if (!ASSERT_GT(result->ifindex_veth_src_fwd, 0, "ifindex_veth_src_fwd"))
+	result->ifindex_src_fwd = if_nametoindex("src_fwd");
+	if (!ASSERT_GT(result->ifindex_src_fwd, 0, "ifindex_src_fwd"))
 		goto fail;
 
-	result->ifindex_veth_dst = if_nametoindex("veth_dst");
-	if (!ASSERT_GT(result->ifindex_veth_dst, 0, "ifindex_veth_dst"))
+	result->ifindex_dst = if_nametoindex("dst");
+	if (!ASSERT_GT(result->ifindex_dst, 0, "ifindex_dst"))
 		goto fail;
 
-	result->ifindex_veth_dst_fwd = if_nametoindex("veth_dst_fwd");
-	if (!ASSERT_GT(result->ifindex_veth_dst_fwd, 0, "ifindex_veth_dst_fwd"))
+	result->ifindex_dst_fwd = if_nametoindex("dst_fwd");
+	if (!ASSERT_GT(result->ifindex_dst_fwd, 0, "ifindex_dst_fwd"))
 		goto fail;
 
-	SYS(fail, "ip link set veth_src netns " NS_SRC);
-	SYS(fail, "ip link set veth_src_fwd netns " NS_FWD);
-	SYS(fail, "ip link set veth_dst_fwd netns " NS_FWD);
-	SYS(fail, "ip link set veth_dst netns " NS_DST);
+	SYS(fail, "ip link set src netns " NS_SRC);
+	SYS(fail, "ip link set src_fwd netns " NS_FWD);
+	SYS(fail, "ip link set dst_fwd netns " NS_FWD);
+	SYS(fail, "ip link set dst netns " NS_DST);
 
 	/** setup in 'src' namespace */
 	nstoken = open_netns(NS_SRC);
 	if (!ASSERT_OK_PTR(nstoken, "setns src"))
 		goto fail;
 
-	SYS(fail, "ip addr add " IP4_SRC "/32 dev veth_src");
-	SYS(fail, "ip addr add " IP6_SRC "/128 dev veth_src nodad");
-	SYS(fail, "ip link set dev veth_src up");
+	SYS(fail, "ip addr add " IP4_SRC "/32 dev src");
+	SYS(fail, "ip addr add " IP6_SRC "/128 dev src nodad");
+	SYS(fail, "ip link set dev src up");
 
-	SYS(fail, "ip route add " IP4_DST "/32 dev veth_src scope global");
-	SYS(fail, "ip route add " IP4_NET "/16 dev veth_src scope global");
-	SYS(fail, "ip route add " IP6_DST "/128 dev veth_src scope global");
+	SYS(fail, "ip route add " IP4_DST "/32 dev src scope global");
+	SYS(fail, "ip route add " IP4_NET "/16 dev src scope global");
+	SYS(fail, "ip route add " IP6_DST "/128 dev src scope global");
 
-	SYS(fail, "ip neigh add " IP4_DST " dev veth_src lladdr %s",
-	    veth_src_fwd_addr);
-	SYS(fail, "ip neigh add " IP6_DST " dev veth_src lladdr %s",
-	    veth_src_fwd_addr);
+	if (result->dev_mode == MODE_VETH) {
+		SYS(fail, "ip neigh add " IP4_DST " dev src lladdr %s",
+		    src_fwd_addr);
+		SYS(fail, "ip neigh add " IP6_DST " dev src lladdr %s",
+		    src_fwd_addr);
+	}
 
 	close_netns(nstoken);
 
@@ -201,15 +260,15 @@ static int netns_setup_links_and_routes(struct netns_setup_result *result)
 	 * needs v4 one in order to start ARP probing. IP4_NET route is added
 	 * to the endpoints so that the ARP processing will reply.
 	 */
-	SYS(fail, "ip addr add " IP4_SLL "/32 dev veth_src_fwd");
-	SYS(fail, "ip addr add " IP4_DLL "/32 dev veth_dst_fwd");
-	SYS(fail, "ip link set dev veth_src_fwd up");
-	SYS(fail, "ip link set dev veth_dst_fwd up");
+	SYS(fail, "ip addr add " IP4_SLL "/32 dev src_fwd");
+	SYS(fail, "ip addr add " IP4_DLL "/32 dev dst_fwd");
+	SYS(fail, "ip link set dev src_fwd up");
+	SYS(fail, "ip link set dev dst_fwd up");
 
-	SYS(fail, "ip route add " IP4_SRC "/32 dev veth_src_fwd scope global");
-	SYS(fail, "ip route add " IP6_SRC "/128 dev veth_src_fwd scope global");
-	SYS(fail, "ip route add " IP4_DST "/32 dev veth_dst_fwd scope global");
-	SYS(fail, "ip route add " IP6_DST "/128 dev veth_dst_fwd scope global");
+	SYS(fail, "ip route add " IP4_SRC "/32 dev src_fwd scope global");
+	SYS(fail, "ip route add " IP6_SRC "/128 dev src_fwd scope global");
+	SYS(fail, "ip route add " IP4_DST "/32 dev dst_fwd scope global");
+	SYS(fail, "ip route add " IP6_DST "/128 dev dst_fwd scope global");
 
 	close_netns(nstoken);
 
@@ -218,16 +277,18 @@ static int netns_setup_links_and_routes(struct netns_setup_result *result)
 	if (!ASSERT_OK_PTR(nstoken, "setns dst"))
 		goto fail;
 
-	SYS(fail, "ip addr add " IP4_DST "/32 dev veth_dst");
-	SYS(fail, "ip addr add " IP6_DST "/128 dev veth_dst nodad");
-	SYS(fail, "ip link set dev veth_dst up");
+	SYS(fail, "ip addr add " IP4_DST "/32 dev dst");
+	SYS(fail, "ip addr add " IP6_DST "/128 dev dst nodad");
+	SYS(fail, "ip link set dev dst up");
 
-	SYS(fail, "ip route add " IP4_SRC "/32 dev veth_dst scope global");
-	SYS(fail, "ip route add " IP4_NET "/16 dev veth_dst scope global");
-	SYS(fail, "ip route add " IP6_SRC "/128 dev veth_dst scope global");
+	SYS(fail, "ip route add " IP4_SRC "/32 dev dst scope global");
+	SYS(fail, "ip route add " IP4_NET "/16 dev dst scope global");
+	SYS(fail, "ip route add " IP6_SRC "/128 dev dst scope global");
 
-	SYS(fail, "ip neigh add " IP4_SRC " dev veth_dst lladdr " MAC_DST_FWD);
-	SYS(fail, "ip neigh add " IP6_SRC " dev veth_dst lladdr " MAC_DST_FWD);
+	if (result->dev_mode == MODE_VETH) {
+		SYS(fail, "ip neigh add " IP4_SRC " dev dst lladdr " MAC_DST_FWD);
+		SYS(fail, "ip neigh add " IP6_SRC " dev dst lladdr " MAC_DST_FWD);
+	}
 
 	close_netns(nstoken);
 
@@ -293,23 +354,23 @@ static int netns_load_bpf(const struct bpf_program *src_prog,
 			  const struct bpf_program *chk_prog,
 			  const struct netns_setup_result *setup_result)
 {
-	LIBBPF_OPTS(bpf_tc_hook, qdisc_veth_src_fwd);
-	LIBBPF_OPTS(bpf_tc_hook, qdisc_veth_dst_fwd);
+	LIBBPF_OPTS(bpf_tc_hook, qdisc_src_fwd);
+	LIBBPF_OPTS(bpf_tc_hook, qdisc_dst_fwd);
 	int err;
 
-	/* tc qdisc add dev veth_src_fwd clsact */
-	QDISC_CLSACT_CREATE(&qdisc_veth_src_fwd, setup_result->ifindex_veth_src_fwd);
-	/* tc filter add dev veth_src_fwd ingress bpf da src_prog */
-	XGRESS_FILTER_ADD(&qdisc_veth_src_fwd, BPF_TC_INGRESS, src_prog, 0);
-	/* tc filter add dev veth_src_fwd egress bpf da chk_prog */
-	XGRESS_FILTER_ADD(&qdisc_veth_src_fwd, BPF_TC_EGRESS, chk_prog, 0);
+	/* tc qdisc add dev src_fwd clsact */
+	QDISC_CLSACT_CREATE(&qdisc_src_fwd, setup_result->ifindex_src_fwd);
+	/* tc filter add dev src_fwd ingress bpf da src_prog */
+	XGRESS_FILTER_ADD(&qdisc_src_fwd, BPF_TC_INGRESS, src_prog, 0);
+	/* tc filter add dev src_fwd egress bpf da chk_prog */
+	XGRESS_FILTER_ADD(&qdisc_src_fwd, BPF_TC_EGRESS, chk_prog, 0);
 
-	/* tc qdisc add dev veth_dst_fwd clsact */
-	QDISC_CLSACT_CREATE(&qdisc_veth_dst_fwd, setup_result->ifindex_veth_dst_fwd);
-	/* tc filter add dev veth_dst_fwd ingress bpf da dst_prog */
-	XGRESS_FILTER_ADD(&qdisc_veth_dst_fwd, BPF_TC_INGRESS, dst_prog, 0);
-	/* tc filter add dev veth_dst_fwd egress bpf da chk_prog */
-	XGRESS_FILTER_ADD(&qdisc_veth_dst_fwd, BPF_TC_EGRESS, chk_prog, 0);
+	/* tc qdisc add dev dst_fwd clsact */
+	QDISC_CLSACT_CREATE(&qdisc_dst_fwd, setup_result->ifindex_dst_fwd);
+	/* tc filter add dev dst_fwd ingress bpf da dst_prog */
+	XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_INGRESS, dst_prog, 0);
+	/* tc filter add dev dst_fwd egress bpf da chk_prog */
+	XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_EGRESS, chk_prog, 0);
 
 	return 0;
 fail:
@@ -539,10 +600,10 @@ done:
 static int netns_load_dtime_bpf(struct test_tc_dtime *skel,
 				const struct netns_setup_result *setup_result)
 {
-	LIBBPF_OPTS(bpf_tc_hook, qdisc_veth_src_fwd);
-	LIBBPF_OPTS(bpf_tc_hook, qdisc_veth_dst_fwd);
-	LIBBPF_OPTS(bpf_tc_hook, qdisc_veth_src);
-	LIBBPF_OPTS(bpf_tc_hook, qdisc_veth_dst);
+	LIBBPF_OPTS(bpf_tc_hook, qdisc_src_fwd);
+	LIBBPF_OPTS(bpf_tc_hook, qdisc_dst_fwd);
+	LIBBPF_OPTS(bpf_tc_hook, qdisc_src);
+	LIBBPF_OPTS(bpf_tc_hook, qdisc_dst);
 	struct nstoken *nstoken;
 	int err;
 
@@ -550,58 +611,58 @@ static int netns_load_dtime_bpf(struct test_tc_dtime *skel,
 	nstoken = open_netns(NS_SRC);
 	if (!ASSERT_OK_PTR(nstoken, "setns " NS_SRC))
 		return -1;
-	/* tc qdisc add dev veth_src clsact */
-	QDISC_CLSACT_CREATE(&qdisc_veth_src, setup_result->ifindex_veth_src);
-	/* tc filter add dev veth_src ingress bpf da ingress_host */
-	XGRESS_FILTER_ADD(&qdisc_veth_src, BPF_TC_INGRESS, skel->progs.ingress_host, 0);
-	/* tc filter add dev veth_src egress bpf da egress_host */
-	XGRESS_FILTER_ADD(&qdisc_veth_src, BPF_TC_EGRESS, skel->progs.egress_host, 0);
+	/* tc qdisc add dev src clsact */
+	QDISC_CLSACT_CREATE(&qdisc_src, setup_result->ifindex_src);
+	/* tc filter add dev src ingress bpf da ingress_host */
+	XGRESS_FILTER_ADD(&qdisc_src, BPF_TC_INGRESS, skel->progs.ingress_host, 0);
+	/* tc filter add dev src egress bpf da egress_host */
+	XGRESS_FILTER_ADD(&qdisc_src, BPF_TC_EGRESS, skel->progs.egress_host, 0);
 	close_netns(nstoken);
 
 	/* setup ns_dst tc progs */
 	nstoken = open_netns(NS_DST);
 	if (!ASSERT_OK_PTR(nstoken, "setns " NS_DST))
 		return -1;
-	/* tc qdisc add dev veth_dst clsact */
-	QDISC_CLSACT_CREATE(&qdisc_veth_dst, setup_result->ifindex_veth_dst);
-	/* tc filter add dev veth_dst ingress bpf da ingress_host */
-	XGRESS_FILTER_ADD(&qdisc_veth_dst, BPF_TC_INGRESS, skel->progs.ingress_host, 0);
-	/* tc filter add dev veth_dst egress bpf da egress_host */
-	XGRESS_FILTER_ADD(&qdisc_veth_dst, BPF_TC_EGRESS, skel->progs.egress_host, 0);
+	/* tc qdisc add dev dst clsact */
+	QDISC_CLSACT_CREATE(&qdisc_dst, setup_result->ifindex_dst);
+	/* tc filter add dev dst ingress bpf da ingress_host */
+	XGRESS_FILTER_ADD(&qdisc_dst, BPF_TC_INGRESS, skel->progs.ingress_host, 0);
+	/* tc filter add dev dst egress bpf da egress_host */
+	XGRESS_FILTER_ADD(&qdisc_dst, BPF_TC_EGRESS, skel->progs.egress_host, 0);
 	close_netns(nstoken);
 
 	/* setup ns_fwd tc progs */
 	nstoken = open_netns(NS_FWD);
 	if (!ASSERT_OK_PTR(nstoken, "setns " NS_FWD))
 		return -1;
-	/* tc qdisc add dev veth_dst_fwd clsact */
-	QDISC_CLSACT_CREATE(&qdisc_veth_dst_fwd, setup_result->ifindex_veth_dst_fwd);
-	/* tc filter add dev veth_dst_fwd ingress prio 100 bpf da ingress_fwdns_prio100 */
-	XGRESS_FILTER_ADD(&qdisc_veth_dst_fwd, BPF_TC_INGRESS,
+	/* tc qdisc add dev dst_fwd clsact */
+	QDISC_CLSACT_CREATE(&qdisc_dst_fwd, setup_result->ifindex_dst_fwd);
+	/* tc filter add dev dst_fwd ingress prio 100 bpf da ingress_fwdns_prio100 */
+	XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_INGRESS,
 			  skel->progs.ingress_fwdns_prio100, 100);
-	/* tc filter add dev veth_dst_fwd ingress prio 101 bpf da ingress_fwdns_prio101 */
-	XGRESS_FILTER_ADD(&qdisc_veth_dst_fwd, BPF_TC_INGRESS,
+	/* tc filter add dev dst_fwd ingress prio 101 bpf da ingress_fwdns_prio101 */
+	XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_INGRESS,
 			  skel->progs.ingress_fwdns_prio101, 101);
-	/* tc filter add dev veth_dst_fwd egress prio 100 bpf da egress_fwdns_prio100 */
-	XGRESS_FILTER_ADD(&qdisc_veth_dst_fwd, BPF_TC_EGRESS,
+	/* tc filter add dev dst_fwd egress prio 100 bpf da egress_fwdns_prio100 */
+	XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_EGRESS,
 			  skel->progs.egress_fwdns_prio100, 100);
-	/* tc filter add dev veth_dst_fwd egress prio 101 bpf da egress_fwdns_prio101 */
-	XGRESS_FILTER_ADD(&qdisc_veth_dst_fwd, BPF_TC_EGRESS,
+	/* tc filter add dev dst_fwd egress prio 101 bpf da egress_fwdns_prio101 */
+	XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_EGRESS,
 			  skel->progs.egress_fwdns_prio101, 101);
 
-	/* tc qdisc add dev veth_src_fwd clsact */
-	QDISC_CLSACT_CREATE(&qdisc_veth_src_fwd, setup_result->ifindex_veth_src_fwd);
-	/* tc filter add dev veth_src_fwd ingress prio 100 bpf da ingress_fwdns_prio100 */
-	XGRESS_FILTER_ADD(&qdisc_veth_src_fwd, BPF_TC_INGRESS,
+	/* tc qdisc add dev src_fwd clsact */
+	QDISC_CLSACT_CREATE(&qdisc_src_fwd, setup_result->ifindex_src_fwd);
+	/* tc filter add dev src_fwd ingress prio 100 bpf da ingress_fwdns_prio100 */
+	XGRESS_FILTER_ADD(&qdisc_src_fwd, BPF_TC_INGRESS,
 			  skel->progs.ingress_fwdns_prio100, 100);
-	/* tc filter add dev veth_src_fwd ingress prio 101 bpf da ingress_fwdns_prio101 */
-	XGRESS_FILTER_ADD(&qdisc_veth_src_fwd, BPF_TC_INGRESS,
+	/* tc filter add dev src_fwd ingress prio 101 bpf da ingress_fwdns_prio101 */
+	XGRESS_FILTER_ADD(&qdisc_src_fwd, BPF_TC_INGRESS,
 			  skel->progs.ingress_fwdns_prio101, 101);
-	/* tc filter add dev veth_src_fwd egress prio 100 bpf da egress_fwdns_prio100 */
-	XGRESS_FILTER_ADD(&qdisc_veth_src_fwd, BPF_TC_EGRESS,
+	/* tc filter add dev src_fwd egress prio 100 bpf da egress_fwdns_prio100 */
+	XGRESS_FILTER_ADD(&qdisc_src_fwd, BPF_TC_EGRESS,
 			  skel->progs.egress_fwdns_prio100, 100);
-	/* tc filter add dev veth_src_fwd egress prio 101 bpf da egress_fwdns_prio101 */
-	XGRESS_FILTER_ADD(&qdisc_veth_src_fwd, BPF_TC_EGRESS,
+	/* tc filter add dev src_fwd egress prio 101 bpf da egress_fwdns_prio101 */
+	XGRESS_FILTER_ADD(&qdisc_src_fwd, BPF_TC_EGRESS,
 			  skel->progs.egress_fwdns_prio101, 101);
 	close_netns(nstoken);
 	return 0;
@@ -777,8 +838,8 @@ static void test_tc_redirect_dtime(struct netns_setup_result *setup_result)
 	if (!ASSERT_OK_PTR(skel, "test_tc_dtime__open"))
 		return;
 
-	skel->rodata->IFINDEX_SRC = setup_result->ifindex_veth_src_fwd;
-	skel->rodata->IFINDEX_DST = setup_result->ifindex_veth_dst_fwd;
+	skel->rodata->IFINDEX_SRC = setup_result->ifindex_src_fwd;
+	skel->rodata->IFINDEX_DST = setup_result->ifindex_dst_fwd;
 
 	err = test_tc_dtime__load(skel);
 	if (!ASSERT_OK(err, "test_tc_dtime__load"))
@@ -868,8 +929,8 @@ static void test_tc_redirect_neigh(struct netns_setup_result *setup_result)
 	if (!ASSERT_OK_PTR(skel, "test_tc_neigh__open"))
 		goto done;
 
-	skel->rodata->IFINDEX_SRC = setup_result->ifindex_veth_src_fwd;
-	skel->rodata->IFINDEX_DST = setup_result->ifindex_veth_dst_fwd;
+	skel->rodata->IFINDEX_SRC = setup_result->ifindex_src_fwd;
+	skel->rodata->IFINDEX_DST = setup_result->ifindex_dst_fwd;
 
 	err = test_tc_neigh__load(skel);
 	if (!ASSERT_OK(err, "test_tc_neigh__load"))
@@ -904,8 +965,8 @@ static void test_tc_redirect_peer(struct netns_setup_result *setup_result)
 	if (!ASSERT_OK_PTR(skel, "test_tc_peer__open"))
 		goto done;
 
-	skel->rodata->IFINDEX_SRC = setup_result->ifindex_veth_src_fwd;
-	skel->rodata->IFINDEX_DST = setup_result->ifindex_veth_dst_fwd;
+	skel->rodata->IFINDEX_SRC = setup_result->ifindex_src_fwd;
+	skel->rodata->IFINDEX_DST = setup_result->ifindex_dst_fwd;
 
 	err = test_tc_peer__load(skel);
 	if (!ASSERT_OK(err, "test_tc_peer__load"))
@@ -996,7 +1057,7 @@ static int tun_relay_loop(int src_fd, int target_fd)
 static void test_tc_redirect_peer_l3(struct netns_setup_result *setup_result)
 {
 	LIBBPF_OPTS(bpf_tc_hook, qdisc_tun_fwd);
-	LIBBPF_OPTS(bpf_tc_hook, qdisc_veth_dst_fwd);
+	LIBBPF_OPTS(bpf_tc_hook, qdisc_dst_fwd);
 	struct test_tc_peer *skel = NULL;
 	struct nstoken *nstoken = NULL;
 	int err;
@@ -1045,7 +1106,7 @@ static void test_tc_redirect_peer_l3(struct netns_setup_result *setup_result)
 		goto fail;
 
 	skel->rodata->IFINDEX_SRC = ifindex;
-	skel->rodata->IFINDEX_DST = setup_result->ifindex_veth_dst_fwd;
+	skel->rodata->IFINDEX_DST = setup_result->ifindex_dst_fwd;
 
 	err = test_tc_peer__load(skel);
 	if (!ASSERT_OK(err, "test_tc_peer__load"))
@@ -1053,19 +1114,19 @@ static void test_tc_redirect_peer_l3(struct netns_setup_result *setup_result)
 
 	/* Load "tc_src_l3" to the tun_fwd interface to redirect packets
 	 * towards dst, and "tc_dst" to redirect packets
-	 * and "tc_chk" on veth_dst_fwd to drop non-redirected packets.
+	 * and "tc_chk" on dst_fwd to drop non-redirected packets.
 	 */
 	/* tc qdisc add dev tun_fwd clsact */
 	QDISC_CLSACT_CREATE(&qdisc_tun_fwd, ifindex);
 	/* tc filter add dev tun_fwd ingress bpf da tc_src_l3 */
 	XGRESS_FILTER_ADD(&qdisc_tun_fwd, BPF_TC_INGRESS, skel->progs.tc_src_l3, 0);
 
-	/* tc qdisc add dev veth_dst_fwd clsact */
-	QDISC_CLSACT_CREATE(&qdisc_veth_dst_fwd, setup_result->ifindex_veth_dst_fwd);
-	/* tc filter add dev veth_dst_fwd ingress bpf da tc_dst_l3 */
-	XGRESS_FILTER_ADD(&qdisc_veth_dst_fwd, BPF_TC_INGRESS, skel->progs.tc_dst_l3, 0);
-	/* tc filter add dev veth_dst_fwd egress bpf da tc_chk */
-	XGRESS_FILTER_ADD(&qdisc_veth_dst_fwd, BPF_TC_EGRESS, skel->progs.tc_chk, 0);
+	/* tc qdisc add dev dst_fwd clsact */
+	QDISC_CLSACT_CREATE(&qdisc_dst_fwd, setup_result->ifindex_dst_fwd);
+	/* tc filter add dev dst_fwd ingress bpf da tc_dst_l3 */
+	XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_INGRESS, skel->progs.tc_dst_l3, 0);
+	/* tc filter add dev dst_fwd egress bpf da tc_chk */
+	XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_EGRESS, skel->progs.tc_chk, 0);
 
 	/* Setup route and neigh tables */
 	SYS(fail, "ip -netns " NS_SRC " addr add dev tun_src " IP4_TUN_SRC "/24");
@@ -1074,17 +1135,17 @@ static void test_tc_redirect_peer_l3(struct netns_setup_result *setup_result)
 	SYS(fail, "ip -netns " NS_SRC " addr add dev tun_src " IP6_TUN_SRC "/64 nodad");
 	SYS(fail, "ip -netns " NS_FWD " addr add dev tun_fwd " IP6_TUN_FWD "/64 nodad");
 
-	SYS(fail, "ip -netns " NS_SRC " route del " IP4_DST "/32 dev veth_src scope global");
+	SYS(fail, "ip -netns " NS_SRC " route del " IP4_DST "/32 dev src scope global");
 	SYS(fail, "ip -netns " NS_SRC " route add " IP4_DST "/32 via " IP4_TUN_FWD
 	    " dev tun_src scope global");
-	SYS(fail, "ip -netns " NS_DST " route add " IP4_TUN_SRC "/32 dev veth_dst scope global");
-	SYS(fail, "ip -netns " NS_SRC " route del " IP6_DST "/128 dev veth_src scope global");
+	SYS(fail, "ip -netns " NS_DST " route add " IP4_TUN_SRC "/32 dev dst scope global");
+	SYS(fail, "ip -netns " NS_SRC " route del " IP6_DST "/128 dev src scope global");
 	SYS(fail, "ip -netns " NS_SRC " route add " IP6_DST "/128 via " IP6_TUN_FWD
 	    " dev tun_src scope global");
-	SYS(fail, "ip -netns " NS_DST " route add " IP6_TUN_SRC "/128 dev veth_dst scope global");
+	SYS(fail, "ip -netns " NS_DST " route add " IP6_TUN_SRC "/128 dev dst scope global");
 
-	SYS(fail, "ip -netns " NS_DST " neigh add " IP4_TUN_SRC " dev veth_dst lladdr " MAC_DST_FWD);
-	SYS(fail, "ip -netns " NS_DST " neigh add " IP6_TUN_SRC " dev veth_dst lladdr " MAC_DST_FWD);
+	SYS(fail, "ip -netns " NS_DST " neigh add " IP4_TUN_SRC " dev dst lladdr " MAC_DST_FWD);
+	SYS(fail, "ip -netns " NS_DST " neigh add " IP6_TUN_SRC " dev dst lladdr " MAC_DST_FWD);
 
 	if (!ASSERT_OK(set_forwarding(false), "disable forwarding"))
 		goto fail;
@@ -1106,9 +1167,9 @@ fail:
 		close_netns(nstoken);
 }
 
-#define RUN_TEST(name)                                                                      \
+#define RUN_TEST(name, mode)                                                                \
 	({                                                                                  \
-		struct netns_setup_result setup_result;                                     \
+		struct netns_setup_result setup_result = { .dev_mode = mode, };             \
 		if (test__start_subtest(#name))                                             \
 			if (ASSERT_OK(netns_setup_namespaces("add"), "setup namespaces")) { \
 				if (ASSERT_OK(netns_setup_links_and_routes(&setup_result),  \
@@ -1122,11 +1183,13 @@ static void *test_tc_redirect_run_tests(void *arg)
 {
 	netns_setup_namespaces_nofail("delete");
 
-	RUN_TEST(tc_redirect_peer);
-	RUN_TEST(tc_redirect_peer_l3);
-	RUN_TEST(tc_redirect_neigh);
-	RUN_TEST(tc_redirect_neigh_fib);
-	RUN_TEST(tc_redirect_dtime);
+	RUN_TEST(tc_redirect_peer, MODE_VETH);
+	RUN_TEST(tc_redirect_peer, MODE_NETKIT);
+	RUN_TEST(tc_redirect_peer_l3, MODE_VETH);
+	RUN_TEST(tc_redirect_peer_l3, MODE_NETKIT);
+	RUN_TEST(tc_redirect_neigh, MODE_VETH);
+	RUN_TEST(tc_redirect_neigh_fib, MODE_VETH);
+	RUN_TEST(tc_redirect_dtime, MODE_VETH);
 	return NULL;
 }
 
diff --git a/tools/testing/selftests/bpf/prog_tests/test_bpf_ma.c b/tools/testing/selftests/bpf/prog_tests/test_bpf_ma.c
index 0cca4e8ae38e..d3491a84b3b9 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_bpf_ma.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_bpf_ma.c
@@ -9,9 +9,10 @@
 
 #include "test_bpf_ma.skel.h"
 
-void test_test_bpf_ma(void)
+static void do_bpf_ma_test(const char *name)
 {
 	struct test_bpf_ma *skel;
+	struct bpf_program *prog;
 	struct btf *btf;
 	int i, err;
 
@@ -34,6 +35,11 @@ void test_test_bpf_ma(void)
 		skel->rodata->data_btf_ids[i] = id;
 	}
 
+	prog = bpf_object__find_program_by_name(skel->obj, name);
+	if (!ASSERT_OK_PTR(prog, "invalid prog name"))
+		goto out;
+	bpf_program__set_autoload(prog, true);
+
 	err = test_bpf_ma__load(skel);
 	if (!ASSERT_OK(err, "load"))
 		goto out;
@@ -48,3 +54,15 @@ void test_test_bpf_ma(void)
 out:
 	test_bpf_ma__destroy(skel);
 }
+
+void test_test_bpf_ma(void)
+{
+	if (test__start_subtest("batch_alloc_free"))
+		do_bpf_ma_test("test_batch_alloc_free");
+	if (test__start_subtest("free_through_map_free"))
+		do_bpf_ma_test("test_free_through_map_free");
+	if (test__start_subtest("batch_percpu_alloc_free"))
+		do_bpf_ma_test("test_batch_percpu_alloc_free");
+	if (test__start_subtest("percpu_free_through_map_free"))
+		do_bpf_ma_test("test_percpu_free_through_map_free");
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/test_bpffs.c b/tools/testing/selftests/bpf/prog_tests/test_bpffs.c
index 214d9f4a94a5..ea933fd151c3 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_bpffs.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_bpffs.c
@@ -8,7 +8,8 @@
 #include <sys/types.h>
 #include <test_progs.h>
 
-#define TDIR "/sys/kernel/debug"
+/* TDIR must be in a location we can create a directory in. */
+#define TDIR "/tmp/test_bpffs_testdir"
 
 static int read_iter(char *file)
 {
@@ -43,8 +44,11 @@ static int fn(void)
 	if (!ASSERT_OK(err, "mount /"))
 		goto out;
 
-	err = umount(TDIR);
-	if (!ASSERT_OK(err, "umount " TDIR))
+	err =  mkdir(TDIR, 0777);
+	/* If the directory already exists we can carry on. It may be left over
+	 * from a previous run.
+	 */
+	if ((err && errno != EEXIST) && !ASSERT_OK(err, "mkdir " TDIR))
 		goto out;
 
 	err = mount("none", TDIR, "tmpfs", 0, NULL);
@@ -138,6 +142,7 @@ out:
 	rmdir(TDIR "/fs1");
 	rmdir(TDIR "/fs2");
 	umount(TDIR);
+	rmdir(TDIR);
 	exit(err);
 }
 
diff --git a/tools/testing/selftests/bpf/prog_tests/verifier.c b/tools/testing/selftests/bpf/prog_tests/verifier.c
index e3e68c97b40c..5cfa7a6316b6 100644
--- a/tools/testing/selftests/bpf/prog_tests/verifier.c
+++ b/tools/testing/selftests/bpf/prog_tests/verifier.c
@@ -31,6 +31,7 @@
 #include "verifier_helper_restricted.skel.h"
 #include "verifier_helper_value_access.skel.h"
 #include "verifier_int_ptr.skel.h"
+#include "verifier_iterating_callbacks.skel.h"
 #include "verifier_jeq_infer_not_null.skel.h"
 #include "verifier_ld_ind.skel.h"
 #include "verifier_ldsx.skel.h"
@@ -46,6 +47,7 @@
 #include "verifier_movsx.skel.h"
 #include "verifier_netfilter_ctx.skel.h"
 #include "verifier_netfilter_retcode.skel.h"
+#include "verifier_precision.skel.h"
 #include "verifier_prevent_map_lookup.skel.h"
 #include "verifier_raw_stack.skel.h"
 #include "verifier_raw_tp_writable.skel.h"
@@ -138,6 +140,7 @@ void test_verifier_helper_packet_access(void) { RUN(verifier_helper_packet_acces
 void test_verifier_helper_restricted(void)    { RUN(verifier_helper_restricted); }
 void test_verifier_helper_value_access(void)  { RUN(verifier_helper_value_access); }
 void test_verifier_int_ptr(void)              { RUN(verifier_int_ptr); }
+void test_verifier_iterating_callbacks(void)  { RUN(verifier_iterating_callbacks); }
 void test_verifier_jeq_infer_not_null(void)   { RUN(verifier_jeq_infer_not_null); }
 void test_verifier_ld_ind(void)               { RUN(verifier_ld_ind); }
 void test_verifier_ldsx(void)                  { RUN(verifier_ldsx); }
@@ -153,6 +156,7 @@ void test_verifier_meta_access(void)          { RUN(verifier_meta_access); }
 void test_verifier_movsx(void)                 { RUN(verifier_movsx); }
 void test_verifier_netfilter_ctx(void)        { RUN(verifier_netfilter_ctx); }
 void test_verifier_netfilter_retcode(void)    { RUN(verifier_netfilter_retcode); }
+void test_verifier_precision(void)            { RUN(verifier_precision); }
 void test_verifier_prevent_map_lookup(void)   { RUN(verifier_prevent_map_lookup); }
 void test_verifier_raw_stack(void)            { RUN(verifier_raw_stack); }
 void test_verifier_raw_tp_writable(void)      { RUN(verifier_raw_tp_writable); }
diff --git a/tools/testing/selftests/bpf/prog_tests/vmlinux.c b/tools/testing/selftests/bpf/prog_tests/vmlinux.c
index 72310cfc6474..6fb2217d940b 100644
--- a/tools/testing/selftests/bpf/prog_tests/vmlinux.c
+++ b/tools/testing/selftests/bpf/prog_tests/vmlinux.c
@@ -16,27 +16,27 @@ static void nsleep()
 
 void test_vmlinux(void)
 {
-	int duration = 0, err;
+	int err;
 	struct test_vmlinux* skel;
 	struct test_vmlinux__bss *bss;
 
 	skel = test_vmlinux__open_and_load();
-	if (CHECK(!skel, "skel_open", "failed to open skeleton\n"))
+	if (!ASSERT_OK_PTR(skel, "test_vmlinux__open_and_load"))
 		return;
 	bss = skel->bss;
 
 	err = test_vmlinux__attach(skel);
-	if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err))
+	if (!ASSERT_OK(err, "test_vmlinux__attach"))
 		goto cleanup;
 
 	/* trigger everything */
 	nsleep();
 
-	CHECK(!bss->tp_called, "tp", "not called\n");
-	CHECK(!bss->raw_tp_called, "raw_tp", "not called\n");
-	CHECK(!bss->tp_btf_called, "tp_btf", "not called\n");
-	CHECK(!bss->kprobe_called, "kprobe", "not called\n");
-	CHECK(!bss->fentry_called, "fentry", "not called\n");
+	ASSERT_TRUE(bss->tp_called, "tp");
+	ASSERT_TRUE(bss->raw_tp_called, "raw_tp");
+	ASSERT_TRUE(bss->tp_btf_called, "tp_btf");
+	ASSERT_TRUE(bss->kprobe_called, "kprobe");
+	ASSERT_TRUE(bss->fentry_called, "fentry");
 
 cleanup:
 	test_vmlinux__destroy(skel);
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c b/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c
index f2b8167b72a8..442f4ca39fd7 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c
@@ -35,6 +35,8 @@ int dump_task_stack(struct bpf_iter__task *ctx)
 	return 0;
 }
 
+int num_user_stacks = 0;
+
 SEC("iter/task")
 int get_task_user_stacks(struct bpf_iter__task *ctx)
 {
@@ -51,6 +53,9 @@ int get_task_user_stacks(struct bpf_iter__task *ctx)
 	if (res <= 0)
 		return 0;
 
+	/* Only one task, the current one, should succeed */
+	++num_user_stacks;
+
 	buf_sz += res;
 
 	/* If the verifier doesn't refine bpf_get_task_stack res, and instead
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task.c b/tools/testing/selftests/bpf/progs/bpf_iter_tasks.c
index 96131b9a1caa..96131b9a1caa 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_task.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_tasks.c
diff --git a/tools/testing/selftests/bpf/progs/bpf_loop_bench.c b/tools/testing/selftests/bpf/progs/bpf_loop_bench.c
index 4ce76eb064c4..d461746fd3c1 100644
--- a/tools/testing/selftests/bpf/progs/bpf_loop_bench.c
+++ b/tools/testing/selftests/bpf/progs/bpf_loop_bench.c
@@ -15,13 +15,16 @@ static int empty_callback(__u32 index, void *data)
 	return 0;
 }
 
+static int outer_loop(__u32 index, void *data)
+{
+	bpf_loop(nr_loops, empty_callback, NULL, 0);
+	__sync_add_and_fetch(&hits, nr_loops);
+	return 0;
+}
+
 SEC("fentry/" SYS_PREFIX "sys_getpgid")
 int benchmark(void *ctx)
 {
-	for (int i = 0; i < 1000; i++) {
-		bpf_loop(nr_loops, empty_callback, NULL, 0);
-
-		__sync_add_and_fetch(&hits, nr_loops);
-	}
+	bpf_loop(1000, outer_loop, NULL, 0);
 	return 0;
 }
diff --git a/tools/testing/selftests/bpf/progs/cb_refs.c b/tools/testing/selftests/bpf/progs/cb_refs.c
index 76d661b20e87..56c764df8196 100644
--- a/tools/testing/selftests/bpf/progs/cb_refs.c
+++ b/tools/testing/selftests/bpf/progs/cb_refs.c
@@ -33,6 +33,7 @@ int underflow_prog(void *ctx)
 	if (!p)
 		return 0;
 	bpf_for_each_map_elem(&array_map, cb1, &p, 0);
+	bpf_kfunc_call_test_release(p);
 	return 0;
 }
 
diff --git a/tools/testing/selftests/bpf/progs/exceptions_assert.c b/tools/testing/selftests/bpf/progs/exceptions_assert.c
index e1e5c54a6a11..49efaed143fc 100644
--- a/tools/testing/selftests/bpf/progs/exceptions_assert.c
+++ b/tools/testing/selftests/bpf/progs/exceptions_assert.c
@@ -18,48 +18,48 @@
 		return *(u64 *)num;					\
 	}
 
-__msg(": R0_w=-2147483648 R10=fp0")
+__msg(": R0_w=0xffffffff80000000 R10=fp0")
 check_assert(s64, eq, int_min, INT_MIN);
-__msg(": R0_w=2147483647 R10=fp0")
+__msg(": R0_w=0x7fffffff R10=fp0")
 check_assert(s64, eq, int_max, INT_MAX);
 __msg(": R0_w=0 R10=fp0")
 check_assert(s64, eq, zero, 0);
-__msg(": R0_w=-9223372036854775808 R1_w=-9223372036854775808 R10=fp0")
+__msg(": R0_w=0x8000000000000000 R1_w=0x8000000000000000 R10=fp0")
 check_assert(s64, eq, llong_min, LLONG_MIN);
-__msg(": R0_w=9223372036854775807 R1_w=9223372036854775807 R10=fp0")
+__msg(": R0_w=0x7fffffffffffffff R1_w=0x7fffffffffffffff R10=fp0")
 check_assert(s64, eq, llong_max, LLONG_MAX);
 
-__msg(": R0_w=scalar(smax=2147483646) R10=fp0")
+__msg(": R0_w=scalar(smax=0x7ffffffe) R10=fp0")
 check_assert(s64, lt, pos, INT_MAX);
-__msg(": R0_w=scalar(smax=-1,umin=9223372036854775808,var_off=(0x8000000000000000; 0x7fffffffffffffff))")
+__msg(": R0_w=scalar(smax=-1,umin=0x8000000000000000,var_off=(0x8000000000000000; 0x7fffffffffffffff))")
 check_assert(s64, lt, zero, 0);
-__msg(": R0_w=scalar(smax=-2147483649,umin=9223372036854775808,umax=18446744071562067967,var_off=(0x8000000000000000; 0x7fffffffffffffff))")
+__msg(": R0_w=scalar(smax=0xffffffff7fffffff,umin=0x8000000000000000,umax=0xffffffff7fffffff,var_off=(0x8000000000000000; 0x7fffffffffffffff))")
 check_assert(s64, lt, neg, INT_MIN);
 
-__msg(": R0_w=scalar(smax=2147483647) R10=fp0")
+__msg(": R0_w=scalar(smax=0x7fffffff) R10=fp0")
 check_assert(s64, le, pos, INT_MAX);
 __msg(": R0_w=scalar(smax=0) R10=fp0")
 check_assert(s64, le, zero, 0);
-__msg(": R0_w=scalar(smax=-2147483648,umin=9223372036854775808,umax=18446744071562067968,var_off=(0x8000000000000000; 0x7fffffffffffffff))")
+__msg(": R0_w=scalar(smax=0xffffffff80000000,umin=0x8000000000000000,umax=0xffffffff80000000,var_off=(0x8000000000000000; 0x7fffffffffffffff))")
 check_assert(s64, le, neg, INT_MIN);
 
-__msg(": R0_w=scalar(smin=umin=2147483648,umax=9223372036854775807,var_off=(0x0; 0x7fffffffffffffff))")
+__msg(": R0_w=scalar(smin=umin=0x80000000,umax=0x7fffffffffffffff,var_off=(0x0; 0x7fffffffffffffff))")
 check_assert(s64, gt, pos, INT_MAX);
-__msg(": R0_w=scalar(smin=umin=1,umax=9223372036854775807,var_off=(0x0; 0x7fffffffffffffff))")
+__msg(": R0_w=scalar(smin=umin=1,umax=0x7fffffffffffffff,var_off=(0x0; 0x7fffffffffffffff))")
 check_assert(s64, gt, zero, 0);
-__msg(": R0_w=scalar(smin=-2147483647) R10=fp0")
+__msg(": R0_w=scalar(smin=0xffffffff80000001) R10=fp0")
 check_assert(s64, gt, neg, INT_MIN);
 
-__msg(": R0_w=scalar(smin=umin=2147483647,umax=9223372036854775807,var_off=(0x0; 0x7fffffffffffffff))")
+__msg(": R0_w=scalar(smin=umin=0x7fffffff,umax=0x7fffffffffffffff,var_off=(0x0; 0x7fffffffffffffff))")
 check_assert(s64, ge, pos, INT_MAX);
-__msg(": R0_w=scalar(smin=0,umax=9223372036854775807,var_off=(0x0; 0x7fffffffffffffff)) R10=fp0")
+__msg(": R0_w=scalar(smin=0,umax=0x7fffffffffffffff,var_off=(0x0; 0x7fffffffffffffff)) R10=fp0")
 check_assert(s64, ge, zero, 0);
-__msg(": R0_w=scalar(smin=-2147483648) R10=fp0")
+__msg(": R0_w=scalar(smin=0xffffffff80000000) R10=fp0")
 check_assert(s64, ge, neg, INT_MIN);
 
 SEC("?tc")
 __log_level(2) __failure
-__msg(": R0=0 R1=ctx(off=0,imm=0) R2=scalar(smin=smin32=-2147483646,smax=smax32=2147483645) R10=fp0")
+__msg(": R0=0 R1=ctx() R2=scalar(smin=0xffffffff80000002,smax=smax32=0x7ffffffd,smin32=0x80000002) R10=fp0")
 int check_assert_range_s64(struct __sk_buff *ctx)
 {
 	struct bpf_sock *sk = ctx->sk;
@@ -75,7 +75,7 @@ int check_assert_range_s64(struct __sk_buff *ctx)
 
 SEC("?tc")
 __log_level(2) __failure
-__msg(": R1=ctx(off=0,imm=0) R2=scalar(smin=umin=smin32=umin32=4096,smax=umax=smax32=umax32=8192,var_off=(0x0; 0x3fff))")
+__msg(": R1=ctx() R2=scalar(smin=umin=smin32=umin32=4096,smax=umax=smax32=umax32=8192,var_off=(0x0; 0x3fff))")
 int check_assert_range_u64(struct __sk_buff *ctx)
 {
 	u64 num = ctx->len;
@@ -86,7 +86,7 @@ int check_assert_range_u64(struct __sk_buff *ctx)
 
 SEC("?tc")
 __log_level(2) __failure
-__msg(": R0=0 R1=ctx(off=0,imm=0) R2=4096 R10=fp0")
+__msg(": R0=0 R1=ctx() R2=4096 R10=fp0")
 int check_assert_single_range_s64(struct __sk_buff *ctx)
 {
 	struct bpf_sock *sk = ctx->sk;
@@ -103,7 +103,7 @@ int check_assert_single_range_s64(struct __sk_buff *ctx)
 
 SEC("?tc")
 __log_level(2) __failure
-__msg(": R1=ctx(off=0,imm=0) R2=4096 R10=fp0")
+__msg(": R1=ctx() R2=4096 R10=fp0")
 int check_assert_single_range_u64(struct __sk_buff *ctx)
 {
 	u64 num = ctx->len;
@@ -114,7 +114,7 @@ int check_assert_single_range_u64(struct __sk_buff *ctx)
 
 SEC("?tc")
 __log_level(2) __failure
-__msg(": R1=pkt(off=64,r=64,imm=0) R2=pkt_end(off=0,imm=0) R6=pkt(off=0,r=64,imm=0) R10=fp0")
+__msg(": R1=pkt(off=64,r=64) R2=pkt_end() R6=pkt(r=64) R10=fp0")
 int check_assert_generic(struct __sk_buff *ctx)
 {
 	u8 *data_end = (void *)(long)ctx->data_end;
diff --git a/tools/testing/selftests/bpf/progs/exceptions_fail.c b/tools/testing/selftests/bpf/progs/exceptions_fail.c
index 4c39e920dac2..8c0ef2742208 100644
--- a/tools/testing/selftests/bpf/progs/exceptions_fail.c
+++ b/tools/testing/selftests/bpf/progs/exceptions_fail.c
@@ -171,6 +171,7 @@ int reject_with_rbtree_add_throw(void *ctx)
 		return 0;
 	bpf_spin_lock(&lock);
 	bpf_rbtree_add(&rbtree, &f->node, rbless);
+	bpf_spin_unlock(&lock);
 	return 0;
 }
 
@@ -214,6 +215,7 @@ int reject_with_cb_reference(void *ctx)
 	if (!f)
 		return 0;
 	bpf_loop(5, subprog_cb_ref, NULL, 0);
+	bpf_obj_drop(f);
 	return 0;
 }
 
diff --git a/tools/testing/selftests/bpf/progs/iters.c b/tools/testing/selftests/bpf/progs/iters.c
index 6b9b3c56f009..b2181f850d3e 100644
--- a/tools/testing/selftests/bpf/progs/iters.c
+++ b/tools/testing/selftests/bpf/progs/iters.c
@@ -14,6 +14,13 @@ int my_pid;
 int arr[256];
 int small_arr[16] SEC(".data.small_arr");
 
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__uint(max_entries, 10);
+	__type(key, int);
+	__type(value, int);
+} amap SEC(".maps");
+
 #ifdef REAL_TEST
 #define MY_PID_GUARD() if (my_pid != (bpf_get_current_pid_tgid() >> 32)) return 0
 #else
@@ -716,4 +723,714 @@ int iter_pass_iter_ptr_to_subprog(const void *ctx)
 	return 0;
 }
 
+SEC("?raw_tp")
+__failure
+__msg("R1 type=scalar expected=fp")
+__naked int delayed_read_mark(void)
+{
+	/* This is equivalent to C program below.
+	 * The call to bpf_iter_num_next() is reachable with r7 values &fp[-16] and 0xdead.
+	 * State with r7=&fp[-16] is visited first and follows r6 != 42 ... continue branch.
+	 * At this point iterator next() call is reached with r7 that has no read mark.
+	 * Loop body with r7=0xdead would only be visited if verifier would decide to continue
+	 * with second loop iteration. Absence of read mark on r7 might affect state
+	 * equivalent logic used for iterator convergence tracking.
+	 *
+	 * r7 = &fp[-16]
+	 * fp[-16] = 0
+	 * r6 = bpf_get_prandom_u32()
+	 * bpf_iter_num_new(&fp[-8], 0, 10)
+	 * while (bpf_iter_num_next(&fp[-8])) {
+	 *   r6++
+	 *   if (r6 != 42) {
+	 *     r7 = 0xdead
+	 *     continue;
+	 *   }
+	 *   bpf_probe_read_user(r7, 8, 0xdeadbeef); // this is not safe
+	 * }
+	 * bpf_iter_num_destroy(&fp[-8])
+	 * return 0
+	 */
+	asm volatile (
+		"r7 = r10;"
+		"r7 += -16;"
+		"r0 = 0;"
+		"*(u64 *)(r7 + 0) = r0;"
+		"call %[bpf_get_prandom_u32];"
+		"r6 = r0;"
+		"r1 = r10;"
+		"r1 += -8;"
+		"r2 = 0;"
+		"r3 = 10;"
+		"call %[bpf_iter_num_new];"
+	"1:"
+		"r1 = r10;"
+		"r1 += -8;"
+		"call %[bpf_iter_num_next];"
+		"if r0 == 0 goto 2f;"
+		"r6 += 1;"
+		"if r6 != 42 goto 3f;"
+		"r7 = 0xdead;"
+		"goto 1b;"
+	"3:"
+		"r1 = r7;"
+		"r2 = 8;"
+		"r3 = 0xdeadbeef;"
+		"call %[bpf_probe_read_user];"
+		"goto 1b;"
+	"2:"
+		"r1 = r10;"
+		"r1 += -8;"
+		"call %[bpf_iter_num_destroy];"
+		"r0 = 0;"
+		"exit;"
+		:
+		: __imm(bpf_get_prandom_u32),
+		  __imm(bpf_iter_num_new),
+		  __imm(bpf_iter_num_next),
+		  __imm(bpf_iter_num_destroy),
+		  __imm(bpf_probe_read_user)
+		: __clobber_all
+	);
+}
+
+SEC("?raw_tp")
+__failure
+__msg("math between fp pointer and register with unbounded")
+__naked int delayed_precision_mark(void)
+{
+	/* This is equivalent to C program below.
+	 * The test is similar to delayed_iter_mark but verifies that incomplete
+	 * precision don't fool verifier.
+	 * The call to bpf_iter_num_next() is reachable with r7 values -16 and -32.
+	 * State with r7=-16 is visited first and follows r6 != 42 ... continue branch.
+	 * At this point iterator next() call is reached with r7 that has no read
+	 * and precision marks.
+	 * Loop body with r7=-32 would only be visited if verifier would decide to continue
+	 * with second loop iteration. Absence of precision mark on r7 might affect state
+	 * equivalent logic used for iterator convergence tracking.
+	 *
+	 * r8 = 0
+	 * fp[-16] = 0
+	 * r7 = -16
+	 * r6 = bpf_get_prandom_u32()
+	 * bpf_iter_num_new(&fp[-8], 0, 10)
+	 * while (bpf_iter_num_next(&fp[-8])) {
+	 *   if (r6 != 42) {
+	 *     r7 = -32
+	 *     r6 = bpf_get_prandom_u32()
+	 *     continue;
+	 *   }
+	 *   r0 = r10
+	 *   r0 += r7
+	 *   r8 = *(u64 *)(r0 + 0)           // this is not safe
+	 *   r6 = bpf_get_prandom_u32()
+	 * }
+	 * bpf_iter_num_destroy(&fp[-8])
+	 * return r8
+	 */
+	asm volatile (
+		"r8 = 0;"
+		"*(u64 *)(r10 - 16) = r8;"
+		"r7 = -16;"
+		"call %[bpf_get_prandom_u32];"
+		"r6 = r0;"
+		"r1 = r10;"
+		"r1 += -8;"
+		"r2 = 0;"
+		"r3 = 10;"
+		"call %[bpf_iter_num_new];"
+	"1:"
+		"r1 = r10;"
+		"r1 += -8;\n"
+		"call %[bpf_iter_num_next];"
+		"if r0 == 0 goto 2f;"
+		"if r6 != 42 goto 3f;"
+		"r7 = -32;"
+		"call %[bpf_get_prandom_u32];"
+		"r6 = r0;"
+		"goto 1b;\n"
+	"3:"
+		"r0 = r10;"
+		"r0 += r7;"
+		"r8 = *(u64 *)(r0 + 0);"
+		"call %[bpf_get_prandom_u32];"
+		"r6 = r0;"
+		"goto 1b;\n"
+	"2:"
+		"r1 = r10;"
+		"r1 += -8;"
+		"call %[bpf_iter_num_destroy];"
+		"r0 = r8;"
+		"exit;"
+		:
+		: __imm(bpf_get_prandom_u32),
+		  __imm(bpf_iter_num_new),
+		  __imm(bpf_iter_num_next),
+		  __imm(bpf_iter_num_destroy),
+		  __imm(bpf_probe_read_user)
+		: __clobber_all
+	);
+}
+
+SEC("?raw_tp")
+__failure
+__msg("math between fp pointer and register with unbounded")
+__flag(BPF_F_TEST_STATE_FREQ)
+__naked int loop_state_deps1(void)
+{
+	/* This is equivalent to C program below.
+	 *
+	 * The case turns out to be tricky in a sense that:
+	 * - states with c=-25 are explored only on a second iteration
+	 *   of the outer loop;
+	 * - states with read+precise mark on c are explored only on
+	 *   second iteration of the inner loop and in a state which
+	 *   is pushed to states stack first.
+	 *
+	 * Depending on the details of iterator convergence logic
+	 * verifier might stop states traversal too early and miss
+	 * unsafe c=-25 memory access.
+	 *
+	 *   j = iter_new();		 // fp[-16]
+	 *   a = 0;			 // r6
+	 *   b = 0;			 // r7
+	 *   c = -24;			 // r8
+	 *   while (iter_next(j)) {
+	 *     i = iter_new();		 // fp[-8]
+	 *     a = 0;			 // r6
+	 *     b = 0;			 // r7
+	 *     while (iter_next(i)) {
+	 *	 if (a == 1) {
+	 *	   a = 0;
+	 *	   b = 1;
+	 *	 } else if (a == 0) {
+	 *	   a = 1;
+	 *	   if (random() == 42)
+	 *	     continue;
+	 *	   if (b == 1) {
+	 *	     *(r10 + c) = 7;  // this is not safe
+	 *	     iter_destroy(i);
+	 *	     iter_destroy(j);
+	 *	     return;
+	 *	   }
+	 *	 }
+	 *     }
+	 *     iter_destroy(i);
+	 *     a = 0;
+	 *     b = 0;
+	 *     c = -25;
+	 *   }
+	 *   iter_destroy(j);
+	 *   return;
+	 */
+	asm volatile (
+		"r1 = r10;"
+		"r1 += -16;"
+		"r2 = 0;"
+		"r3 = 10;"
+		"call %[bpf_iter_num_new];"
+		"r6 = 0;"
+		"r7 = 0;"
+		"r8 = -24;"
+	"j_loop_%=:"
+		"r1 = r10;"
+		"r1 += -16;"
+		"call %[bpf_iter_num_next];"
+		"if r0 == 0 goto j_loop_end_%=;"
+		"r1 = r10;"
+		"r1 += -8;"
+		"r2 = 0;"
+		"r3 = 10;"
+		"call %[bpf_iter_num_new];"
+		"r6 = 0;"
+		"r7 = 0;"
+	"i_loop_%=:"
+		"r1 = r10;"
+		"r1 += -8;"
+		"call %[bpf_iter_num_next];"
+		"if r0 == 0 goto i_loop_end_%=;"
+	"check_one_r6_%=:"
+		"if r6 != 1 goto check_zero_r6_%=;"
+		"r6 = 0;"
+		"r7 = 1;"
+		"goto i_loop_%=;"
+	"check_zero_r6_%=:"
+		"if r6 != 0 goto i_loop_%=;"
+		"r6 = 1;"
+		"call %[bpf_get_prandom_u32];"
+		"if r0 != 42 goto check_one_r7_%=;"
+		"goto i_loop_%=;"
+	"check_one_r7_%=:"
+		"if r7 != 1 goto i_loop_%=;"
+		"r0 = r10;"
+		"r0 += r8;"
+		"r1 = 7;"
+		"*(u64 *)(r0 + 0) = r1;"
+		"r1 = r10;"
+		"r1 += -8;"
+		"call %[bpf_iter_num_destroy];"
+		"r1 = r10;"
+		"r1 += -16;"
+		"call %[bpf_iter_num_destroy];"
+		"r0 = 0;"
+		"exit;"
+	"i_loop_end_%=:"
+		"r1 = r10;"
+		"r1 += -8;"
+		"call %[bpf_iter_num_destroy];"
+		"r6 = 0;"
+		"r7 = 0;"
+		"r8 = -25;"
+		"goto j_loop_%=;"
+	"j_loop_end_%=:"
+		"r1 = r10;"
+		"r1 += -16;"
+		"call %[bpf_iter_num_destroy];"
+		"r0 = 0;"
+		"exit;"
+		:
+		: __imm(bpf_get_prandom_u32),
+		  __imm(bpf_iter_num_new),
+		  __imm(bpf_iter_num_next),
+		  __imm(bpf_iter_num_destroy)
+		: __clobber_all
+	);
+}
+
+SEC("?raw_tp")
+__failure
+__msg("math between fp pointer and register with unbounded")
+__flag(BPF_F_TEST_STATE_FREQ)
+__naked int loop_state_deps2(void)
+{
+	/* This is equivalent to C program below.
+	 *
+	 * The case turns out to be tricky in a sense that:
+	 * - states with read+precise mark on c are explored only on a second
+	 *   iteration of the first inner loop and in a state which is pushed to
+	 *   states stack first.
+	 * - states with c=-25 are explored only on a second iteration of the
+	 *   second inner loop and in a state which is pushed to states stack
+	 *   first.
+	 *
+	 * Depending on the details of iterator convergence logic
+	 * verifier might stop states traversal too early and miss
+	 * unsafe c=-25 memory access.
+	 *
+	 *   j = iter_new();             // fp[-16]
+	 *   a = 0;                      // r6
+	 *   b = 0;                      // r7
+	 *   c = -24;                    // r8
+	 *   while (iter_next(j)) {
+	 *     i = iter_new();           // fp[-8]
+	 *     a = 0;                    // r6
+	 *     b = 0;                    // r7
+	 *     while (iter_next(i)) {
+	 *       if (a == 1) {
+	 *         a = 0;
+	 *         b = 1;
+	 *       } else if (a == 0) {
+	 *         a = 1;
+	 *         if (random() == 42)
+	 *           continue;
+	 *         if (b == 1) {
+	 *           *(r10 + c) = 7;     // this is not safe
+	 *           iter_destroy(i);
+	 *           iter_destroy(j);
+	 *           return;
+	 *         }
+	 *       }
+	 *     }
+	 *     iter_destroy(i);
+	 *     i = iter_new();           // fp[-8]
+	 *     a = 0;                    // r6
+	 *     b = 0;                    // r7
+	 *     while (iter_next(i)) {
+	 *       if (a == 1) {
+	 *         a = 0;
+	 *         b = 1;
+	 *       } else if (a == 0) {
+	 *         a = 1;
+	 *         if (random() == 42)
+	 *           continue;
+	 *         if (b == 1) {
+	 *           a = 0;
+	 *           c = -25;
+	 *         }
+	 *       }
+	 *     }
+	 *     iter_destroy(i);
+	 *   }
+	 *   iter_destroy(j);
+	 *   return;
+	 */
+	asm volatile (
+		"r1 = r10;"
+		"r1 += -16;"
+		"r2 = 0;"
+		"r3 = 10;"
+		"call %[bpf_iter_num_new];"
+		"r6 = 0;"
+		"r7 = 0;"
+		"r8 = -24;"
+	"j_loop_%=:"
+		"r1 = r10;"
+		"r1 += -16;"
+		"call %[bpf_iter_num_next];"
+		"if r0 == 0 goto j_loop_end_%=;"
+
+		/* first inner loop */
+		"r1 = r10;"
+		"r1 += -8;"
+		"r2 = 0;"
+		"r3 = 10;"
+		"call %[bpf_iter_num_new];"
+		"r6 = 0;"
+		"r7 = 0;"
+	"i_loop_%=:"
+		"r1 = r10;"
+		"r1 += -8;"
+		"call %[bpf_iter_num_next];"
+		"if r0 == 0 goto i_loop_end_%=;"
+	"check_one_r6_%=:"
+		"if r6 != 1 goto check_zero_r6_%=;"
+		"r6 = 0;"
+		"r7 = 1;"
+		"goto i_loop_%=;"
+	"check_zero_r6_%=:"
+		"if r6 != 0 goto i_loop_%=;"
+		"r6 = 1;"
+		"call %[bpf_get_prandom_u32];"
+		"if r0 != 42 goto check_one_r7_%=;"
+		"goto i_loop_%=;"
+	"check_one_r7_%=:"
+		"if r7 != 1 goto i_loop_%=;"
+		"r0 = r10;"
+		"r0 += r8;"
+		"r1 = 7;"
+		"*(u64 *)(r0 + 0) = r1;"
+		"r1 = r10;"
+		"r1 += -8;"
+		"call %[bpf_iter_num_destroy];"
+		"r1 = r10;"
+		"r1 += -16;"
+		"call %[bpf_iter_num_destroy];"
+		"r0 = 0;"
+		"exit;"
+	"i_loop_end_%=:"
+		"r1 = r10;"
+		"r1 += -8;"
+		"call %[bpf_iter_num_destroy];"
+
+		/* second inner loop */
+		"r1 = r10;"
+		"r1 += -8;"
+		"r2 = 0;"
+		"r3 = 10;"
+		"call %[bpf_iter_num_new];"
+		"r6 = 0;"
+		"r7 = 0;"
+	"i2_loop_%=:"
+		"r1 = r10;"
+		"r1 += -8;"
+		"call %[bpf_iter_num_next];"
+		"if r0 == 0 goto i2_loop_end_%=;"
+	"check2_one_r6_%=:"
+		"if r6 != 1 goto check2_zero_r6_%=;"
+		"r6 = 0;"
+		"r7 = 1;"
+		"goto i2_loop_%=;"
+	"check2_zero_r6_%=:"
+		"if r6 != 0 goto i2_loop_%=;"
+		"r6 = 1;"
+		"call %[bpf_get_prandom_u32];"
+		"if r0 != 42 goto check2_one_r7_%=;"
+		"goto i2_loop_%=;"
+	"check2_one_r7_%=:"
+		"if r7 != 1 goto i2_loop_%=;"
+		"r6 = 0;"
+		"r8 = -25;"
+		"goto i2_loop_%=;"
+	"i2_loop_end_%=:"
+		"r1 = r10;"
+		"r1 += -8;"
+		"call %[bpf_iter_num_destroy];"
+
+		"r6 = 0;"
+		"r7 = 0;"
+		"goto j_loop_%=;"
+	"j_loop_end_%=:"
+		"r1 = r10;"
+		"r1 += -16;"
+		"call %[bpf_iter_num_destroy];"
+		"r0 = 0;"
+		"exit;"
+		:
+		: __imm(bpf_get_prandom_u32),
+		  __imm(bpf_iter_num_new),
+		  __imm(bpf_iter_num_next),
+		  __imm(bpf_iter_num_destroy)
+		: __clobber_all
+	);
+}
+
+SEC("?raw_tp")
+__success
+__naked int triple_continue(void)
+{
+	/* This is equivalent to C program below.
+	 * High branching factor of the loop body turned out to be
+	 * problematic for one of the iterator convergence tracking
+	 * algorithms explored.
+	 *
+	 * r6 = bpf_get_prandom_u32()
+	 * bpf_iter_num_new(&fp[-8], 0, 10)
+	 * while (bpf_iter_num_next(&fp[-8])) {
+	 *   if (bpf_get_prandom_u32() != 42)
+	 *     continue;
+	 *   if (bpf_get_prandom_u32() != 42)
+	 *     continue;
+	 *   if (bpf_get_prandom_u32() != 42)
+	 *     continue;
+	 *   r0 += 0;
+	 * }
+	 * bpf_iter_num_destroy(&fp[-8])
+	 * return 0
+	 */
+	asm volatile (
+		"r1 = r10;"
+		"r1 += -8;"
+		"r2 = 0;"
+		"r3 = 10;"
+		"call %[bpf_iter_num_new];"
+	"loop_%=:"
+		"r1 = r10;"
+		"r1 += -8;"
+		"call %[bpf_iter_num_next];"
+		"if r0 == 0 goto loop_end_%=;"
+		"call %[bpf_get_prandom_u32];"
+		"if r0 != 42 goto loop_%=;"
+		"call %[bpf_get_prandom_u32];"
+		"if r0 != 42 goto loop_%=;"
+		"call %[bpf_get_prandom_u32];"
+		"if r0 != 42 goto loop_%=;"
+		"r0 += 0;"
+		"goto loop_%=;"
+	"loop_end_%=:"
+		"r1 = r10;"
+		"r1 += -8;"
+		"call %[bpf_iter_num_destroy];"
+		"r0 = 0;"
+		"exit;"
+		:
+		: __imm(bpf_get_prandom_u32),
+		  __imm(bpf_iter_num_new),
+		  __imm(bpf_iter_num_next),
+		  __imm(bpf_iter_num_destroy)
+		: __clobber_all
+	);
+}
+
+SEC("?raw_tp")
+__success
+__naked int widen_spill(void)
+{
+	/* This is equivalent to C program below.
+	 * The counter is stored in fp[-16], if this counter is not widened
+	 * verifier states representing loop iterations would never converge.
+	 *
+	 * fp[-16] = 0
+	 * bpf_iter_num_new(&fp[-8], 0, 10)
+	 * while (bpf_iter_num_next(&fp[-8])) {
+	 *   r0 = fp[-16];
+	 *   r0 += 1;
+	 *   fp[-16] = r0;
+	 * }
+	 * bpf_iter_num_destroy(&fp[-8])
+	 * return 0
+	 */
+	asm volatile (
+		"r0 = 0;"
+		"*(u64 *)(r10 - 16) = r0;"
+		"r1 = r10;"
+		"r1 += -8;"
+		"r2 = 0;"
+		"r3 = 10;"
+		"call %[bpf_iter_num_new];"
+	"loop_%=:"
+		"r1 = r10;"
+		"r1 += -8;"
+		"call %[bpf_iter_num_next];"
+		"if r0 == 0 goto loop_end_%=;"
+		"r0 = *(u64 *)(r10 - 16);"
+		"r0 += 1;"
+		"*(u64 *)(r10 - 16) = r0;"
+		"goto loop_%=;"
+	"loop_end_%=:"
+		"r1 = r10;"
+		"r1 += -8;"
+		"call %[bpf_iter_num_destroy];"
+		"r0 = 0;"
+		"exit;"
+		:
+		: __imm(bpf_iter_num_new),
+		  __imm(bpf_iter_num_next),
+		  __imm(bpf_iter_num_destroy)
+		: __clobber_all
+	);
+}
+
+SEC("raw_tp")
+__success
+__naked int checkpoint_states_deletion(void)
+{
+	/* This is equivalent to C program below.
+	 *
+	 *   int *a, *b, *c, *d, *e, *f;
+	 *   int i, sum = 0;
+	 *   bpf_for(i, 0, 10) {
+	 *     a = bpf_map_lookup_elem(&amap, &i);
+	 *     b = bpf_map_lookup_elem(&amap, &i);
+	 *     c = bpf_map_lookup_elem(&amap, &i);
+	 *     d = bpf_map_lookup_elem(&amap, &i);
+	 *     e = bpf_map_lookup_elem(&amap, &i);
+	 *     f = bpf_map_lookup_elem(&amap, &i);
+	 *     if (a) sum += 1;
+	 *     if (b) sum += 1;
+	 *     if (c) sum += 1;
+	 *     if (d) sum += 1;
+	 *     if (e) sum += 1;
+	 *     if (f) sum += 1;
+	 *   }
+	 *   return 0;
+	 *
+	 * The body of the loop spawns multiple simulation paths
+	 * with different combination of NULL/non-NULL information for a/b/c/d/e/f.
+	 * Each combination is unique from states_equal() point of view.
+	 * Explored states checkpoint is created after each iterator next call.
+	 * Iterator convergence logic expects that eventually current state
+	 * would get equal to one of the explored states and thus loop
+	 * exploration would be finished (at-least for a specific path).
+	 * Verifier evicts explored states with high miss to hit ratio
+	 * to to avoid comparing current state with too many explored
+	 * states per instruction.
+	 * This test is designed to "stress test" eviction policy defined using formula:
+	 *
+	 *    sl->miss_cnt > sl->hit_cnt * N + N // if true sl->state is evicted
+	 *
+	 * Currently N is set to 64, which allows for 6 variables in this test.
+	 */
+	asm volatile (
+		"r6 = 0;"                  /* a */
+		"r7 = 0;"                  /* b */
+		"r8 = 0;"                  /* c */
+		"*(u64 *)(r10 - 24) = r6;" /* d */
+		"*(u64 *)(r10 - 32) = r6;" /* e */
+		"*(u64 *)(r10 - 40) = r6;" /* f */
+		"r9 = 0;"                  /* sum */
+		"r1 = r10;"
+		"r1 += -8;"
+		"r2 = 0;"
+		"r3 = 10;"
+		"call %[bpf_iter_num_new];"
+	"loop_%=:"
+		"r1 = r10;"
+		"r1 += -8;"
+		"call %[bpf_iter_num_next];"
+		"if r0 == 0 goto loop_end_%=;"
+
+		"*(u64 *)(r10 - 16) = r0;"
+
+		"r1 = %[amap] ll;"
+		"r2 = r10;"
+		"r2 += -16;"
+		"call %[bpf_map_lookup_elem];"
+		"r6 = r0;"
+
+		"r1 = %[amap] ll;"
+		"r2 = r10;"
+		"r2 += -16;"
+		"call %[bpf_map_lookup_elem];"
+		"r7 = r0;"
+
+		"r1 = %[amap] ll;"
+		"r2 = r10;"
+		"r2 += -16;"
+		"call %[bpf_map_lookup_elem];"
+		"r8 = r0;"
+
+		"r1 = %[amap] ll;"
+		"r2 = r10;"
+		"r2 += -16;"
+		"call %[bpf_map_lookup_elem];"
+		"*(u64 *)(r10 - 24) = r0;"
+
+		"r1 = %[amap] ll;"
+		"r2 = r10;"
+		"r2 += -16;"
+		"call %[bpf_map_lookup_elem];"
+		"*(u64 *)(r10 - 32) = r0;"
+
+		"r1 = %[amap] ll;"
+		"r2 = r10;"
+		"r2 += -16;"
+		"call %[bpf_map_lookup_elem];"
+		"*(u64 *)(r10 - 40) = r0;"
+
+		"if r6 == 0 goto +1;"
+		"r9 += 1;"
+		"if r7 == 0 goto +1;"
+		"r9 += 1;"
+		"if r8 == 0 goto +1;"
+		"r9 += 1;"
+		"r0 = *(u64 *)(r10 - 24);"
+		"if r0 == 0 goto +1;"
+		"r9 += 1;"
+		"r0 = *(u64 *)(r10 - 32);"
+		"if r0 == 0 goto +1;"
+		"r9 += 1;"
+		"r0 = *(u64 *)(r10 - 40);"
+		"if r0 == 0 goto +1;"
+		"r9 += 1;"
+
+		"goto loop_%=;"
+	"loop_end_%=:"
+		"r1 = r10;"
+		"r1 += -8;"
+		"call %[bpf_iter_num_destroy];"
+		"r0 = 0;"
+		"exit;"
+		:
+		: __imm(bpf_map_lookup_elem),
+		  __imm(bpf_iter_num_new),
+		  __imm(bpf_iter_num_next),
+		  __imm(bpf_iter_num_destroy),
+		  __imm_addr(amap)
+		: __clobber_all
+	);
+}
+
+struct {
+	int data[32];
+	int n;
+} loop_data;
+
+SEC("raw_tp")
+__success
+int iter_arr_with_actual_elem_count(const void *ctx)
+{
+	int i, n = loop_data.n, sum = 0;
+
+	if (n > ARRAY_SIZE(loop_data.data))
+		return 0;
+
+	bpf_for(i, 0, n) {
+		/* no rechecking of i against ARRAY_SIZE(loop_data.n) */
+		sum += loop_data.data[i];
+	}
+
+	return sum;
+}
+
 char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/iters_css.c b/tools/testing/selftests/bpf/progs/iters_css.c
new file mode 100644
index 000000000000..ec1f6c2f590b
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/iters_css.c
@@ -0,0 +1,72 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2023 Chuyi Zhou <zhouchuyi@bytedance.com> */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+#include "bpf_experimental.h"
+
+char _license[] SEC("license") = "GPL";
+
+pid_t target_pid;
+u64 root_cg_id, leaf_cg_id;
+u64 first_cg_id, last_cg_id;
+
+int pre_order_cnt, post_order_cnt, tree_high;
+
+struct cgroup *bpf_cgroup_from_id(u64 cgid) __ksym;
+void bpf_cgroup_release(struct cgroup *p) __ksym;
+void bpf_rcu_read_lock(void) __ksym;
+void bpf_rcu_read_unlock(void) __ksym;
+
+SEC("fentry.s/" SYS_PREFIX "sys_getpgid")
+int iter_css_for_each(const void *ctx)
+{
+	struct task_struct *cur_task = bpf_get_current_task_btf();
+	struct cgroup_subsys_state *root_css, *leaf_css, *pos;
+	struct cgroup *root_cgrp, *leaf_cgrp, *cur_cgrp;
+
+	if (cur_task->pid != target_pid)
+		return 0;
+
+	root_cgrp = bpf_cgroup_from_id(root_cg_id);
+
+	if (!root_cgrp)
+		return 0;
+
+	leaf_cgrp = bpf_cgroup_from_id(leaf_cg_id);
+
+	if (!leaf_cgrp) {
+		bpf_cgroup_release(root_cgrp);
+		return 0;
+	}
+	root_css = &root_cgrp->self;
+	leaf_css = &leaf_cgrp->self;
+	pre_order_cnt = post_order_cnt = tree_high = 0;
+	first_cg_id = last_cg_id = 0;
+
+	bpf_rcu_read_lock();
+	bpf_for_each(css, pos, root_css, BPF_CGROUP_ITER_DESCENDANTS_POST) {
+		cur_cgrp = pos->cgroup;
+		post_order_cnt++;
+		last_cg_id = cur_cgrp->kn->id;
+	}
+
+	bpf_for_each(css, pos, root_css, BPF_CGROUP_ITER_DESCENDANTS_PRE) {
+		cur_cgrp = pos->cgroup;
+		pre_order_cnt++;
+		if (!first_cg_id)
+			first_cg_id = cur_cgrp->kn->id;
+	}
+
+	bpf_for_each(css, pos, leaf_css, BPF_CGROUP_ITER_ANCESTORS_UP)
+		tree_high++;
+
+	bpf_for_each(css, pos, root_css, BPF_CGROUP_ITER_ANCESTORS_UP)
+		tree_high--;
+	bpf_rcu_read_unlock();
+	bpf_cgroup_release(root_cgrp);
+	bpf_cgroup_release(leaf_cgrp);
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/iters_css_task.c b/tools/testing/selftests/bpf/progs/iters_css_task.c
new file mode 100644
index 000000000000..9ac758649cb8
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/iters_css_task.c
@@ -0,0 +1,102 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2023 Chuyi Zhou <zhouchuyi@bytedance.com> */
+
+#include "vmlinux.h"
+#include <errno.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+#include "bpf_experimental.h"
+
+char _license[] SEC("license") = "GPL";
+
+struct cgroup *bpf_cgroup_acquire(struct cgroup *p) __ksym;
+struct cgroup *bpf_cgroup_from_id(u64 cgid) __ksym;
+void bpf_cgroup_release(struct cgroup *p) __ksym;
+
+pid_t target_pid;
+int css_task_cnt;
+u64 cg_id;
+
+SEC("lsm/file_mprotect")
+int BPF_PROG(iter_css_task_for_each, struct vm_area_struct *vma,
+	    unsigned long reqprot, unsigned long prot, int ret)
+{
+	struct task_struct *cur_task = bpf_get_current_task_btf();
+	struct cgroup_subsys_state *css;
+	struct task_struct *task;
+	struct cgroup *cgrp;
+
+	if (cur_task->pid != target_pid)
+		return ret;
+
+	cgrp = bpf_cgroup_from_id(cg_id);
+
+	if (!cgrp)
+		return -EPERM;
+
+	css = &cgrp->self;
+	css_task_cnt = 0;
+
+	bpf_for_each(css_task, task, css, CSS_TASK_ITER_PROCS)
+		if (task->pid == target_pid)
+			css_task_cnt++;
+
+	bpf_cgroup_release(cgrp);
+
+	return -EPERM;
+}
+
+static inline u64 cgroup_id(struct cgroup *cgrp)
+{
+	return cgrp->kn->id;
+}
+
+SEC("?iter/cgroup")
+int cgroup_id_printer(struct bpf_iter__cgroup *ctx)
+{
+	struct seq_file *seq = ctx->meta->seq;
+	struct cgroup *cgrp = ctx->cgroup;
+	struct cgroup_subsys_state *css;
+	struct task_struct *task;
+
+	/* epilogue */
+	if (cgrp == NULL) {
+		BPF_SEQ_PRINTF(seq, "epilogue\n");
+		return 0;
+	}
+
+	/* prologue */
+	if (ctx->meta->seq_num == 0)
+		BPF_SEQ_PRINTF(seq, "prologue\n");
+
+	BPF_SEQ_PRINTF(seq, "%8llu\n", cgroup_id(cgrp));
+
+	css = &cgrp->self;
+	css_task_cnt = 0;
+	bpf_for_each(css_task, task, css, CSS_TASK_ITER_PROCS) {
+		if (task->pid == target_pid)
+			css_task_cnt++;
+	}
+
+	return 0;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+int BPF_PROG(iter_css_task_for_each_sleep)
+{
+	u64 cgrp_id = bpf_get_current_cgroup_id();
+	struct cgroup *cgrp = bpf_cgroup_from_id(cgrp_id);
+	struct cgroup_subsys_state *css;
+	struct task_struct *task;
+
+	if (cgrp == NULL)
+		return 0;
+	css = &cgrp->self;
+
+	bpf_for_each(css_task, task, css, CSS_TASK_ITER_PROCS) {
+
+	}
+	bpf_cgroup_release(cgrp);
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/iters_task.c b/tools/testing/selftests/bpf/progs/iters_task.c
new file mode 100644
index 000000000000..c9b4055cd410
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/iters_task.c
@@ -0,0 +1,41 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2023 Chuyi Zhou <zhouchuyi@bytedance.com> */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+#include "bpf_experimental.h"
+
+char _license[] SEC("license") = "GPL";
+
+pid_t target_pid;
+int procs_cnt, threads_cnt, proc_threads_cnt;
+
+void bpf_rcu_read_lock(void) __ksym;
+void bpf_rcu_read_unlock(void) __ksym;
+
+SEC("fentry.s/" SYS_PREFIX "sys_getpgid")
+int iter_task_for_each_sleep(void *ctx)
+{
+	struct task_struct *cur_task = bpf_get_current_task_btf();
+	struct task_struct *pos;
+
+	if (cur_task->pid != target_pid)
+		return 0;
+	procs_cnt = threads_cnt = proc_threads_cnt = 0;
+
+	bpf_rcu_read_lock();
+	bpf_for_each(task, pos, NULL, BPF_TASK_ITER_ALL_PROCS)
+		if (pos->pid == target_pid)
+			procs_cnt++;
+
+	bpf_for_each(task, pos, cur_task, BPF_TASK_ITER_PROC_THREADS)
+		proc_threads_cnt++;
+
+	bpf_for_each(task, pos, NULL, BPF_TASK_ITER_ALL_THREADS)
+		if (pos->tgid == target_pid)
+			threads_cnt++;
+	bpf_rcu_read_unlock();
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/iters_task_failure.c b/tools/testing/selftests/bpf/progs/iters_task_failure.c
new file mode 100644
index 000000000000..6b1588d70652
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/iters_task_failure.c
@@ -0,0 +1,105 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2023 Chuyi Zhou <zhouchuyi@bytedance.com> */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+#include "bpf_experimental.h"
+
+char _license[] SEC("license") = "GPL";
+
+struct cgroup *bpf_cgroup_from_id(u64 cgid) __ksym;
+void bpf_cgroup_release(struct cgroup *p) __ksym;
+void bpf_rcu_read_lock(void) __ksym;
+void bpf_rcu_read_unlock(void) __ksym;
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+__failure __msg("expected an RCU CS when using bpf_iter_task_next")
+int BPF_PROG(iter_tasks_without_lock)
+{
+	struct task_struct *pos;
+
+	bpf_for_each(task, pos, NULL, BPF_TASK_ITER_ALL_PROCS) {
+
+	}
+	return 0;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+__failure __msg("expected an RCU CS when using bpf_iter_css_next")
+int BPF_PROG(iter_css_without_lock)
+{
+	u64 cg_id = bpf_get_current_cgroup_id();
+	struct cgroup *cgrp = bpf_cgroup_from_id(cg_id);
+	struct cgroup_subsys_state *root_css, *pos;
+
+	if (!cgrp)
+		return 0;
+	root_css = &cgrp->self;
+
+	bpf_for_each(css, pos, root_css, BPF_CGROUP_ITER_DESCENDANTS_POST) {
+
+	}
+	bpf_cgroup_release(cgrp);
+	return 0;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+__failure __msg("expected an RCU CS when using bpf_iter_task_next")
+int BPF_PROG(iter_tasks_lock_and_unlock)
+{
+	struct task_struct *pos;
+
+	bpf_rcu_read_lock();
+	bpf_for_each(task, pos, NULL, BPF_TASK_ITER_ALL_PROCS) {
+		bpf_rcu_read_unlock();
+
+		bpf_rcu_read_lock();
+	}
+	bpf_rcu_read_unlock();
+	return 0;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+__failure __msg("expected an RCU CS when using bpf_iter_css_next")
+int BPF_PROG(iter_css_lock_and_unlock)
+{
+	u64 cg_id = bpf_get_current_cgroup_id();
+	struct cgroup *cgrp = bpf_cgroup_from_id(cg_id);
+	struct cgroup_subsys_state *root_css, *pos;
+
+	if (!cgrp)
+		return 0;
+	root_css = &cgrp->self;
+
+	bpf_rcu_read_lock();
+	bpf_for_each(css, pos, root_css, BPF_CGROUP_ITER_DESCENDANTS_POST) {
+		bpf_rcu_read_unlock();
+
+		bpf_rcu_read_lock();
+	}
+	bpf_rcu_read_unlock();
+	bpf_cgroup_release(cgrp);
+	return 0;
+}
+
+SEC("?fentry/" SYS_PREFIX "sys_getpgid")
+__failure __msg("css_task_iter is only allowed in bpf_lsm, bpf_iter and sleepable progs")
+int BPF_PROG(iter_css_task_for_each)
+{
+	u64 cg_id = bpf_get_current_cgroup_id();
+	struct cgroup *cgrp = bpf_cgroup_from_id(cg_id);
+	struct cgroup_subsys_state *css;
+	struct task_struct *task;
+
+	if (cgrp == NULL)
+		return 0;
+	css = &cgrp->self;
+
+	bpf_for_each(css_task, task, css, CSS_TASK_ITER_PROCS) {
+
+	}
+	bpf_cgroup_release(cgrp);
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/iters_task_vma.c b/tools/testing/selftests/bpf/progs/iters_task_vma.c
index 44edecfdfaee..e085a51d153e 100644
--- a/tools/testing/selftests/bpf/progs/iters_task_vma.c
+++ b/tools/testing/selftests/bpf/progs/iters_task_vma.c
@@ -30,6 +30,7 @@ int iter_task_vma_for_each(const void *ctx)
 	bpf_for_each(task_vma, vma, task, 0) {
 		if (seen >= 1000)
 			break;
+		barrier_var(seen);
 
 		vm_ranges[seen].vm_start = vma->vm_start;
 		vm_ranges[seen].vm_end = vma->vm_end;
diff --git a/tools/testing/selftests/bpf/progs/linked_list_fail.c b/tools/testing/selftests/bpf/progs/linked_list_fail.c
index f4c63daba229..6438982b928b 100644
--- a/tools/testing/selftests/bpf/progs/linked_list_fail.c
+++ b/tools/testing/selftests/bpf/progs/linked_list_fail.c
@@ -591,7 +591,9 @@ int pop_ptr_off(void *(*op)(void *head))
 	n = op(&p->head);
 	bpf_spin_unlock(&p->lock);
 
-	bpf_this_cpu_ptr(n);
+	if (!n)
+		return 0;
+	bpf_spin_lock((void *)n);
 	return 0;
 }
 
diff --git a/tools/testing/selftests/bpf/progs/local_kptr_stash.c b/tools/testing/selftests/bpf/progs/local_kptr_stash.c
index b567a666d2b8..1769fdff6aea 100644
--- a/tools/testing/selftests/bpf/progs/local_kptr_stash.c
+++ b/tools/testing/selftests/bpf/progs/local_kptr_stash.c
@@ -14,6 +14,24 @@ struct node_data {
 	struct bpf_rb_node node;
 };
 
+struct refcounted_node {
+	long data;
+	struct bpf_rb_node rb_node;
+	struct bpf_refcount refcount;
+};
+
+struct stash {
+	struct bpf_spin_lock l;
+	struct refcounted_node __kptr *stashed;
+};
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__type(key, int);
+	__type(value, struct stash);
+	__uint(max_entries, 10);
+} refcounted_node_stash SEC(".maps");
+
 struct plain_local {
 	long key;
 	long data;
@@ -38,6 +56,7 @@ struct map_value {
  * Had to do the same w/ bpf_kfunc_call_test_release below
  */
 struct node_data *just_here_because_btf_bug;
+struct refcounted_node *just_here_because_btf_bug2;
 
 struct {
 	__uint(type, BPF_MAP_TYPE_ARRAY);
@@ -132,4 +151,56 @@ long stash_test_ref_kfunc(void *ctx)
 	return 0;
 }
 
+SEC("tc")
+long refcount_acquire_without_unstash(void *ctx)
+{
+	struct refcounted_node *p;
+	struct stash *s;
+	int ret = 0;
+
+	s = bpf_map_lookup_elem(&refcounted_node_stash, &ret);
+	if (!s)
+		return 1;
+
+	if (!s->stashed)
+		/* refcount_acquire failure is expected when no refcounted_node
+		 * has been stashed before this program executes
+		 */
+		return 2;
+
+	p = bpf_refcount_acquire(s->stashed);
+	if (!p)
+		return 3;
+
+	ret = s->stashed ? s->stashed->data : -1;
+	bpf_obj_drop(p);
+	return ret;
+}
+
+/* Helper for refcount_acquire_without_unstash test */
+SEC("tc")
+long stash_refcounted_node(void *ctx)
+{
+	struct refcounted_node *p;
+	struct stash *s;
+	int key = 0;
+
+	s = bpf_map_lookup_elem(&refcounted_node_stash, &key);
+	if (!s)
+		return 1;
+
+	p = bpf_obj_new(typeof(*p));
+	if (!p)
+		return 2;
+	p->data = 42;
+
+	p = bpf_kptr_xchg(&s->stashed, p);
+	if (p) {
+		bpf_obj_drop(p);
+		return 3;
+	}
+
+	return 0;
+}
+
 char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/pyperf180.c b/tools/testing/selftests/bpf/progs/pyperf180.c
index c39f559d3100..42c4a8b62e36 100644
--- a/tools/testing/selftests/bpf/progs/pyperf180.c
+++ b/tools/testing/selftests/bpf/progs/pyperf180.c
@@ -1,4 +1,26 @@
 // SPDX-License-Identifier: GPL-2.0
 // Copyright (c) 2019 Facebook
 #define STACK_MAX_LEN 180
+
+/* llvm upstream commit at clang18
+ *   https://github.com/llvm/llvm-project/commit/1a2e77cf9e11dbf56b5720c607313a566eebb16e
+ * changed inlining behavior and caused compilation failure as some branch
+ * target distance exceeded 16bit representation which is the maximum for
+ * cpu v1/v2/v3. Macro __BPF_CPU_VERSION__ is later implemented in clang18
+ * to specify which cpu version is used for compilation. So a smaller
+ * unroll_count can be set if __BPF_CPU_VERSION__ is less than 4, which
+ * reduced some branch target distances and resolved the compilation failure.
+ *
+ * To capture the case where a developer/ci uses clang18 but the corresponding
+ * repo checkpoint does not have __BPF_CPU_VERSION__, a smaller unroll_count
+ * will be set as well to prevent potential compilation failures.
+ */
+#ifdef __BPF_CPU_VERSION__
+#if __BPF_CPU_VERSION__ < 4
+#define UNROLL_COUNT 90
+#endif
+#elif __clang_major__ == 18
+#define UNROLL_COUNT 90
+#endif
+
 #include "pyperf.h"
diff --git a/tools/testing/selftests/bpf/progs/refcounted_kptr_fail.c b/tools/testing/selftests/bpf/progs/refcounted_kptr_fail.c
index 1ef07f6ee580..1553b9c16aa7 100644
--- a/tools/testing/selftests/bpf/progs/refcounted_kptr_fail.c
+++ b/tools/testing/selftests/bpf/progs/refcounted_kptr_fail.c
@@ -54,6 +54,25 @@ long rbtree_refcounted_node_ref_escapes(void *ctx)
 }
 
 SEC("?tc")
+__failure __msg("Possibly NULL pointer passed to trusted arg0")
+long refcount_acquire_maybe_null(void *ctx)
+{
+	struct node_acquire *n, *m;
+
+	n = bpf_obj_new(typeof(*n));
+	/* Intentionally not testing !n
+	 * it's MAYBE_NULL for refcount_acquire
+	 */
+	m = bpf_refcount_acquire(n);
+	if (m)
+		bpf_obj_drop(m);
+	if (n)
+		bpf_obj_drop(n);
+
+	return 0;
+}
+
+SEC("?tc")
 __failure __msg("Unreleased reference id=3 alloc_insn=9")
 long rbtree_refcounted_node_ref_escapes_owning_input(void *ctx)
 {
diff --git a/tools/testing/selftests/bpf/progs/strobemeta.h b/tools/testing/selftests/bpf/progs/strobemeta.h
index e02cfd380746..40df2cc26eaf 100644
--- a/tools/testing/selftests/bpf/progs/strobemeta.h
+++ b/tools/testing/selftests/bpf/progs/strobemeta.h
@@ -24,9 +24,11 @@ struct task_struct {};
 #define STACK_TABLE_EPOCH_SHIFT 20
 #define STROBE_MAX_STR_LEN 1
 #define STROBE_MAX_CFGS 32
+#define READ_MAP_VAR_PAYLOAD_CAP					\
+	((1 + STROBE_MAX_MAP_ENTRIES * 2) * STROBE_MAX_STR_LEN)
 #define STROBE_MAX_PAYLOAD						\
 	(STROBE_MAX_STRS * STROBE_MAX_STR_LEN +				\
-	STROBE_MAX_MAPS * (1 + STROBE_MAX_MAP_ENTRIES * 2) * STROBE_MAX_STR_LEN)
+	 STROBE_MAX_MAPS * READ_MAP_VAR_PAYLOAD_CAP)
 
 struct strobe_value_header {
 	/*
@@ -355,7 +357,7 @@ static __always_inline uint64_t read_str_var(struct strobemeta_cfg *cfg,
 					     size_t idx, void *tls_base,
 					     struct strobe_value_generic *value,
 					     struct strobemeta_payload *data,
-					     void *payload)
+					     size_t off)
 {
 	void *location;
 	uint64_t len;
@@ -366,7 +368,7 @@ static __always_inline uint64_t read_str_var(struct strobemeta_cfg *cfg,
 		return 0;
 
 	bpf_probe_read_user(value, sizeof(struct strobe_value_generic), location);
-	len = bpf_probe_read_user_str(payload, STROBE_MAX_STR_LEN, value->ptr);
+	len = bpf_probe_read_user_str(&data->payload[off], STROBE_MAX_STR_LEN, value->ptr);
 	/*
 	 * if bpf_probe_read_user_str returns error (<0), due to casting to
 	 * unsinged int, it will become big number, so next check is
@@ -378,14 +380,14 @@ static __always_inline uint64_t read_str_var(struct strobemeta_cfg *cfg,
 		return 0;
 
 	data->str_lens[idx] = len;
-	return len;
+	return off + len;
 }
 
-static __always_inline void *read_map_var(struct strobemeta_cfg *cfg,
-					  size_t idx, void *tls_base,
-					  struct strobe_value_generic *value,
-					  struct strobemeta_payload *data,
-					  void *payload)
+static __always_inline uint64_t read_map_var(struct strobemeta_cfg *cfg,
+					     size_t idx, void *tls_base,
+					     struct strobe_value_generic *value,
+					     struct strobemeta_payload *data,
+					     size_t off)
 {
 	struct strobe_map_descr* descr = &data->map_descrs[idx];
 	struct strobe_map_raw map;
@@ -397,11 +399,11 @@ static __always_inline void *read_map_var(struct strobemeta_cfg *cfg,
 
 	location = calc_location(&cfg->map_locs[idx], tls_base);
 	if (!location)
-		return payload;
+		return off;
 
 	bpf_probe_read_user(value, sizeof(struct strobe_value_generic), location);
 	if (bpf_probe_read_user(&map, sizeof(struct strobe_map_raw), value->ptr))
-		return payload;
+		return off;
 
 	descr->id = map.id;
 	descr->cnt = map.cnt;
@@ -410,10 +412,10 @@ static __always_inline void *read_map_var(struct strobemeta_cfg *cfg,
 		data->req_meta_valid = 1;
 	}
 
-	len = bpf_probe_read_user_str(payload, STROBE_MAX_STR_LEN, map.tag);
+	len = bpf_probe_read_user_str(&data->payload[off], STROBE_MAX_STR_LEN, map.tag);
 	if (len <= STROBE_MAX_STR_LEN) {
 		descr->tag_len = len;
-		payload += len;
+		off += len;
 	}
 
 #ifdef NO_UNROLL
@@ -426,22 +428,22 @@ static __always_inline void *read_map_var(struct strobemeta_cfg *cfg,
 			break;
 
 		descr->key_lens[i] = 0;
-		len = bpf_probe_read_user_str(payload, STROBE_MAX_STR_LEN,
+		len = bpf_probe_read_user_str(&data->payload[off], STROBE_MAX_STR_LEN,
 					      map.entries[i].key);
 		if (len <= STROBE_MAX_STR_LEN) {
 			descr->key_lens[i] = len;
-			payload += len;
+			off += len;
 		}
 		descr->val_lens[i] = 0;
-		len = bpf_probe_read_user_str(payload, STROBE_MAX_STR_LEN,
+		len = bpf_probe_read_user_str(&data->payload[off], STROBE_MAX_STR_LEN,
 					      map.entries[i].val);
 		if (len <= STROBE_MAX_STR_LEN) {
 			descr->val_lens[i] = len;
-			payload += len;
+			off += len;
 		}
 	}
 
-	return payload;
+	return off;
 }
 
 #ifdef USE_BPF_LOOP
@@ -455,14 +457,20 @@ struct read_var_ctx {
 	struct strobemeta_payload *data;
 	void *tls_base;
 	struct strobemeta_cfg *cfg;
-	void *payload;
+	size_t payload_off;
 	/* value gets mutated */
 	struct strobe_value_generic *value;
 	enum read_type type;
 };
 
-static int read_var_callback(__u32 index, struct read_var_ctx *ctx)
+static int read_var_callback(__u64 index, struct read_var_ctx *ctx)
 {
+	/* lose precision info for ctx->payload_off, verifier won't track
+	 * double xor, barrier_var() is needed to force clang keep both xors.
+	 */
+	ctx->payload_off ^= index;
+	barrier_var(ctx->payload_off);
+	ctx->payload_off ^= index;
 	switch (ctx->type) {
 	case READ_INT_VAR:
 		if (index >= STROBE_MAX_INTS)
@@ -472,14 +480,18 @@ static int read_var_callback(__u32 index, struct read_var_ctx *ctx)
 	case READ_MAP_VAR:
 		if (index >= STROBE_MAX_MAPS)
 			return 1;
-		ctx->payload = read_map_var(ctx->cfg, index, ctx->tls_base,
-					    ctx->value, ctx->data, ctx->payload);
+		if (ctx->payload_off > sizeof(ctx->data->payload) - READ_MAP_VAR_PAYLOAD_CAP)
+			return 1;
+		ctx->payload_off = read_map_var(ctx->cfg, index, ctx->tls_base,
+						ctx->value, ctx->data, ctx->payload_off);
 		break;
 	case READ_STR_VAR:
 		if (index >= STROBE_MAX_STRS)
 			return 1;
-		ctx->payload += read_str_var(ctx->cfg, index, ctx->tls_base,
-					     ctx->value, ctx->data, ctx->payload);
+		if (ctx->payload_off > sizeof(ctx->data->payload) - STROBE_MAX_STR_LEN)
+			return 1;
+		ctx->payload_off = read_str_var(ctx->cfg, index, ctx->tls_base,
+						ctx->value, ctx->data, ctx->payload_off);
 		break;
 	}
 	return 0;
@@ -501,7 +513,8 @@ static void *read_strobe_meta(struct task_struct *task,
 	pid_t pid = bpf_get_current_pid_tgid() >> 32;
 	struct strobe_value_generic value = {0};
 	struct strobemeta_cfg *cfg;
-	void *tls_base, *payload;
+	size_t payload_off;
+	void *tls_base;
 
 	cfg = bpf_map_lookup_elem(&strobemeta_cfgs, &pid);
 	if (!cfg)
@@ -509,7 +522,7 @@ static void *read_strobe_meta(struct task_struct *task,
 
 	data->int_vals_set_mask = 0;
 	data->req_meta_valid = 0;
-	payload = data->payload;
+	payload_off = 0;
 	/*
 	 * we don't have struct task_struct definition, it should be:
 	 * tls_base = (void *)task->thread.fsbase;
@@ -522,7 +535,7 @@ static void *read_strobe_meta(struct task_struct *task,
 		.tls_base = tls_base,
 		.value = &value,
 		.data = data,
-		.payload = payload,
+		.payload_off = 0,
 	};
 	int err;
 
@@ -540,6 +553,11 @@ static void *read_strobe_meta(struct task_struct *task,
 	err = bpf_loop(STROBE_MAX_MAPS, read_var_callback, &ctx, 0);
 	if (err != STROBE_MAX_MAPS)
 		return NULL;
+
+	payload_off = ctx.payload_off;
+	/* this should not really happen, here only to satisfy verifer */
+	if (payload_off > sizeof(data->payload))
+		payload_off = sizeof(data->payload);
 #else
 #ifdef NO_UNROLL
 #pragma clang loop unroll(disable)
@@ -555,7 +573,7 @@ static void *read_strobe_meta(struct task_struct *task,
 #pragma unroll
 #endif /* NO_UNROLL */
 	for (int i = 0; i < STROBE_MAX_STRS; ++i) {
-		payload += read_str_var(cfg, i, tls_base, &value, data, payload);
+		payload_off = read_str_var(cfg, i, tls_base, &value, data, payload_off);
 	}
 #ifdef NO_UNROLL
 #pragma clang loop unroll(disable)
@@ -563,7 +581,7 @@ static void *read_strobe_meta(struct task_struct *task,
 #pragma unroll
 #endif /* NO_UNROLL */
 	for (int i = 0; i < STROBE_MAX_MAPS; ++i) {
-		payload = read_map_var(cfg, i, tls_base, &value, data, payload);
+		payload_off = read_map_var(cfg, i, tls_base, &value, data, payload_off);
 	}
 #endif /* USE_BPF_LOOP */
 
@@ -571,7 +589,7 @@ static void *read_strobe_meta(struct task_struct *task,
 	 * return pointer right after end of payload, so it's possible to
 	 * calculate exact amount of useful data that needs to be sent
 	 */
-	return payload;
+	return &data->payload[payload_off];
 }
 
 SEC("raw_tracepoint/kfree_skb")
diff --git a/tools/testing/selftests/bpf/progs/test_bpf_ma.c b/tools/testing/selftests/bpf/progs/test_bpf_ma.c
index ecde41ae0fc8..b685a4aba6bd 100644
--- a/tools/testing/selftests/bpf/progs/test_bpf_ma.c
+++ b/tools/testing/selftests/bpf/progs/test_bpf_ma.c
@@ -37,10 +37,20 @@ int pid = 0;
 		__type(key, int); \
 		__type(value, struct map_value_##_size); \
 		__uint(max_entries, 128); \
-	} array_##_size SEC(".maps");
+	} array_##_size SEC(".maps")
 
-static __always_inline void batch_alloc_free(struct bpf_map *map, unsigned int batch,
-					     unsigned int idx)
+#define DEFINE_ARRAY_WITH_PERCPU_KPTR(_size) \
+	struct map_value_percpu_##_size { \
+		struct bin_data_##_size __percpu_kptr * data; \
+	}; \
+	struct { \
+		__uint(type, BPF_MAP_TYPE_ARRAY); \
+		__type(key, int); \
+		__type(value, struct map_value_percpu_##_size); \
+		__uint(max_entries, 128); \
+	} array_percpu_##_size SEC(".maps")
+
+static __always_inline void batch_alloc(struct bpf_map *map, unsigned int batch, unsigned int idx)
 {
 	struct generic_map_value *value;
 	unsigned int i, key;
@@ -65,6 +75,14 @@ static __always_inline void batch_alloc_free(struct bpf_map *map, unsigned int b
 			return;
 		}
 	}
+}
+
+static __always_inline void batch_free(struct bpf_map *map, unsigned int batch, unsigned int idx)
+{
+	struct generic_map_value *value;
+	unsigned int i, key;
+	void *old;
+
 	for (i = 0; i < batch; i++) {
 		key = i;
 		value = bpf_map_lookup_elem(map, &key);
@@ -81,8 +99,72 @@ static __always_inline void batch_alloc_free(struct bpf_map *map, unsigned int b
 	}
 }
 
+static __always_inline void batch_percpu_alloc(struct bpf_map *map, unsigned int batch,
+					       unsigned int idx)
+{
+	struct generic_map_value *value;
+	unsigned int i, key;
+	void *old, *new;
+
+	for (i = 0; i < batch; i++) {
+		key = i;
+		value = bpf_map_lookup_elem(map, &key);
+		if (!value) {
+			err = 1;
+			return;
+		}
+		/* per-cpu allocator may not be able to refill in time */
+		new = bpf_percpu_obj_new_impl(data_btf_ids[idx], NULL);
+		if (!new)
+			continue;
+
+		old = bpf_kptr_xchg(&value->data, new);
+		if (old) {
+			bpf_percpu_obj_drop(old);
+			err = 2;
+			return;
+		}
+	}
+}
+
+static __always_inline void batch_percpu_free(struct bpf_map *map, unsigned int batch,
+					      unsigned int idx)
+{
+	struct generic_map_value *value;
+	unsigned int i, key;
+	void *old;
+
+	for (i = 0; i < batch; i++) {
+		key = i;
+		value = bpf_map_lookup_elem(map, &key);
+		if (!value) {
+			err = 3;
+			return;
+		}
+		old = bpf_kptr_xchg(&value->data, NULL);
+		if (!old)
+			continue;
+		bpf_percpu_obj_drop(old);
+	}
+}
+
+#define CALL_BATCH_ALLOC(size, batch, idx) \
+	batch_alloc((struct bpf_map *)(&array_##size), batch, idx)
+
 #define CALL_BATCH_ALLOC_FREE(size, batch, idx) \
-	batch_alloc_free((struct bpf_map *)(&array_##size), batch, idx)
+	do { \
+		batch_alloc((struct bpf_map *)(&array_##size), batch, idx); \
+		batch_free((struct bpf_map *)(&array_##size), batch, idx); \
+	} while (0)
+
+#define CALL_BATCH_PERCPU_ALLOC(size, batch, idx) \
+	batch_percpu_alloc((struct bpf_map *)(&array_percpu_##size), batch, idx)
+
+#define CALL_BATCH_PERCPU_ALLOC_FREE(size, batch, idx) \
+	do { \
+		batch_percpu_alloc((struct bpf_map *)(&array_percpu_##size), batch, idx); \
+		batch_percpu_free((struct bpf_map *)(&array_percpu_##size), batch, idx); \
+	} while (0)
 
 DEFINE_ARRAY_WITH_KPTR(8);
 DEFINE_ARRAY_WITH_KPTR(16);
@@ -97,8 +179,21 @@ DEFINE_ARRAY_WITH_KPTR(1024);
 DEFINE_ARRAY_WITH_KPTR(2048);
 DEFINE_ARRAY_WITH_KPTR(4096);
 
-SEC("fentry/" SYS_PREFIX "sys_nanosleep")
-int test_bpf_mem_alloc_free(void *ctx)
+/* per-cpu kptr doesn't support bin_data_8 which is a zero-sized array */
+DEFINE_ARRAY_WITH_PERCPU_KPTR(16);
+DEFINE_ARRAY_WITH_PERCPU_KPTR(32);
+DEFINE_ARRAY_WITH_PERCPU_KPTR(64);
+DEFINE_ARRAY_WITH_PERCPU_KPTR(96);
+DEFINE_ARRAY_WITH_PERCPU_KPTR(128);
+DEFINE_ARRAY_WITH_PERCPU_KPTR(192);
+DEFINE_ARRAY_WITH_PERCPU_KPTR(256);
+DEFINE_ARRAY_WITH_PERCPU_KPTR(512);
+DEFINE_ARRAY_WITH_PERCPU_KPTR(1024);
+DEFINE_ARRAY_WITH_PERCPU_KPTR(2048);
+DEFINE_ARRAY_WITH_PERCPU_KPTR(4096);
+
+SEC("?fentry/" SYS_PREFIX "sys_nanosleep")
+int test_batch_alloc_free(void *ctx)
 {
 	if ((u32)bpf_get_current_pid_tgid() != pid)
 		return 0;
@@ -121,3 +216,76 @@ int test_bpf_mem_alloc_free(void *ctx)
 
 	return 0;
 }
+
+SEC("?fentry/" SYS_PREFIX "sys_nanosleep")
+int test_free_through_map_free(void *ctx)
+{
+	if ((u32)bpf_get_current_pid_tgid() != pid)
+		return 0;
+
+	/* Alloc 128 8-bytes objects in batch to trigger refilling,
+	 * then free these objects through map free.
+	 */
+	CALL_BATCH_ALLOC(8, 128, 0);
+	CALL_BATCH_ALLOC(16, 128, 1);
+	CALL_BATCH_ALLOC(32, 128, 2);
+	CALL_BATCH_ALLOC(64, 128, 3);
+	CALL_BATCH_ALLOC(96, 128, 4);
+	CALL_BATCH_ALLOC(128, 128, 5);
+	CALL_BATCH_ALLOC(192, 128, 6);
+	CALL_BATCH_ALLOC(256, 128, 7);
+	CALL_BATCH_ALLOC(512, 64, 8);
+	CALL_BATCH_ALLOC(1024, 32, 9);
+	CALL_BATCH_ALLOC(2048, 16, 10);
+	CALL_BATCH_ALLOC(4096, 8, 11);
+
+	return 0;
+}
+
+SEC("?fentry/" SYS_PREFIX "sys_nanosleep")
+int test_batch_percpu_alloc_free(void *ctx)
+{
+	if ((u32)bpf_get_current_pid_tgid() != pid)
+		return 0;
+
+	/* Alloc 128 16-bytes per-cpu objects in batch to trigger refilling,
+	 * then free 128 16-bytes per-cpu objects in batch to trigger freeing.
+	 */
+	CALL_BATCH_PERCPU_ALLOC_FREE(16, 128, 1);
+	CALL_BATCH_PERCPU_ALLOC_FREE(32, 128, 2);
+	CALL_BATCH_PERCPU_ALLOC_FREE(64, 128, 3);
+	CALL_BATCH_PERCPU_ALLOC_FREE(96, 128, 4);
+	CALL_BATCH_PERCPU_ALLOC_FREE(128, 128, 5);
+	CALL_BATCH_PERCPU_ALLOC_FREE(192, 128, 6);
+	CALL_BATCH_PERCPU_ALLOC_FREE(256, 128, 7);
+	CALL_BATCH_PERCPU_ALLOC_FREE(512, 64, 8);
+	CALL_BATCH_PERCPU_ALLOC_FREE(1024, 32, 9);
+	CALL_BATCH_PERCPU_ALLOC_FREE(2048, 16, 10);
+	CALL_BATCH_PERCPU_ALLOC_FREE(4096, 8, 11);
+
+	return 0;
+}
+
+SEC("?fentry/" SYS_PREFIX "sys_nanosleep")
+int test_percpu_free_through_map_free(void *ctx)
+{
+	if ((u32)bpf_get_current_pid_tgid() != pid)
+		return 0;
+
+	/* Alloc 128 16-bytes per-cpu objects in batch to trigger refilling,
+	 * then free these object through map free.
+	 */
+	CALL_BATCH_PERCPU_ALLOC(16, 128, 1);
+	CALL_BATCH_PERCPU_ALLOC(32, 128, 2);
+	CALL_BATCH_PERCPU_ALLOC(64, 128, 3);
+	CALL_BATCH_PERCPU_ALLOC(96, 128, 4);
+	CALL_BATCH_PERCPU_ALLOC(128, 128, 5);
+	CALL_BATCH_PERCPU_ALLOC(192, 128, 6);
+	CALL_BATCH_PERCPU_ALLOC(256, 128, 7);
+	CALL_BATCH_PERCPU_ALLOC(512, 64, 8);
+	CALL_BATCH_PERCPU_ALLOC(1024, 32, 9);
+	CALL_BATCH_PERCPU_ALLOC(2048, 16, 10);
+	CALL_BATCH_PERCPU_ALLOC(4096, 8, 11);
+
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_cgroup1_hierarchy.c b/tools/testing/selftests/bpf/progs/test_cgroup1_hierarchy.c
new file mode 100644
index 000000000000..44628865fe1d
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_cgroup1_hierarchy.c
@@ -0,0 +1,71 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2023 Yafang Shao <laoar.shao@gmail.com> */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+
+__u32 target_ancestor_level;
+__u64 target_ancestor_cgid;
+int target_pid, target_hid;
+
+struct cgroup *bpf_task_get_cgroup1(struct task_struct *task, int hierarchy_id) __ksym;
+struct cgroup *bpf_cgroup_ancestor(struct cgroup *cgrp, int level) __ksym;
+void bpf_cgroup_release(struct cgroup *cgrp) __ksym;
+
+static int bpf_link_create_verify(int cmd)
+{
+	struct cgroup *cgrp, *ancestor;
+	struct task_struct *task;
+	int ret = 0;
+
+	if (cmd != BPF_LINK_CREATE)
+		return 0;
+
+	task = bpf_get_current_task_btf();
+
+	/* Then it can run in parallel with others */
+	if (task->pid != target_pid)
+		return 0;
+
+	cgrp = bpf_task_get_cgroup1(task, target_hid);
+	if (!cgrp)
+		return 0;
+
+	/* Refuse it if its cgid or its ancestor's cgid is the target cgid */
+	if (cgrp->kn->id == target_ancestor_cgid)
+		ret = -1;
+
+	ancestor = bpf_cgroup_ancestor(cgrp, target_ancestor_level);
+	if (!ancestor)
+		goto out;
+
+	if (ancestor->kn->id == target_ancestor_cgid)
+		ret = -1;
+	bpf_cgroup_release(ancestor);
+
+out:
+	bpf_cgroup_release(cgrp);
+	return ret;
+}
+
+SEC("lsm/bpf")
+int BPF_PROG(lsm_run, int cmd, union bpf_attr *attr, unsigned int size)
+{
+	return bpf_link_create_verify(cmd);
+}
+
+SEC("lsm.s/bpf")
+int BPF_PROG(lsm_s_run, int cmd, union bpf_attr *attr, unsigned int size)
+{
+	return bpf_link_create_verify(cmd);
+}
+
+SEC("fentry")
+int BPF_PROG(fentry_run)
+{
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_ldsx_insn.c b/tools/testing/selftests/bpf/progs/test_ldsx_insn.c
index 3ddcb3777912..2a2a942737d7 100644
--- a/tools/testing/selftests/bpf/progs/test_ldsx_insn.c
+++ b/tools/testing/selftests/bpf/progs/test_ldsx_insn.c
@@ -7,7 +7,8 @@
 
 #if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \
      (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64) ||       \
-     defined(__TARGET_ARCH_s390)) && __clang_major__ >= 18
+     defined(__TARGET_ARCH_s390) || defined(__TARGET_ARCH_loongarch)) && \
+     __clang_major__ >= 18
 const volatile int skip = 0;
 #else
 const volatile int skip = 1;
diff --git a/tools/testing/selftests/bpf/progs/test_task_under_cgroup.c b/tools/testing/selftests/bpf/progs/test_task_under_cgroup.c
index 56cdc0a553f0..7e750309ce27 100644
--- a/tools/testing/selftests/bpf/progs/test_task_under_cgroup.c
+++ b/tools/testing/selftests/bpf/progs/test_task_under_cgroup.c
@@ -18,7 +18,7 @@ const volatile __u64 cgid;
 int remote_pid;
 
 SEC("tp_btf/task_newtask")
-int BPF_PROG(handle__task_newtask, struct task_struct *task, u64 clone_flags)
+int BPF_PROG(tp_btf_run, struct task_struct *task, u64 clone_flags)
 {
 	struct cgroup *cgrp = NULL;
 	struct task_struct *acquired;
@@ -48,4 +48,30 @@ out:
 	return 0;
 }
 
+SEC("lsm.s/bpf")
+int BPF_PROG(lsm_run, int cmd, union bpf_attr *attr, unsigned int size)
+{
+	struct cgroup *cgrp = NULL;
+	struct task_struct *task;
+	int ret = 0;
+
+	task = bpf_get_current_task_btf();
+	if (local_pid != task->pid)
+		return 0;
+
+	if (cmd != BPF_LINK_CREATE)
+		return 0;
+
+	/* 1 is the root cgroup */
+	cgrp = bpf_cgroup_from_id(1);
+	if (!cgrp)
+		goto out;
+	if (!bpf_task_under_cgroup(task, cgrp))
+		ret = -1;
+	bpf_cgroup_release(cgrp);
+
+out:
+	return ret;
+}
+
 char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_tc_link.c b/tools/testing/selftests/bpf/progs/test_tc_link.c
index 30e7124c49a1..992400acb957 100644
--- a/tools/testing/selftests/bpf/progs/test_tc_link.c
+++ b/tools/testing/selftests/bpf/progs/test_tc_link.c
@@ -1,7 +1,11 @@
 // SPDX-License-Identifier: GPL-2.0
 /* Copyright (c) 2023 Isovalent */
 #include <stdbool.h>
+
 #include <linux/bpf.h>
+#include <linux/if_ether.h>
+
+#include <bpf/bpf_endian.h>
 #include <bpf/bpf_helpers.h>
 
 char LICENSE[] SEC("license") = "GPL";
@@ -12,10 +16,19 @@ bool seen_tc3;
 bool seen_tc4;
 bool seen_tc5;
 bool seen_tc6;
+bool seen_eth;
 
 SEC("tc/ingress")
 int tc1(struct __sk_buff *skb)
 {
+	struct ethhdr eth = {};
+
+	if (skb->protocol != __bpf_constant_htons(ETH_P_IP))
+		goto out;
+	if (bpf_skb_load_bytes(skb, 0, &eth, sizeof(eth)))
+		goto out;
+	seen_eth = eth.h_proto == bpf_htons(ETH_P_IP);
+out:
 	seen_tc1 = true;
 	return TCX_NEXT;
 }
diff --git a/tools/testing/selftests/bpf/progs/verifier_bounds.c b/tools/testing/selftests/bpf/progs/verifier_bounds.c
index c5588a14fe2e..ec430b71730b 100644
--- a/tools/testing/selftests/bpf/progs/verifier_bounds.c
+++ b/tools/testing/selftests/bpf/progs/verifier_bounds.c
@@ -965,6 +965,7 @@ l0_%=:	r0 = 0;						\
 SEC("xdp")
 __description("bound check with JMP_JSLT for crossing 64-bit signed boundary")
 __success __retval(0)
+__flag(!BPF_F_TEST_REG_INVARIANTS) /* known invariants violation */
 __naked void crossing_64_bit_signed_boundary_2(void)
 {
 	asm volatile ("					\
@@ -1046,6 +1047,7 @@ l0_%=:	r0 = 0;						\
 SEC("xdp")
 __description("bound check with JMP32_JSLT for crossing 32-bit signed boundary")
 __success __retval(0)
+__flag(!BPF_F_TEST_REG_INVARIANTS) /* known invariants violation */
 __naked void crossing_32_bit_signed_boundary_2(void)
 {
 	asm volatile ("					\
diff --git a/tools/testing/selftests/bpf/progs/verifier_bswap.c b/tools/testing/selftests/bpf/progs/verifier_bswap.c
index 107525fb4a6a..e61755656e8d 100644
--- a/tools/testing/selftests/bpf/progs/verifier_bswap.c
+++ b/tools/testing/selftests/bpf/progs/verifier_bswap.c
@@ -6,7 +6,8 @@
 
 #if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \
 	(defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64) || \
-        defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_s390)) && \
+	defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_s390) || \
+	defined(__TARGET_ARCH_loongarch)) && \
 	__clang_major__ >= 18
 
 SEC("socket")
diff --git a/tools/testing/selftests/bpf/progs/verifier_cfg.c b/tools/testing/selftests/bpf/progs/verifier_cfg.c
index df7697b94007..c1f55e1d80a4 100644
--- a/tools/testing/selftests/bpf/progs/verifier_cfg.c
+++ b/tools/testing/selftests/bpf/progs/verifier_cfg.c
@@ -97,4 +97,66 @@ l0_%=:	r2 = r0;					\
 "	::: __clobber_all);
 }
 
+SEC("socket")
+__description("conditional loop (2)")
+__success
+__failure_unpriv __msg_unpriv("back-edge from insn 10 to 11")
+__naked void conditional_loop2(void)
+{
+	asm volatile ("					\
+	r9 = 2 ll;					\
+	r3 = 0x20 ll;					\
+	r4 = 0x35 ll;					\
+	r8 = r4;					\
+	goto l1_%=;					\
+l0_%=:	r9 -= r3;					\
+	r9 -= r4;					\
+	r9 -= r8;					\
+l1_%=:	r8 += r4;					\
+	if r8 < 0x64 goto l0_%=;			\
+	r0 = r9;					\
+	exit;						\
+"	::: __clobber_all);
+}
+
+SEC("socket")
+__description("unconditional loop after conditional jump")
+__failure __msg("infinite loop detected")
+__failure_unpriv __msg_unpriv("back-edge from insn 3 to 2")
+__naked void uncond_loop_after_cond_jmp(void)
+{
+	asm volatile ("					\
+	r0 = 0;						\
+	if r0 > 0 goto l1_%=;				\
+l0_%=:	r0 = 1;						\
+	goto l0_%=;					\
+l1_%=:	exit;						\
+"	::: __clobber_all);
+}
+
+
+__naked __noinline __used
+static unsigned long never_ending_subprog()
+{
+	asm volatile ("					\
+	r0 = r1;					\
+	goto -1;					\
+"	::: __clobber_all);
+}
+
+SEC("socket")
+__description("unconditional loop after conditional jump")
+/* infinite loop is detected *after* check_cfg() */
+__failure __msg("infinite loop detected")
+__naked void uncond_loop_in_subprog_after_cond_jmp(void)
+{
+	asm volatile ("					\
+	r0 = 0;						\
+	if r0 > 0 goto l1_%=;				\
+l0_%=:	r0 += 1;					\
+	call never_ending_subprog;			\
+l1_%=:	exit;						\
+"	::: __clobber_all);
+}
+
 char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_gotol.c b/tools/testing/selftests/bpf/progs/verifier_gotol.c
index 9f202eda952f..d1edbcff9a18 100644
--- a/tools/testing/selftests/bpf/progs/verifier_gotol.c
+++ b/tools/testing/selftests/bpf/progs/verifier_gotol.c
@@ -6,7 +6,8 @@
 
 #if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \
 	(defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64) || \
-        defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_s390)) && \
+	defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_s390) || \
+	defined(__TARGET_ARCH_loongarch)) && \
 	__clang_major__ >= 18
 
 SEC("socket")
diff --git a/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c b/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c
new file mode 100644
index 000000000000..5905e036e0ea
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c
@@ -0,0 +1,242 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 8);
+	__type(key, __u32);
+	__type(value, __u64);
+} map SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_USER_RINGBUF);
+	__uint(max_entries, 8);
+} ringbuf SEC(".maps");
+
+struct vm_area_struct;
+struct bpf_map;
+
+struct buf_context {
+	char *buf;
+};
+
+struct num_context {
+	__u64 i;
+	__u64 j;
+};
+
+__u8 choice_arr[2] = { 0, 1 };
+
+static int unsafe_on_2nd_iter_cb(__u32 idx, struct buf_context *ctx)
+{
+	if (idx == 0) {
+		ctx->buf = (char *)(0xDEAD);
+		return 0;
+	}
+
+	if (bpf_probe_read_user(ctx->buf, 8, (void *)(0xBADC0FFEE)))
+		return 1;
+
+	return 0;
+}
+
+SEC("?raw_tp")
+__failure __msg("R1 type=scalar expected=fp")
+int unsafe_on_2nd_iter(void *unused)
+{
+	char buf[4];
+	struct buf_context loop_ctx = { .buf = buf };
+
+	bpf_loop(100, unsafe_on_2nd_iter_cb, &loop_ctx, 0);
+	return 0;
+}
+
+static int unsafe_on_zero_iter_cb(__u32 idx, struct num_context *ctx)
+{
+	ctx->i = 0;
+	return 0;
+}
+
+SEC("?raw_tp")
+__failure __msg("invalid access to map value, value_size=2 off=32 size=1")
+int unsafe_on_zero_iter(void *unused)
+{
+	struct num_context loop_ctx = { .i = 32 };
+
+	bpf_loop(100, unsafe_on_zero_iter_cb, &loop_ctx, 0);
+	return choice_arr[loop_ctx.i];
+}
+
+static int widening_cb(__u32 idx, struct num_context *ctx)
+{
+	++ctx->i;
+	return 0;
+}
+
+SEC("?raw_tp")
+__success
+int widening(void *unused)
+{
+	struct num_context loop_ctx = { .i = 0, .j = 1 };
+
+	bpf_loop(100, widening_cb, &loop_ctx, 0);
+	/* loop_ctx.j is not changed during callback iteration,
+	 * verifier should not apply widening to it.
+	 */
+	return choice_arr[loop_ctx.j];
+}
+
+static int loop_detection_cb(__u32 idx, struct num_context *ctx)
+{
+	for (;;) {}
+	return 0;
+}
+
+SEC("?raw_tp")
+__failure __msg("infinite loop detected")
+int loop_detection(void *unused)
+{
+	struct num_context loop_ctx = { .i = 0 };
+
+	bpf_loop(100, loop_detection_cb, &loop_ctx, 0);
+	return 0;
+}
+
+static __always_inline __u64 oob_state_machine(struct num_context *ctx)
+{
+	switch (ctx->i) {
+	case 0:
+		ctx->i = 1;
+		break;
+	case 1:
+		ctx->i = 32;
+		break;
+	}
+	return 0;
+}
+
+static __u64 for_each_map_elem_cb(struct bpf_map *map, __u32 *key, __u64 *val, void *data)
+{
+	return oob_state_machine(data);
+}
+
+SEC("?raw_tp")
+__failure __msg("invalid access to map value, value_size=2 off=32 size=1")
+int unsafe_for_each_map_elem(void *unused)
+{
+	struct num_context loop_ctx = { .i = 0 };
+
+	bpf_for_each_map_elem(&map, for_each_map_elem_cb, &loop_ctx, 0);
+	return choice_arr[loop_ctx.i];
+}
+
+static __u64 ringbuf_drain_cb(struct bpf_dynptr *dynptr, void *data)
+{
+	return oob_state_machine(data);
+}
+
+SEC("?raw_tp")
+__failure __msg("invalid access to map value, value_size=2 off=32 size=1")
+int unsafe_ringbuf_drain(void *unused)
+{
+	struct num_context loop_ctx = { .i = 0 };
+
+	bpf_user_ringbuf_drain(&ringbuf, ringbuf_drain_cb, &loop_ctx, 0);
+	return choice_arr[loop_ctx.i];
+}
+
+static __u64 find_vma_cb(struct task_struct *task, struct vm_area_struct *vma, void *data)
+{
+	return oob_state_machine(data);
+}
+
+SEC("?raw_tp")
+__failure __msg("invalid access to map value, value_size=2 off=32 size=1")
+int unsafe_find_vma(void *unused)
+{
+	struct task_struct *task = bpf_get_current_task_btf();
+	struct num_context loop_ctx = { .i = 0 };
+
+	bpf_find_vma(task, 0, find_vma_cb, &loop_ctx, 0);
+	return choice_arr[loop_ctx.i];
+}
+
+static int iter_limit_cb(__u32 idx, struct num_context *ctx)
+{
+	ctx->i++;
+	return 0;
+}
+
+SEC("?raw_tp")
+__success
+int bpf_loop_iter_limit_ok(void *unused)
+{
+	struct num_context ctx = { .i = 0 };
+
+	bpf_loop(1, iter_limit_cb, &ctx, 0);
+	return choice_arr[ctx.i];
+}
+
+SEC("?raw_tp")
+__failure __msg("invalid access to map value, value_size=2 off=2 size=1")
+int bpf_loop_iter_limit_overflow(void *unused)
+{
+	struct num_context ctx = { .i = 0 };
+
+	bpf_loop(2, iter_limit_cb, &ctx, 0);
+	return choice_arr[ctx.i];
+}
+
+static int iter_limit_level2a_cb(__u32 idx, struct num_context *ctx)
+{
+	ctx->i += 100;
+	return 0;
+}
+
+static int iter_limit_level2b_cb(__u32 idx, struct num_context *ctx)
+{
+	ctx->i += 10;
+	return 0;
+}
+
+static int iter_limit_level1_cb(__u32 idx, struct num_context *ctx)
+{
+	ctx->i += 1;
+	bpf_loop(1, iter_limit_level2a_cb, ctx, 0);
+	bpf_loop(1, iter_limit_level2b_cb, ctx, 0);
+	return 0;
+}
+
+/* Check that path visiting every callback function once had been
+ * reached by verifier. Variables 'ctx{1,2}i' below serve as flags,
+ * with each decimal digit corresponding to a callback visit marker.
+ */
+SEC("socket")
+__success __retval(111111)
+int bpf_loop_iter_limit_nested(void *unused)
+{
+	struct num_context ctx1 = { .i = 0 };
+	struct num_context ctx2 = { .i = 0 };
+	__u64 a, b, c;
+
+	bpf_loop(1, iter_limit_level1_cb, &ctx1, 0);
+	bpf_loop(1, iter_limit_level1_cb, &ctx2, 0);
+	a = ctx1.i;
+	b = ctx2.i;
+	/* Force 'ctx1.i' and 'ctx2.i' precise. */
+	c = choice_arr[(a + b) % 2];
+	/* This makes 'c' zero, but neither clang nor verifier know it. */
+	c /= 10;
+	/* Make sure that verifier does not visit 'impossible' states:
+	 * enumerate all possible callback visit masks.
+	 */
+	if (a != 0 && a != 1 && a != 11 && a != 101 && a != 111 &&
+	    b != 0 && b != 1 && b != 11 && b != 101 && b != 111)
+		asm volatile ("r0 /= 0;" ::: "r0");
+	return 1000 * a + b + c;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_ldsx.c b/tools/testing/selftests/bpf/progs/verifier_ldsx.c
index 375525329637..d4427d8e1217 100644
--- a/tools/testing/selftests/bpf/progs/verifier_ldsx.c
+++ b/tools/testing/selftests/bpf/progs/verifier_ldsx.c
@@ -6,7 +6,8 @@
 
 #if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \
 	(defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64) || \
-        defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_s390)) && \
+	defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_s390) || \
+	defined(__TARGET_ARCH_loongarch)) && \
 	__clang_major__ >= 18
 
 SEC("socket")
diff --git a/tools/testing/selftests/bpf/progs/verifier_loops1.c b/tools/testing/selftests/bpf/progs/verifier_loops1.c
index 5bc86af80a9a..71735dbf33d4 100644
--- a/tools/testing/selftests/bpf/progs/verifier_loops1.c
+++ b/tools/testing/selftests/bpf/progs/verifier_loops1.c
@@ -75,9 +75,10 @@ l0_%=:	r0 += 1;					\
 "	::: __clobber_all);
 }
 
-SEC("tracepoint")
+SEC("socket")
 __description("bounded loop, start in the middle")
-__failure __msg("back-edge")
+__success
+__failure_unpriv __msg_unpriv("back-edge")
 __naked void loop_start_in_the_middle(void)
 {
 	asm volatile ("					\
@@ -136,7 +137,9 @@ l0_%=:	exit;						\
 
 SEC("tracepoint")
 __description("bounded recursion")
-__failure __msg("back-edge")
+__failure
+/* verifier limitation in detecting max stack depth */
+__msg("the call stack of 8 frames is too deep !")
 __naked void bounded_recursion(void)
 {
 	asm volatile ("					\
diff --git a/tools/testing/selftests/bpf/progs/verifier_movsx.c b/tools/testing/selftests/bpf/progs/verifier_movsx.c
index b2a04d1179d0..cbb9d6714f53 100644
--- a/tools/testing/selftests/bpf/progs/verifier_movsx.c
+++ b/tools/testing/selftests/bpf/progs/verifier_movsx.c
@@ -6,7 +6,8 @@
 
 #if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \
 	(defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64) || \
-        defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_s390)) && \
+	defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_s390) || \
+	defined(__TARGET_ARCH_loongarch)) && \
 	__clang_major__ >= 18
 
 SEC("socket")
diff --git a/tools/testing/selftests/bpf/progs/verifier_precision.c b/tools/testing/selftests/bpf/progs/verifier_precision.c
new file mode 100644
index 000000000000..6b564d4c0986
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_precision.c
@@ -0,0 +1,133 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2023 SUSE LLC */
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+SEC("?raw_tp")
+__success __log_level(2)
+__msg("mark_precise: frame0: regs=r2 stack= before 3: (bf) r1 = r10")
+__msg("mark_precise: frame0: regs=r2 stack= before 2: (55) if r2 != 0xfffffff8 goto pc+2")
+__msg("mark_precise: frame0: regs=r2 stack= before 1: (87) r2 = -r2")
+__msg("mark_precise: frame0: regs=r2 stack= before 0: (b7) r2 = 8")
+__naked int bpf_neg(void)
+{
+	asm volatile (
+		"r2 = 8;"
+		"r2 = -r2;"
+		"if r2 != -8 goto 1f;"
+		"r1 = r10;"
+		"r1 += r2;"
+	"1:"
+		"r0 = 0;"
+		"exit;"
+		::: __clobber_all);
+}
+
+SEC("?raw_tp")
+__success __log_level(2)
+__msg("mark_precise: frame0: regs=r2 stack= before 3: (bf) r1 = r10")
+__msg("mark_precise: frame0: regs=r2 stack= before 2: (55) if r2 != 0x0 goto pc+2")
+__msg("mark_precise: frame0: regs=r2 stack= before 1: (d4) r2 = le16 r2")
+__msg("mark_precise: frame0: regs=r2 stack= before 0: (b7) r2 = 0")
+__naked int bpf_end_to_le(void)
+{
+	asm volatile (
+		"r2 = 0;"
+		"r2 = le16 r2;"
+		"if r2 != 0 goto 1f;"
+		"r1 = r10;"
+		"r1 += r2;"
+	"1:"
+		"r0 = 0;"
+		"exit;"
+		::: __clobber_all);
+}
+
+
+SEC("?raw_tp")
+__success __log_level(2)
+__msg("mark_precise: frame0: regs=r2 stack= before 3: (bf) r1 = r10")
+__msg("mark_precise: frame0: regs=r2 stack= before 2: (55) if r2 != 0x0 goto pc+2")
+__msg("mark_precise: frame0: regs=r2 stack= before 1: (dc) r2 = be16 r2")
+__msg("mark_precise: frame0: regs=r2 stack= before 0: (b7) r2 = 0")
+__naked int bpf_end_to_be(void)
+{
+	asm volatile (
+		"r2 = 0;"
+		"r2 = be16 r2;"
+		"if r2 != 0 goto 1f;"
+		"r1 = r10;"
+		"r1 += r2;"
+	"1:"
+		"r0 = 0;"
+		"exit;"
+		::: __clobber_all);
+}
+
+#if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \
+	(defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64) || \
+	defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_s390)) && \
+	__clang_major__ >= 18
+
+SEC("?raw_tp")
+__success __log_level(2)
+__msg("mark_precise: frame0: regs=r2 stack= before 3: (bf) r1 = r10")
+__msg("mark_precise: frame0: regs=r2 stack= before 2: (55) if r2 != 0x0 goto pc+2")
+__msg("mark_precise: frame0: regs=r2 stack= before 1: (d7) r2 = bswap16 r2")
+__msg("mark_precise: frame0: regs=r2 stack= before 0: (b7) r2 = 0")
+__naked int bpf_end_bswap(void)
+{
+	asm volatile (
+		"r2 = 0;"
+		"r2 = bswap16 r2;"
+		"if r2 != 0 goto 1f;"
+		"r1 = r10;"
+		"r1 += r2;"
+	"1:"
+		"r0 = 0;"
+		"exit;"
+		::: __clobber_all);
+}
+
+#endif /* v4 instruction */
+
+SEC("?raw_tp")
+__success __log_level(2)
+/*
+ * Without the bug fix there will be no history between "last_idx 3 first_idx 3"
+ * and "parent state regs=" lines. "R0_w=6" parts are here to help anchor
+ * expected log messages to the one specific mark_chain_precision operation.
+ *
+ * This is quite fragile: if verifier checkpointing heuristic changes, this
+ * might need adjusting.
+ */
+__msg("2: (07) r0 += 1                       ; R0_w=6")
+__msg("3: (35) if r0 >= 0xa goto pc+1")
+__msg("mark_precise: frame0: last_idx 3 first_idx 3 subseq_idx -1")
+__msg("mark_precise: frame0: regs=r0 stack= before 2: (07) r0 += 1")
+__msg("mark_precise: frame0: regs=r0 stack= before 1: (07) r0 += 1")
+__msg("mark_precise: frame0: regs=r0 stack= before 4: (05) goto pc-4")
+__msg("mark_precise: frame0: regs=r0 stack= before 3: (35) if r0 >= 0xa goto pc+1")
+__msg("mark_precise: frame0: parent state regs= stack=:  R0_rw=P4")
+__msg("3: R0_w=6")
+__naked int state_loop_first_last_equal(void)
+{
+	asm volatile (
+		"r0 = 0;"
+	"l0_%=:"
+		"r0 += 1;"
+		"r0 += 1;"
+		/* every few iterations we'll have a checkpoint here with
+		 * first_idx == last_idx, potentially confusing precision
+		 * backtracking logic
+		 */
+		"if r0 >= 10 goto l1_%=;"	/* checkpoint + mark_precise */
+		"goto l0_%=;"
+	"l1_%=:"
+		"exit;"
+		::: __clobber_common
+	);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_sdiv.c b/tools/testing/selftests/bpf/progs/verifier_sdiv.c
index 8fc5174808b2..2a2271cf0294 100644
--- a/tools/testing/selftests/bpf/progs/verifier_sdiv.c
+++ b/tools/testing/selftests/bpf/progs/verifier_sdiv.c
@@ -6,7 +6,8 @@
 
 #if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \
 	(defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64) || \
-        defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_s390)) && \
+	defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_s390) || \
+	defined(__TARGET_ARCH_loongarch)) && \
 	__clang_major__ >= 18
 
 SEC("socket")
diff --git a/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c b/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c
index db6b3143338b..f61d623b1ce8 100644
--- a/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c
+++ b/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c
@@ -119,15 +119,41 @@ __naked int global_subprog_result_precise(void)
 
 SEC("?raw_tp")
 __success __log_level(2)
+/* First simulated path does not include callback body,
+ * r1 and r4 are always precise for bpf_loop() calls.
+ */
+__msg("9: (85) call bpf_loop#181")
+__msg("mark_precise: frame0: last_idx 9 first_idx 9 subseq_idx -1")
+__msg("mark_precise: frame0: parent state regs=r4 stack=:")
+__msg("mark_precise: frame0: last_idx 8 first_idx 0 subseq_idx 9")
+__msg("mark_precise: frame0: regs=r4 stack= before 8: (b7) r4 = 0")
+__msg("mark_precise: frame0: last_idx 9 first_idx 9 subseq_idx -1")
+__msg("mark_precise: frame0: parent state regs=r1 stack=:")
+__msg("mark_precise: frame0: last_idx 8 first_idx 0 subseq_idx 9")
+__msg("mark_precise: frame0: regs=r1 stack= before 8: (b7) r4 = 0")
+__msg("mark_precise: frame0: regs=r1 stack= before 7: (b7) r3 = 0")
+__msg("mark_precise: frame0: regs=r1 stack= before 6: (bf) r2 = r8")
+__msg("mark_precise: frame0: regs=r1 stack= before 5: (bf) r1 = r6")
+__msg("mark_precise: frame0: regs=r6 stack= before 4: (b7) r6 = 3")
+/* r6 precision propagation */
 __msg("14: (0f) r1 += r6")
-__msg("mark_precise: frame0: last_idx 14 first_idx 10")
+__msg("mark_precise: frame0: last_idx 14 first_idx 9")
 __msg("mark_precise: frame0: regs=r6 stack= before 13: (bf) r1 = r7")
 __msg("mark_precise: frame0: regs=r6 stack= before 12: (27) r6 *= 4")
 __msg("mark_precise: frame0: regs=r6 stack= before 11: (25) if r6 > 0x3 goto pc+4")
 __msg("mark_precise: frame0: regs=r6 stack= before 10: (bf) r6 = r0")
-__msg("mark_precise: frame0: parent state regs=r0 stack=:")
-__msg("mark_precise: frame0: last_idx 18 first_idx 0")
-__msg("mark_precise: frame0: regs=r0 stack= before 18: (95) exit")
+__msg("mark_precise: frame0: regs=r0 stack= before 9: (85) call bpf_loop")
+/* State entering callback body popped from states stack */
+__msg("from 9 to 17: frame1:")
+__msg("17: frame1: R1=scalar() R2=0 R10=fp0 cb")
+__msg("17: (b7) r0 = 0")
+__msg("18: (95) exit")
+__msg("returning from callee:")
+__msg("to caller at 9:")
+__msg("frame 0: propagating r1,r4")
+__msg("mark_precise: frame0: last_idx 9 first_idx 9 subseq_idx -1")
+__msg("mark_precise: frame0: regs=r1,r4 stack= before 18: (95) exit")
+__msg("from 18 to 9: safe")
 __naked int callback_result_precise(void)
 {
 	asm volatile (
@@ -233,20 +259,36 @@ __naked int parent_callee_saved_reg_precise_global(void)
 
 SEC("?raw_tp")
 __success __log_level(2)
+/* First simulated path does not include callback body */
 __msg("12: (0f) r1 += r6")
-__msg("mark_precise: frame0: last_idx 12 first_idx 10")
+__msg("mark_precise: frame0: last_idx 12 first_idx 9")
 __msg("mark_precise: frame0: regs=r6 stack= before 11: (bf) r1 = r7")
 __msg("mark_precise: frame0: regs=r6 stack= before 10: (27) r6 *= 4")
+__msg("mark_precise: frame0: regs=r6 stack= before 9: (85) call bpf_loop")
 __msg("mark_precise: frame0: parent state regs=r6 stack=:")
-__msg("mark_precise: frame0: last_idx 16 first_idx 0")
-__msg("mark_precise: frame0: regs=r6 stack= before 16: (95) exit")
-__msg("mark_precise: frame1: regs= stack= before 15: (b7) r0 = 0")
-__msg("mark_precise: frame1: regs= stack= before 9: (85) call bpf_loop#181")
+__msg("mark_precise: frame0: last_idx 8 first_idx 0 subseq_idx 9")
 __msg("mark_precise: frame0: regs=r6 stack= before 8: (b7) r4 = 0")
 __msg("mark_precise: frame0: regs=r6 stack= before 7: (b7) r3 = 0")
 __msg("mark_precise: frame0: regs=r6 stack= before 6: (bf) r2 = r8")
 __msg("mark_precise: frame0: regs=r6 stack= before 5: (b7) r1 = 1")
 __msg("mark_precise: frame0: regs=r6 stack= before 4: (b7) r6 = 3")
+/* State entering callback body popped from states stack */
+__msg("from 9 to 15: frame1:")
+__msg("15: frame1: R1=scalar() R2=0 R10=fp0 cb")
+__msg("15: (b7) r0 = 0")
+__msg("16: (95) exit")
+__msg("returning from callee:")
+__msg("to caller at 9:")
+/* r1, r4 are always precise for bpf_loop(),
+ * r6 was marked before backtracking to callback body.
+ */
+__msg("frame 0: propagating r1,r4,r6")
+__msg("mark_precise: frame0: last_idx 9 first_idx 9 subseq_idx -1")
+__msg("mark_precise: frame0: regs=r1,r4,r6 stack= before 16: (95) exit")
+__msg("mark_precise: frame1: regs= stack= before 15: (b7) r0 = 0")
+__msg("mark_precise: frame1: regs= stack= before 9: (85) call bpf_loop")
+__msg("mark_precise: frame0: parent state regs= stack=:")
+__msg("from 16 to 9: safe")
 __naked int parent_callee_saved_reg_precise_with_callback(void)
 {
 	asm volatile (
@@ -373,22 +415,38 @@ __naked int parent_stack_slot_precise_global(void)
 
 SEC("?raw_tp")
 __success __log_level(2)
+/* First simulated path does not include callback body */
 __msg("14: (0f) r1 += r6")
-__msg("mark_precise: frame0: last_idx 14 first_idx 11")
+__msg("mark_precise: frame0: last_idx 14 first_idx 10")
 __msg("mark_precise: frame0: regs=r6 stack= before 13: (bf) r1 = r7")
 __msg("mark_precise: frame0: regs=r6 stack= before 12: (27) r6 *= 4")
 __msg("mark_precise: frame0: regs=r6 stack= before 11: (79) r6 = *(u64 *)(r10 -8)")
+__msg("mark_precise: frame0: regs= stack=-8 before 10: (85) call bpf_loop")
 __msg("mark_precise: frame0: parent state regs= stack=-8:")
-__msg("mark_precise: frame0: last_idx 18 first_idx 0")
-__msg("mark_precise: frame0: regs= stack=-8 before 18: (95) exit")
-__msg("mark_precise: frame1: regs= stack= before 17: (b7) r0 = 0")
-__msg("mark_precise: frame1: regs= stack= before 10: (85) call bpf_loop#181")
+__msg("mark_precise: frame0: last_idx 9 first_idx 0 subseq_idx 10")
 __msg("mark_precise: frame0: regs= stack=-8 before 9: (b7) r4 = 0")
 __msg("mark_precise: frame0: regs= stack=-8 before 8: (b7) r3 = 0")
 __msg("mark_precise: frame0: regs= stack=-8 before 7: (bf) r2 = r8")
 __msg("mark_precise: frame0: regs= stack=-8 before 6: (bf) r1 = r6")
 __msg("mark_precise: frame0: regs= stack=-8 before 5: (7b) *(u64 *)(r10 -8) = r6")
 __msg("mark_precise: frame0: regs=r6 stack= before 4: (b7) r6 = 3")
+/* State entering callback body popped from states stack */
+__msg("from 10 to 17: frame1:")
+__msg("17: frame1: R1=scalar() R2=0 R10=fp0 cb")
+__msg("17: (b7) r0 = 0")
+__msg("18: (95) exit")
+__msg("returning from callee:")
+__msg("to caller at 10:")
+/* r1, r4 are always precise for bpf_loop(),
+ * fp-8 was marked before backtracking to callback body.
+ */
+__msg("frame 0: propagating r1,r4,fp-8")
+__msg("mark_precise: frame0: last_idx 10 first_idx 10 subseq_idx -1")
+__msg("mark_precise: frame0: regs=r1,r4 stack=-8 before 18: (95) exit")
+__msg("mark_precise: frame1: regs= stack= before 17: (b7) r0 = 0")
+__msg("mark_precise: frame1: regs= stack= before 10: (85) call bpf_loop#181")
+__msg("mark_precise: frame0: parent state regs= stack=:")
+__msg("from 18 to 10: safe")
 __naked int parent_stack_slot_precise_with_callback(void)
 {
 	asm volatile (
diff --git a/tools/testing/selftests/bpf/progs/xdp_hw_metadata.c b/tools/testing/selftests/bpf/progs/xdp_hw_metadata.c
index b2dfd7066c6e..f6d1cc9ad892 100644
--- a/tools/testing/selftests/bpf/progs/xdp_hw_metadata.c
+++ b/tools/testing/selftests/bpf/progs/xdp_hw_metadata.c
@@ -21,7 +21,7 @@ extern int bpf_xdp_metadata_rx_timestamp(const struct xdp_md *ctx,
 extern int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx, __u32 *hash,
 				    enum xdp_rss_hash_type *rss_type) __ksym;
 
-SEC("xdp")
+SEC("xdp.frags")
 int rx(struct xdp_md *ctx)
 {
 	void *data, *data_meta, *data_end;
diff --git a/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c b/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c
index 07d786329105..80f620602d50 100644
--- a/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c
+++ b/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c
@@ -53,6 +53,8 @@
 #define DEFAULT_TTL 64
 #define MAX_ALLOWED_PORTS 8
 
+#define MAX_PACKET_OFF 0xffff
+
 #define swap(a, b) \
 	do { typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0)
 
@@ -177,69 +179,82 @@ static __always_inline __u32 tcp_ns_to_ts(__u64 ns)
 	return ns / (NSEC_PER_SEC / TCP_TS_HZ);
 }
 
-static __always_inline __u32 tcp_time_stamp_raw(void)
+static __always_inline __u32 tcp_clock_ms(void)
 {
 	return tcp_ns_to_ts(tcp_clock_ns());
 }
 
 struct tcpopt_context {
-	__u8 *ptr;
-	__u8 *end;
+	void *data;
 	void *data_end;
 	__be32 *tsecr;
 	__u8 wscale;
 	bool option_timestamp;
 	bool option_sack;
+	__u32 off;
 };
 
-static int tscookie_tcpopt_parse(struct tcpopt_context *ctx)
+static __always_inline u8 *next(struct tcpopt_context *ctx, __u32 sz)
 {
-	__u8 opcode, opsize;
+	__u64 off = ctx->off;
+	__u8 *data;
 
-	if (ctx->ptr >= ctx->end)
-		return 1;
-	if (ctx->ptr >= ctx->data_end)
-		return 1;
+	/* Verifier forbids access to packet when offset exceeds MAX_PACKET_OFF */
+	if (off > MAX_PACKET_OFF - sz)
+		return NULL;
 
-	opcode = ctx->ptr[0];
+	data = ctx->data + off;
+	barrier_var(data);
+	if (data + sz >= ctx->data_end)
+		return NULL;
 
-	if (opcode == TCPOPT_EOL)
-		return 1;
-	if (opcode == TCPOPT_NOP) {
-		++ctx->ptr;
-		return 0;
-	}
+	ctx->off += sz;
+	return data;
+}
 
-	if (ctx->ptr + 1 >= ctx->end)
-		return 1;
-	if (ctx->ptr + 1 >= ctx->data_end)
+static int tscookie_tcpopt_parse(struct tcpopt_context *ctx)
+{
+	__u8 *opcode, *opsize, *wscale, *tsecr;
+	__u32 off = ctx->off;
+
+	opcode = next(ctx, 1);
+	if (!opcode)
 		return 1;
-	opsize = ctx->ptr[1];
-	if (opsize < 2)
+
+	if (*opcode == TCPOPT_EOL)
 		return 1;
+	if (*opcode == TCPOPT_NOP)
+		return 0;
 
-	if (ctx->ptr + opsize > ctx->end)
+	opsize = next(ctx, 1);
+	if (!opsize || *opsize < 2)
 		return 1;
 
-	switch (opcode) {
+	switch (*opcode) {
 	case TCPOPT_WINDOW:
-		if (opsize == TCPOLEN_WINDOW && ctx->ptr + TCPOLEN_WINDOW <= ctx->data_end)
-			ctx->wscale = ctx->ptr[2] < TCP_MAX_WSCALE ? ctx->ptr[2] : TCP_MAX_WSCALE;
+		wscale = next(ctx, 1);
+		if (!wscale)
+			return 1;
+		if (*opsize == TCPOLEN_WINDOW)
+			ctx->wscale = *wscale < TCP_MAX_WSCALE ? *wscale : TCP_MAX_WSCALE;
 		break;
 	case TCPOPT_TIMESTAMP:
-		if (opsize == TCPOLEN_TIMESTAMP && ctx->ptr + TCPOLEN_TIMESTAMP <= ctx->data_end) {
+		tsecr = next(ctx, 4);
+		if (!tsecr)
+			return 1;
+		if (*opsize == TCPOLEN_TIMESTAMP) {
 			ctx->option_timestamp = true;
 			/* Client's tsval becomes our tsecr. */
-			*ctx->tsecr = get_unaligned((__be32 *)(ctx->ptr + 2));
+			*ctx->tsecr = get_unaligned((__be32 *)tsecr);
 		}
 		break;
 	case TCPOPT_SACK_PERM:
-		if (opsize == TCPOLEN_SACK_PERM)
+		if (*opsize == TCPOLEN_SACK_PERM)
 			ctx->option_sack = true;
 		break;
 	}
 
-	ctx->ptr += opsize;
+	ctx->off = off + *opsize;
 
 	return 0;
 }
@@ -256,16 +271,21 @@ static int tscookie_tcpopt_parse_batch(__u32 index, void *context)
 
 static __always_inline bool tscookie_init(struct tcphdr *tcp_header,
 					  __u16 tcp_len, __be32 *tsval,
-					  __be32 *tsecr, void *data_end)
+					  __be32 *tsecr, void *data, void *data_end)
 {
 	struct tcpopt_context loop_ctx = {
-		.ptr = (__u8 *)(tcp_header + 1),
-		.end = (__u8 *)tcp_header + tcp_len,
+		.data = data,
 		.data_end = data_end,
 		.tsecr = tsecr,
 		.wscale = TS_OPT_WSCALE_MASK,
 		.option_timestamp = false,
 		.option_sack = false,
+		/* Note: currently verifier would track .off as unbound scalar.
+		 *       In case if verifier would at some point get smarter and
+		 *       compute bounded value for this var, beware that it might
+		 *       hinder bpf_loop() convergence validation.
+		 */
+		.off = (__u8 *)(tcp_header + 1) - (__u8 *)data,
 	};
 	u32 cookie;
 
@@ -274,7 +294,7 @@ static __always_inline bool tscookie_init(struct tcphdr *tcp_header,
 	if (!loop_ctx.option_timestamp)
 		return false;
 
-	cookie = tcp_time_stamp_raw() & ~TSMASK;
+	cookie = tcp_clock_ms() & ~TSMASK;
 	cookie |= loop_ctx.wscale & TS_OPT_WSCALE_MASK;
 	if (loop_ctx.option_sack)
 		cookie |= TS_OPT_SACK;
@@ -635,7 +655,7 @@ static __always_inline int syncookie_handle_syn(struct header_pointers *hdr,
 	cookie = (__u32)value;
 
 	if (tscookie_init((void *)hdr->tcp, hdr->tcp_len,
-			  &tsopt_buf[0], &tsopt_buf[1], data_end))
+			  &tsopt_buf[0], &tsopt_buf[1], data, data_end))
 		tsopt = tsopt_buf;
 
 	/* Check that there is enough space for a SYNACK. It also covers
diff --git a/tools/testing/selftests/bpf/test_loader.c b/tools/testing/selftests/bpf/test_loader.c
index 37ffa57f28a1..a350ecdfba4a 100644
--- a/tools/testing/selftests/bpf/test_loader.c
+++ b/tools/testing/selftests/bpf/test_loader.c
@@ -153,6 +153,14 @@ static int parse_retval(const char *str, int *val, const char *name)
 	return parse_int(str, val, name);
 }
 
+static void update_flags(int *flags, int flag, bool clear)
+{
+	if (clear)
+		*flags &= ~flag;
+	else
+		*flags |= flag;
+}
+
 /* Uses btf_decl_tag attributes to describe the expected test
  * behavior, see bpf_misc.h for detailed description of each attribute
  * and attribute combinations.
@@ -171,6 +179,7 @@ static int parse_test_spec(struct test_loader *tester,
 	memset(spec, 0, sizeof(*spec));
 
 	spec->prog_name = bpf_program__name(prog);
+	spec->prog_flags = BPF_F_TEST_REG_INVARIANTS; /* by default be strict */
 
 	btf = bpf_object__btf(obj);
 	if (!btf) {
@@ -187,7 +196,8 @@ static int parse_test_spec(struct test_loader *tester,
 	for (i = 1; i < btf__type_cnt(btf); i++) {
 		const char *s, *val, *msg;
 		const struct btf_type *t;
-		int tmp;
+		bool clear;
+		int flags;
 
 		t = btf__type_by_id(btf, i);
 		if (!btf_is_decl_tag(t))
@@ -253,23 +263,30 @@ static int parse_test_spec(struct test_loader *tester,
 				goto cleanup;
 		} else if (str_has_pfx(s, TEST_TAG_PROG_FLAGS_PFX)) {
 			val = s + sizeof(TEST_TAG_PROG_FLAGS_PFX) - 1;
+
+			clear = val[0] == '!';
+			if (clear)
+				val++;
+
 			if (strcmp(val, "BPF_F_STRICT_ALIGNMENT") == 0) {
-				spec->prog_flags |= BPF_F_STRICT_ALIGNMENT;
+				update_flags(&spec->prog_flags, BPF_F_STRICT_ALIGNMENT, clear);
 			} else if (strcmp(val, "BPF_F_ANY_ALIGNMENT") == 0) {
-				spec->prog_flags |= BPF_F_ANY_ALIGNMENT;
+				update_flags(&spec->prog_flags, BPF_F_ANY_ALIGNMENT, clear);
 			} else if (strcmp(val, "BPF_F_TEST_RND_HI32") == 0) {
-				spec->prog_flags |= BPF_F_TEST_RND_HI32;
+				update_flags(&spec->prog_flags, BPF_F_TEST_RND_HI32, clear);
 			} else if (strcmp(val, "BPF_F_TEST_STATE_FREQ") == 0) {
-				spec->prog_flags |= BPF_F_TEST_STATE_FREQ;
+				update_flags(&spec->prog_flags, BPF_F_TEST_STATE_FREQ, clear);
 			} else if (strcmp(val, "BPF_F_SLEEPABLE") == 0) {
-				spec->prog_flags |= BPF_F_SLEEPABLE;
+				update_flags(&spec->prog_flags, BPF_F_SLEEPABLE, clear);
 			} else if (strcmp(val, "BPF_F_XDP_HAS_FRAGS") == 0) {
-				spec->prog_flags |= BPF_F_XDP_HAS_FRAGS;
+				update_flags(&spec->prog_flags, BPF_F_XDP_HAS_FRAGS, clear);
+			} else if (strcmp(val, "BPF_F_TEST_REG_INVARIANTS") == 0) {
+				update_flags(&spec->prog_flags, BPF_F_TEST_REG_INVARIANTS, clear);
 			} else /* assume numeric value */ {
-				err = parse_int(val, &tmp, "test prog flags");
+				err = parse_int(val, &flags, "test prog flags");
 				if (err)
 					goto cleanup;
-				spec->prog_flags |= tmp;
+				update_flags(&spec->prog_flags, flags, clear);
 			}
 		}
 	}
diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c
index 7fc00e423e4d..767e0693df10 100644
--- a/tools/testing/selftests/bpf/test_maps.c
+++ b/tools/testing/selftests/bpf/test_maps.c
@@ -1396,13 +1396,18 @@ static void test_map_stress(void)
 #define MAX_DELAY_US 50000
 #define MIN_DELAY_RANGE_US 5000
 
-static int map_update_retriable(int map_fd, const void *key, const void *value,
-				int flags, int attempts)
+static bool retry_for_again_or_busy(int err)
+{
+	return (err == EAGAIN || err == EBUSY);
+}
+
+int map_update_retriable(int map_fd, const void *key, const void *value, int flags, int attempts,
+			 retry_for_error_fn need_retry)
 {
 	int delay = rand() % MIN_DELAY_RANGE_US;
 
 	while (bpf_map_update_elem(map_fd, key, value, flags)) {
-		if (!attempts || (errno != EAGAIN && errno != EBUSY))
+		if (!attempts || !need_retry(errno))
 			return -errno;
 
 		if (delay <= MAX_DELAY_US / 2)
@@ -1445,11 +1450,13 @@ static void test_update_delete(unsigned int fn, void *data)
 		key = value = i;
 
 		if (do_update) {
-			err = map_update_retriable(fd, &key, &value, BPF_NOEXIST, MAP_RETRIES);
+			err = map_update_retriable(fd, &key, &value, BPF_NOEXIST, MAP_RETRIES,
+						   retry_for_again_or_busy);
 			if (err)
 				printf("error %d %d\n", err, errno);
 			assert(err == 0);
-			err = map_update_retriable(fd, &key, &value, BPF_EXIST, MAP_RETRIES);
+			err = map_update_retriable(fd, &key, &value, BPF_EXIST, MAP_RETRIES,
+						   retry_for_again_or_busy);
 			if (err)
 				printf("error %d %d\n", err, errno);
 			assert(err == 0);
diff --git a/tools/testing/selftests/bpf/test_maps.h b/tools/testing/selftests/bpf/test_maps.h
index f6fbca761732..e4ac704a536c 100644
--- a/tools/testing/selftests/bpf/test_maps.h
+++ b/tools/testing/selftests/bpf/test_maps.h
@@ -4,6 +4,7 @@
 
 #include <stdio.h>
 #include <stdlib.h>
+#include <stdbool.h>
 
 #define CHECK(condition, tag, format...) ({				\
 	int __ret = !!(condition);					\
@@ -16,4 +17,8 @@
 
 extern int skips;
 
+typedef bool (*retry_for_error_fn)(int err);
+int map_update_retriable(int map_fd, const void *key, const void *value, int flags, int attempts,
+			 retry_for_error_fn need_retry);
+
 #endif
diff --git a/tools/testing/selftests/bpf/test_sock_addr.c b/tools/testing/selftests/bpf/test_sock_addr.c
index 2c89674fc62c..b0068a9d2cfe 100644
--- a/tools/testing/selftests/bpf/test_sock_addr.c
+++ b/tools/testing/selftests/bpf/test_sock_addr.c
@@ -679,7 +679,7 @@ static int load_path(const struct sock_addr_test *test, const char *path)
 
 	bpf_program__set_type(prog, BPF_PROG_TYPE_CGROUP_SOCK_ADDR);
 	bpf_program__set_expected_attach_type(prog, test->expected_attach_type);
-	bpf_program__set_flags(prog, BPF_F_TEST_RND_HI32);
+	bpf_program__set_flags(prog, BPF_F_TEST_RND_HI32 | BPF_F_TEST_REG_INVARIANTS);
 
 	err = bpf_object__load(obj);
 	if (err) {
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 98107e0452d3..f36e41435be7 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -1588,7 +1588,7 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
 	if (fixup_skips != skips)
 		return;
 
-	pflags = BPF_F_TEST_RND_HI32;
+	pflags = BPF_F_TEST_RND_HI32 | BPF_F_TEST_REG_INVARIANTS;
 	if (test->flags & F_LOAD_WITH_STRICT_ALIGNMENT)
 		pflags |= BPF_F_STRICT_ALIGNMENT;
 	if (test->flags & F_NEEDS_EFFICIENT_UNALIGNED_ACCESS)
diff --git a/tools/testing/selftests/bpf/testing_helpers.c b/tools/testing/selftests/bpf/testing_helpers.c
index 8d994884c7b4..d2458c1b1671 100644
--- a/tools/testing/selftests/bpf/testing_helpers.c
+++ b/tools/testing/selftests/bpf/testing_helpers.c
@@ -276,7 +276,7 @@ int bpf_prog_test_load(const char *file, enum bpf_prog_type type,
 	if (type != BPF_PROG_TYPE_UNSPEC && bpf_program__type(prog) != type)
 		bpf_program__set_type(prog, type);
 
-	flags = bpf_program__flags(prog) | BPF_F_TEST_RND_HI32;
+	flags = bpf_program__flags(prog) | BPF_F_TEST_RND_HI32 | BPF_F_TEST_REG_INVARIANTS;
 	bpf_program__set_flags(prog, flags);
 
 	err = bpf_object__load(obj);
@@ -299,7 +299,7 @@ int bpf_test_load_program(enum bpf_prog_type type, const struct bpf_insn *insns,
 {
 	LIBBPF_OPTS(bpf_prog_load_opts, opts,
 		.kern_version = kern_version,
-		.prog_flags = BPF_F_TEST_RND_HI32,
+		.prog_flags = BPF_F_TEST_RND_HI32 | BPF_F_TEST_REG_INVARIANTS,
 		.log_level = extra_prog_load_log_flags,
 		.log_buf = log_buf,
 		.log_size = log_buf_sz,
diff --git a/tools/testing/selftests/bpf/unpriv_helpers.c b/tools/testing/selftests/bpf/unpriv_helpers.c
index 2a6efbd0401e..b6d016461fb0 100644
--- a/tools/testing/selftests/bpf/unpriv_helpers.c
+++ b/tools/testing/selftests/bpf/unpriv_helpers.c
@@ -4,9 +4,40 @@
 #include <stdlib.h>
 #include <error.h>
 #include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <fcntl.h>
 
 #include "unpriv_helpers.h"
 
+static bool get_mitigations_off(void)
+{
+	char cmdline[4096], *c;
+	int fd, ret = false;
+
+	fd = open("/proc/cmdline", O_RDONLY);
+	if (fd < 0) {
+		perror("open /proc/cmdline");
+		return false;
+	}
+
+	if (read(fd, cmdline, sizeof(cmdline) - 1) < 0) {
+		perror("read /proc/cmdline");
+		goto out;
+	}
+
+	cmdline[sizeof(cmdline) - 1] = '\0';
+	for (c = strtok(cmdline, " \n"); c; c = strtok(NULL, " \n")) {
+		if (strncmp(c, "mitigations=off", strlen(c)))
+			continue;
+		ret = true;
+		break;
+	}
+out:
+	close(fd);
+	return ret;
+}
+
 bool get_unpriv_disabled(void)
 {
 	bool disabled;
@@ -22,5 +53,5 @@ bool get_unpriv_disabled(void)
 		disabled = true;
 	}
 
-	return disabled;
+	return disabled ? true : get_mitigations_off();
 }
diff --git a/tools/testing/selftests/bpf/verifier/bpf_st_mem.c b/tools/testing/selftests/bpf/verifier/bpf_st_mem.c
index 3af2501082b2..b616575c3b00 100644
--- a/tools/testing/selftests/bpf/verifier/bpf_st_mem.c
+++ b/tools/testing/selftests/bpf/verifier/bpf_st_mem.c
@@ -65,3 +65,35 @@
 	.expected_attach_type = BPF_SK_LOOKUP,
 	.runs = -1,
 },
+{
+	"BPF_ST_MEM stack imm sign",
+	/* Check if verifier correctly reasons about sign of an
+	 * immediate spilled to stack by BPF_ST instruction.
+	 *
+	 *   fp[-8] = -44;
+	 *   r0 = fp[-8];
+	 *   if r0 s< 0 goto ret0;
+	 *   r0 = -1;
+	 *   exit;
+	 * ret0:
+	 *   r0 = 0;
+	 *   exit;
+	 */
+	.insns = {
+	BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, -44),
+	BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8),
+	BPF_JMP_IMM(BPF_JSLT, BPF_REG_0, 0, 2),
+	BPF_MOV64_IMM(BPF_REG_0, -1),
+	BPF_EXIT_INSN(),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	},
+	/* Use prog type that requires return value in range [0, 1] */
+	.prog_type = BPF_PROG_TYPE_SK_LOOKUP,
+	.expected_attach_type = BPF_SK_LOOKUP,
+	.result = VERBOSE_ACCEPT,
+	.runs = -1,
+	.errstr = "0: (7a) *(u64 *)(r10 -8) = -44        ; R10=fp0 fp-8_w=-44\
+	2: (c5) if r0 s< 0x0 goto pc+2\
+	R0_w=-44",
+},
diff --git a/tools/testing/selftests/bpf/verifier/calls.c b/tools/testing/selftests/bpf/verifier/calls.c
index 1bdf2b43e49e..3d5cd51071f0 100644
--- a/tools/testing/selftests/bpf/verifier/calls.c
+++ b/tools/testing/selftests/bpf/verifier/calls.c
@@ -442,7 +442,7 @@
 	BPF_EXIT_INSN(),
 	},
 	.prog_type = BPF_PROG_TYPE_TRACEPOINT,
-	.errstr = "back-edge from insn 0 to 0",
+	.errstr = "the call stack of 9 frames is too deep",
 	.result = REJECT,
 },
 {
@@ -799,7 +799,7 @@
 	BPF_EXIT_INSN(),
 	},
 	.prog_type = BPF_PROG_TYPE_TRACEPOINT,
-	.errstr = "back-edge",
+	.errstr = "the call stack of 9 frames is too deep",
 	.result = REJECT,
 },
 {
@@ -811,7 +811,7 @@
 	BPF_EXIT_INSN(),
 	},
 	.prog_type = BPF_PROG_TYPE_TRACEPOINT,
-	.errstr = "back-edge",
+	.errstr = "the call stack of 9 frames is too deep",
 	.result = REJECT,
 },
 {
diff --git a/tools/testing/selftests/bpf/verifier/ld_imm64.c b/tools/testing/selftests/bpf/verifier/ld_imm64.c
index f9297900cea6..78f19c255f20 100644
--- a/tools/testing/selftests/bpf/verifier/ld_imm64.c
+++ b/tools/testing/selftests/bpf/verifier/ld_imm64.c
@@ -9,8 +9,8 @@
 	BPF_MOV64_IMM(BPF_REG_0, 2),
 	BPF_EXIT_INSN(),
 	},
-	.errstr = "invalid BPF_LD_IMM insn",
-	.errstr_unpriv = "R1 pointer comparison",
+	.errstr = "jump into the middle of ldimm64 insn 1",
+	.errstr_unpriv = "jump into the middle of ldimm64 insn 1",
 	.result = REJECT,
 },
 {
@@ -23,8 +23,8 @@
 	BPF_LD_IMM64(BPF_REG_0, 1),
 	BPF_EXIT_INSN(),
 	},
-	.errstr = "invalid BPF_LD_IMM insn",
-	.errstr_unpriv = "R1 pointer comparison",
+	.errstr = "jump into the middle of ldimm64 insn 1",
+	.errstr_unpriv = "jump into the middle of ldimm64 insn 1",
 	.result = REJECT,
 },
 {
diff --git a/tools/testing/selftests/bpf/veristat.c b/tools/testing/selftests/bpf/veristat.c
index 655095810d4a..1d418d66e375 100644
--- a/tools/testing/selftests/bpf/veristat.c
+++ b/tools/testing/selftests/bpf/veristat.c
@@ -18,6 +18,7 @@
 #include <libelf.h>
 #include <gelf.h>
 #include <float.h>
+#include <math.h>
 
 #ifndef ARRAY_SIZE
 #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
@@ -99,6 +100,7 @@ struct stat_specs {
 	enum stat_id ids[ALL_STATS_CNT];
 	enum stat_variant variants[ALL_STATS_CNT];
 	bool asc[ALL_STATS_CNT];
+	bool abs[ALL_STATS_CNT];
 	int lens[ALL_STATS_CNT * 3]; /* 3x for comparison mode */
 };
 
@@ -133,6 +135,7 @@ struct filter {
 	int stat_id;
 	enum stat_variant stat_var;
 	long value;
+	bool abs;
 };
 
 static struct env {
@@ -142,10 +145,12 @@ static struct env {
 	bool debug;
 	bool quiet;
 	bool force_checkpoints;
+	bool force_reg_invariants;
 	enum resfmt out_fmt;
 	bool show_version;
 	bool comparison_mode;
 	bool replay_mode;
+	int top_n;
 
 	int log_level;
 	int log_size;
@@ -210,8 +215,7 @@ static const struct argp_option opts[] = {
 	{ "log-level", 'l', "LEVEL", 0, "Verifier log level (default 0 for normal mode, 1 for verbose mode)" },
 	{ "log-fixed", OPT_LOG_FIXED, NULL, 0, "Disable verifier log rotation" },
 	{ "log-size", OPT_LOG_SIZE, "BYTES", 0, "Customize verifier log size (default to 16MB)" },
-	{ "test-states", 't', NULL, 0,
-	  "Force frequent BPF verifier state checkpointing (set BPF_F_TEST_STATE_FREQ program flag)" },
+	{ "top-n", 'n', "N", 0, "Emit only up to first N results." },
 	{ "quiet", 'q', NULL, 0, "Quiet mode" },
 	{ "emit", 'e', "SPEC", 0, "Specify stats to be emitted" },
 	{ "sort", 's', "SPEC", 0, "Specify sort order" },
@@ -219,6 +223,10 @@ static const struct argp_option opts[] = {
 	{ "compare", 'C', NULL, 0, "Comparison mode" },
 	{ "replay", 'R', NULL, 0, "Replay mode" },
 	{ "filter", 'f', "FILTER", 0, "Filter expressions (or @filename for file with expressions)." },
+	{ "test-states", 't', NULL, 0,
+	  "Force frequent BPF verifier state checkpointing (set BPF_F_TEST_STATE_FREQ program flag)" },
+	{ "test-reg-invariants", 'r', NULL, 0,
+	  "Force BPF verifier failure on register invariant violation (BPF_F_TEST_REG_INVARIANTS program flag)" },
 	{},
 };
 
@@ -290,6 +298,16 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state)
 	case 't':
 		env.force_checkpoints = true;
 		break;
+	case 'r':
+		env.force_reg_invariants = true;
+		break;
+	case 'n':
+		errno = 0;
+		env.top_n = strtol(arg, NULL, 10);
+		if (errno) {
+			fprintf(stderr, "invalid top N specifier: %s\n", arg);
+			argp_usage(state);
+		}
 	case 'C':
 		env.comparison_mode = true;
 		break;
@@ -455,7 +473,8 @@ static struct {
 	{ OP_EQ, "=" },
 };
 
-static bool parse_stat_id_var(const char *name, size_t len, int *id, enum stat_variant *var);
+static bool parse_stat_id_var(const char *name, size_t len, int *id,
+			      enum stat_variant *var, bool *is_abs);
 
 static int append_filter(struct filter **filters, int *cnt, const char *str)
 {
@@ -488,13 +507,14 @@ static int append_filter(struct filter **filters, int *cnt, const char *str)
 		long val;
 		const char *end = str;
 		const char *op_str;
+		bool is_abs;
 
 		op_str = operators[i].op_str;
 		p = strstr(str, op_str);
 		if (!p)
 			continue;
 
-		if (!parse_stat_id_var(str, p - str, &id, &var)) {
+		if (!parse_stat_id_var(str, p - str, &id, &var, &is_abs)) {
 			fprintf(stderr, "Unrecognized stat name in '%s'!\n", str);
 			return -EINVAL;
 		}
@@ -533,6 +553,7 @@ static int append_filter(struct filter **filters, int *cnt, const char *str)
 		f->stat_id = id;
 		f->stat_var = var;
 		f->op = operators[i].op_kind;
+		f->abs = true;
 		f->value = val;
 
 		*cnt += 1;
@@ -657,7 +678,8 @@ static struct stat_def {
 	[MARK_READ_MAX_LEN] = { "Max mark read length", {"max_mark_read_len", "mark_read"}, },
 };
 
-static bool parse_stat_id_var(const char *name, size_t len, int *id, enum stat_variant *var)
+static bool parse_stat_id_var(const char *name, size_t len, int *id,
+			      enum stat_variant *var, bool *is_abs)
 {
 	static const char *var_sfxs[] = {
 		[VARIANT_A] = "_a",
@@ -667,6 +689,14 @@ static bool parse_stat_id_var(const char *name, size_t len, int *id, enum stat_v
 	};
 	int i, j, k;
 
+	/* |<stat>| means we take absolute value of given stat */
+	*is_abs = false;
+	if (len > 2 && name[0] == '|' && name[len - 1] == '|') {
+		*is_abs = true;
+		name += 1;
+		len -= 2;
+	}
+
 	for (i = 0; i < ARRAY_SIZE(stat_defs); i++) {
 		struct stat_def *def = &stat_defs[i];
 		size_t alias_len, sfx_len;
@@ -722,7 +752,7 @@ static bool is_desc_sym(char c)
 static int parse_stat(const char *stat_name, struct stat_specs *specs)
 {
 	int id;
-	bool has_order = false, is_asc = false;
+	bool has_order = false, is_asc = false, is_abs = false;
 	size_t len = strlen(stat_name);
 	enum stat_variant var;
 
@@ -737,7 +767,7 @@ static int parse_stat(const char *stat_name, struct stat_specs *specs)
 		len -= 1;
 	}
 
-	if (!parse_stat_id_var(stat_name, len, &id, &var)) {
+	if (!parse_stat_id_var(stat_name, len, &id, &var, &is_abs)) {
 		fprintf(stderr, "Unrecognized stat name '%s'\n", stat_name);
 		return -ESRCH;
 	}
@@ -745,6 +775,7 @@ static int parse_stat(const char *stat_name, struct stat_specs *specs)
 	specs->ids[specs->spec_cnt] = id;
 	specs->variants[specs->spec_cnt] = var;
 	specs->asc[specs->spec_cnt] = has_order ? is_asc : stat_defs[id].asc_by_default;
+	specs->abs[specs->spec_cnt] = is_abs;
 	specs->spec_cnt++;
 
 	return 0;
@@ -997,6 +1028,8 @@ static int process_prog(const char *filename, struct bpf_object *obj, struct bpf
 
 	if (env.force_checkpoints)
 		bpf_program__set_flags(prog, bpf_program__flags(prog) | BPF_F_TEST_STATE_FREQ);
+	if (env.force_reg_invariants)
+		bpf_program__set_flags(prog, bpf_program__flags(prog) | BPF_F_TEST_REG_INVARIANTS);
 
 	err = bpf_object__load(obj);
 	env.progs_processed++;
@@ -1103,7 +1136,7 @@ cleanup:
 }
 
 static int cmp_stat(const struct verif_stats *s1, const struct verif_stats *s2,
-		    enum stat_id id, bool asc)
+		    enum stat_id id, bool asc, bool abs)
 {
 	int cmp = 0;
 
@@ -1124,6 +1157,11 @@ static int cmp_stat(const struct verif_stats *s1, const struct verif_stats *s2,
 		long v1 = s1->stats[id];
 		long v2 = s2->stats[id];
 
+		if (abs) {
+			v1 = v1 < 0 ? -v1 : v1;
+			v2 = v2 < 0 ? -v2 : v2;
+		}
+
 		if (v1 != v2)
 			cmp = v1 < v2 ? -1 : 1;
 		break;
@@ -1142,7 +1180,8 @@ static int cmp_prog_stats(const void *v1, const void *v2)
 	int i, cmp;
 
 	for (i = 0; i < env.sort_spec.spec_cnt; i++) {
-		cmp = cmp_stat(s1, s2, env.sort_spec.ids[i], env.sort_spec.asc[i]);
+		cmp = cmp_stat(s1, s2, env.sort_spec.ids[i],
+			       env.sort_spec.asc[i], env.sort_spec.abs[i]);
 		if (cmp != 0)
 			return cmp;
 	}
@@ -1211,7 +1250,8 @@ static void fetch_join_stat_value(const struct verif_stats_join *s,
 
 static int cmp_join_stat(const struct verif_stats_join *s1,
 			 const struct verif_stats_join *s2,
-			 enum stat_id id, enum stat_variant var, bool asc)
+			 enum stat_id id, enum stat_variant var,
+			 bool asc, bool abs)
 {
 	const char *str1 = NULL, *str2 = NULL;
 	double v1, v2;
@@ -1220,6 +1260,11 @@ static int cmp_join_stat(const struct verif_stats_join *s1,
 	fetch_join_stat_value(s1, id, var, &str1, &v1);
 	fetch_join_stat_value(s2, id, var, &str2, &v2);
 
+	if (abs) {
+		v1 = fabs(v1);
+		v2 = fabs(v2);
+	}
+
 	if (str1)
 		cmp = strcmp(str1, str2);
 	else if (v1 != v2)
@@ -1237,7 +1282,8 @@ static int cmp_join_stats(const void *v1, const void *v2)
 		cmp = cmp_join_stat(s1, s2,
 				    env.sort_spec.ids[i],
 				    env.sort_spec.variants[i],
-				    env.sort_spec.asc[i]);
+				    env.sort_spec.asc[i],
+				    env.sort_spec.abs[i]);
 		if (cmp != 0)
 			return cmp;
 	}
@@ -1720,6 +1766,9 @@ static bool is_join_stat_filter_matched(struct filter *f, const struct verif_sta
 
 	fetch_join_stat_value(stats, f->stat_id, f->stat_var, &str, &value);
 
+	if (f->abs)
+		value = fabs(value);
+
 	switch (f->op) {
 	case OP_EQ: return value > f->value - eps && value < f->value + eps;
 	case OP_NEQ: return value < f->value - eps || value > f->value + eps;
@@ -1766,7 +1815,7 @@ static int handle_comparison_mode(void)
 	struct stat_specs base_specs = {}, comp_specs = {};
 	struct stat_specs tmp_sort_spec;
 	enum resfmt cur_fmt;
-	int err, i, j, last_idx;
+	int err, i, j, last_idx, cnt;
 
 	if (env.filename_cnt != 2) {
 		fprintf(stderr, "Comparison mode expects exactly two input CSV files!\n\n");
@@ -1879,7 +1928,7 @@ static int handle_comparison_mode(void)
 		env.join_stat_cnt += 1;
 	}
 
-	/* now sort joined results accorsing to sort spec */
+	/* now sort joined results according to sort spec */
 	qsort(env.join_stats, env.join_stat_cnt, sizeof(*env.join_stats), cmp_join_stats);
 
 	/* for human-readable table output we need to do extra pass to
@@ -1896,16 +1945,22 @@ one_more_time:
 	output_comp_headers(cur_fmt);
 
 	last_idx = -1;
+	cnt = 0;
 	for (i = 0; i < env.join_stat_cnt; i++) {
 		const struct verif_stats_join *join = &env.join_stats[i];
 
 		if (!should_output_join_stats(join))
 			continue;
 
+		if (env.top_n && cnt >= env.top_n)
+			break;
+
 		if (cur_fmt == RESFMT_TABLE_CALCLEN)
 			last_idx = i;
 
 		output_comp_stats(join, cur_fmt, i == last_idx);
+
+		cnt++;
 	}
 
 	if (cur_fmt == RESFMT_TABLE_CALCLEN) {
@@ -1920,6 +1975,9 @@ static bool is_stat_filter_matched(struct filter *f, const struct verif_stats *s
 {
 	long value = stats->stats[f->stat_id];
 
+	if (f->abs)
+		value = value < 0 ? -value : value;
+
 	switch (f->op) {
 	case OP_EQ: return value == f->value;
 	case OP_NEQ: return value != f->value;
@@ -1964,7 +2022,7 @@ static bool should_output_stats(const struct verif_stats *stats)
 static void output_prog_stats(void)
 {
 	const struct verif_stats *stats;
-	int i, last_stat_idx = 0;
+	int i, last_stat_idx = 0, cnt = 0;
 
 	if (env.out_fmt == RESFMT_TABLE) {
 		/* calculate column widths */
@@ -1984,7 +2042,10 @@ static void output_prog_stats(void)
 		stats = &env.prog_stats[i];
 		if (!should_output_stats(stats))
 			continue;
+		if (env.top_n && cnt >= env.top_n)
+			break;
 		output_stats(stats, env.out_fmt, i == last_stat_idx);
+		cnt++;
 	}
 }
 
diff --git a/tools/testing/selftests/bpf/vmtest.sh b/tools/testing/selftests/bpf/vmtest.sh
index 685034528018..65d14f3bbe30 100755
--- a/tools/testing/selftests/bpf/vmtest.sh
+++ b/tools/testing/selftests/bpf/vmtest.sh
@@ -36,7 +36,9 @@ DEFAULT_COMMAND="./test_progs"
 MOUNT_DIR="mnt"
 ROOTFS_IMAGE="root.img"
 OUTPUT_DIR="$HOME/.bpf_selftests"
-KCONFIG_REL_PATHS=("tools/testing/selftests/bpf/config" "tools/testing/selftests/bpf/config.${ARCH}")
+KCONFIG_REL_PATHS=("tools/testing/selftests/bpf/config"
+	"tools/testing/selftests/bpf/config.vm"
+	"tools/testing/selftests/bpf/config.${ARCH}")
 INDEX_URL="https://raw.githubusercontent.com/libbpf/ci/master/INDEX"
 NUM_COMPILE_JOBS="$(nproc)"
 LOG_FILE_BASE="$(date +"bpf_selftests.%Y-%m-%d_%H-%M-%S")"
diff --git a/tools/testing/selftests/bpf/xdp_hw_metadata.c b/tools/testing/selftests/bpf/xdp_hw_metadata.c
index 17c980138796..c3ba40d0b9de 100644
--- a/tools/testing/selftests/bpf/xdp_hw_metadata.c
+++ b/tools/testing/selftests/bpf/xdp_hw_metadata.c
@@ -26,6 +26,7 @@
 #include <linux/sockios.h>
 #include <sys/mman.h>
 #include <net/if.h>
+#include <ctype.h>
 #include <poll.h>
 #include <time.h>
 
@@ -47,6 +48,7 @@ struct xsk {
 };
 
 struct xdp_hw_metadata *bpf_obj;
+__u16 bind_flags = XDP_COPY;
 struct xsk *rx_xsk;
 const char *ifname;
 int ifindex;
@@ -60,7 +62,7 @@ static int open_xsk(int ifindex, struct xsk *xsk, __u32 queue_id)
 	const struct xsk_socket_config socket_config = {
 		.rx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
 		.tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
-		.bind_flags = XDP_COPY,
+		.bind_flags = bind_flags,
 	};
 	const struct xsk_umem_config umem_config = {
 		.fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
@@ -263,11 +265,14 @@ static int verify_metadata(struct xsk *rx_xsk, int rxq, int server_fd, clockid_t
 			verify_skb_metadata(server_fd);
 
 		for (i = 0; i < rxq; i++) {
+			bool first_seg = true;
+			bool is_eop = true;
+
 			if (fds[i].revents == 0)
 				continue;
 
 			struct xsk *xsk = &rx_xsk[i];
-
+peek:
 			ret = xsk_ring_cons__peek(&xsk->rx, 1, &idx);
 			printf("xsk_ring_cons__peek: %d\n", ret);
 			if (ret != 1)
@@ -276,12 +281,19 @@ static int verify_metadata(struct xsk *rx_xsk, int rxq, int server_fd, clockid_t
 			rx_desc = xsk_ring_cons__rx_desc(&xsk->rx, idx);
 			comp_addr = xsk_umem__extract_addr(rx_desc->addr);
 			addr = xsk_umem__add_offset_to_addr(rx_desc->addr);
-			printf("%p: rx_desc[%u]->addr=%llx addr=%llx comp_addr=%llx\n",
-			       xsk, idx, rx_desc->addr, addr, comp_addr);
-			verify_xdp_metadata(xsk_umem__get_data(xsk->umem_area, addr),
-					    clock_id);
+			is_eop = !(rx_desc->options & XDP_PKT_CONTD);
+			printf("%p: rx_desc[%u]->addr=%llx addr=%llx comp_addr=%llx%s\n",
+			       xsk, idx, rx_desc->addr, addr, comp_addr, is_eop ? " EoP" : "");
+			if (first_seg) {
+				verify_xdp_metadata(xsk_umem__get_data(xsk->umem_area, addr),
+						    clock_id);
+				first_seg = false;
+			}
+
 			xsk_ring_cons__release(&xsk->rx, 1);
 			refill_rx(xsk, comp_addr);
+			if (!is_eop)
+				goto peek;
 		}
 	}
 
@@ -404,6 +416,53 @@ static void timestamping_enable(int fd, int val)
 		error(1, errno, "setsockopt(SO_TIMESTAMPING)");
 }
 
+static void print_usage(void)
+{
+	const char *usage =
+		"Usage: xdp_hw_metadata [OPTIONS] [IFNAME]\n"
+		"  -m    Enable multi-buffer XDP for larger MTU\n"
+		"  -h    Display this help and exit\n\n"
+		"Generate test packets on the other machine with:\n"
+		"  echo -n xdp | nc -u -q1 <dst_ip> 9091\n";
+
+	printf("%s", usage);
+}
+
+static void read_args(int argc, char *argv[])
+{
+	int opt;
+
+	while ((opt = getopt(argc, argv, "mh")) != -1) {
+		switch (opt) {
+		case 'm':
+			bind_flags |= XDP_USE_SG;
+			break;
+		case 'h':
+			print_usage();
+			exit(0);
+		case '?':
+			if (isprint(optopt))
+				fprintf(stderr, "Unknown option: -%c\n", optopt);
+			fallthrough;
+		default:
+			print_usage();
+			error(-1, opterr, "Command line options error");
+		}
+	}
+
+	if (optind >= argc) {
+		fprintf(stderr, "No device name provided\n");
+		print_usage();
+		exit(-1);
+	}
+
+	ifname = argv[optind];
+	ifindex = if_nametoindex(ifname);
+
+	if (!ifname)
+		error(-1, errno, "Invalid interface name");
+}
+
 int main(int argc, char *argv[])
 {
 	clockid_t clock_id = CLOCK_TAI;
@@ -413,13 +472,8 @@ int main(int argc, char *argv[])
 
 	struct bpf_program *prog;
 
-	if (argc != 2) {
-		fprintf(stderr, "pass device name\n");
-		return -1;
-	}
+	read_args(argc, argv);
 
-	ifname = argv[1];
-	ifindex = if_nametoindex(ifname);
 	rxq = rxq_num(ifname);
 
 	printf("rxq: %d\n", rxq);
diff --git a/tools/testing/selftests/bpf/xskxceiver.c b/tools/testing/selftests/bpf/xskxceiver.c
index 591ca9637b23..b604c570309a 100644
--- a/tools/testing/selftests/bpf/xskxceiver.c
+++ b/tools/testing/selftests/bpf/xskxceiver.c
@@ -908,8 +908,9 @@ static bool is_metadata_correct(struct pkt *pkt, void *buffer, u64 addr)
 	struct xdp_info *meta = data - sizeof(struct xdp_info);
 
 	if (meta->count != pkt->pkt_nb) {
-		ksft_print_msg("[%s] expected meta_count [%d], got meta_count [%d]\n",
-			       __func__, pkt->pkt_nb, meta->count);
+		ksft_print_msg("[%s] expected meta_count [%d], got meta_count [%llu]\n",
+			       __func__, pkt->pkt_nb,
+			       (unsigned long long)meta->count);
 		return false;
 	}
 
@@ -926,11 +927,13 @@ static bool is_frag_valid(struct xsk_umem_info *umem, u64 addr, u32 len, u32 exp
 
 	if (addr >= umem->num_frames * umem->frame_size ||
 	    addr + len > umem->num_frames * umem->frame_size) {
-		ksft_print_msg("Frag invalid addr: %llx len: %u\n", addr, len);
+		ksft_print_msg("Frag invalid addr: %llx len: %u\n",
+			       (unsigned long long)addr, len);
 		return false;
 	}
 	if (!umem->unaligned_mode && addr % umem->frame_size + len > umem->frame_size) {
-		ksft_print_msg("Frag crosses frame boundary addr: %llx len: %u\n", addr, len);
+		ksft_print_msg("Frag crosses frame boundary addr: %llx len: %u\n",
+			       (unsigned long long)addr, len);
 		return false;
 	}
 
@@ -1029,7 +1032,8 @@ static int complete_pkts(struct xsk_socket_info *xsk, int batch_size)
 			u64 addr = *xsk_ring_cons__comp_addr(&xsk->umem->cq, idx + rcvd - 1);
 
 			ksft_print_msg("[%s] Too many packets completed\n", __func__);
-			ksft_print_msg("Last completion address: %llx\n", addr);
+			ksft_print_msg("Last completion address: %llx\n",
+				       (unsigned long long)addr);
 			return TEST_FAILURE;
 		}
 
@@ -1513,8 +1517,9 @@ static int validate_tx_invalid_descs(struct ifobject *ifobject)
 	}
 
 	if (stats.tx_invalid_descs != ifobject->xsk->pkt_stream->nb_pkts / 2) {
-		ksft_print_msg("[%s] tx_invalid_descs incorrect. Got [%u] expected [%u]\n",
-			       __func__, stats.tx_invalid_descs,
+		ksft_print_msg("[%s] tx_invalid_descs incorrect. Got [%llu] expected [%u]\n",
+			       __func__,
+			       (unsigned long long)stats.tx_invalid_descs,
 			       ifobject->xsk->pkt_stream->nb_pkts);
 		return TEST_FAILURE;
 	}
diff --git a/tools/testing/selftests/cachestat/test_cachestat.c b/tools/testing/selftests/cachestat/test_cachestat.c
index 4804c7dc7b31..b171fd53b004 100644
--- a/tools/testing/selftests/cachestat/test_cachestat.c
+++ b/tools/testing/selftests/cachestat/test_cachestat.c
@@ -27,7 +27,7 @@ static const char * const dev_files[] = {
 void print_cachestat(struct cachestat *cs)
 {
 	ksft_print_msg(
-	"Using cachestat: Cached: %lu, Dirty: %lu, Writeback: %lu, Evicted: %lu, Recently Evicted: %lu\n",
+	"Using cachestat: Cached: %llu, Dirty: %llu, Writeback: %llu, Evicted: %llu, Recently Evicted: %llu\n",
 	cs->nr_cache, cs->nr_dirty, cs->nr_writeback,
 	cs->nr_evicted, cs->nr_recently_evicted);
 }
diff --git a/tools/testing/selftests/capabilities/Makefile b/tools/testing/selftests/capabilities/Makefile
index 6e9d98d457d5..411ac098308f 100644
--- a/tools/testing/selftests/capabilities/Makefile
+++ b/tools/testing/selftests/capabilities/Makefile
@@ -2,7 +2,7 @@
 TEST_GEN_FILES := validate_cap
 TEST_GEN_PROGS := test_execve
 
-CFLAGS += -O2 -g -std=gnu99 -Wall
+CFLAGS += -O2 -g -std=gnu99 -Wall $(KHDR_INCLUDES)
 LDLIBS += -lcap-ng -lrt -ldl
 
 include ../lib.mk
diff --git a/tools/testing/selftests/capabilities/test_execve.c b/tools/testing/selftests/capabilities/test_execve.c
index df0ef02b4036..e3a352b020a7 100644
--- a/tools/testing/selftests/capabilities/test_execve.c
+++ b/tools/testing/selftests/capabilities/test_execve.c
@@ -20,14 +20,6 @@
 
 #include "../kselftest.h"
 
-#ifndef PR_CAP_AMBIENT
-#define PR_CAP_AMBIENT			47
-# define PR_CAP_AMBIENT_IS_SET		1
-# define PR_CAP_AMBIENT_RAISE		2
-# define PR_CAP_AMBIENT_LOWER		3
-# define PR_CAP_AMBIENT_CLEAR_ALL	4
-#endif
-
 static int nerrs;
 static pid_t mpid;	/*  main() pid is used to avoid duplicate test counts */
 
diff --git a/tools/testing/selftests/capabilities/validate_cap.c b/tools/testing/selftests/capabilities/validate_cap.c
index cdfc94268fe6..60b4e7b716a7 100644
--- a/tools/testing/selftests/capabilities/validate_cap.c
+++ b/tools/testing/selftests/capabilities/validate_cap.c
@@ -9,14 +9,6 @@
 
 #include "../kselftest.h"
 
-#ifndef PR_CAP_AMBIENT
-#define PR_CAP_AMBIENT			47
-# define PR_CAP_AMBIENT_IS_SET		1
-# define PR_CAP_AMBIENT_RAISE		2
-# define PR_CAP_AMBIENT_LOWER		3
-# define PR_CAP_AMBIENT_CLEAR_ALL	4
-#endif
-
 #if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 19)
 # define HAVE_GETAUXVAL
 #endif
diff --git a/tools/testing/selftests/cgroup/.gitignore b/tools/testing/selftests/cgroup/.gitignore
index af8c3f30b9c1..2732e0b29271 100644
--- a/tools/testing/selftests/cgroup/.gitignore
+++ b/tools/testing/selftests/cgroup/.gitignore
@@ -7,4 +7,5 @@ test_kill
 test_cpu
 test_cpuset
 test_zswap
+test_hugetlb_memcg
 wait_inotify
diff --git a/tools/testing/selftests/cgroup/Makefile b/tools/testing/selftests/cgroup/Makefile
index c27f05f6ce9b..00b441928909 100644
--- a/tools/testing/selftests/cgroup/Makefile
+++ b/tools/testing/selftests/cgroup/Makefile
@@ -14,6 +14,7 @@ TEST_GEN_PROGS += test_kill
 TEST_GEN_PROGS += test_cpu
 TEST_GEN_PROGS += test_cpuset
 TEST_GEN_PROGS += test_zswap
+TEST_GEN_PROGS += test_hugetlb_memcg
 
 LOCAL_HDRS += $(selfdir)/clone3/clone3_selftests.h $(selfdir)/pidfd/pidfd.h
 
@@ -27,3 +28,4 @@ $(OUTPUT)/test_kill: cgroup_util.c
 $(OUTPUT)/test_cpu: cgroup_util.c
 $(OUTPUT)/test_cpuset: cgroup_util.c
 $(OUTPUT)/test_zswap: cgroup_util.c
+$(OUTPUT)/test_hugetlb_memcg: cgroup_util.c
diff --git a/tools/testing/selftests/cgroup/test_cpuset_prs.sh b/tools/testing/selftests/cgroup/test_cpuset_prs.sh
index 4afb132e4e4f..a6e9848189d6 100755
--- a/tools/testing/selftests/cgroup/test_cpuset_prs.sh
+++ b/tools/testing/selftests/cgroup/test_cpuset_prs.sh
@@ -3,7 +3,7 @@
 #
 # Test for cpuset v2 partition root state (PRS)
 #
-# The sched verbose flag is set, if available, so that the console log
+# The sched verbose flag can be optionally set so that the console log
 # can be examined for the correct setting of scheduling domain.
 #
 
@@ -22,27 +22,27 @@ WAIT_INOTIFY=$(cd $(dirname $0); pwd)/wait_inotify
 # Find cgroup v2 mount point
 CGROUP2=$(mount -t cgroup2 | head -1 | awk -e '{print $3}')
 [[ -n "$CGROUP2" ]] || skip_test "Cgroup v2 mount point not found!"
+SUBPARTS_CPUS=$CGROUP2/.__DEBUG__.cpuset.cpus.subpartitions
+CPULIST=$(cat $CGROUP2/cpuset.cpus.effective)
 
-CPUS=$(lscpu | grep "^CPU(s):" | sed -e "s/.*:[[:space:]]*//")
-[[ $CPUS -lt 8 ]] && skip_test "Test needs at least 8 cpus available!"
+NR_CPUS=$(lscpu | grep "^CPU(s):" | sed -e "s/.*:[[:space:]]*//")
+[[ $NR_CPUS -lt 8 ]] && skip_test "Test needs at least 8 cpus available!"
 
 # Set verbose flag and delay factor
 PROG=$1
-VERBOSE=
+VERBOSE=0
 DELAY_FACTOR=1
 SCHED_DEBUG=
 while [[ "$1" = -* ]]
 do
 	case "$1" in
-		-v) VERBOSE=1
+		-v) ((VERBOSE++))
 		    # Enable sched/verbose can slow thing down
 		    [[ $DELAY_FACTOR -eq 1 ]] &&
 			DELAY_FACTOR=2
-		    break
 		    ;;
 		-d) DELAY_FACTOR=$2
 		    shift
-		    break
 		    ;;
 		*)  echo "Usage: $PROG [-v] [-d <delay-factor>"
 		    exit
@@ -52,7 +52,7 @@ do
 done
 
 # Set sched verbose flag if available when "-v" option is specified
-if [[ -n "$VERBOSE" && -d /sys/kernel/debug/sched ]]
+if [[ $VERBOSE -gt 0 && -d /sys/kernel/debug/sched ]]
 then
 	# Used to restore the original setting during cleanup
 	SCHED_DEBUG=$(cat /sys/kernel/debug/sched/verbose)
@@ -61,14 +61,26 @@ fi
 
 cd $CGROUP2
 echo +cpuset > cgroup.subtree_control
+
+#
+# If cpuset has been set up and used in child cgroups, we may not be able to
+# create partition under root cgroup because of the CPU exclusivity rule.
+# So we are going to skip the test if this is the case.
+#
 [[ -d test ]] || mkdir test
-cd test
+echo 0-6 > test/cpuset.cpus
+echo root > test/cpuset.cpus.partition
+cat test/cpuset.cpus.partition | grep -q invalid
+RESULT=$?
+echo member > test/cpuset.cpus.partition
+echo "" > test/cpuset.cpus
+[[ $RESULT -eq 0 ]] && skip_test "Child cgroups are using cpuset!"
 
 cleanup()
 {
 	online_cpus
+	cd $CGROUP2
 	rmdir A1/A2/A3 A1/A2 A1 B1 > /dev/null 2>&1
-	cd ..
 	rmdir test > /dev/null 2>&1
 	[[ -n "$SCHED_DEBUG" ]] &&
 		echo "$SCHED_DEBUG" > /sys/kernel/debug/sched/verbose
@@ -103,7 +115,7 @@ test_partition()
 	[[ $? -eq 0 ]] || exit 1
 	ACTUAL_VAL=$(cat cpuset.cpus.partition)
 	[[ $ACTUAL_VAL != $EXPECTED_VAL ]] && {
-		echo "cpuset.cpus.partition: expect $EXPECTED_VAL, found $EXPECTED_VAL"
+		echo "cpuset.cpus.partition: expect $EXPECTED_VAL, found $ACTUAL_VAL"
 		echo "Test FAILED"
 		exit 1
 	}
@@ -114,7 +126,7 @@ test_effective_cpus()
 	EXPECTED_VAL=$1
 	ACTUAL_VAL=$(cat cpuset.cpus.effective)
 	[[ "$ACTUAL_VAL" != "$EXPECTED_VAL" ]] && {
-		echo "cpuset.cpus.effective: expect '$EXPECTED_VAL', found '$EXPECTED_VAL'"
+		echo "cpuset.cpus.effective: expect '$EXPECTED_VAL', found '$ACTUAL_VAL'"
 		echo "Test FAILED"
 		exit 1
 	}
@@ -139,6 +151,7 @@ test_add_proc()
 #
 test_isolated()
 {
+	cd $CGROUP2/test
 	echo 2-3 > cpuset.cpus
 	TYPE=$(cat cpuset.cpus.partition)
 	[[ $TYPE = member ]] || echo member > cpuset.cpus.partition
@@ -203,125 +216,220 @@ test_isolated()
 #
 # Cgroup test hierarchy
 #
-# test -- A1 -- A2 -- A3
-#      \- B1
+# root -- A1 -- A2 -- A3
+#      +- B1
 #
-#  P<v> = set cpus.partition (0:member, 1:root, 2:isolated, -1:root invalid)
-#  C<l> = add cpu-list
+#  P<v> = set cpus.partition (0:member, 1:root, 2:isolated)
+#  C<l> = add cpu-list to cpuset.cpus
+#  X<l> = add cpu-list to cpuset.cpus.exclusive
 #  S<p> = use prefix in subtree_control
 #  T    = put a task into cgroup
-#  O<c>-<v> = Write <v> to CPU online file of <c>
+#  O<c>=<v> = Write <v> to CPU online file of <c>
 #
 SETUP_A123_PARTITIONS="C1-3:P1:S+ C2-3:P1:S+ C3:P1"
 TEST_MATRIX=(
-	# test  old-A1 old-A2 old-A3 old-B1 new-A1 new-A2 new-A3 new-B1 fail ECPUs Pstate
-	# ----  ------ ------ ------ ------ ------ ------ ------ ------ ---- ----- ------
-	"  S+    C0-1     .      .    C2-3    S+    C4-5     .      .     0 A2:0-1"
-	"  S+    C0-1     .      .    C2-3    P1      .      .      .     0 "
-	"  S+    C0-1     .      .    C2-3   P1:S+ C0-1:P1   .      .     0 "
-	"  S+    C0-1     .      .    C2-3   P1:S+  C1:P1    .      .     0 "
-	"  S+   C0-1:S+   .      .    C2-3     .      .      .     P1     0 "
-	"  S+   C0-1:P1   .      .    C2-3    S+     C1      .      .     0 "
-	"  S+   C0-1:P1   .      .    C2-3    S+    C1:P1    .      .     0 "
-	"  S+   C0-1:P1   .      .    C2-3    S+    C1:P1    .     P1     0 "
-	"  S+   C0-1:P1   .      .    C2-3   C4-5     .      .      .     0 A1:4-5"
-	"  S+   C0-1:P1   .      .    C2-3  S+:C4-5   .      .      .     0 A1:4-5"
-	"  S+    C0-1     .      .   C2-3:P1   .      .      .     C2     0 "
-	"  S+    C0-1     .      .   C2-3:P1   .      .      .    C4-5    0 B1:4-5"
-	"  S+ C0-3:P1:S+ C2-3:P1 .      .      .      .      .      .     0 A1:0-1,A2:2-3"
-	"  S+ C0-3:P1:S+ C2-3:P1 .      .     C1-3    .      .      .     0 A1:1,A2:2-3"
-	"  S+ C2-3:P1:S+  C3:P1  .      .     C3      .      .      .     0 A1:,A2:3 A1:P1,A2:P1"
-	"  S+ C2-3:P1:S+  C3:P1  .      .     C3      P0     .      .     0 A1:3,A2:3 A1:P1,A2:P0"
-	"  S+ C2-3:P1:S+  C2:P1  .      .     C2-4    .      .      .     0 A1:3-4,A2:2"
-	"  S+ C2-3:P1:S+  C3:P1  .      .     C3      .      .     C0-2   0 A1:,B1:0-2 A1:P1,A2:P1"
-	"  S+ $SETUP_A123_PARTITIONS    .     C2-3    .      .      .     0 A1:,A2:2,A3:3 A1:P1,A2:P1,A3:P1"
+	#  old-A1 old-A2 old-A3 old-B1 new-A1 new-A2 new-A3 new-B1 fail ECPUs Pstate ISOLCPUS
+	#  ------ ------ ------ ------ ------ ------ ------ ------ ---- ----- ------ --------
+	"   C0-1     .      .    C2-3    S+    C4-5     .      .     0 A2:0-1"
+	"   C0-1     .      .    C2-3    P1      .      .      .     0 "
+	"   C0-1     .      .    C2-3   P1:S+ C0-1:P1   .      .     0 "
+	"   C0-1     .      .    C2-3   P1:S+  C1:P1    .      .     0 "
+	"  C0-1:S+   .      .    C2-3     .      .      .     P1     0 "
+	"  C0-1:P1   .      .    C2-3    S+     C1      .      .     0 "
+	"  C0-1:P1   .      .    C2-3    S+    C1:P1    .      .     0 "
+	"  C0-1:P1   .      .    C2-3    S+    C1:P1    .     P1     0 "
+	"  C0-1:P1   .      .    C2-3   C4-5     .      .      .     0 A1:4-5"
+	"  C0-1:P1   .      .    C2-3  S+:C4-5   .      .      .     0 A1:4-5"
+	"   C0-1     .      .   C2-3:P1   .      .      .     C2     0 "
+	"   C0-1     .      .   C2-3:P1   .      .      .    C4-5    0 B1:4-5"
+	"C0-3:P1:S+ C2-3:P1 .      .      .      .      .      .     0 A1:0-1,A2:2-3"
+	"C0-3:P1:S+ C2-3:P1 .      .     C1-3    .      .      .     0 A1:1,A2:2-3"
+	"C2-3:P1:S+  C3:P1  .      .     C3      .      .      .     0 A1:,A2:3 A1:P1,A2:P1"
+	"C2-3:P1:S+  C3:P1  .      .     C3      P0     .      .     0 A1:3,A2:3 A1:P1,A2:P0"
+	"C2-3:P1:S+  C2:P1  .      .     C2-4    .      .      .     0 A1:3-4,A2:2"
+	"C2-3:P1:S+  C3:P1  .      .     C3      .      .     C0-2   0 A1:,B1:0-2 A1:P1,A2:P1"
+	"$SETUP_A123_PARTITIONS    .     C2-3    .      .      .     0 A1:,A2:2,A3:3 A1:P1,A2:P1,A3:P1"
 
 	# CPU offlining cases:
-	"  S+    C0-1     .      .    C2-3    S+    C4-5     .     O2-0   0 A1:0-1,B1:3"
-	"  S+ C0-3:P1:S+ C2-3:P1 .      .     O2-0    .      .      .     0 A1:0-1,A2:3"
-	"  S+ C0-3:P1:S+ C2-3:P1 .      .     O2-0   O2-1    .      .     0 A1:0-1,A2:2-3"
-	"  S+ C0-3:P1:S+ C2-3:P1 .      .     O1-0    .      .      .     0 A1:0,A2:2-3"
-	"  S+ C0-3:P1:S+ C2-3:P1 .      .     O1-0   O1-1    .      .     0 A1:0-1,A2:2-3"
-	"  S+ C2-3:P1:S+  C3:P1  .      .     O3-0   O3-1    .      .     0 A1:2,A2:3 A1:P1,A2:P1"
-	"  S+ C2-3:P1:S+  C3:P2  .      .     O3-0   O3-1    .      .     0 A1:2,A2:3 A1:P1,A2:P2"
-	"  S+ C2-3:P1:S+  C3:P1  .      .     O2-0   O2-1    .      .     0 A1:2,A2:3 A1:P1,A2:P1"
-	"  S+ C2-3:P1:S+  C3:P2  .      .     O2-0   O2-1    .      .     0 A1:2,A2:3 A1:P1,A2:P2"
-	"  S+ C2-3:P1:S+  C3:P1  .      .     O2-0    .      .      .     0 A1:,A2:3 A1:P1,A2:P1"
-	"  S+ C2-3:P1:S+  C3:P1  .      .     O3-0    .      .      .     0 A1:2,A2: A1:P1,A2:P1"
-	"  S+ C2-3:P1:S+  C3:P1  .      .    T:O2-0   .      .      .     0 A1:3,A2:3 A1:P1,A2:P-1"
-	"  S+ C2-3:P1:S+  C3:P1  .      .      .    T:O3-0   .      .     0 A1:2,A2:2 A1:P1,A2:P-1"
-	"  S+ $SETUP_A123_PARTITIONS    .     O1-0    .      .      .     0 A1:,A2:2,A3:3 A1:P1,A2:P1,A3:P1"
-	"  S+ $SETUP_A123_PARTITIONS    .     O2-0    .      .      .     0 A1:1,A2:,A3:3 A1:P1,A2:P1,A3:P1"
-	"  S+ $SETUP_A123_PARTITIONS    .     O3-0    .      .      .     0 A1:1,A2:2,A3: A1:P1,A2:P1,A3:P1"
-	"  S+ $SETUP_A123_PARTITIONS    .    T:O1-0   .      .      .     0 A1:2-3,A2:2-3,A3:3 A1:P1,A2:P-1,A3:P-1"
-	"  S+ $SETUP_A123_PARTITIONS    .      .    T:O2-0   .      .     0 A1:1,A2:3,A3:3 A1:P1,A2:P1,A3:P-1"
-	"  S+ $SETUP_A123_PARTITIONS    .      .      .    T:O3-0   .     0 A1:1,A2:2,A3:2 A1:P1,A2:P1,A3:P-1"
-	"  S+ $SETUP_A123_PARTITIONS    .    T:O1-0  O1-1    .      .     0 A1:1,A2:2,A3:3 A1:P1,A2:P1,A3:P1"
-	"  S+ $SETUP_A123_PARTITIONS    .      .    T:O2-0  O2-1    .     0 A1:1,A2:2,A3:3 A1:P1,A2:P1,A3:P1"
-	"  S+ $SETUP_A123_PARTITIONS    .      .      .    T:O3-0  O3-1   0 A1:1,A2:2,A3:3 A1:P1,A2:P1,A3:P1"
-	"  S+ $SETUP_A123_PARTITIONS    .    T:O1-0  O2-0   O1-1    .     0 A1:1,A2:,A3:3 A1:P1,A2:P1,A3:P1"
-	"  S+ $SETUP_A123_PARTITIONS    .    T:O1-0  O2-0   O2-1    .     0 A1:2-3,A2:2-3,A3:3 A1:P1,A2:P-1,A3:P-1"
-
-	# test  old-A1 old-A2 old-A3 old-B1 new-A1 new-A2 new-A3 new-B1 fail ECPUs Pstate
-	# ----  ------ ------ ------ ------ ------ ------ ------ ------ ---- ----- ------
+	"   C0-1     .      .    C2-3    S+    C4-5     .     O2=0   0 A1:0-1,B1:3"
+	"C0-3:P1:S+ C2-3:P1 .      .     O2=0    .      .      .     0 A1:0-1,A2:3"
+	"C0-3:P1:S+ C2-3:P1 .      .     O2=0   O2=1    .      .     0 A1:0-1,A2:2-3"
+	"C0-3:P1:S+ C2-3:P1 .      .     O1=0    .      .      .     0 A1:0,A2:2-3"
+	"C0-3:P1:S+ C2-3:P1 .      .     O1=0   O1=1    .      .     0 A1:0-1,A2:2-3"
+	"C2-3:P1:S+  C3:P1  .      .     O3=0   O3=1    .      .     0 A1:2,A2:3 A1:P1,A2:P1"
+	"C2-3:P1:S+  C3:P2  .      .     O3=0   O3=1    .      .     0 A1:2,A2:3 A1:P1,A2:P2"
+	"C2-3:P1:S+  C3:P1  .      .     O2=0   O2=1    .      .     0 A1:2,A2:3 A1:P1,A2:P1"
+	"C2-3:P1:S+  C3:P2  .      .     O2=0   O2=1    .      .     0 A1:2,A2:3 A1:P1,A2:P2"
+	"C2-3:P1:S+  C3:P1  .      .     O2=0    .      .      .     0 A1:,A2:3 A1:P1,A2:P1"
+	"C2-3:P1:S+  C3:P1  .      .     O3=0    .      .      .     0 A1:2,A2: A1:P1,A2:P1"
+	"C2-3:P1:S+  C3:P1  .      .    T:O2=0   .      .      .     0 A1:3,A2:3 A1:P1,A2:P-1"
+	"C2-3:P1:S+  C3:P1  .      .      .    T:O3=0   .      .     0 A1:2,A2:2 A1:P1,A2:P-1"
+	"$SETUP_A123_PARTITIONS    .     O1=0    .      .      .     0 A1:,A2:2,A3:3 A1:P1,A2:P1,A3:P1"
+	"$SETUP_A123_PARTITIONS    .     O2=0    .      .      .     0 A1:1,A2:,A3:3 A1:P1,A2:P1,A3:P1"
+	"$SETUP_A123_PARTITIONS    .     O3=0    .      .      .     0 A1:1,A2:2,A3: A1:P1,A2:P1,A3:P1"
+	"$SETUP_A123_PARTITIONS    .    T:O1=0   .      .      .     0 A1:2-3,A2:2-3,A3:3 A1:P1,A2:P-1,A3:P-1"
+	"$SETUP_A123_PARTITIONS    .      .    T:O2=0   .      .     0 A1:1,A2:3,A3:3 A1:P1,A2:P1,A3:P-1"
+	"$SETUP_A123_PARTITIONS    .      .      .    T:O3=0   .     0 A1:1,A2:2,A3:2 A1:P1,A2:P1,A3:P-1"
+	"$SETUP_A123_PARTITIONS    .    T:O1=0  O1=1    .      .     0 A1:1,A2:2,A3:3 A1:P1,A2:P1,A3:P1"
+	"$SETUP_A123_PARTITIONS    .      .    T:O2=0  O2=1    .     0 A1:1,A2:2,A3:3 A1:P1,A2:P1,A3:P1"
+	"$SETUP_A123_PARTITIONS    .      .      .    T:O3=0  O3=1   0 A1:1,A2:2,A3:3 A1:P1,A2:P1,A3:P1"
+	"$SETUP_A123_PARTITIONS    .    T:O1=0  O2=0   O1=1    .     0 A1:1,A2:,A3:3 A1:P1,A2:P1,A3:P1"
+	"$SETUP_A123_PARTITIONS    .    T:O1=0  O2=0   O2=1    .     0 A1:2-3,A2:2-3,A3:3 A1:P1,A2:P-1,A3:P-1"
+
+	#  old-A1 old-A2 old-A3 old-B1 new-A1 new-A2 new-A3 new-B1 fail ECPUs Pstate ISOLCPUS
+	#  ------ ------ ------ ------ ------ ------ ------ ------ ---- ----- ------ --------
+	#
+	# Remote partition and cpuset.cpus.exclusive tests
+	#
+	" C0-3:S+ C1-3:S+ C2-3     .    X2-3     .      .      .     0 A1:0-3,A2:1-3,A3:2-3,XA1:2-3"
+	" C0-3:S+ C1-3:S+ C2-3     .    X2-3  X2-3:P2   .      .     0 A1:0-1,A2:2-3,A3:2-3 A1:P0,A2:P2 2-3"
+	" C0-3:S+ C1-3:S+ C2-3     .    X2-3   X3:P2    .      .     0 A1:0-2,A2:3,A3:3 A1:P0,A2:P2 3"
+	" C0-3:S+ C1-3:S+ C2-3     .    X2-3   X2-3  X2-3:P2   .     0 A1:0-1,A2:1,A3:2-3 A1:P0,A3:P2 2-3"
+	" C0-3:S+ C1-3:S+ C2-3     .    X2-3   X2-3 X2-3:P2:C3 .     0 A1:0-2,A2:1-2,A3:3 A1:P0,A3:P2 3"
+	" C0-3:S+ C1-3:S+ C2-3   C2-3     .      .      .      P2    0 A1:0-3,A2:1-3,A3:2-3,B1:2-3 A1:P0,A3:P0,B1:P-2"
+	" C0-3:S+ C1-3:S+ C2-3   C4-5     .      .      .      P2    0 B1:4-5 B1:P2 4-5"
+	" C0-3:S+ C1-3:S+ C2-3    C4    X2-3   X2-3  X2-3:P2   P2    0 A3:2-3,B1:4 A3:P2,B1:P2 2-4"
+	" C0-3:S+ C1-3:S+ C2-3    C4    X2-3   X2-3 X2-3:P2:C1-3 P2  0 A3:2-3,B1:4 A3:P2,B1:P2 2-4"
+	" C0-3:S+ C1-3:S+ C2-3    C4    X1-3  X1-3:P2   P2     .     0 A2:1,A3:2-3 A2:P2,A3:P2 1-3"
+	" C0-3:S+ C1-3:S+ C2-3    C4    X2-3   X2-3  X2-3:P2 P2:C4-5 0 A3:2-3,B1:4-5 A3:P2,B1:P2 2-5"
+
+	# Nested remote/local partition tests
+	" C0-3:S+ C1-3:S+ C2-3   C4-5   X2-3  X2-3:P1   P2     P1    0 A1:0-1,A2:,A3:2-3,B1:4-5 \
+								       A1:P0,A2:P1,A3:P2,B1:P1 2-3"
+	" C0-3:S+ C1-3:S+ C2-3    C4    X2-3  X2-3:P1   P2     P1    0 A1:0-1,A2:,A3:2-3,B1:4 \
+								       A1:P0,A2:P1,A3:P2,B1:P1 2-4"
+	" C0-3:S+ C1-3:S+  C3     C4    X2-3  X2-3:P1   P2     P1    0 A1:0-1,A2:2,A3:3,B1:4 \
+								       A1:P0,A2:P1,A3:P2,B1:P1 2-4"
+	" C0-4:S+ C1-4:S+ C2-4     .    X2-4  X2-4:P2  X4:P1    .    0 A1:0-1,A2:2-3,A3:4 \
+								       A1:P0,A2:P2,A3:P1 2-4"
+	" C0-4:X2-4:S+ C1-4:X2-4:S+:P2 C2-4:X4:P1 \
+				   .      .      X5      .      .    0 A1:0-4,A2:1-4,A3:2-4 \
+								       A1:P0,A2:P-2,A3:P-1 ."
+	" C0-4:X2-4:S+ C1-4:X2-4:S+:P2 C2-4:X4:P1 \
+				   .      .      .      X1      .    0 A1:0-1,A2:2-4,A3:2-4 \
+								       A1:P0,A2:P2,A3:P-1 2-4"
+
+	# Remote partition offline tests
+	" C0-3:S+ C1-3:S+ C2-3     .    X2-3   X2-3 X2-3:P2:O2=0 .   0 A1:0-1,A2:1,A3:3 A1:P0,A3:P2 2-3"
+	" C0-3:S+ C1-3:S+ C2-3     .    X2-3   X2-3 X2-3:P2:O2=0 O2=1 0 A1:0-1,A2:1,A3:2-3 A1:P0,A3:P2 2-3"
+	" C0-3:S+ C1-3:S+  C3      .    X2-3   X2-3    P2:O3=0   .   0 A1:0-2,A2:1-2,A3: A1:P0,A3:P2 3"
+	" C0-3:S+ C1-3:S+  C3      .    X2-3   X2-3   T:P2:O3=0  .   0 A1:0-2,A2:1-2,A3:1-2 A1:P0,A3:P-2 3"
+
+	# An invalidated remote partition cannot self-recover from hotplug
+	" C0-3:S+ C1-3:S+  C2      .    X2-3   X2-3   T:P2:O2=0 O2=1 0 A1:0-3,A2:1-3,A3:2 A1:P0,A3:P-2"
+
+	# cpus.exclusive.effective clearing test
+	" C0-3:S+ C1-3:S+  C2      .   X2-3:X    .      .      .     0 A1:0-3,A2:1-3,A3:2,XA1:"
+
+	# Invalid to valid remote partition transition test
+	" C0-3:S+   C1-3    .      .      .    X3:P2    .      .     0 A1:0-3,A2:1-3,XA2: A2:P-2"
+	" C0-3:S+ C1-3:X3:P2
+			    .      .    X2-3    P2      .      .     0 A1:0-2,A2:3,XA2:3 A2:P2 3"
+
+	# Invalid to valid local partition direct transition tests
+	" C1-3:S+:P2 C2-3:X1:P2 .  .      .      .      .      .     0 A1:1-3,XA1:1-3,A2:2-3:XA2: A1:P2,A2:P-2 1-3"
+	" C1-3:S+:P2 C2-3:X1:P2 .  .      .    X3:P2    .      .     0 A1:1-2,XA1:1-3,A2:3:XA2:3 A1:P2,A2:P2 1-3"
+	"  C0-3:P2   .      .    C4-6   C0-4     .      .      .     0 A1:0-4,B1:4-6 A1:P-2,B1:P0"
+	"  C0-3:P2   .      .    C4-6 C0-4:C0-3  .      .      .     0 A1:0-3,B1:4-6 A1:P2,B1:P0 0-3"
+	"  C0-3:P2   .      .  C3-5:C4-5  .      .      .      .     0 A1:0-3,B1:4-5 A1:P2,B1:P0 0-3"
+
+	# Local partition invalidation tests
+	" C0-3:X1-3:S+:P2 C1-3:X2-3:S+:P2 C2-3:X3:P2 \
+				   .      .      .      .      .     0 A1:1,A2:2,A3:3 A1:P2,A2:P2,A3:P2 1-3"
+	" C0-3:X1-3:S+:P2 C1-3:X2-3:S+:P2 C2-3:X3:P2 \
+				   .      .     X4      .      .     0 A1:1-3,A2:1-3,A3:2-3,XA2:,XA3: A1:P2,A2:P-2,A3:P-2 1-3"
+	" C0-3:X1-3:S+:P2 C1-3:X2-3:S+:P2 C2-3:X3:P2 \
+				   .      .     C4      .      .     0 A1:1-3,A2:1-3,A3:2-3,XA2:,XA3: A1:P2,A2:P-2,A3:P-2 1-3"
+	# Local partition CPU change tests
+	" C0-5:S+:P2 C4-5:S+:P1 .  .      .    C3-5     .      .     0 A1:0-2,A2:3-5 A1:P2,A2:P1 0-2"
+	" C0-5:S+:P2 C4-5:S+:P1 .  .    C1-5     .      .      .     0 A1:1-3,A2:4-5 A1:P2,A2:P1 1-3"
+
+	# cpus_allowed/exclusive_cpus update tests
+	" C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3 \
+				   .     C4      .      P2     .     0 A1:4,A2:4,XA2:,XA3:,A3:4 \
+								       A1:P0,A3:P-2 ."
+	" C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3 \
+				   .     X1      .      P2     .     0 A1:0-3,A2:1-3,XA1:1,XA2:,XA3:,A3:2-3 \
+								       A1:P0,A3:P-2 ."
+	" C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3 \
+				   .      .     C3      P2     .     0 A1:0-2,A2:0-2,XA2:3,XA3:3,A3:3 \
+								       A1:P0,A3:P2 3"
+	" C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3 \
+				   .      .     X3      P2     .     0 A1:0-2,A2:1-2,XA2:3,XA3:3,A3:3 \
+								       A1:P0,A3:P2 3"
+	" C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3:P2 \
+				   .      .     X3      .      .     0 A1:0-3,A2:1-3,XA2:3,XA3:3,A3:2-3 \
+								       A1:P0,A3:P-2 ."
+	" C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3:P2 \
+				   .      .     C3      .      .     0 A1:0-3,A2:3,XA2:3,XA3:3,A3:3 \
+								       A1:P0,A3:P-2 ."
+	" C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3:P2 \
+				   .     C4      .      .      .     0 A1:4,A2:4,A3:4,XA1:,XA2:,XA3 \
+								       A1:P0,A3:P-2 ."
+
+	#  old-A1 old-A2 old-A3 old-B1 new-A1 new-A2 new-A3 new-B1 fail ECPUs Pstate ISOLCPUS
+	#  ------ ------ ------ ------ ------ ------ ------ ------ ---- ----- ------ --------
 	#
 	# Incorrect change to cpuset.cpus invalidates partition root
 	#
 	# Adding CPUs to partition root that are not in parent's
 	# cpuset.cpus is allowed, but those extra CPUs are ignored.
-	"  S+ C2-3:P1:S+ C3:P1   .      .      .     C2-4    .      .     0 A1:,A2:2-3 A1:P1,A2:P1"
+	"C2-3:P1:S+ C3:P1   .      .      .     C2-4    .      .     0 A1:,A2:2-3 A1:P1,A2:P1"
 
 	# Taking away all CPUs from parent or itself if there are tasks
 	# will make the partition invalid.
-	"  S+ C2-3:P1:S+  C3:P1  .      .      T     C2-3    .      .     0 A1:2-3,A2:2-3 A1:P1,A2:P-1"
-	"  S+  C3:P1:S+    C3    .      .      T      P1     .      .     0 A1:3,A2:3 A1:P1,A2:P-1"
-	"  S+ $SETUP_A123_PARTITIONS    .    T:C2-3   .      .      .     0 A1:2-3,A2:2-3,A3:3 A1:P1,A2:P-1,A3:P-1"
-	"  S+ $SETUP_A123_PARTITIONS    . T:C2-3:C1-3 .      .      .     0 A1:1,A2:2,A3:3 A1:P1,A2:P1,A3:P1"
+	"C2-3:P1:S+  C3:P1  .      .      T     C2-3    .      .     0 A1:2-3,A2:2-3 A1:P1,A2:P-1"
+	" C3:P1:S+    C3    .      .      T      P1     .      .     0 A1:3,A2:3 A1:P1,A2:P-1"
+	"$SETUP_A123_PARTITIONS    .    T:C2-3   .      .      .     0 A1:2-3,A2:2-3,A3:3 A1:P1,A2:P-1,A3:P-1"
+	"$SETUP_A123_PARTITIONS    . T:C2-3:C1-3 .      .      .     0 A1:1,A2:2,A3:3 A1:P1,A2:P1,A3:P1"
 
 	# Changing a partition root to member makes child partitions invalid
-	"  S+ C2-3:P1:S+  C3:P1  .      .      P0     .      .      .     0 A1:2-3,A2:3 A1:P0,A2:P-1"
-	"  S+ $SETUP_A123_PARTITIONS    .     C2-3    P0     .      .     0 A1:2-3,A2:2-3,A3:3 A1:P1,A2:P0,A3:P-1"
+	"C2-3:P1:S+  C3:P1  .      .      P0     .      .      .     0 A1:2-3,A2:3 A1:P0,A2:P-1"
+	"$SETUP_A123_PARTITIONS    .     C2-3    P0     .      .     0 A1:2-3,A2:2-3,A3:3 A1:P1,A2:P0,A3:P-1"
 
 	# cpuset.cpus can contains cpus not in parent's cpuset.cpus as long
 	# as they overlap.
-	"  S+ C2-3:P1:S+  .      .      .      .   C3-4:P1   .      .     0 A1:2,A2:3 A1:P1,A2:P1"
+	"C2-3:P1:S+  .      .      .      .   C3-4:P1   .      .     0 A1:2,A2:3 A1:P1,A2:P1"
 
 	# Deletion of CPUs distributed to child cgroup is allowed.
-	"  S+ C0-1:P1:S+ C1      .    C2-3   C4-5     .      .      .     0 A1:4-5,A2:4-5"
+	"C0-1:P1:S+ C1      .    C2-3   C4-5     .      .      .     0 A1:4-5,A2:4-5"
 
 	# To become a valid partition root, cpuset.cpus must overlap parent's
 	# cpuset.cpus.
-	"  S+   C0-1:P1   .      .    C2-3    S+   C4-5:P1   .      .     0 A1:0-1,A2:0-1 A1:P1,A2:P-1"
+	"  C0-1:P1   .      .    C2-3    S+   C4-5:P1   .      .     0 A1:0-1,A2:0-1 A1:P1,A2:P-1"
 
 	# Enabling partition with child cpusets is allowed
-	"  S+   C0-1:S+  C1      .    C2-3    P1      .      .      .     0 A1:0-1,A2:1 A1:P1"
+	"  C0-1:S+  C1      .    C2-3    P1      .      .      .     0 A1:0-1,A2:1 A1:P1"
 
 	# A partition root with non-partition root parent is invalid, but it
 	# can be made valid if its parent becomes a partition root too.
-	"  S+   C0-1:S+  C1      .    C2-3     .      P2     .      .     0 A1:0-1,A2:1 A1:P0,A2:P-2"
-	"  S+   C0-1:S+ C1:P2    .    C2-3     P1     .      .      .     0 A1:0,A2:1 A1:P1,A2:P2"
+	"  C0-1:S+  C1      .    C2-3     .      P2     .      .     0 A1:0-1,A2:1 A1:P0,A2:P-2"
+	"  C0-1:S+ C1:P2    .    C2-3     P1     .      .      .     0 A1:0,A2:1 A1:P1,A2:P2"
 
 	# A non-exclusive cpuset.cpus change will invalidate partition and its siblings
-	"  S+   C0-1:P1   .      .    C2-3   C0-2     .      .      .     0 A1:0-2,B1:2-3 A1:P-1,B1:P0"
-	"  S+   C0-1:P1   .      .  P1:C2-3  C0-2   .      .      .     0 A1:0-2,B1:2-3 A1:P-1,B1:P-1"
-	"  S+    C0-1     .      .  P1:C2-3  C0-2   .      .      .     0 A1:0-2,B1:2-3 A1:P0,B1:P-1"
+	"  C0-1:P1   .      .    C2-3   C0-2     .      .      .     0 A1:0-2,B1:2-3 A1:P-1,B1:P0"
+	"  C0-1:P1   .      .  P1:C2-3  C0-2     .      .      .     0 A1:0-2,B1:2-3 A1:P-1,B1:P-1"
+	"   C0-1     .      .  P1:C2-3  C0-2     .      .      .     0 A1:0-2,B1:2-3 A1:P0,B1:P-1"
 
-	# test  old-A1 old-A2 old-A3 old-B1 new-A1 new-A2 new-A3 new-B1 fail ECPUs Pstate
-	# ----  ------ ------ ------ ------ ------ ------ ------ ------ ---- ----- ------
+	#  old-A1 old-A2 old-A3 old-B1 new-A1 new-A2 new-A3 new-B1 fail ECPUs Pstate ISOLCPUS
+	#  ------ ------ ------ ------ ------ ------ ------ ------ ---- ----- ------ --------
 	# Failure cases:
 
 	# A task cannot be added to a partition with no cpu
-	"  S+ C2-3:P1:S+  C3:P1  .      .    O2-0:T   .      .      .     1 A1:,A2:3 A1:P1,A2:P1"
+	"C2-3:P1:S+  C3:P1  .      .    O2=0:T   .      .      .     1 A1:,A2:3 A1:P1,A2:P1"
+
+	# Changes to cpuset.cpus.exclusive that violate exclusivity rule is rejected
+	"   C0-3     .      .    C4-5   X0-3     .      .     X3-5   1 A1:0-3,B1:4-5"
 )
 
 #
 # Write to the cpu online file
-#  $1 - <c>-<v> where <c> = cpu number, <v> value to be written
+#  $1 - <c>=<v> where <c> = cpu number, <v> value to be written
 #
 write_cpu_online()
 {
-	CPU=${1%-*}
-	VAL=${1#*-}
+	CPU=${1%=*}
+	VAL=${1#*=}
 	CPUFILE=//sys/devices/system/cpu/cpu${CPU}/online
 	if [[ $VAL -eq 0 ]]
 	then
@@ -349,11 +457,12 @@ set_ctrl_state()
 	TMPMSG=/tmp/.msg_$$
 	CGRP=$1
 	STATE=$2
-	SHOWERR=${3}${VERBOSE}
+	SHOWERR=${3}
 	CTRL=${CTRL:=$CONTROLLER}
 	HASERR=0
 	REDIRECT="2> $TMPMSG"
 	[[ -z "$STATE" || "$STATE" = '.' ]] && return 0
+	[[ $VERBOSE -gt 0 ]] && SHOWERR=1
 
 	rm -f $TMPMSG
 	for CMD in $(echo $STATE | sed -e "s/:/ /g")
@@ -362,12 +471,18 @@ set_ctrl_state()
 		SFILE=$CGRP/cgroup.subtree_control
 		PFILE=$CGRP/cpuset.cpus.partition
 		CFILE=$CGRP/cpuset.cpus
+		XFILE=$CGRP/cpuset.cpus.exclusive
 		S=$(expr substr $CMD 1 1)
 		if [[ $S = S ]]
 		then
 			PREFIX=${CMD#?}
 			COMM="echo ${PREFIX}${CTRL} > $SFILE"
 			eval $COMM $REDIRECT
+		elif [[ $S = X ]]
+		then
+			CPUS=${CMD#?}
+			COMM="echo $CPUS > $XFILE"
+			eval $COMM $REDIRECT
 		elif [[ $S = C ]]
 		then
 			CPUS=${CMD#?}
@@ -430,7 +545,7 @@ online_cpus()
 	[[ -n "OFFLINE_CPUS" ]] && {
 		for C in $OFFLINE_CPUS
 		do
-			write_cpu_online ${C}-1
+			write_cpu_online ${C}=1
 		done
 	}
 }
@@ -443,18 +558,27 @@ reset_cgroup_states()
 	echo 0 > $CGROUP2/cgroup.procs
 	online_cpus
 	rmdir A1/A2/A3 A1/A2 A1 B1 > /dev/null 2>&1
-	set_ctrl_state . S-
+	pause 0.02
+	set_ctrl_state . R-
 	pause 0.01
 }
 
 dump_states()
 {
-	for DIR in A1 A1/A2 A1/A2/A3 B1
+	for DIR in . A1 A1/A2 A1/A2/A3 B1
 	do
+		CPUS=$DIR/cpuset.cpus
 		ECPUS=$DIR/cpuset.cpus.effective
+		XCPUS=$DIR/cpuset.cpus.exclusive
+		XECPUS=$DIR/cpuset.cpus.exclusive.effective
 		PRS=$DIR/cpuset.cpus.partition
-		[[ -e $ECPUS ]] && echo "$ECPUS: $(cat $ECPUS)"
-		[[ -e $PRS   ]] && echo "$PRS: $(cat $PRS)"
+		PCPUS=$DIR/.__DEBUG__.cpuset.cpus.subpartitions
+		[[ -e $CPUS   ]] && echo "$CPUS: $(cat $CPUS)"
+		[[ -e $XCPUS  ]] && echo "$XCPUS: $(cat $XCPUS)"
+		[[ -e $ECPUS  ]] && echo "$ECPUS: $(cat $ECPUS)"
+		[[ -e $XECPUS ]] && echo "$XECPUS: $(cat $XECPUS)"
+		[[ -e $PRS    ]] && echo "$PRS: $(cat $PRS)"
+		[[ -e $PCPUS  ]] && echo "$PCPUS: $(cat $PCPUS)"
 	done
 }
 
@@ -470,11 +594,17 @@ check_effective_cpus()
 		set -- $(echo $CHK | sed -e "s/:/ /g")
 		CGRP=$1
 		CPUS=$2
+		if [[ $CGRP = X* ]]
+		then
+			CGRP=${CGRP#X}
+			FILE=cpuset.cpus.exclusive.effective
+		else
+			FILE=cpuset.cpus.effective
+		fi
 		[[ $CGRP = A2 ]] && CGRP=A1/A2
 		[[ $CGRP = A3 ]] && CGRP=A1/A2/A3
-		FILE=$CGRP/cpuset.cpus.effective
-		[[ -e $FILE ]] || return 1
-		[[ $CPUS = $(cat $FILE) ]] || return 1
+		[[ -e $CGRP/$FILE ]] || return 1
+		[[ $CPUS = $(cat $CGRP/$FILE) ]] || return 1
 	done
 }
 
@@ -525,6 +655,65 @@ check_cgroup_states()
 }
 
 #
+# Get isolated (including offline) CPUs by looking at
+# /sys/kernel/debug/sched/domains and compare that with the expected value.
+#
+# Note that a sched domain of just 1 CPU will be considered isolated.
+#
+# $1 - expected isolated cpu list
+#
+check_isolcpus()
+{
+	EXPECT_VAL=$1
+	ISOLCPUS=
+	LASTISOLCPU=
+	SCHED_DOMAINS=/sys/kernel/debug/sched/domains
+	[[ -d $SCHED_DOMAINS ]] || return 0
+	[[ $EXPECT_VAL = . ]] && EXPECT_VAL=
+
+	for ((CPU=0; CPU < $NR_CPUS; CPU++))
+	do
+		[[ -n "$(ls ${SCHED_DOMAINS}/cpu$CPU)" ]] && continue
+
+		if [[ -z "$LASTISOLCPU" ]]
+		then
+			ISOLCPUS=$CPU
+			LASTISOLCPU=$CPU
+		elif [[ "$LASTISOLCPU" -eq $((CPU - 1)) ]]
+		then
+			echo $ISOLCPUS | grep -q "\<$LASTISOLCPU\$"
+			if [[ $? -eq 0 ]]
+			then
+				ISOLCPUS=${ISOLCPUS}-
+			fi
+			LASTISOLCPU=$CPU
+		else
+			if [[ $ISOLCPUS = *- ]]
+			then
+				ISOLCPUS=${ISOLCPUS}$LASTISOLCPU
+			fi
+			ISOLCPUS=${ISOLCPUS},$CPU
+			LASTISOLCPU=$CPU
+		fi
+	done
+	[[ "$ISOLCPUS" = *- ]] && ISOLCPUS=${ISOLCPUS}$LASTISOLCPU
+	[[ "$EXPECT_VAL" = "$ISOLCPUS" ]]
+}
+
+test_fail()
+{
+	TESTNUM=$1
+	TESTTYPE=$2
+	ADDINFO=$3
+	echo "Test $TEST[$TESTNUM] failed $TESTTYPE check!"
+	[[ -n "$ADDINFO" ]] && echo "*** $ADDINFO ***"
+	eval echo \${$TEST[$I]}
+	echo
+	dump_states
+	exit 1
+}
+
+#
 # Run cpuset state transition test
 #  $1 - test matrix name
 #
@@ -536,88 +725,83 @@ run_state_test()
 {
 	TEST=$1
 	CONTROLLER=cpuset
-	CPULIST=0-6
 	I=0
 	eval CNT="\${#$TEST[@]}"
 
 	reset_cgroup_states
-	echo $CPULIST > cpuset.cpus
-	echo root > cpuset.cpus.partition
 	console_msg "Running state transition test ..."
 
 	while [[ $I -lt $CNT ]]
 	do
 		echo "Running test $I ..." > /dev/console
+		[[ $VERBOSE -gt 1 ]] && {
+			echo ""
+			eval echo \${$TEST[$I]}
+		}
 		eval set -- "\${$TEST[$I]}"
-		ROOT=$1
-		OLD_A1=$2
-		OLD_A2=$3
-		OLD_A3=$4
-		OLD_B1=$5
-		NEW_A1=$6
-		NEW_A2=$7
-		NEW_A3=$8
-		NEW_B1=$9
-		RESULT=${10}
-		ECPUS=${11}
-		STATES=${12}
-
-		set_ctrl_state_noerr .        $ROOT
+		OLD_A1=$1
+		OLD_A2=$2
+		OLD_A3=$3
+		OLD_B1=$4
+		NEW_A1=$5
+		NEW_A2=$6
+		NEW_A3=$7
+		NEW_B1=$8
+		RESULT=$9
+		ECPUS=${10}
+		STATES=${11}
+		ICPUS=${12}
+
+		set_ctrl_state_noerr B1       $OLD_B1
 		set_ctrl_state_noerr A1       $OLD_A1
 		set_ctrl_state_noerr A1/A2    $OLD_A2
 		set_ctrl_state_noerr A1/A2/A3 $OLD_A3
-		set_ctrl_state_noerr B1       $OLD_B1
 		RETVAL=0
 		set_ctrl_state A1       $NEW_A1; ((RETVAL += $?))
 		set_ctrl_state A1/A2    $NEW_A2; ((RETVAL += $?))
 		set_ctrl_state A1/A2/A3 $NEW_A3; ((RETVAL += $?))
 		set_ctrl_state B1       $NEW_B1; ((RETVAL += $?))
 
-		[[ $RETVAL -ne $RESULT ]] && {
-			echo "Test $TEST[$I] failed result check!"
-			eval echo \"\${$TEST[$I]}\"
-			dump_states
-			exit 1
-		}
+		[[ $RETVAL -ne $RESULT ]] && test_fail $I result
 
 		[[ -n "$ECPUS" && "$ECPUS" != . ]] && {
 			check_effective_cpus $ECPUS
-			[[ $? -ne 0 ]] && {
-				echo "Test $TEST[$I] failed effective CPU check!"
-				eval echo \"\${$TEST[$I]}\"
-				echo
-				dump_states
-				exit 1
-			}
+			[[ $? -ne 0 ]] && test_fail $I "effective CPU"
 		}
 
-		[[ -n "$STATES" ]] && {
+		[[ -n "$STATES" && "$STATES" != . ]] && {
 			check_cgroup_states $STATES
-			[[ $? -ne 0 ]] && {
-				echo "FAILED: Test $TEST[$I] failed states check!"
-				eval echo \"\${$TEST[$I]}\"
-				echo
-				dump_states
-				exit 1
-			}
+			[[ $? -ne 0 ]] && test_fail $I states
 		}
 
+		# Compare the expected isolated CPUs with the actual ones,
+		# if available
+		[[ -n "$ICPUS" ]] && {
+			check_isolcpus $ICPUS
+			[[ $? -ne 0 ]] && test_fail $I "isolated CPU" \
+				"Expect $ICPUS, get $ISOLCPUS instead"
+		}
 		reset_cgroup_states
 		#
 		# Check to see if effective cpu list changes
 		#
-		pause 0.05
 		NEWLIST=$(cat cpuset.cpus.effective)
+		RETRY=0
+		while [[ $NEWLIST != $CPULIST && $RETRY -lt 5 ]]
+		do
+			# Wait a bit longer & recheck a few times
+			pause 0.01
+			((RETRY++))
+			NEWLIST=$(cat cpuset.cpus.effective)
+		done
 		[[ $NEWLIST != $CPULIST ]] && {
 			echo "Effective cpus changed to $NEWLIST after test $I!"
 			exit 1
 		}
-		[[ -n "$VERBOSE" ]] && echo "Test $I done."
+		[[ $VERBOSE -gt 0 ]] && echo "Test $I done."
 		((I++))
 	done
 	echo "All $I tests of $TEST PASSED."
-
-	echo member > cpuset.cpus.partition
 }
 
 #
@@ -642,6 +826,7 @@ test_inotify()
 {
 	ERR=0
 	PRS=/tmp/.prs_$$
+	cd $CGROUP2/test
 	[[ -f $WAIT_INOTIFY ]] || {
 		echo "wait_inotify not found, inotify test SKIPPED."
 		return
@@ -655,7 +840,7 @@ test_inotify()
 	rm -f $PRS
 	wait_inotify $PWD/cpuset.cpus.partition $PRS &
 	pause 0.01
-	set_ctrl_state . "O1-0"
+	set_ctrl_state . "O1=0"
 	pause 0.01
 	check_cgroup_states ".:P-1"
 	if [[ $? -ne 0 ]]
@@ -689,5 +874,3 @@ run_state_test TEST_MATRIX
 test_isolated
 test_inotify
 echo "All tests PASSED."
-cd ..
-rmdir test
diff --git a/tools/testing/selftests/cgroup/test_hugetlb_memcg.c b/tools/testing/selftests/cgroup/test_hugetlb_memcg.c
new file mode 100644
index 000000000000..f0fefeb4cc24
--- /dev/null
+++ b/tools/testing/selftests/cgroup/test_hugetlb_memcg.c
@@ -0,0 +1,234 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+
+#include <linux/limits.h>
+#include <sys/mman.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <fcntl.h>
+#include "../kselftest.h"
+#include "cgroup_util.h"
+
+#define ADDR ((void *)(0x0UL))
+#define FLAGS (MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB)
+/* mapping 8 MBs == 4 hugepages */
+#define LENGTH (8UL*1024*1024)
+#define PROTECTION (PROT_READ | PROT_WRITE)
+
+/* borrowed from mm/hmm-tests.c */
+static long get_hugepage_size(void)
+{
+	int fd;
+	char buf[2048];
+	int len;
+	char *p, *q, *path = "/proc/meminfo", *tag = "Hugepagesize:";
+	long val;
+
+	fd = open(path, O_RDONLY);
+	if (fd < 0) {
+		/* Error opening the file */
+		return -1;
+	}
+
+	len = read(fd, buf, sizeof(buf));
+	close(fd);
+	if (len < 0) {
+		/* Error in reading the file */
+		return -1;
+	}
+	if (len == sizeof(buf)) {
+		/* Error file is too large */
+		return -1;
+	}
+	buf[len] = '\0';
+
+	/* Search for a tag if provided */
+	if (tag) {
+		p = strstr(buf, tag);
+		if (!p)
+			return -1; /* looks like the line we want isn't there */
+		p += strlen(tag);
+	} else
+		p = buf;
+
+	val = strtol(p, &q, 0);
+	if (*q != ' ') {
+		/* Error parsing the file */
+		return -1;
+	}
+
+	return val;
+}
+
+static int set_file(const char *path, long value)
+{
+	FILE *file;
+	int ret;
+
+	file = fopen(path, "w");
+	if (!file)
+		return -1;
+	ret = fprintf(file, "%ld\n", value);
+	fclose(file);
+	return ret;
+}
+
+static int set_nr_hugepages(long value)
+{
+	return set_file("/proc/sys/vm/nr_hugepages", value);
+}
+
+static unsigned int check_first(char *addr)
+{
+	return *(unsigned int *)addr;
+}
+
+static void write_data(char *addr)
+{
+	unsigned long i;
+
+	for (i = 0; i < LENGTH; i++)
+		*(addr + i) = (char)i;
+}
+
+static int hugetlb_test_program(const char *cgroup, void *arg)
+{
+	char *test_group = (char *)arg;
+	void *addr;
+	long old_current, expected_current, current;
+	int ret = EXIT_FAILURE;
+
+	old_current = cg_read_long(test_group, "memory.current");
+	set_nr_hugepages(20);
+	current = cg_read_long(test_group, "memory.current");
+	if (current - old_current >= MB(2)) {
+		ksft_print_msg(
+			"setting nr_hugepages should not increase hugepage usage.\n");
+		ksft_print_msg("before: %ld, after: %ld\n", old_current, current);
+		return EXIT_FAILURE;
+	}
+
+	addr = mmap(ADDR, LENGTH, PROTECTION, FLAGS, 0, 0);
+	if (addr == MAP_FAILED) {
+		ksft_print_msg("fail to mmap.\n");
+		return EXIT_FAILURE;
+	}
+	current = cg_read_long(test_group, "memory.current");
+	if (current - old_current >= MB(2)) {
+		ksft_print_msg("mmap should not increase hugepage usage.\n");
+		ksft_print_msg("before: %ld, after: %ld\n", old_current, current);
+		goto out_failed_munmap;
+	}
+	old_current = current;
+
+	/* read the first page */
+	check_first(addr);
+	expected_current = old_current + MB(2);
+	current = cg_read_long(test_group, "memory.current");
+	if (!values_close(expected_current, current, 5)) {
+		ksft_print_msg("memory usage should increase by around 2MB.\n");
+		ksft_print_msg(
+			"expected memory: %ld, actual memory: %ld\n",
+			expected_current, current);
+		goto out_failed_munmap;
+	}
+
+	/* write to the whole range */
+	write_data(addr);
+	current = cg_read_long(test_group, "memory.current");
+	expected_current = old_current + MB(8);
+	if (!values_close(expected_current, current, 5)) {
+		ksft_print_msg("memory usage should increase by around 8MB.\n");
+		ksft_print_msg(
+			"expected memory: %ld, actual memory: %ld\n",
+			expected_current, current);
+		goto out_failed_munmap;
+	}
+
+	/* unmap the whole range */
+	munmap(addr, LENGTH);
+	current = cg_read_long(test_group, "memory.current");
+	expected_current = old_current;
+	if (!values_close(expected_current, current, 5)) {
+		ksft_print_msg("memory usage should go back down.\n");
+		ksft_print_msg(
+			"expected memory: %ld, actual memory: %ld\n",
+			expected_current, current);
+		return ret;
+	}
+
+	ret = EXIT_SUCCESS;
+	return ret;
+
+out_failed_munmap:
+	munmap(addr, LENGTH);
+	return ret;
+}
+
+static int test_hugetlb_memcg(char *root)
+{
+	int ret = KSFT_FAIL;
+	char *test_group;
+
+	test_group = cg_name(root, "hugetlb_memcg_test");
+	if (!test_group || cg_create(test_group)) {
+		ksft_print_msg("fail to create cgroup.\n");
+		goto out;
+	}
+
+	if (cg_write(test_group, "memory.max", "100M")) {
+		ksft_print_msg("fail to set cgroup memory limit.\n");
+		goto out;
+	}
+
+	/* disable swap */
+	if (cg_write(test_group, "memory.swap.max", "0")) {
+		ksft_print_msg("fail to disable swap.\n");
+		goto out;
+	}
+
+	if (!cg_run(test_group, hugetlb_test_program, (void *)test_group))
+		ret = KSFT_PASS;
+out:
+	cg_destroy(test_group);
+	free(test_group);
+	return ret;
+}
+
+int main(int argc, char **argv)
+{
+	char root[PATH_MAX];
+	int ret = EXIT_SUCCESS, has_memory_hugetlb_acc;
+
+	has_memory_hugetlb_acc = proc_mount_contains("memory_hugetlb_accounting");
+	if (has_memory_hugetlb_acc < 0)
+		ksft_exit_skip("Failed to query cgroup mount option\n");
+	else if (!has_memory_hugetlb_acc)
+		ksft_exit_skip("memory hugetlb accounting is disabled\n");
+
+	/* Unit is kB! */
+	if (get_hugepage_size() != 2048) {
+		ksft_print_msg("test_hugetlb_memcg requires 2MB hugepages\n");
+		ksft_test_result_skip("test_hugetlb_memcg\n");
+		return ret;
+	}
+
+	if (cg_find_unified_root(root, sizeof(root)))
+		ksft_exit_skip("cgroup v2 isn't mounted\n");
+
+	switch (test_hugetlb_memcg(root)) {
+	case KSFT_PASS:
+		ksft_test_result_pass("test_hugetlb_memcg\n");
+		break;
+	case KSFT_SKIP:
+		ksft_test_result_skip("test_hugetlb_memcg\n");
+		break;
+	default:
+		ret = EXIT_FAILURE;
+		ksft_test_result_fail("test_hugetlb_memcg\n");
+		break;
+	}
+
+	return ret;
+}
diff --git a/tools/testing/selftests/cgroup/test_zswap.c b/tools/testing/selftests/cgroup/test_zswap.c
index 49def87a909b..c99d2adaca3f 100644
--- a/tools/testing/selftests/cgroup/test_zswap.c
+++ b/tools/testing/selftests/cgroup/test_zswap.c
@@ -55,6 +55,11 @@ static int get_zswap_written_back_pages(size_t *value)
 	return read_int("/sys/kernel/debug/zswap/written_back_pages", value);
 }
 
+static long get_zswpout(const char *cgroup)
+{
+	return cg_read_key_long(cgroup, "memory.stat", "zswpout ");
+}
+
 static int allocate_bytes(const char *cgroup, void *arg)
 {
 	size_t size = (size_t)arg;
@@ -69,6 +74,48 @@ static int allocate_bytes(const char *cgroup, void *arg)
 }
 
 /*
+ * Sanity test to check that pages are written into zswap.
+ */
+static int test_zswap_usage(const char *root)
+{
+	long zswpout_before, zswpout_after;
+	int ret = KSFT_FAIL;
+	char *test_group;
+
+	/* Set up */
+	test_group = cg_name(root, "no_shrink_test");
+	if (!test_group)
+		goto out;
+	if (cg_create(test_group))
+		goto out;
+	if (cg_write(test_group, "memory.max", "1M"))
+		goto out;
+
+	zswpout_before = get_zswpout(test_group);
+	if (zswpout_before < 0) {
+		ksft_print_msg("Failed to get zswpout\n");
+		goto out;
+	}
+
+	/* Allocate more than memory.max to push memory into zswap */
+	if (cg_run(test_group, allocate_bytes, (void *)MB(4)))
+		goto out;
+
+	/* Verify that pages come into zswap */
+	zswpout_after = get_zswpout(test_group);
+	if (zswpout_after <= zswpout_before) {
+		ksft_print_msg("zswpout does not increase after test program\n");
+		goto out;
+	}
+	ret = KSFT_PASS;
+
+out:
+	cg_destroy(test_group);
+	free(test_group);
+	return ret;
+}
+
+/*
  * When trying to store a memcg page in zswap, if the memcg hits its memory
  * limit in zswap, writeback should not be triggered.
  *
@@ -235,6 +282,7 @@ struct zswap_test {
 	int (*fn)(const char *root);
 	const char *name;
 } tests[] = {
+	T(test_zswap_usage),
 	T(test_no_kmem_bypass),
 	T(test_no_invasive_cgroup_shrink),
 };
diff --git a/tools/testing/selftests/clone3/clone3.c b/tools/testing/selftests/clone3/clone3.c
index e60cf4da8fb0..3c9bf0cd82a8 100644
--- a/tools/testing/selftests/clone3/clone3.c
+++ b/tools/testing/selftests/clone3/clone3.c
@@ -7,6 +7,7 @@
 #include <inttypes.h>
 #include <linux/types.h>
 #include <linux/sched.h>
+#include <stdbool.h>
 #include <stdint.h>
 #include <stdio.h>
 #include <stdlib.h>
@@ -103,8 +104,8 @@ static int call_clone3(uint64_t flags, size_t size, enum test_mode test_mode)
 	return 0;
 }
 
-static void test_clone3(uint64_t flags, size_t size, int expected,
-		       enum test_mode test_mode)
+static bool test_clone3(uint64_t flags, size_t size, int expected,
+			enum test_mode test_mode)
 {
 	int ret;
 
@@ -114,92 +115,223 @@ static void test_clone3(uint64_t flags, size_t size, int expected,
 	ret = call_clone3(flags, size, test_mode);
 	ksft_print_msg("[%d] clone3() with flags says: %d expected %d\n",
 			getpid(), ret, expected);
-	if (ret != expected)
-		ksft_test_result_fail(
+	if (ret != expected) {
+		ksft_print_msg(
 			"[%d] Result (%d) is different than expected (%d)\n",
 			getpid(), ret, expected);
-	else
-		ksft_test_result_pass(
-			"[%d] Result (%d) matches expectation (%d)\n",
-			getpid(), ret, expected);
-}
-
-int main(int argc, char *argv[])
-{
-	uid_t uid = getuid();
-
-	ksft_print_header();
-	ksft_set_plan(19);
-	test_clone3_supported();
-
-	/* Just a simple clone3() should return 0.*/
-	test_clone3(0, 0, 0, CLONE3_ARGS_NO_TEST);
-
-	/* Do a clone3() in a new PID NS.*/
-	if (uid == 0)
-		test_clone3(CLONE_NEWPID, 0, 0, CLONE3_ARGS_NO_TEST);
-	else
-		ksft_test_result_skip("Skipping clone3() with CLONE_NEWPID\n");
-
-	/* Do a clone3() with CLONE_ARGS_SIZE_VER0. */
-	test_clone3(0, CLONE_ARGS_SIZE_VER0, 0, CLONE3_ARGS_NO_TEST);
+		return false;
+	}
 
-	/* Do a clone3() with CLONE_ARGS_SIZE_VER0 - 8 */
-	test_clone3(0, CLONE_ARGS_SIZE_VER0 - 8, -EINVAL, CLONE3_ARGS_NO_TEST);
+	return true;
+}
 
-	/* Do a clone3() with sizeof(struct clone_args) + 8 */
-	test_clone3(0, sizeof(struct __clone_args) + 8, 0, CLONE3_ARGS_NO_TEST);
+typedef bool (*filter_function)(void);
+typedef size_t (*size_function)(void);
 
-	/* Do a clone3() with exit_signal having highest 32 bits non-zero */
-	test_clone3(0, 0, -EINVAL, CLONE3_ARGS_INVAL_EXIT_SIGNAL_BIG);
+static bool not_root(void)
+{
+	if (getuid() != 0) {
+		ksft_print_msg("Not running as root\n");
+		return true;
+	}
 
-	/* Do a clone3() with negative 32-bit exit_signal */
-	test_clone3(0, 0, -EINVAL, CLONE3_ARGS_INVAL_EXIT_SIGNAL_NEG);
+	return false;
+}
 
-	/* Do a clone3() with exit_signal not fitting into CSIGNAL mask */
-	test_clone3(0, 0, -EINVAL, CLONE3_ARGS_INVAL_EXIT_SIGNAL_CSIG);
+static bool no_timenamespace(void)
+{
+	if (not_root())
+		return true;
 
-	/* Do a clone3() with NSIG < exit_signal < CSIG */
-	test_clone3(0, 0, -EINVAL, CLONE3_ARGS_INVAL_EXIT_SIGNAL_NSIG);
+	if (!access("/proc/self/ns/time", F_OK))
+		return false;
 
-	test_clone3(0, sizeof(struct __clone_args) + 8, 0, CLONE3_ARGS_ALL_0);
+	ksft_print_msg("Time namespaces are not supported\n");
+	return true;
+}
 
-	test_clone3(0, sizeof(struct __clone_args) + 16, -E2BIG,
-			CLONE3_ARGS_ALL_0);
+static size_t page_size_plus_8(void)
+{
+	return getpagesize() + 8;
+}
 
-	test_clone3(0, sizeof(struct __clone_args) * 2, -E2BIG,
-			CLONE3_ARGS_ALL_0);
+struct test {
+	const char *name;
+	uint64_t flags;
+	size_t size;
+	size_function size_function;
+	int expected;
+	enum test_mode test_mode;
+	filter_function filter;
+};
 
-	/* Do a clone3() with > page size */
-	test_clone3(0, getpagesize() + 8, -E2BIG, CLONE3_ARGS_NO_TEST);
+static const struct test tests[] = {
+	{
+		.name = "simple clone3()",
+		.flags = 0,
+		.size = 0,
+		.expected = 0,
+		.test_mode = CLONE3_ARGS_NO_TEST,
+	},
+	{
+		.name = "clone3() in a new PID_NS",
+		.flags = CLONE_NEWPID,
+		.size = 0,
+		.expected = 0,
+		.test_mode = CLONE3_ARGS_NO_TEST,
+		.filter = not_root,
+	},
+	{
+		.name = "CLONE_ARGS_SIZE_VER0",
+		.flags = 0,
+		.size = CLONE_ARGS_SIZE_VER0,
+		.expected = 0,
+		.test_mode = CLONE3_ARGS_NO_TEST,
+	},
+	{
+		.name = "CLONE_ARGS_SIZE_VER0 - 8",
+		.flags = 0,
+		.size = CLONE_ARGS_SIZE_VER0 - 8,
+		.expected = -EINVAL,
+		.test_mode = CLONE3_ARGS_NO_TEST,
+	},
+	{
+		.name = "sizeof(struct clone_args) + 8",
+		.flags = 0,
+		.size = sizeof(struct __clone_args) + 8,
+		.expected = 0,
+		.test_mode = CLONE3_ARGS_NO_TEST,
+	},
+	{
+		.name = "exit_signal with highest 32 bits non-zero",
+		.flags = 0,
+		.size = 0,
+		.expected = -EINVAL,
+		.test_mode = CLONE3_ARGS_INVAL_EXIT_SIGNAL_BIG,
+	},
+	{
+		.name = "negative 32-bit exit_signal",
+		.flags = 0,
+		.size = 0,
+		.expected = -EINVAL,
+		.test_mode = CLONE3_ARGS_INVAL_EXIT_SIGNAL_NEG,
+	},
+	{
+		.name = "exit_signal not fitting into CSIGNAL mask",
+		.flags = 0,
+		.size = 0,
+		.expected = -EINVAL,
+		.test_mode = CLONE3_ARGS_INVAL_EXIT_SIGNAL_CSIG,
+	},
+	{
+		.name = "NSIG < exit_signal < CSIG",
+		.flags = 0,
+		.size = 0,
+		.expected = -EINVAL,
+		.test_mode = CLONE3_ARGS_INVAL_EXIT_SIGNAL_NSIG,
+	},
+	{
+		.name = "Arguments sizeof(struct clone_args) + 8",
+		.flags = 0,
+		.size = sizeof(struct __clone_args) + 8,
+		.expected = 0,
+		.test_mode = CLONE3_ARGS_ALL_0,
+	},
+	{
+		.name = "Arguments sizeof(struct clone_args) + 16",
+		.flags = 0,
+		.size = sizeof(struct __clone_args) + 16,
+		.expected = -E2BIG,
+		.test_mode = CLONE3_ARGS_ALL_0,
+	},
+	{
+		.name = "Arguments sizeof(struct clone_arg) * 2",
+		.flags = 0,
+		.size = sizeof(struct __clone_args) + 16,
+		.expected = -E2BIG,
+		.test_mode = CLONE3_ARGS_ALL_0,
+	},
+	{
+		.name = "Arguments > page size",
+		.flags = 0,
+		.size_function = page_size_plus_8,
+		.expected = -E2BIG,
+		.test_mode = CLONE3_ARGS_NO_TEST,
+	},
+	{
+		.name = "CLONE_ARGS_SIZE_VER0 in a new PID NS",
+		.flags = CLONE_NEWPID,
+		.size = CLONE_ARGS_SIZE_VER0,
+		.expected = 0,
+		.test_mode = CLONE3_ARGS_NO_TEST,
+		.filter = not_root,
+	},
+	{
+		.name = "CLONE_ARGS_SIZE_VER0 - 8 in a new PID NS",
+		.flags = CLONE_NEWPID,
+		.size = CLONE_ARGS_SIZE_VER0 - 8,
+		.expected = -EINVAL,
+		.test_mode = CLONE3_ARGS_NO_TEST,
+	},
+	{
+		.name = "sizeof(struct clone_args) + 8 in a new PID NS",
+		.flags = CLONE_NEWPID,
+		.size = sizeof(struct __clone_args) + 8,
+		.expected = 0,
+		.test_mode = CLONE3_ARGS_NO_TEST,
+		.filter = not_root,
+	},
+	{
+		.name = "Arguments > page size in a new PID NS",
+		.flags = CLONE_NEWPID,
+		.size_function = page_size_plus_8,
+		.expected = -E2BIG,
+		.test_mode = CLONE3_ARGS_NO_TEST,
+	},
+	{
+		.name = "New time NS",
+		.flags = CLONE_NEWTIME,
+		.size = 0,
+		.expected = 0,
+		.test_mode = CLONE3_ARGS_NO_TEST,
+		.filter = no_timenamespace,
+	},
+	{
+		.name = "exit signal (SIGCHLD) in flags",
+		.flags = SIGCHLD,
+		.size = 0,
+		.expected = -EINVAL,
+		.test_mode = CLONE3_ARGS_NO_TEST,
+	},
+};
 
-	/* Do a clone3() with CLONE_ARGS_SIZE_VER0 in a new PID NS. */
-	if (uid == 0)
-		test_clone3(CLONE_NEWPID, CLONE_ARGS_SIZE_VER0, 0,
-				CLONE3_ARGS_NO_TEST);
-	else
-		ksft_test_result_skip("Skipping clone3() with CLONE_NEWPID\n");
+int main(int argc, char *argv[])
+{
+	size_t size;
+	int i;
 
-	/* Do a clone3() with CLONE_ARGS_SIZE_VER0 - 8 in a new PID NS */
-	test_clone3(CLONE_NEWPID, CLONE_ARGS_SIZE_VER0 - 8, -EINVAL,
-			CLONE3_ARGS_NO_TEST);
+	ksft_print_header();
+	ksft_set_plan(ARRAY_SIZE(tests));
+	test_clone3_supported();
 
-	/* Do a clone3() with sizeof(struct clone_args) + 8 in a new PID NS */
-	if (uid == 0)
-		test_clone3(CLONE_NEWPID, sizeof(struct __clone_args) + 8, 0,
-				CLONE3_ARGS_NO_TEST);
-	else
-		ksft_test_result_skip("Skipping clone3() with CLONE_NEWPID\n");
+	for (i = 0; i < ARRAY_SIZE(tests); i++) {
+		if (tests[i].filter && tests[i].filter()) {
+			ksft_test_result_skip("%s\n", tests[i].name);
+			continue;
+		}
 
-	/* Do a clone3() with > page size in a new PID NS */
-	test_clone3(CLONE_NEWPID, getpagesize() + 8, -E2BIG,
-			CLONE3_ARGS_NO_TEST);
+		if (tests[i].size_function)
+			size = tests[i].size_function();
+		else
+			size = tests[i].size;
 
-	/* Do a clone3() in a new time namespace */
-	test_clone3(CLONE_NEWTIME, 0, 0, CLONE3_ARGS_NO_TEST);
+		ksft_print_msg("Running test '%s'\n", tests[i].name);
 
-	/* Do a clone3() with exit signal (SIGCHLD) in flags */
-	test_clone3(SIGCHLD, 0, -EINVAL, CLONE3_ARGS_NO_TEST);
+		ksft_test_result(test_clone3(tests[i].flags, size,
+					     tests[i].expected,
+					     tests[i].test_mode),
+				 "%s\n", tests[i].name);
+	}
 
 	ksft_finished();
 }
diff --git a/tools/testing/selftests/clone3/clone3_cap_checkpoint_restore.c b/tools/testing/selftests/clone3/clone3_cap_checkpoint_restore.c
index 52d3f0364bda..31b56d625655 100644
--- a/tools/testing/selftests/clone3/clone3_cap_checkpoint_restore.c
+++ b/tools/testing/selftests/clone3/clone3_cap_checkpoint_restore.c
@@ -27,9 +27,7 @@
 #include "../kselftest_harness.h"
 #include "clone3_selftests.h"
 
-#ifndef MAX_PID_NS_LEVEL
 #define MAX_PID_NS_LEVEL 32
-#endif
 
 static void child_exit(int ret)
 {
diff --git a/tools/testing/selftests/clone3/clone3_clear_sighand.c b/tools/testing/selftests/clone3/clone3_clear_sighand.c
index 47a8c0fc3676..54a8b2445be9 100644
--- a/tools/testing/selftests/clone3/clone3_clear_sighand.c
+++ b/tools/testing/selftests/clone3/clone3_clear_sighand.c
@@ -16,10 +16,6 @@
 #include "../kselftest.h"
 #include "clone3_selftests.h"
 
-#ifndef CLONE_CLEAR_SIGHAND
-#define CLONE_CLEAR_SIGHAND 0x100000000ULL
-#endif
-
 static void nop_handler(int signo)
 {
 }
diff --git a/tools/testing/selftests/clone3/clone3_selftests.h b/tools/testing/selftests/clone3/clone3_selftests.h
index e81ffaaee02b..3d2663fe50ba 100644
--- a/tools/testing/selftests/clone3/clone3_selftests.h
+++ b/tools/testing/selftests/clone3/clone3_selftests.h
@@ -15,10 +15,6 @@
 
 #define ptr_to_u64(ptr) ((__u64)((uintptr_t)(ptr)))
 
-#ifndef CLONE_INTO_CGROUP
-#define CLONE_INTO_CGROUP 0x200000000ULL /* Clone into a specific cgroup given the right permissions. */
-#endif
-
 #ifndef __NR_clone3
 #define __NR_clone3 -1
 #endif
@@ -32,18 +28,9 @@ struct __clone_args {
 	__aligned_u64 stack;
 	__aligned_u64 stack_size;
 	__aligned_u64 tls;
-#ifndef CLONE_ARGS_SIZE_VER0
-#define CLONE_ARGS_SIZE_VER0 64	/* sizeof first published struct */
-#endif
 	__aligned_u64 set_tid;
 	__aligned_u64 set_tid_size;
-#ifndef CLONE_ARGS_SIZE_VER1
-#define CLONE_ARGS_SIZE_VER1 80	/* sizeof second published struct */
-#endif
 	__aligned_u64 cgroup;
-#ifndef CLONE_ARGS_SIZE_VER2
-#define CLONE_ARGS_SIZE_VER2 88	/* sizeof third published struct */
-#endif
 };
 
 static pid_t sys_clone3(struct __clone_args *args, size_t size)
diff --git a/tools/testing/selftests/clone3/clone3_set_tid.c b/tools/testing/selftests/clone3/clone3_set_tid.c
index 0229e9ebb995..ed785afb6077 100644
--- a/tools/testing/selftests/clone3/clone3_set_tid.c
+++ b/tools/testing/selftests/clone3/clone3_set_tid.c
@@ -23,9 +23,7 @@
 #include "../kselftest.h"
 #include "clone3_selftests.h"
 
-#ifndef MAX_PID_NS_LEVEL
 #define MAX_PID_NS_LEVEL 32
-#endif
 
 static int pipe_1[2];
 static int pipe_2[2];
diff --git a/tools/testing/selftests/core/close_range_test.c b/tools/testing/selftests/core/close_range_test.c
index 749239930ca8..534576f06df1 100644
--- a/tools/testing/selftests/core/close_range_test.c
+++ b/tools/testing/selftests/core/close_range_test.c
@@ -16,34 +16,6 @@
 #include "../kselftest_harness.h"
 #include "../clone3/clone3_selftests.h"
 
-#ifndef __NR_close_range
-	#if defined __alpha__
-		#define __NR_close_range 546
-	#elif defined _MIPS_SIM
-		#if _MIPS_SIM == _MIPS_SIM_ABI32	/* o32 */
-			#define __NR_close_range (436 + 4000)
-		#endif
-		#if _MIPS_SIM == _MIPS_SIM_NABI32	/* n32 */
-			#define __NR_close_range (436 + 6000)
-		#endif
-		#if _MIPS_SIM == _MIPS_SIM_ABI64	/* n64 */
-			#define __NR_close_range (436 + 5000)
-		#endif
-	#elif defined __ia64__
-		#define __NR_close_range (436 + 1024)
-	#else
-		#define __NR_close_range 436
-	#endif
-#endif
-
-#ifndef CLOSE_RANGE_UNSHARE
-#define CLOSE_RANGE_UNSHARE	(1U << 1)
-#endif
-
-#ifndef CLOSE_RANGE_CLOEXEC
-#define CLOSE_RANGE_CLOEXEC	(1U << 2)
-#endif
-
 static inline int sys_close_range(unsigned int fd, unsigned int max_fd,
 				  unsigned int flags)
 {
diff --git a/tools/testing/selftests/damon/debugfs_attrs.sh b/tools/testing/selftests/damon/debugfs_attrs.sh
index 902e312bca89..902e312bca89 100644..100755
--- a/tools/testing/selftests/damon/debugfs_attrs.sh
+++ b/tools/testing/selftests/damon/debugfs_attrs.sh
diff --git a/tools/testing/selftests/damon/debugfs_duplicate_context_creation.sh b/tools/testing/selftests/damon/debugfs_duplicate_context_creation.sh
index 4a76e37ef16b..4a76e37ef16b 100644..100755
--- a/tools/testing/selftests/damon/debugfs_duplicate_context_creation.sh
+++ b/tools/testing/selftests/damon/debugfs_duplicate_context_creation.sh
diff --git a/tools/testing/selftests/damon/debugfs_empty_targets.sh b/tools/testing/selftests/damon/debugfs_empty_targets.sh
index 87aff8083822..87aff8083822 100644..100755
--- a/tools/testing/selftests/damon/debugfs_empty_targets.sh
+++ b/tools/testing/selftests/damon/debugfs_empty_targets.sh
diff --git a/tools/testing/selftests/damon/debugfs_huge_count_read_write.sh b/tools/testing/selftests/damon/debugfs_huge_count_read_write.sh
index 922cadac2950..922cadac2950 100644..100755
--- a/tools/testing/selftests/damon/debugfs_huge_count_read_write.sh
+++ b/tools/testing/selftests/damon/debugfs_huge_count_read_write.sh
diff --git a/tools/testing/selftests/damon/debugfs_rm_non_contexts.sh b/tools/testing/selftests/damon/debugfs_rm_non_contexts.sh
index f3ffeb1343cf..f3ffeb1343cf 100644..100755
--- a/tools/testing/selftests/damon/debugfs_rm_non_contexts.sh
+++ b/tools/testing/selftests/damon/debugfs_rm_non_contexts.sh
diff --git a/tools/testing/selftests/damon/debugfs_schemes.sh b/tools/testing/selftests/damon/debugfs_schemes.sh
index 5b39ab44731c..5b39ab44731c 100644..100755
--- a/tools/testing/selftests/damon/debugfs_schemes.sh
+++ b/tools/testing/selftests/damon/debugfs_schemes.sh
diff --git a/tools/testing/selftests/damon/debugfs_target_ids.sh b/tools/testing/selftests/damon/debugfs_target_ids.sh
index 49aeabdb0aae..49aeabdb0aae 100644..100755
--- a/tools/testing/selftests/damon/debugfs_target_ids.sh
+++ b/tools/testing/selftests/damon/debugfs_target_ids.sh
diff --git a/tools/testing/selftests/damon/lru_sort.sh b/tools/testing/selftests/damon/lru_sort.sh
index 61b80197c896..61b80197c896 100644..100755
--- a/tools/testing/selftests/damon/lru_sort.sh
+++ b/tools/testing/selftests/damon/lru_sort.sh
diff --git a/tools/testing/selftests/damon/reclaim.sh b/tools/testing/selftests/damon/reclaim.sh
index 78dbc2334cbe..78dbc2334cbe 100644..100755
--- a/tools/testing/selftests/damon/reclaim.sh
+++ b/tools/testing/selftests/damon/reclaim.sh
diff --git a/tools/testing/selftests/damon/sysfs.sh b/tools/testing/selftests/damon/sysfs.sh
index 60a9a305aef0..56f0230a8b92 100644..100755
--- a/tools/testing/selftests/damon/sysfs.sh
+++ b/tools/testing/selftests/damon/sysfs.sh
@@ -175,6 +175,7 @@ test_scheme()
 	ensure_dir "$scheme_dir" "exist"
 	ensure_file "$scheme_dir/action" "exist" "600"
 	test_access_pattern "$scheme_dir/access_pattern"
+	ensure_file "$scheme_dir/apply_interval_us" "exist" "600"
 	test_quotas "$scheme_dir/quotas"
 	test_watermarks "$scheme_dir/watermarks"
 	test_filters "$scheme_dir/filters"
diff --git a/tools/testing/selftests/damon/sysfs_update_removed_scheme_dir.sh b/tools/testing/selftests/damon/sysfs_update_removed_scheme_dir.sh
index ade35576e748..ade35576e748 100644..100755
--- a/tools/testing/selftests/damon/sysfs_update_removed_scheme_dir.sh
+++ b/tools/testing/selftests/damon/sysfs_update_removed_scheme_dir.sh
diff --git a/tools/testing/selftests/dmabuf-heaps/.gitignore b/tools/testing/selftests/dmabuf-heaps/.gitignore
new file mode 100644
index 000000000000..b500e76b9045
--- /dev/null
+++ b/tools/testing/selftests/dmabuf-heaps/.gitignore
@@ -0,0 +1 @@
+dmabuf-heap
diff --git a/tools/testing/selftests/drivers/net/mlxsw/pci_reset.sh b/tools/testing/selftests/drivers/net/mlxsw/pci_reset.sh
new file mode 100755
index 000000000000..fe0343b95e6c
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/pci_reset.sh
@@ -0,0 +1,58 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test that PCI reset works correctly by verifying that only the expected reset
+# methods are supported and that after issuing the reset the ifindex of the
+# port changes.
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+	pci_reset_test
+"
+NUM_NETIFS=1
+source $lib_dir/lib.sh
+source $lib_dir/devlink_lib.sh
+
+pci_reset_test()
+{
+	RET=0
+
+	local bus=$(echo $DEVLINK_DEV | cut -d '/' -f 1)
+	local bdf=$(echo $DEVLINK_DEV | cut -d '/' -f 2)
+
+	if [ $bus != "pci" ]; then
+		check_err 1 "devlink device is not a PCI device"
+		log_test "pci reset"
+		return
+	fi
+
+	if [ ! -f /sys/bus/pci/devices/$bdf/reset_method ]; then
+		check_err 1 "reset is not supported"
+		log_test "pci reset"
+		return
+	fi
+
+	[[ $(cat /sys/bus/pci/devices/$bdf/reset_method) == "bus" ]]
+	check_err $? "only \"bus\" reset method should be supported"
+
+	local ifindex_pre=$(ip -j link show dev $swp1 | jq '.[]["ifindex"]')
+
+	echo 1 > /sys/bus/pci/devices/$bdf/reset
+	check_err $? "reset failed"
+
+	# Wait for udev to rename newly created netdev.
+	udevadm settle
+
+	local ifindex_post=$(ip -j link show dev $swp1 | jq '.[]["ifindex"]')
+
+	[[ $ifindex_pre != $ifindex_post ]]
+	check_err $? "reset not performed"
+
+	log_test "pci reset"
+}
+
+swp1=${NETIFS[p1]}
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/dt/.gitignore b/tools/testing/selftests/dt/.gitignore
new file mode 100644
index 000000000000..f6476c9f2884
--- /dev/null
+++ b/tools/testing/selftests/dt/.gitignore
@@ -0,0 +1 @@
+compatible_list
diff --git a/tools/testing/selftests/dt/Makefile b/tools/testing/selftests/dt/Makefile
new file mode 100644
index 000000000000..62dc00ee4978
--- /dev/null
+++ b/tools/testing/selftests/dt/Makefile
@@ -0,0 +1,21 @@
+PY3 = $(shell which python3 2>/dev/null)
+
+ifneq ($(PY3),)
+
+TEST_PROGS := test_unprobed_devices.sh
+TEST_GEN_FILES := compatible_list
+TEST_FILES := compatible_ignore_list ktap_helpers.sh
+
+include ../lib.mk
+
+$(OUTPUT)/compatible_list:
+	$(top_srcdir)/scripts/dtc/dt-extract-compatibles -d $(top_srcdir) > $@
+
+else
+
+all: no_py3_warning
+
+no_py3_warning:
+	@echo "Missing python3. This test will be skipped."
+
+endif
diff --git a/tools/testing/selftests/dt/compatible_ignore_list b/tools/testing/selftests/dt/compatible_ignore_list
new file mode 100644
index 000000000000..1323903feca9
--- /dev/null
+++ b/tools/testing/selftests/dt/compatible_ignore_list
@@ -0,0 +1 @@
+simple-mfd
diff --git a/tools/testing/selftests/dt/ktap_helpers.sh b/tools/testing/selftests/dt/ktap_helpers.sh
new file mode 100644
index 000000000000..8dfae51bb4e2
--- /dev/null
+++ b/tools/testing/selftests/dt/ktap_helpers.sh
@@ -0,0 +1,70 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Copyright (c) 2023 Collabora Ltd
+#
+# Helpers for outputting in KTAP format
+#
+KTAP_TESTNO=1
+KTAP_CNT_PASS=0
+KTAP_CNT_FAIL=0
+KTAP_CNT_SKIP=0
+
+ktap_print_header() {
+	echo "TAP version 13"
+}
+
+ktap_set_plan() {
+	num_tests="$1"
+
+	echo "1..$num_tests"
+}
+
+ktap_skip_all() {
+	echo -n "1..0 # SKIP "
+	echo $@
+}
+
+__ktap_test() {
+	result="$1"
+	description="$2"
+	directive="$3" # optional
+
+	local directive_str=
+	[[ ! -z "$directive" ]] && directive_str="# $directive"
+
+	echo $result $KTAP_TESTNO $description $directive_str
+
+	KTAP_TESTNO=$((KTAP_TESTNO+1))
+}
+
+ktap_test_pass() {
+	description="$1"
+
+	result="ok"
+	__ktap_test "$result" "$description"
+
+	KTAP_CNT_PASS=$((KTAP_CNT_PASS+1))
+}
+
+ktap_test_skip() {
+	description="$1"
+
+	result="ok"
+	directive="SKIP"
+	__ktap_test "$result" "$description" "$directive"
+
+	KTAP_CNT_SKIP=$((KTAP_CNT_SKIP+1))
+}
+
+ktap_test_fail() {
+	description="$1"
+
+	result="not ok"
+	__ktap_test "$result" "$description"
+
+	KTAP_CNT_FAIL=$((KTAP_CNT_FAIL+1))
+}
+
+ktap_print_totals() {
+	echo "# Totals: pass:$KTAP_CNT_PASS fail:$KTAP_CNT_FAIL xfail:0 xpass:0 skip:$KTAP_CNT_SKIP error:0"
+}
diff --git a/tools/testing/selftests/dt/test_unprobed_devices.sh b/tools/testing/selftests/dt/test_unprobed_devices.sh
new file mode 100755
index 000000000000..b07af2a4c4de
--- /dev/null
+++ b/tools/testing/selftests/dt/test_unprobed_devices.sh
@@ -0,0 +1,83 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Copyright (c) 2023 Collabora Ltd
+#
+# Based on Frank Rowand's dt_stat script.
+#
+# This script tests for devices that were declared on the Devicetree and are
+# expected to bind to a driver, but didn't.
+#
+# To achieve this, two lists are used:
+# * a list of the compatibles that can be matched by a Devicetree node
+# * a list of compatibles that should be ignored
+#
+
+DIR="$(dirname $(readlink -f "$0"))"
+
+source "${DIR}"/ktap_helpers.sh
+
+PDT=/proc/device-tree/
+COMPAT_LIST="${DIR}"/compatible_list
+IGNORE_LIST="${DIR}"/compatible_ignore_list
+
+KSFT_PASS=0
+KSFT_FAIL=1
+KSFT_SKIP=4
+
+ktap_print_header
+
+if [[ ! -d "${PDT}" ]]; then
+	ktap_skip_all "${PDT} doesn't exist."
+	exit "${KSFT_SKIP}"
+fi
+
+nodes_compatible=$(
+	for node_compat in $(find ${PDT} -name compatible); do
+		node=$(dirname "${node_compat}")
+		# Check if node is available
+		if [[ -e "${node}"/status ]]; then
+			status=$(tr -d '\000' < "${node}"/status)
+			[[ "${status}" != "okay" && "${status}" != "ok" ]] && continue
+		fi
+		echo "${node}" | sed -e 's|\/proc\/device-tree||'
+	done | sort
+	)
+
+nodes_dev_bound=$(
+	IFS=$'\n'
+	for uevent in $(find /sys/devices -name uevent); do
+		if [[ -d "$(dirname "${uevent}")"/driver ]]; then
+			grep '^OF_FULLNAME=' "${uevent}" | sed -e 's|OF_FULLNAME=||'
+		fi
+	done
+	)
+
+num_tests=$(echo ${nodes_compatible} | wc -w)
+ktap_set_plan "${num_tests}"
+
+retval="${KSFT_PASS}"
+for node in ${nodes_compatible}; do
+	if ! echo "${nodes_dev_bound}" | grep -E -q "(^| )${node}( |\$)"; then
+		compatibles=$(tr '\000' '\n' < "${PDT}"/"${node}"/compatible)
+
+		for compatible in ${compatibles}; do
+			if grep -x -q "${compatible}" "${IGNORE_LIST}"; then
+				continue
+			fi
+
+			if grep -x -q "${compatible}" "${COMPAT_LIST}"; then
+				ktap_test_fail "${node}"
+				retval="${KSFT_FAIL}"
+				continue 2
+			fi
+		done
+		ktap_test_skip "${node}"
+	else
+		ktap_test_pass "${node}"
+	fi
+
+done
+
+ktap_print_totals
+exit "${retval}"
diff --git a/tools/testing/selftests/efivarfs/create-read.c b/tools/testing/selftests/efivarfs/create-read.c
index 9674a19396a3..7bc7af4eb2c1 100644
--- a/tools/testing/selftests/efivarfs/create-read.c
+++ b/tools/testing/selftests/efivarfs/create-read.c
@@ -32,8 +32,10 @@ int main(int argc, char **argv)
 	rc = read(fd, buf, sizeof(buf));
 	if (rc != 0) {
 		fprintf(stderr, "Reading a new var should return EOF\n");
+		close(fd);
 		return EXIT_FAILURE;
 	}
 
+	close(fd);
 	return EXIT_SUCCESS;
 }
diff --git a/tools/testing/selftests/exec/execveat.c b/tools/testing/selftests/exec/execveat.c
index 67bf7254a48f..bf79d664c8e6 100644
--- a/tools/testing/selftests/exec/execveat.c
+++ b/tools/testing/selftests/exec/execveat.c
@@ -23,6 +23,9 @@
 
 #include "../kselftest.h"
 
+#define TESTS_EXPECTED 51
+#define TEST_NAME_LEN (PATH_MAX * 4)
+
 static char longpath[2 * PATH_MAX] = "";
 static char *envp[] = { "IN_TEST=yes", NULL, NULL };
 static char *argv[] = { "execveat", "99", NULL };
@@ -43,71 +46,85 @@ static int execveat_(int fd, const char *path, char **argv, char **envp,
 static int _check_execveat_fail(int fd, const char *path, int flags,
 				int expected_errno, const char *errno_str)
 {
+	char test_name[TEST_NAME_LEN];
 	int rc;
 
 	errno = 0;
-	printf("Check failure of execveat(%d, '%s', %d) with %s... ",
-		fd, path?:"(null)", flags, errno_str);
+	snprintf(test_name, sizeof(test_name),
+		 "Check failure of execveat(%d, '%s', %d) with %s",
+		 fd, path?:"(null)", flags, errno_str);
 	rc = execveat_(fd, path, argv, envp, flags);
 
 	if (rc > 0) {
-		printf("[FAIL] (unexpected success from execveat(2))\n");
+		ksft_print_msg("unexpected success from execveat(2)\n");
+		ksft_test_result_fail("%s\n", test_name);
 		return 1;
 	}
 	if (errno != expected_errno) {
-		printf("[FAIL] (expected errno %d (%s) not %d (%s)\n",
-			expected_errno, strerror(expected_errno),
-			errno, strerror(errno));
+		ksft_print_msg("expected errno %d (%s) not %d (%s)\n",
+			       expected_errno, strerror(expected_errno),
+			       errno, strerror(errno));
+		ksft_test_result_fail("%s\n", test_name);
 		return 1;
 	}
-	printf("[OK]\n");
+	ksft_test_result_pass("%s\n", test_name);
 	return 0;
 }
 
 static int check_execveat_invoked_rc(int fd, const char *path, int flags,
 				     int expected_rc, int expected_rc2)
 {
+	char test_name[TEST_NAME_LEN];
 	int status;
 	int rc;
 	pid_t child;
 	int pathlen = path ? strlen(path) : 0;
 
 	if (pathlen > 40)
-		printf("Check success of execveat(%d, '%.20s...%s', %d)... ",
-			fd, path, (path + pathlen - 20), flags);
+		snprintf(test_name, sizeof(test_name),
+			 "Check success of execveat(%d, '%.20s...%s', %d)... ",
+			 fd, path, (path + pathlen - 20), flags);
 	else
-		printf("Check success of execveat(%d, '%s', %d)... ",
-			fd, path?:"(null)", flags);
+		snprintf(test_name, sizeof(test_name),
+			 "Check success of execveat(%d, '%s', %d)... ",
+			 fd, path?:"(null)", flags);
+
 	child = fork();
 	if (child < 0) {
-		printf("[FAIL] (fork() failed)\n");
+		ksft_perror("fork() failed");
+		ksft_test_result_fail("%s\n", test_name);
 		return 1;
 	}
 	if (child == 0) {
 		/* Child: do execveat(). */
 		rc = execveat_(fd, path, argv, envp, flags);
-		printf("[FAIL]: execveat() failed, rc=%d errno=%d (%s)\n",
-			rc, errno, strerror(errno));
+		ksft_print_msg("execveat() failed, rc=%d errno=%d (%s)\n",
+			       rc, errno, strerror(errno));
+		ksft_test_result_fail("%s\n", test_name);
 		exit(1);  /* should not reach here */
 	}
 	/* Parent: wait for & check child's exit status. */
 	rc = waitpid(child, &status, 0);
 	if (rc != child) {
-		printf("[FAIL] (waitpid(%d,...) returned %d)\n", child, rc);
+		ksft_print_msg("waitpid(%d,...) returned %d\n", child, rc);
+		ksft_test_result_fail("%s\n", test_name);
 		return 1;
 	}
 	if (!WIFEXITED(status)) {
-		printf("[FAIL] (child %d did not exit cleanly, status=%08x)\n",
-			child, status);
+		ksft_print_msg("child %d did not exit cleanly, status=%08x\n",
+			       child, status);
+		ksft_test_result_fail("%s\n", test_name);
 		return 1;
 	}
 	if ((WEXITSTATUS(status) != expected_rc) &&
 	    (WEXITSTATUS(status) != expected_rc2)) {
-		printf("[FAIL] (child %d exited with %d not %d nor %d)\n",
-			child, WEXITSTATUS(status), expected_rc, expected_rc2);
+		ksft_print_msg("child %d exited with %d not %d nor %d\n",
+			       child, WEXITSTATUS(status), expected_rc,
+			       expected_rc2);
+		ksft_test_result_fail("%s\n", test_name);
 		return 1;
 	}
-	printf("[OK]\n");
+	ksft_test_result_pass("%s\n", test_name);
 	return 0;
 }
 
@@ -129,11 +146,9 @@ static int open_or_die(const char *filename, int flags)
 {
 	int fd = open(filename, flags);
 
-	if (fd < 0) {
-		printf("Failed to open '%s'; "
+	if (fd < 0)
+		ksft_exit_fail_msg("Failed to open '%s'; "
 			"check prerequisites are available\n", filename);
-		exit(1);
-	}
 	return fd;
 }
 
@@ -162,8 +177,7 @@ static int check_execveat_pathmax(int root_dfd, const char *src, int is_script)
 		char *cwd = getcwd(NULL, 0);
 
 		if (!cwd) {
-			printf("Failed to getcwd(), errno=%d (%s)\n",
-			       errno, strerror(errno));
+			ksft_perror("Failed to getcwd()");
 			return 2;
 		}
 		strcpy(longpath, cwd);
@@ -193,12 +207,12 @@ static int check_execveat_pathmax(int root_dfd, const char *src, int is_script)
 	 */
 	fd = open(longpath, O_RDONLY);
 	if (fd > 0) {
-		printf("Invoke copy of '%s' via filename of length %zu:\n",
-			src, strlen(longpath));
+		ksft_print_msg("Invoke copy of '%s' via filename of length %zu:\n",
+			       src, strlen(longpath));
 		fail += check_execveat(fd, "", AT_EMPTY_PATH);
 	} else {
-		printf("Failed to open length %zu filename, errno=%d (%s)\n",
-			strlen(longpath), errno, strerror(errno));
+		ksft_print_msg("Failed to open length %zu filename, errno=%d (%s)\n",
+			       strlen(longpath), errno, strerror(errno));
 		fail++;
 	}
 
@@ -405,28 +419,31 @@ int main(int argc, char **argv)
 		const char *in_test = getenv("IN_TEST");
 
 		if (verbose) {
-			printf("  invoked with:");
+			ksft_print_msg("invoked with:\n");
 			for (ii = 0; ii < argc; ii++)
-				printf(" [%d]='%s'", ii, argv[ii]);
-			printf("\n");
+				ksft_print_msg("\t[%d]='%s\n'", ii, argv[ii]);
 		}
 
 		/* Check expected environment transferred. */
 		if (!in_test || strcmp(in_test, "yes") != 0) {
-			printf("[FAIL] (no IN_TEST=yes in env)\n");
+			ksft_print_msg("no IN_TEST=yes in env\n");
 			return 1;
 		}
 
 		/* Use the final argument as an exit code. */
 		rc = atoi(argv[argc - 1]);
-		fflush(stdout);
+		exit(rc);
 	} else {
+		ksft_print_header();
+		ksft_set_plan(TESTS_EXPECTED);
 		prerequisites();
 		if (verbose)
 			envp[1] = "VERBOSE=1";
 		rc = run_tests();
 		if (rc > 0)
 			printf("%d tests failed\n", rc);
+		ksft_finished();
 	}
+
 	return rc;
 }
diff --git a/tools/testing/selftests/firmware/fw_namespace.c b/tools/testing/selftests/firmware/fw_namespace.c
index 4c6f0cd83c5b..04757dc7e546 100644
--- a/tools/testing/selftests/firmware/fw_namespace.c
+++ b/tools/testing/selftests/firmware/fw_namespace.c
@@ -17,10 +17,6 @@
 #include <sys/wait.h>
 #include <unistd.h>
 
-#ifndef CLONE_NEWNS
-# define CLONE_NEWNS 0x00020000
-#endif
-
 static char *fw_path = NULL;
 
 static void die(char *fmt, ...)
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_char.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_char.tc
index ff7499eb98d6..e21c9c27ece4 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_char.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_char.tc
@@ -28,13 +28,21 @@ s390*)
 mips*)
   ARG1=%r4
 ;;
+loongarch*)
+  ARG1=%r4
+;;
+riscv*)
+  ARG1=%a0
+;;
 *)
   echo "Please implement other architecture here"
   exit_untested
 esac
 
 : "Test get argument (1)"
-if grep -q eventfs_add_dir available_filter_functions; then
+if grep -q eventfs_create_dir available_filter_functions; then
+  DIR_NAME="eventfs_create_dir"
+elif grep -q eventfs_add_dir available_filter_functions; then
   DIR_NAME="eventfs_add_dir"
 else
   DIR_NAME="tracefs_create_dir"
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc
index a202b2ea4baf..93217d459556 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc
@@ -31,13 +31,18 @@ mips*)
 loongarch*)
   ARG1=%r4
 ;;
+riscv*)
+  ARG1=%a0
+;;
 *)
   echo "Please implement other architecture here"
   exit_untested
 esac
 
 : "Test get argument (1)"
-if grep -q eventfs_add_dir available_filter_functions; then
+if grep -q eventfs_create_dir available_filter_functions; then
+  DIR_NAME="eventfs_create_dir"
+elif grep -q eventfs_add_dir available_filter_functions; then
   DIR_NAME="eventfs_add_dir"
 else
   DIR_NAME="tracefs_create_dir"
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_syntax.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_syntax.tc
index 1df61e13a812..8f1292ad80ff 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_syntax.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_syntax.tc
@@ -44,6 +44,10 @@ loongarch*)
   GOODREG=%r4
   BADREG=%r12
 ;;
+riscv*)
+  GOODREG=%a0
+  BADREG=%a8
+;;
 *)
   echo "Please implement other architecture here"
   exit_untested
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_non_uniq_symbol.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_non_uniq_symbol.tc
new file mode 100644
index 000000000000..bc9514428dba
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_non_uniq_symbol.tc
@@ -0,0 +1,13 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Test failure of registering kprobe on non unique symbol
+# requires: kprobe_events
+
+SYMBOL='name_show'
+
+# We skip this test on kernel where SYMBOL is unique or does not exist.
+if [ "$(grep -c -E "[[:alnum:]]+ t ${SYMBOL}" /proc/kallsyms)" -le '1' ]; then
+	exit_unsupported
+fi
+
+! echo "p:test_non_unique ${SYMBOL}" > kprobe_events
diff --git a/tools/testing/selftests/hid/Makefile b/tools/testing/selftests/hid/Makefile
index 2e986cbf1a46..2b5ea18bde38 100644
--- a/tools/testing/selftests/hid/Makefile
+++ b/tools/testing/selftests/hid/Makefile
@@ -21,7 +21,9 @@ CXX ?= $(CROSS_COMPILE)g++
 
 HOSTPKG_CONFIG := pkg-config
 
-CFLAGS += -g -O0 -rdynamic -Wall -Werror -I$(KHDR_INCLUDES) -I$(OUTPUT)
+CFLAGS += -g -O0 -rdynamic -Wall -Werror -I$(OUTPUT)
+CFLAGS += -I$(OUTPUT)/tools/include
+
 LDLIBS += -lelf -lz -lrt -lpthread
 
 # Silence some warnings when compiled with clang
@@ -65,7 +67,6 @@ BPFTOOLDIR := $(TOOLSDIR)/bpf/bpftool
 SCRATCH_DIR := $(OUTPUT)/tools
 BUILD_DIR := $(SCRATCH_DIR)/build
 INCLUDE_DIR := $(SCRATCH_DIR)/include
-KHDR_INCLUDES := $(SCRATCH_DIR)/uapi/include
 BPFOBJ := $(BUILD_DIR)/libbpf/libbpf.a
 ifneq ($(CROSS_COMPILE),)
 HOST_BUILD_DIR		:= $(BUILD_DIR)/host
@@ -151,9 +152,6 @@ else
 	$(Q)cp "$(VMLINUX_H)" $@
 endif
 
-$(KHDR_INCLUDES)/linux/hid.h: $(top_srcdir)/include/uapi/linux/hid.h
-	$(MAKE) -C $(top_srcdir) INSTALL_HDR_PATH=$(SCRATCH_DIR)/uapi headers_install
-
 $(RESOLVE_BTFIDS): $(HOST_BPFOBJ) | $(HOST_BUILD_DIR)/resolve_btfids	\
 		       $(TOOLSDIR)/bpf/resolve_btfids/main.c	\
 		       $(TOOLSDIR)/lib/rbtree.c			\
@@ -231,7 +229,7 @@ $(BPF_SKELS): %.skel.h: %.bpf.o $(BPFTOOL) | $(OUTPUT)
 	$(Q)$(BPFTOOL) gen object $(<:.o=.linked1.o) $<
 	$(Q)$(BPFTOOL) gen skeleton $(<:.o=.linked1.o) name $(notdir $(<:.bpf.o=)) > $@
 
-$(OUTPUT)/%.o: %.c $(BPF_SKELS) $(KHDR_INCLUDES)/linux/hid.h
+$(OUTPUT)/%.o: %.c $(BPF_SKELS)
 	$(call msg,CC,,$@)
 	$(Q)$(CC) $(CFLAGS) -c $(filter %.c,$^) $(LDLIBS) -o $@
 
diff --git a/tools/testing/selftests/hid/progs/hid.c b/tools/testing/selftests/hid/progs/hid.c
index 88c593f753b5..1e558826b809 100644
--- a/tools/testing/selftests/hid/progs/hid.c
+++ b/tools/testing/selftests/hid/progs/hid.c
@@ -1,8 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
 /* Copyright (c) 2022 Red hat */
-#include "vmlinux.h"
-#include <bpf/bpf_helpers.h>
-#include <bpf/bpf_tracing.h>
 #include "hid_bpf_helpers.h"
 
 char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/hid/progs/hid_bpf_helpers.h b/tools/testing/selftests/hid/progs/hid_bpf_helpers.h
index 4fff31dbe0e7..65e657ac1198 100644
--- a/tools/testing/selftests/hid/progs/hid_bpf_helpers.h
+++ b/tools/testing/selftests/hid/progs/hid_bpf_helpers.h
@@ -5,6 +5,83 @@
 #ifndef __HID_BPF_HELPERS_H
 #define __HID_BPF_HELPERS_H
 
+/* "undefine" structs and enums in vmlinux.h, because we "override" them below */
+#define hid_bpf_ctx hid_bpf_ctx___not_used
+#define hid_report_type hid_report_type___not_used
+#define hid_class_request hid_class_request___not_used
+#define hid_bpf_attach_flags hid_bpf_attach_flags___not_used
+#define HID_INPUT_REPORT         HID_INPUT_REPORT___not_used
+#define HID_OUTPUT_REPORT        HID_OUTPUT_REPORT___not_used
+#define HID_FEATURE_REPORT       HID_FEATURE_REPORT___not_used
+#define HID_REPORT_TYPES         HID_REPORT_TYPES___not_used
+#define HID_REQ_GET_REPORT       HID_REQ_GET_REPORT___not_used
+#define HID_REQ_GET_IDLE         HID_REQ_GET_IDLE___not_used
+#define HID_REQ_GET_PROTOCOL     HID_REQ_GET_PROTOCOL___not_used
+#define HID_REQ_SET_REPORT       HID_REQ_SET_REPORT___not_used
+#define HID_REQ_SET_IDLE         HID_REQ_SET_IDLE___not_used
+#define HID_REQ_SET_PROTOCOL     HID_REQ_SET_PROTOCOL___not_used
+#define HID_BPF_FLAG_NONE        HID_BPF_FLAG_NONE___not_used
+#define HID_BPF_FLAG_INSERT_HEAD HID_BPF_FLAG_INSERT_HEAD___not_used
+#define HID_BPF_FLAG_MAX         HID_BPF_FLAG_MAX___not_used
+
+#include "vmlinux.h"
+
+#undef hid_bpf_ctx
+#undef hid_report_type
+#undef hid_class_request
+#undef hid_bpf_attach_flags
+#undef HID_INPUT_REPORT
+#undef HID_OUTPUT_REPORT
+#undef HID_FEATURE_REPORT
+#undef HID_REPORT_TYPES
+#undef HID_REQ_GET_REPORT
+#undef HID_REQ_GET_IDLE
+#undef HID_REQ_GET_PROTOCOL
+#undef HID_REQ_SET_REPORT
+#undef HID_REQ_SET_IDLE
+#undef HID_REQ_SET_PROTOCOL
+#undef HID_BPF_FLAG_NONE
+#undef HID_BPF_FLAG_INSERT_HEAD
+#undef HID_BPF_FLAG_MAX
+
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <linux/const.h>
+
+enum hid_report_type {
+	HID_INPUT_REPORT		= 0,
+	HID_OUTPUT_REPORT		= 1,
+	HID_FEATURE_REPORT		= 2,
+
+	HID_REPORT_TYPES,
+};
+
+struct hid_bpf_ctx {
+	__u32 index;
+	const struct hid_device *hid;
+	__u32 allocated_size;
+	enum hid_report_type report_type;
+	union {
+		__s32 retval;
+		__s32 size;
+	};
+} __attribute__((preserve_access_index));
+
+enum hid_class_request {
+	HID_REQ_GET_REPORT		= 0x01,
+	HID_REQ_GET_IDLE		= 0x02,
+	HID_REQ_GET_PROTOCOL		= 0x03,
+	HID_REQ_SET_REPORT		= 0x09,
+	HID_REQ_SET_IDLE		= 0x0A,
+	HID_REQ_SET_PROTOCOL		= 0x0B,
+};
+
+enum hid_bpf_attach_flags {
+	HID_BPF_FLAG_NONE = 0,
+	HID_BPF_FLAG_INSERT_HEAD = _BITUL(0),
+	HID_BPF_FLAG_MAX,
+};
+
 /* following are kfuncs exported by HID for HID-BPF */
 extern __u8 *hid_bpf_get_data(struct hid_bpf_ctx *ctx,
 			      unsigned int offset,
diff --git a/tools/testing/selftests/iommu/iommufd.c b/tools/testing/selftests/iommu/iommufd.c
index 33d08600be13..6ed328c863c4 100644
--- a/tools/testing/selftests/iommu/iommufd.c
+++ b/tools/testing/selftests/iommu/iommufd.c
@@ -86,12 +86,13 @@ TEST_F(iommufd, cmd_fail)
 
 TEST_F(iommufd, cmd_length)
 {
-#define TEST_LENGTH(_struct, _ioctl)                                     \
+#define TEST_LENGTH(_struct, _ioctl, _last)                              \
 	{                                                                \
+		size_t min_size = offsetofend(struct _struct, _last);    \
 		struct {                                                 \
 			struct _struct cmd;                              \
 			uint8_t extra;                                   \
-		} cmd = { .cmd = { .size = sizeof(struct _struct) - 1 }, \
+		} cmd = { .cmd = { .size = min_size - 1 },               \
 			  .extra = UINT8_MAX };                          \
 		int old_errno;                                           \
 		int rc;                                                  \
@@ -112,16 +113,19 @@ TEST_F(iommufd, cmd_length)
 		}                                                        \
 	}
 
-	TEST_LENGTH(iommu_destroy, IOMMU_DESTROY);
-	TEST_LENGTH(iommu_hw_info, IOMMU_GET_HW_INFO);
-	TEST_LENGTH(iommu_ioas_alloc, IOMMU_IOAS_ALLOC);
-	TEST_LENGTH(iommu_ioas_iova_ranges, IOMMU_IOAS_IOVA_RANGES);
-	TEST_LENGTH(iommu_ioas_allow_iovas, IOMMU_IOAS_ALLOW_IOVAS);
-	TEST_LENGTH(iommu_ioas_map, IOMMU_IOAS_MAP);
-	TEST_LENGTH(iommu_ioas_copy, IOMMU_IOAS_COPY);
-	TEST_LENGTH(iommu_ioas_unmap, IOMMU_IOAS_UNMAP);
-	TEST_LENGTH(iommu_option, IOMMU_OPTION);
-	TEST_LENGTH(iommu_vfio_ioas, IOMMU_VFIO_IOAS);
+	TEST_LENGTH(iommu_destroy, IOMMU_DESTROY, id);
+	TEST_LENGTH(iommu_hw_info, IOMMU_GET_HW_INFO, __reserved);
+	TEST_LENGTH(iommu_hwpt_alloc, IOMMU_HWPT_ALLOC, __reserved);
+	TEST_LENGTH(iommu_ioas_alloc, IOMMU_IOAS_ALLOC, out_ioas_id);
+	TEST_LENGTH(iommu_ioas_iova_ranges, IOMMU_IOAS_IOVA_RANGES,
+		    out_iova_alignment);
+	TEST_LENGTH(iommu_ioas_allow_iovas, IOMMU_IOAS_ALLOW_IOVAS,
+		    allowed_iovas);
+	TEST_LENGTH(iommu_ioas_map, IOMMU_IOAS_MAP, iova);
+	TEST_LENGTH(iommu_ioas_copy, IOMMU_IOAS_COPY, src_iova);
+	TEST_LENGTH(iommu_ioas_unmap, IOMMU_IOAS_UNMAP, length);
+	TEST_LENGTH(iommu_option, IOMMU_OPTION, val64);
+	TEST_LENGTH(iommu_vfio_ioas, IOMMU_VFIO_IOAS, __reserved);
 #undef TEST_LENGTH
 }
 
@@ -260,6 +264,121 @@ TEST_F(iommufd_ioas, ioas_destroy)
 	}
 }
 
+TEST_F(iommufd_ioas, alloc_hwpt_nested)
+{
+	const uint32_t min_data_len =
+		offsetofend(struct iommu_hwpt_selftest, iotlb);
+	struct iommu_hwpt_selftest data = {
+		.iotlb = IOMMU_TEST_IOTLB_DEFAULT,
+	};
+	uint32_t nested_hwpt_id[2] = {};
+	uint32_t parent_hwpt_id = 0;
+	uint32_t parent_hwpt_id_not_work = 0;
+	uint32_t test_hwpt_id = 0;
+
+	if (self->device_id) {
+		/* Negative tests */
+		test_err_hwpt_alloc(ENOENT, self->ioas_id, self->device_id, 0,
+				    &test_hwpt_id);
+		test_err_hwpt_alloc(EINVAL, self->device_id, self->device_id, 0,
+				    &test_hwpt_id);
+
+		test_cmd_hwpt_alloc(self->device_id, self->ioas_id,
+				    IOMMU_HWPT_ALLOC_NEST_PARENT,
+				    &parent_hwpt_id);
+
+		test_cmd_hwpt_alloc(self->device_id, self->ioas_id, 0,
+				    &parent_hwpt_id_not_work);
+
+		/* Negative nested tests */
+		test_err_hwpt_alloc_nested(EINVAL, self->device_id,
+					   parent_hwpt_id, 0,
+					   &nested_hwpt_id[0],
+					   IOMMU_HWPT_DATA_NONE, &data,
+					   sizeof(data));
+		test_err_hwpt_alloc_nested(EOPNOTSUPP, self->device_id,
+					   parent_hwpt_id, 0,
+					   &nested_hwpt_id[0],
+					   IOMMU_HWPT_DATA_SELFTEST + 1, &data,
+					   sizeof(data));
+		test_err_hwpt_alloc_nested(EINVAL, self->device_id,
+					   parent_hwpt_id, 0,
+					   &nested_hwpt_id[0],
+					   IOMMU_HWPT_DATA_SELFTEST, &data,
+					   min_data_len - 1);
+		test_err_hwpt_alloc_nested(EFAULT, self->device_id,
+					   parent_hwpt_id, 0,
+					   &nested_hwpt_id[0],
+					   IOMMU_HWPT_DATA_SELFTEST, NULL,
+					   sizeof(data));
+		test_err_hwpt_alloc_nested(
+			EOPNOTSUPP, self->device_id, parent_hwpt_id,
+			IOMMU_HWPT_ALLOC_NEST_PARENT, &nested_hwpt_id[0],
+			IOMMU_HWPT_DATA_SELFTEST, &data, sizeof(data));
+		test_err_hwpt_alloc_nested(EINVAL, self->device_id,
+					   parent_hwpt_id_not_work, 0,
+					   &nested_hwpt_id[0],
+					   IOMMU_HWPT_DATA_SELFTEST, &data,
+					   sizeof(data));
+
+		/* Allocate two nested hwpts sharing one common parent hwpt */
+		test_cmd_hwpt_alloc_nested(self->device_id, parent_hwpt_id, 0,
+					   &nested_hwpt_id[0],
+					   IOMMU_HWPT_DATA_SELFTEST, &data,
+					   sizeof(data));
+		test_cmd_hwpt_alloc_nested(self->device_id, parent_hwpt_id, 0,
+					   &nested_hwpt_id[1],
+					   IOMMU_HWPT_DATA_SELFTEST, &data,
+					   sizeof(data));
+
+		/* Negative test: a nested hwpt on top of a nested hwpt */
+		test_err_hwpt_alloc_nested(EINVAL, self->device_id,
+					   nested_hwpt_id[0], 0, &test_hwpt_id,
+					   IOMMU_HWPT_DATA_SELFTEST, &data,
+					   sizeof(data));
+		/* Negative test: parent hwpt now cannot be freed */
+		EXPECT_ERRNO(EBUSY,
+			     _test_ioctl_destroy(self->fd, parent_hwpt_id));
+
+		/* Attach device to nested_hwpt_id[0] that then will be busy */
+		test_cmd_mock_domain_replace(self->stdev_id, nested_hwpt_id[0]);
+		EXPECT_ERRNO(EBUSY,
+			     _test_ioctl_destroy(self->fd, nested_hwpt_id[0]));
+
+		/* Switch from nested_hwpt_id[0] to nested_hwpt_id[1] */
+		test_cmd_mock_domain_replace(self->stdev_id, nested_hwpt_id[1]);
+		EXPECT_ERRNO(EBUSY,
+			     _test_ioctl_destroy(self->fd, nested_hwpt_id[1]));
+		test_ioctl_destroy(nested_hwpt_id[0]);
+
+		/* Detach from nested_hwpt_id[1] and destroy it */
+		test_cmd_mock_domain_replace(self->stdev_id, parent_hwpt_id);
+		test_ioctl_destroy(nested_hwpt_id[1]);
+
+		/* Detach from the parent hw_pagetable and destroy it */
+		test_cmd_mock_domain_replace(self->stdev_id, self->ioas_id);
+		test_ioctl_destroy(parent_hwpt_id);
+		test_ioctl_destroy(parent_hwpt_id_not_work);
+	} else {
+		test_err_hwpt_alloc(ENOENT, self->device_id, self->ioas_id, 0,
+				    &parent_hwpt_id);
+		test_err_hwpt_alloc_nested(ENOENT, self->device_id,
+					   parent_hwpt_id, 0,
+					   &nested_hwpt_id[0],
+					   IOMMU_HWPT_DATA_SELFTEST, &data,
+					   sizeof(data));
+		test_err_hwpt_alloc_nested(ENOENT, self->device_id,
+					   parent_hwpt_id, 0,
+					   &nested_hwpt_id[1],
+					   IOMMU_HWPT_DATA_SELFTEST, &data,
+					   sizeof(data));
+		test_err_mock_domain_replace(ENOENT, self->stdev_id,
+					     nested_hwpt_id[0]);
+		test_err_mock_domain_replace(ENOENT, self->stdev_id,
+					     nested_hwpt_id[1]);
+	}
+}
+
 TEST_F(iommufd_ioas, hwpt_attach)
 {
 	/* Create a device attached directly to a hwpt */
@@ -1404,16 +1523,242 @@ TEST_F(iommufd_mock_domain, alloc_hwpt)
 	int i;
 
 	for (i = 0; i != variant->mock_domains; i++) {
+		uint32_t hwpt_id[2];
 		uint32_t stddev_id;
-		uint32_t hwpt_id;
 
-		test_cmd_hwpt_alloc(self->idev_ids[0], self->ioas_id, &hwpt_id);
-		test_cmd_mock_domain(hwpt_id, &stddev_id, NULL, NULL);
+		test_err_hwpt_alloc(EOPNOTSUPP,
+				    self->idev_ids[i], self->ioas_id,
+				    ~IOMMU_HWPT_ALLOC_NEST_PARENT, &hwpt_id[0]);
+		test_cmd_hwpt_alloc(self->idev_ids[i], self->ioas_id,
+				    0, &hwpt_id[0]);
+		test_cmd_hwpt_alloc(self->idev_ids[i], self->ioas_id,
+				    IOMMU_HWPT_ALLOC_NEST_PARENT, &hwpt_id[1]);
+
+		/* Do a hw_pagetable rotation test */
+		test_cmd_mock_domain_replace(self->stdev_ids[i], hwpt_id[0]);
+		EXPECT_ERRNO(EBUSY, _test_ioctl_destroy(self->fd, hwpt_id[0]));
+		test_cmd_mock_domain_replace(self->stdev_ids[i], hwpt_id[1]);
+		EXPECT_ERRNO(EBUSY, _test_ioctl_destroy(self->fd, hwpt_id[1]));
+		test_cmd_mock_domain_replace(self->stdev_ids[i], self->ioas_id);
+		test_ioctl_destroy(hwpt_id[1]);
+
+		test_cmd_mock_domain(hwpt_id[0], &stddev_id, NULL, NULL);
 		test_ioctl_destroy(stddev_id);
-		test_ioctl_destroy(hwpt_id);
+		test_ioctl_destroy(hwpt_id[0]);
 	}
 }
 
+FIXTURE(iommufd_dirty_tracking)
+{
+	int fd;
+	uint32_t ioas_id;
+	uint32_t hwpt_id;
+	uint32_t stdev_id;
+	uint32_t idev_id;
+	unsigned long page_size;
+	unsigned long bitmap_size;
+	void *bitmap;
+	void *buffer;
+};
+
+FIXTURE_VARIANT(iommufd_dirty_tracking)
+{
+	unsigned long buffer_size;
+};
+
+FIXTURE_SETUP(iommufd_dirty_tracking)
+{
+	void *vrc;
+	int rc;
+
+	self->fd = open("/dev/iommu", O_RDWR);
+	ASSERT_NE(-1, self->fd);
+
+	rc = posix_memalign(&self->buffer, HUGEPAGE_SIZE, variant->buffer_size);
+	if (rc || !self->buffer) {
+		SKIP(return, "Skipping buffer_size=%lu due to errno=%d",
+			   variant->buffer_size, rc);
+	}
+
+	assert((uintptr_t)self->buffer % HUGEPAGE_SIZE == 0);
+	vrc = mmap(self->buffer, variant->buffer_size, PROT_READ | PROT_WRITE,
+		   MAP_SHARED | MAP_ANONYMOUS | MAP_FIXED, -1, 0);
+	assert(vrc == self->buffer);
+
+	self->page_size = MOCK_PAGE_SIZE;
+	self->bitmap_size =
+		variant->buffer_size / self->page_size / BITS_PER_BYTE;
+
+	/* Provision with an extra (MOCK_PAGE_SIZE) for the unaligned case */
+	rc = posix_memalign(&self->bitmap, PAGE_SIZE,
+			    self->bitmap_size + MOCK_PAGE_SIZE);
+	assert(!rc);
+	assert(self->bitmap);
+	assert((uintptr_t)self->bitmap % PAGE_SIZE == 0);
+
+	test_ioctl_ioas_alloc(&self->ioas_id);
+	test_cmd_mock_domain(self->ioas_id, &self->stdev_id, &self->hwpt_id,
+			     &self->idev_id);
+}
+
+FIXTURE_TEARDOWN(iommufd_dirty_tracking)
+{
+	munmap(self->buffer, variant->buffer_size);
+	munmap(self->bitmap, self->bitmap_size);
+	teardown_iommufd(self->fd, _metadata);
+}
+
+FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty128k)
+{
+	/* one u32 index bitmap */
+	.buffer_size = 128UL * 1024UL,
+};
+
+FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty256k)
+{
+	/* one u64 index bitmap */
+	.buffer_size = 256UL * 1024UL,
+};
+
+FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty640k)
+{
+	/* two u64 index and trailing end bitmap */
+	.buffer_size = 640UL * 1024UL,
+};
+
+FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty128M)
+{
+	/* 4K bitmap (128M IOVA range) */
+	.buffer_size = 128UL * 1024UL * 1024UL,
+};
+
+FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty256M)
+{
+	/* 8K bitmap (256M IOVA range) */
+	.buffer_size = 256UL * 1024UL * 1024UL,
+};
+
+TEST_F(iommufd_dirty_tracking, enforce_dirty)
+{
+	uint32_t ioas_id, stddev_id, idev_id;
+	uint32_t hwpt_id, _hwpt_id;
+	uint32_t dev_flags;
+
+	/* Regular case */
+	dev_flags = MOCK_FLAGS_DEVICE_NO_DIRTY;
+	test_cmd_hwpt_alloc(self->idev_id, self->ioas_id,
+			    IOMMU_HWPT_ALLOC_DIRTY_TRACKING, &hwpt_id);
+	test_cmd_mock_domain(hwpt_id, &stddev_id, NULL, NULL);
+	test_err_mock_domain_flags(EINVAL, hwpt_id, dev_flags, &stddev_id,
+				   NULL);
+	test_ioctl_destroy(stddev_id);
+	test_ioctl_destroy(hwpt_id);
+
+	/* IOMMU device does not support dirty tracking */
+	test_ioctl_ioas_alloc(&ioas_id);
+	test_cmd_mock_domain_flags(ioas_id, dev_flags, &stddev_id, &_hwpt_id,
+				   &idev_id);
+	test_err_hwpt_alloc(EOPNOTSUPP, idev_id, ioas_id,
+			    IOMMU_HWPT_ALLOC_DIRTY_TRACKING, &hwpt_id);
+	test_ioctl_destroy(stddev_id);
+}
+
+TEST_F(iommufd_dirty_tracking, set_dirty_tracking)
+{
+	uint32_t stddev_id;
+	uint32_t hwpt_id;
+
+	test_cmd_hwpt_alloc(self->idev_id, self->ioas_id,
+			    IOMMU_HWPT_ALLOC_DIRTY_TRACKING, &hwpt_id);
+	test_cmd_mock_domain(hwpt_id, &stddev_id, NULL, NULL);
+	test_cmd_set_dirty_tracking(hwpt_id, true);
+	test_cmd_set_dirty_tracking(hwpt_id, false);
+
+	test_ioctl_destroy(stddev_id);
+	test_ioctl_destroy(hwpt_id);
+}
+
+TEST_F(iommufd_dirty_tracking, device_dirty_capability)
+{
+	uint32_t caps = 0;
+	uint32_t stddev_id;
+	uint32_t hwpt_id;
+
+	test_cmd_hwpt_alloc(self->idev_id, self->ioas_id, 0, &hwpt_id);
+	test_cmd_mock_domain(hwpt_id, &stddev_id, NULL, NULL);
+	test_cmd_get_hw_capabilities(self->idev_id, caps,
+				     IOMMU_HW_CAP_DIRTY_TRACKING);
+	ASSERT_EQ(IOMMU_HW_CAP_DIRTY_TRACKING,
+		  caps & IOMMU_HW_CAP_DIRTY_TRACKING);
+
+	test_ioctl_destroy(stddev_id);
+	test_ioctl_destroy(hwpt_id);
+}
+
+TEST_F(iommufd_dirty_tracking, get_dirty_bitmap)
+{
+	uint32_t stddev_id;
+	uint32_t hwpt_id;
+	uint32_t ioas_id;
+
+	test_ioctl_ioas_alloc(&ioas_id);
+	test_ioctl_ioas_map_fixed_id(ioas_id, self->buffer,
+				     variant->buffer_size, MOCK_APERTURE_START);
+
+	test_cmd_hwpt_alloc(self->idev_id, ioas_id,
+			    IOMMU_HWPT_ALLOC_DIRTY_TRACKING, &hwpt_id);
+	test_cmd_mock_domain(hwpt_id, &stddev_id, NULL, NULL);
+
+	test_cmd_set_dirty_tracking(hwpt_id, true);
+
+	test_mock_dirty_bitmaps(hwpt_id, variant->buffer_size,
+				MOCK_APERTURE_START, self->page_size,
+				self->bitmap, self->bitmap_size, 0, _metadata);
+
+	/* PAGE_SIZE unaligned bitmap */
+	test_mock_dirty_bitmaps(hwpt_id, variant->buffer_size,
+				MOCK_APERTURE_START, self->page_size,
+				self->bitmap + MOCK_PAGE_SIZE,
+				self->bitmap_size, 0, _metadata);
+
+	test_ioctl_destroy(stddev_id);
+	test_ioctl_destroy(hwpt_id);
+}
+
+TEST_F(iommufd_dirty_tracking, get_dirty_bitmap_no_clear)
+{
+	uint32_t stddev_id;
+	uint32_t hwpt_id;
+	uint32_t ioas_id;
+
+	test_ioctl_ioas_alloc(&ioas_id);
+	test_ioctl_ioas_map_fixed_id(ioas_id, self->buffer,
+				     variant->buffer_size, MOCK_APERTURE_START);
+
+	test_cmd_hwpt_alloc(self->idev_id, ioas_id,
+			    IOMMU_HWPT_ALLOC_DIRTY_TRACKING, &hwpt_id);
+	test_cmd_mock_domain(hwpt_id, &stddev_id, NULL, NULL);
+
+	test_cmd_set_dirty_tracking(hwpt_id, true);
+
+	test_mock_dirty_bitmaps(hwpt_id, variant->buffer_size,
+				MOCK_APERTURE_START, self->page_size,
+				self->bitmap, self->bitmap_size,
+				IOMMU_HWPT_GET_DIRTY_BITMAP_NO_CLEAR,
+				_metadata);
+
+	/* Unaligned bitmap */
+	test_mock_dirty_bitmaps(hwpt_id, variant->buffer_size,
+				MOCK_APERTURE_START, self->page_size,
+				self->bitmap + MOCK_PAGE_SIZE,
+				self->bitmap_size,
+				IOMMU_HWPT_GET_DIRTY_BITMAP_NO_CLEAR,
+				_metadata);
+
+	test_ioctl_destroy(stddev_id);
+	test_ioctl_destroy(hwpt_id);
+}
+
 /* VFIO compatibility IOCTLs */
 
 TEST_F(iommufd, simple_ioctls)
@@ -1729,7 +2074,7 @@ TEST_F(vfio_compat_mock_domain, map)
 	ASSERT_EQ(0, ioctl(self->fd, VFIO_IOMMU_UNMAP_DMA, &unmap_cmd));
 	ASSERT_EQ(BUFFER_SIZE, unmap_cmd.size);
 
-	/* UNMAP_FLAG_ALL requres 0 iova/size */
+	/* UNMAP_FLAG_ALL requires 0 iova/size */
 	ASSERT_EQ(0, ioctl(self->fd, VFIO_IOMMU_MAP_DMA, &map_cmd));
 	unmap_cmd.flags = VFIO_DMA_UNMAP_FLAG_ALL;
 	EXPECT_ERRNO(EINVAL, ioctl(self->fd, VFIO_IOMMU_UNMAP_DMA, &unmap_cmd));
diff --git a/tools/testing/selftests/iommu/iommufd_fail_nth.c b/tools/testing/selftests/iommu/iommufd_fail_nth.c
index a220ca2a689d..f590417cd67a 100644
--- a/tools/testing/selftests/iommu/iommufd_fail_nth.c
+++ b/tools/testing/selftests/iommu/iommufd_fail_nth.c
@@ -105,7 +105,7 @@ static bool fail_nth_next(struct __test_metadata *_metadata,
 
 	/*
 	 * This is just an arbitrary limit based on the current kernel
-	 * situation. Changes in the kernel can dramtically change the number of
+	 * situation. Changes in the kernel can dramatically change the number of
 	 * required fault injection sites, so if this hits it doesn't
 	 * necessarily mean a test failure, just that the limit has to be made
 	 * bigger.
@@ -612,10 +612,11 @@ TEST_FAIL_NTH(basic_fail_nth, device)
 				  &idev_id))
 		return -1;
 
-	if (_test_cmd_get_hw_info(self->fd, idev_id, &info, sizeof(info)))
+	if (_test_cmd_get_hw_info(self->fd, idev_id, &info, sizeof(info), NULL))
 		return -1;
 
-	if (_test_cmd_hwpt_alloc(self->fd, idev_id, ioas_id, &hwpt_id))
+	if (_test_cmd_hwpt_alloc(self->fd, idev_id, ioas_id, 0, &hwpt_id,
+				 IOMMU_HWPT_DATA_NONE, 0, 0))
 		return -1;
 
 	if (_test_cmd_mock_domain_replace(self->fd, stdev_id, ioas_id2, NULL))
diff --git a/tools/testing/selftests/iommu/iommufd_utils.h b/tools/testing/selftests/iommu/iommufd_utils.h
index e0753d03ecaa..050e9751321c 100644
--- a/tools/testing/selftests/iommu/iommufd_utils.h
+++ b/tools/testing/selftests/iommu/iommufd_utils.h
@@ -16,6 +16,25 @@
 /* Hack to make assertions more readable */
 #define _IOMMU_TEST_CMD(x) IOMMU_TEST_CMD
 
+/* Imported from include/asm-generic/bitops/generic-non-atomic.h */
+#define BITS_PER_BYTE 8
+#define BITS_PER_LONG __BITS_PER_LONG
+#define BIT_MASK(nr) (1UL << ((nr) % __BITS_PER_LONG))
+#define BIT_WORD(nr) ((nr) / __BITS_PER_LONG)
+
+static inline void set_bit(unsigned int nr, unsigned long *addr)
+{
+	unsigned long mask = BIT_MASK(nr);
+	unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
+
+	*p |= mask;
+}
+
+static inline bool test_bit(unsigned int nr, unsigned long *addr)
+{
+	return 1UL & (addr[BIT_WORD(nr)] >> (nr & (BITS_PER_LONG - 1)));
+}
+
 static void *buffer;
 static unsigned long BUFFER_SIZE;
 
@@ -74,6 +93,38 @@ static int _test_cmd_mock_domain(int fd, unsigned int ioas_id, __u32 *stdev_id,
 	EXPECT_ERRNO(_errno, _test_cmd_mock_domain(self->fd, ioas_id, \
 						   stdev_id, hwpt_id, NULL))
 
+static int _test_cmd_mock_domain_flags(int fd, unsigned int ioas_id,
+				       __u32 stdev_flags, __u32 *stdev_id,
+				       __u32 *hwpt_id, __u32 *idev_id)
+{
+	struct iommu_test_cmd cmd = {
+		.size = sizeof(cmd),
+		.op = IOMMU_TEST_OP_MOCK_DOMAIN_FLAGS,
+		.id = ioas_id,
+		.mock_domain_flags = { .dev_flags = stdev_flags },
+	};
+	int ret;
+
+	ret = ioctl(fd, IOMMU_TEST_CMD, &cmd);
+	if (ret)
+		return ret;
+	if (stdev_id)
+		*stdev_id = cmd.mock_domain_flags.out_stdev_id;
+	assert(cmd.id != 0);
+	if (hwpt_id)
+		*hwpt_id = cmd.mock_domain_flags.out_hwpt_id;
+	if (idev_id)
+		*idev_id = cmd.mock_domain_flags.out_idev_id;
+	return 0;
+}
+#define test_cmd_mock_domain_flags(ioas_id, flags, stdev_id, hwpt_id, idev_id) \
+	ASSERT_EQ(0, _test_cmd_mock_domain_flags(self->fd, ioas_id, flags,     \
+						 stdev_id, hwpt_id, idev_id))
+#define test_err_mock_domain_flags(_errno, ioas_id, flags, stdev_id, hwpt_id) \
+	EXPECT_ERRNO(_errno,                                                  \
+		     _test_cmd_mock_domain_flags(self->fd, ioas_id, flags,    \
+						 stdev_id, hwpt_id, NULL))
+
 static int _test_cmd_mock_domain_replace(int fd, __u32 stdev_id, __u32 pt_id,
 					 __u32 *hwpt_id)
 {
@@ -103,12 +154,17 @@ static int _test_cmd_mock_domain_replace(int fd, __u32 stdev_id, __u32 pt_id,
 							   pt_id, NULL))
 
 static int _test_cmd_hwpt_alloc(int fd, __u32 device_id, __u32 pt_id,
-					 __u32 *hwpt_id)
+				__u32 flags, __u32 *hwpt_id, __u32 data_type,
+				void *data, size_t data_len)
 {
 	struct iommu_hwpt_alloc cmd = {
 		.size = sizeof(cmd),
+		.flags = flags,
 		.dev_id = device_id,
 		.pt_id = pt_id,
+		.data_type = data_type,
+		.data_len = data_len,
+		.data_uptr = (uint64_t)data,
 	};
 	int ret;
 
@@ -120,8 +176,24 @@ static int _test_cmd_hwpt_alloc(int fd, __u32 device_id, __u32 pt_id,
 	return 0;
 }
 
-#define test_cmd_hwpt_alloc(device_id, pt_id, hwpt_id) \
-	ASSERT_EQ(0, _test_cmd_hwpt_alloc(self->fd, device_id, pt_id, hwpt_id))
+#define test_cmd_hwpt_alloc(device_id, pt_id, flags, hwpt_id)                  \
+	ASSERT_EQ(0, _test_cmd_hwpt_alloc(self->fd, device_id, pt_id, flags,   \
+					  hwpt_id, IOMMU_HWPT_DATA_NONE, NULL, \
+					  0))
+#define test_err_hwpt_alloc(_errno, device_id, pt_id, flags, hwpt_id)   \
+	EXPECT_ERRNO(_errno, _test_cmd_hwpt_alloc(                      \
+				     self->fd, device_id, pt_id, flags, \
+				     hwpt_id, IOMMU_HWPT_DATA_NONE, NULL, 0))
+
+#define test_cmd_hwpt_alloc_nested(device_id, pt_id, flags, hwpt_id,         \
+				   data_type, data, data_len)                \
+	ASSERT_EQ(0, _test_cmd_hwpt_alloc(self->fd, device_id, pt_id, flags, \
+					  hwpt_id, data_type, data, data_len))
+#define test_err_hwpt_alloc_nested(_errno, device_id, pt_id, flags, hwpt_id, \
+				   data_type, data, data_len)                \
+	EXPECT_ERRNO(_errno,                                                 \
+		     _test_cmd_hwpt_alloc(self->fd, device_id, pt_id, flags, \
+					  hwpt_id, data_type, data, data_len))
 
 static int _test_cmd_access_replace_ioas(int fd, __u32 access_id,
 					 unsigned int ioas_id)
@@ -142,6 +214,126 @@ static int _test_cmd_access_replace_ioas(int fd, __u32 access_id,
 #define test_cmd_access_replace_ioas(access_id, ioas_id) \
 	ASSERT_EQ(0, _test_cmd_access_replace_ioas(self->fd, access_id, ioas_id))
 
+static int _test_cmd_set_dirty_tracking(int fd, __u32 hwpt_id, bool enabled)
+{
+	struct iommu_hwpt_set_dirty_tracking cmd = {
+		.size = sizeof(cmd),
+		.flags = enabled ? IOMMU_HWPT_DIRTY_TRACKING_ENABLE : 0,
+		.hwpt_id = hwpt_id,
+	};
+	int ret;
+
+	ret = ioctl(fd, IOMMU_HWPT_SET_DIRTY_TRACKING, &cmd);
+	if (ret)
+		return -errno;
+	return 0;
+}
+#define test_cmd_set_dirty_tracking(hwpt_id, enabled) \
+	ASSERT_EQ(0, _test_cmd_set_dirty_tracking(self->fd, hwpt_id, enabled))
+
+static int _test_cmd_get_dirty_bitmap(int fd, __u32 hwpt_id, size_t length,
+				      __u64 iova, size_t page_size,
+				      __u64 *bitmap, __u32 flags)
+{
+	struct iommu_hwpt_get_dirty_bitmap cmd = {
+		.size = sizeof(cmd),
+		.hwpt_id = hwpt_id,
+		.flags = flags,
+		.iova = iova,
+		.length = length,
+		.page_size = page_size,
+		.data = (uintptr_t)bitmap,
+	};
+	int ret;
+
+	ret = ioctl(fd, IOMMU_HWPT_GET_DIRTY_BITMAP, &cmd);
+	if (ret)
+		return ret;
+	return 0;
+}
+
+#define test_cmd_get_dirty_bitmap(fd, hwpt_id, length, iova, page_size,    \
+				  bitmap, flags)                           \
+	ASSERT_EQ(0, _test_cmd_get_dirty_bitmap(fd, hwpt_id, length, iova, \
+						page_size, bitmap, flags))
+
+static int _test_cmd_mock_domain_set_dirty(int fd, __u32 hwpt_id, size_t length,
+					   __u64 iova, size_t page_size,
+					   __u64 *bitmap, __u64 *dirty)
+{
+	struct iommu_test_cmd cmd = {
+		.size = sizeof(cmd),
+		.op = IOMMU_TEST_OP_DIRTY,
+		.id = hwpt_id,
+		.dirty = {
+			.iova = iova,
+			.length = length,
+			.page_size = page_size,
+			.uptr = (uintptr_t)bitmap,
+		}
+	};
+	int ret;
+
+	ret = ioctl(fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_DIRTY), &cmd);
+	if (ret)
+		return -ret;
+	if (dirty)
+		*dirty = cmd.dirty.out_nr_dirty;
+	return 0;
+}
+
+#define test_cmd_mock_domain_set_dirty(fd, hwpt_id, length, iova, page_size, \
+				       bitmap, nr)                           \
+	ASSERT_EQ(0,                                                         \
+		  _test_cmd_mock_domain_set_dirty(fd, hwpt_id, length, iova, \
+						  page_size, bitmap, nr))
+
+static int _test_mock_dirty_bitmaps(int fd, __u32 hwpt_id, size_t length,
+				    __u64 iova, size_t page_size, __u64 *bitmap,
+				    __u64 bitmap_size, __u32 flags,
+				    struct __test_metadata *_metadata)
+{
+	unsigned long i, count, nbits = bitmap_size * BITS_PER_BYTE;
+	unsigned long nr = nbits / 2;
+	__u64 out_dirty = 0;
+
+	/* Mark all even bits as dirty in the mock domain */
+	for (count = 0, i = 0; i < nbits; count += !(i % 2), i++)
+		if (!(i % 2))
+			set_bit(i, (unsigned long *)bitmap);
+	ASSERT_EQ(nr, count);
+
+	test_cmd_mock_domain_set_dirty(fd, hwpt_id, length, iova, page_size,
+				       bitmap, &out_dirty);
+	ASSERT_EQ(nr, out_dirty);
+
+	/* Expect all even bits as dirty in the user bitmap */
+	memset(bitmap, 0, bitmap_size);
+	test_cmd_get_dirty_bitmap(fd, hwpt_id, length, iova, page_size, bitmap,
+				  flags);
+	for (count = 0, i = 0; i < nbits; count += !(i % 2), i++)
+		ASSERT_EQ(!(i % 2), test_bit(i, (unsigned long *)bitmap));
+	ASSERT_EQ(count, out_dirty);
+
+	memset(bitmap, 0, bitmap_size);
+	test_cmd_get_dirty_bitmap(fd, hwpt_id, length, iova, page_size, bitmap,
+				  flags);
+
+	/* It as read already -- expect all zeroes */
+	for (i = 0; i < nbits; i++) {
+		ASSERT_EQ(!(i % 2) && (flags &
+				       IOMMU_HWPT_GET_DIRTY_BITMAP_NO_CLEAR),
+			  test_bit(i, (unsigned long *)bitmap));
+	}
+
+	return 0;
+}
+#define test_mock_dirty_bitmaps(hwpt_id, length, iova, page_size, bitmap,      \
+				bitmap_size, flags, _metadata)                 \
+	ASSERT_EQ(0, _test_mock_dirty_bitmaps(self->fd, hwpt_id, length, iova, \
+					      page_size, bitmap, bitmap_size,  \
+					      flags, _metadata))
+
 static int _test_cmd_create_access(int fd, unsigned int ioas_id,
 				   __u32 *access_id, unsigned int flags)
 {
@@ -266,6 +458,17 @@ static int _test_ioctl_ioas_map(int fd, unsigned int ioas_id, void *buffer,
 					     IOMMU_IOAS_MAP_READABLE));       \
 	})
 
+#define test_ioctl_ioas_map_fixed_id(ioas_id, buffer, length, iova)           \
+	({                                                                    \
+		__u64 __iova = iova;                                          \
+		ASSERT_EQ(0,                                                  \
+			  _test_ioctl_ioas_map(                               \
+				  self->fd, ioas_id, buffer, length, &__iova, \
+				  IOMMU_IOAS_MAP_FIXED_IOVA |                 \
+					  IOMMU_IOAS_MAP_WRITEABLE |          \
+					  IOMMU_IOAS_MAP_READABLE));          \
+	})
+
 #define test_err_ioctl_ioas_map_fixed(_errno, buffer, length, iova)           \
 	({                                                                    \
 		__u64 __iova = iova;                                          \
@@ -354,8 +557,8 @@ static void teardown_iommufd(int fd, struct __test_metadata *_metadata)
 #endif
 
 /* @data can be NULL */
-static int _test_cmd_get_hw_info(int fd, __u32 device_id,
-				 void *data, size_t data_len)
+static int _test_cmd_get_hw_info(int fd, __u32 device_id, void *data,
+				 size_t data_len, uint32_t *capabilities)
 {
 	struct iommu_test_hw_info *info = (struct iommu_test_hw_info *)data;
 	struct iommu_hw_info cmd = {
@@ -363,6 +566,7 @@ static int _test_cmd_get_hw_info(int fd, __u32 device_id,
 		.dev_id = device_id,
 		.data_len = data_len,
 		.data_uptr = (uint64_t)data,
+		.out_capabilities = 0,
 	};
 	int ret;
 
@@ -399,14 +603,19 @@ static int _test_cmd_get_hw_info(int fd, __u32 device_id,
 			assert(!info->flags);
 	}
 
+	if (capabilities)
+		*capabilities = cmd.out_capabilities;
+
 	return 0;
 }
 
-#define test_cmd_get_hw_info(device_id, data, data_len)         \
-	ASSERT_EQ(0, _test_cmd_get_hw_info(self->fd, device_id, \
-					   data, data_len))
+#define test_cmd_get_hw_info(device_id, data, data_len)               \
+	ASSERT_EQ(0, _test_cmd_get_hw_info(self->fd, device_id, data, \
+					   data_len, NULL))
+
+#define test_err_get_hw_info(_errno, device_id, data, data_len)               \
+	EXPECT_ERRNO(_errno, _test_cmd_get_hw_info(self->fd, device_id, data, \
+						   data_len, NULL))
 
-#define test_err_get_hw_info(_errno, device_id, data, data_len) \
-	EXPECT_ERRNO(_errno,                                    \
-		     _test_cmd_get_hw_info(self->fd, device_id, \
-					   data, data_len))
+#define test_cmd_get_hw_capabilities(device_id, caps, mask) \
+	ASSERT_EQ(0, _test_cmd_get_hw_info(self->fd, device_id, NULL, 0, &caps))
diff --git a/tools/testing/selftests/kselftest.h b/tools/testing/selftests/kselftest.h
index 529d29a35900..a781e6311810 100644
--- a/tools/testing/selftests/kselftest.h
+++ b/tools/testing/selftests/kselftest.h
@@ -48,6 +48,7 @@
 #include <stdlib.h>
 #include <unistd.h>
 #include <stdarg.h>
+#include <string.h>
 #include <stdio.h>
 #endif
 
@@ -77,6 +78,8 @@
 #define KSFT_XPASS 3
 #define KSFT_SKIP  4
 
+#define __printf(a, b)   __attribute__((format(printf, a, b)))
+
 /* counters */
 struct ksft_count {
 	unsigned int ksft_pass;
@@ -129,7 +132,7 @@ static inline void ksft_print_header(void)
 static inline void ksft_set_plan(unsigned int plan)
 {
 	ksft_plan = plan;
-	printf("1..%d\n", ksft_plan);
+	printf("1..%u\n", ksft_plan);
 }
 
 static inline void ksft_print_cnts(void)
@@ -137,13 +140,13 @@ static inline void ksft_print_cnts(void)
 	if (ksft_plan != ksft_test_num())
 		printf("# Planned tests != run tests (%u != %u)\n",
 			ksft_plan, ksft_test_num());
-	printf("# Totals: pass:%d fail:%d xfail:%d xpass:%d skip:%d error:%d\n",
+	printf("# Totals: pass:%u fail:%u xfail:%u xpass:%u skip:%u error:%u\n",
 		ksft_cnt.ksft_pass, ksft_cnt.ksft_fail,
 		ksft_cnt.ksft_xfail, ksft_cnt.ksft_xpass,
 		ksft_cnt.ksft_xskip, ksft_cnt.ksft_error);
 }
 
-static inline void ksft_print_msg(const char *msg, ...)
+static inline __printf(1, 2) void ksft_print_msg(const char *msg, ...)
 {
 	int saved_errno = errno;
 	va_list args;
@@ -155,7 +158,20 @@ static inline void ksft_print_msg(const char *msg, ...)
 	va_end(args);
 }
 
-static inline void ksft_test_result_pass(const char *msg, ...)
+static inline void ksft_perror(const char *msg)
+{
+#ifndef NOLIBC
+	ksft_print_msg("%s: %s (%d)\n", msg, strerror(errno), errno);
+#else
+	/*
+	 * nolibc doesn't provide strerror() and it seems
+	 * inappropriate to add one, just print the errno.
+	 */
+	ksft_print_msg("%s: %d)\n", msg, errno);
+#endif
+}
+
+static inline __printf(1, 2) void ksft_test_result_pass(const char *msg, ...)
 {
 	int saved_errno = errno;
 	va_list args;
@@ -163,13 +179,13 @@ static inline void ksft_test_result_pass(const char *msg, ...)
 	ksft_cnt.ksft_pass++;
 
 	va_start(args, msg);
-	printf("ok %d ", ksft_test_num());
+	printf("ok %u ", ksft_test_num());
 	errno = saved_errno;
 	vprintf(msg, args);
 	va_end(args);
 }
 
-static inline void ksft_test_result_fail(const char *msg, ...)
+static inline __printf(1, 2) void ksft_test_result_fail(const char *msg, ...)
 {
 	int saved_errno = errno;
 	va_list args;
@@ -177,7 +193,7 @@ static inline void ksft_test_result_fail(const char *msg, ...)
 	ksft_cnt.ksft_fail++;
 
 	va_start(args, msg);
-	printf("not ok %d ", ksft_test_num());
+	printf("not ok %u ", ksft_test_num());
 	errno = saved_errno;
 	vprintf(msg, args);
 	va_end(args);
@@ -195,7 +211,7 @@ static inline void ksft_test_result_fail(const char *msg, ...)
 		ksft_test_result_fail(fmt, ##__VA_ARGS__);\
 	} while (0)
 
-static inline void ksft_test_result_xfail(const char *msg, ...)
+static inline __printf(1, 2) void ksft_test_result_xfail(const char *msg, ...)
 {
 	int saved_errno = errno;
 	va_list args;
@@ -203,13 +219,13 @@ static inline void ksft_test_result_xfail(const char *msg, ...)
 	ksft_cnt.ksft_xfail++;
 
 	va_start(args, msg);
-	printf("ok %d # XFAIL ", ksft_test_num());
+	printf("ok %u # XFAIL ", ksft_test_num());
 	errno = saved_errno;
 	vprintf(msg, args);
 	va_end(args);
 }
 
-static inline void ksft_test_result_skip(const char *msg, ...)
+static inline __printf(1, 2) void ksft_test_result_skip(const char *msg, ...)
 {
 	int saved_errno = errno;
 	va_list args;
@@ -217,14 +233,14 @@ static inline void ksft_test_result_skip(const char *msg, ...)
 	ksft_cnt.ksft_xskip++;
 
 	va_start(args, msg);
-	printf("ok %d # SKIP ", ksft_test_num());
+	printf("ok %u # SKIP ", ksft_test_num());
 	errno = saved_errno;
 	vprintf(msg, args);
 	va_end(args);
 }
 
 /* TODO: how does "error" differ from "fail" or "skip"? */
-static inline void ksft_test_result_error(const char *msg, ...)
+static inline __printf(1, 2) void ksft_test_result_error(const char *msg, ...)
 {
 	int saved_errno = errno;
 	va_list args;
@@ -232,7 +248,7 @@ static inline void ksft_test_result_error(const char *msg, ...)
 	ksft_cnt.ksft_error++;
 
 	va_start(args, msg);
-	printf("not ok %d # error ", ksft_test_num());
+	printf("not ok %u # error ", ksft_test_num());
 	errno = saved_errno;
 	vprintf(msg, args);
 	va_end(args);
@@ -271,7 +287,7 @@ static inline int ksft_exit_fail(void)
 		  ksft_cnt.ksft_xfail +	\
 		  ksft_cnt.ksft_xskip)
 
-static inline int ksft_exit_fail_msg(const char *msg, ...)
+static inline __printf(1, 2) int ksft_exit_fail_msg(const char *msg, ...)
 {
 	int saved_errno = errno;
 	va_list args;
@@ -298,7 +314,7 @@ static inline int ksft_exit_xpass(void)
 	exit(KSFT_XPASS);
 }
 
-static inline int ksft_exit_skip(const char *msg, ...)
+static inline __printf(1, 2) int ksft_exit_skip(const char *msg, ...)
 {
 	int saved_errno = errno;
 	va_list args;
diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index a3bb36fb3cfc..a5963ab9215b 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -17,6 +17,15 @@ else
 	ARCH_DIR := $(ARCH)
 endif
 
+ifeq ($(ARCH),arm64)
+arm64_tools_dir := $(top_srcdir)/tools/arch/arm64/tools/
+GEN_HDRS := $(top_srcdir)/tools/arch/arm64/include/generated/
+CFLAGS += -I$(GEN_HDRS)
+
+$(GEN_HDRS): $(wildcard $(arm64_tools_dir)/*)
+	$(MAKE) -C $(arm64_tools_dir)
+endif
+
 LIBKVM += lib/assert.c
 LIBKVM += lib/elf.c
 LIBKVM += lib/guest_modes.c
@@ -66,6 +75,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/dirty_log_page_splitting_test
 TEST_GEN_PROGS_x86_64 += x86_64/get_msr_index_features
 TEST_GEN_PROGS_x86_64 += x86_64/exit_on_emulation_failure_test
 TEST_GEN_PROGS_x86_64 += x86_64/fix_hypercall_test
+TEST_GEN_PROGS_x86_64 += x86_64/hwcr_msr_test
 TEST_GEN_PROGS_x86_64 += x86_64/hyperv_clock
 TEST_GEN_PROGS_x86_64 += x86_64/hyperv_cpuid
 TEST_GEN_PROGS_x86_64 += x86_64/hyperv_evmcs
@@ -145,10 +155,12 @@ TEST_GEN_PROGS_aarch64 += aarch64/debug-exceptions
 TEST_GEN_PROGS_aarch64 += aarch64/hypercalls
 TEST_GEN_PROGS_aarch64 += aarch64/page_fault_test
 TEST_GEN_PROGS_aarch64 += aarch64/psci_test
+TEST_GEN_PROGS_aarch64 += aarch64/set_id_regs
 TEST_GEN_PROGS_aarch64 += aarch64/smccc_filter
 TEST_GEN_PROGS_aarch64 += aarch64/vcpu_width_config
 TEST_GEN_PROGS_aarch64 += aarch64/vgic_init
 TEST_GEN_PROGS_aarch64 += aarch64/vgic_irq
+TEST_GEN_PROGS_aarch64 += aarch64/vpmu_counter_access
 TEST_GEN_PROGS_aarch64 += access_tracking_perf_test
 TEST_GEN_PROGS_aarch64 += demand_paging_test
 TEST_GEN_PROGS_aarch64 += dirty_log_test
@@ -256,13 +268,18 @@ $(TEST_GEN_OBJ): $(OUTPUT)/%.o: %.c
 $(SPLIT_TESTS_TARGETS): %: %.o $(SPLIT_TESTS_OBJS)
 	$(CC) $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) $(TARGET_ARCH) $^ $(LDLIBS) -o $@
 
-EXTRA_CLEAN += $(LIBKVM_OBJS) $(TEST_DEP_FILES) $(TEST_GEN_OBJ) $(SPLIT_TESTS_OBJS) cscope.*
+EXTRA_CLEAN += $(GEN_HDRS) \
+	       $(LIBKVM_OBJS) \
+	       $(SPLIT_TESTS_OBJS) \
+	       $(TEST_DEP_FILES) \
+	       $(TEST_GEN_OBJ) \
+	       cscope.*
 
 x := $(shell mkdir -p $(sort $(dir $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ))))
-$(LIBKVM_C_OBJ): $(OUTPUT)/%.o: %.c
+$(LIBKVM_C_OBJ): $(OUTPUT)/%.o: %.c $(GEN_HDRS)
 	$(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@
 
-$(LIBKVM_S_OBJ): $(OUTPUT)/%.o: %.S
+$(LIBKVM_S_OBJ): $(OUTPUT)/%.o: %.S $(GEN_HDRS)
 	$(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@
 
 # Compile the string overrides as freestanding to prevent the compiler from
@@ -272,8 +289,10 @@ $(LIBKVM_STRING_OBJ): $(OUTPUT)/%.o: %.c
 	$(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c -ffreestanding $< -o $@
 
 x := $(shell mkdir -p $(sort $(dir $(TEST_GEN_PROGS))))
+$(SPLIT_TESTS_OBJS): $(GEN_HDRS)
 $(TEST_GEN_PROGS): $(LIBKVM_OBJS)
 $(TEST_GEN_PROGS_EXTENDED): $(LIBKVM_OBJS)
+$(TEST_GEN_OBJ): $(GEN_HDRS)
 
 cscope: include_paths = $(LINUX_TOOL_INCLUDE) $(LINUX_HDR_PATH) include lib ..
 cscope:
diff --git a/tools/testing/selftests/kvm/aarch64/aarch32_id_regs.c b/tools/testing/selftests/kvm/aarch64/aarch32_id_regs.c
index b90580840b22..8e5bd07a3727 100644
--- a/tools/testing/selftests/kvm/aarch64/aarch32_id_regs.c
+++ b/tools/testing/selftests/kvm/aarch64/aarch32_id_regs.c
@@ -146,8 +146,8 @@ static bool vcpu_aarch64_only(struct kvm_vcpu *vcpu)
 
 	vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1), &val);
 
-	el0 = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL0), val);
-	return el0 == ID_AA64PFR0_ELx_64BIT_ONLY;
+	el0 = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL0), val);
+	return el0 == ID_AA64PFR0_EL1_ELx_64BIT_ONLY;
 }
 
 int main(void)
diff --git a/tools/testing/selftests/kvm/aarch64/debug-exceptions.c b/tools/testing/selftests/kvm/aarch64/debug-exceptions.c
index f5b6cb3a0019..866002917441 100644
--- a/tools/testing/selftests/kvm/aarch64/debug-exceptions.c
+++ b/tools/testing/selftests/kvm/aarch64/debug-exceptions.c
@@ -116,12 +116,12 @@ static void reset_debug_state(void)
 
 	/* Reset all bcr/bvr/wcr/wvr registers */
 	dfr0 = read_sysreg(id_aa64dfr0_el1);
-	brps = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_BRPS), dfr0);
+	brps = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_BRPs), dfr0);
 	for (i = 0; i <= brps; i++) {
 		write_dbgbcr(i, 0);
 		write_dbgbvr(i, 0);
 	}
-	wrps = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_WRPS), dfr0);
+	wrps = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_WRPs), dfr0);
 	for (i = 0; i <= wrps; i++) {
 		write_dbgwcr(i, 0);
 		write_dbgwvr(i, 0);
@@ -418,7 +418,7 @@ static void guest_code_ss(int test_cnt)
 
 static int debug_version(uint64_t id_aa64dfr0)
 {
-	return FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_DEBUGVER), id_aa64dfr0);
+	return FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_DebugVer), id_aa64dfr0);
 }
 
 static void test_guest_debug_exceptions(uint8_t bpn, uint8_t wpn, uint8_t ctx_bpn)
@@ -539,14 +539,14 @@ void test_guest_debug_exceptions_all(uint64_t aa64dfr0)
 	int b, w, c;
 
 	/* Number of breakpoints */
-	brp_num = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_BRPS), aa64dfr0) + 1;
+	brp_num = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_BRPs), aa64dfr0) + 1;
 	__TEST_REQUIRE(brp_num >= 2, "At least two breakpoints are required");
 
 	/* Number of watchpoints */
-	wrp_num = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_WRPS), aa64dfr0) + 1;
+	wrp_num = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_WRPs), aa64dfr0) + 1;
 
 	/* Number of context aware breakpoints */
-	ctx_brp_num = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_CTX_CMPS), aa64dfr0) + 1;
+	ctx_brp_num = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_CTX_CMPs), aa64dfr0) + 1;
 
 	pr_debug("%s brp_num:%d, wrp_num:%d, ctx_brp_num:%d\n", __func__,
 		 brp_num, wrp_num, ctx_brp_num);
diff --git a/tools/testing/selftests/kvm/aarch64/page_fault_test.c b/tools/testing/selftests/kvm/aarch64/page_fault_test.c
index 47bb914ab2fa..eb4217b7c768 100644
--- a/tools/testing/selftests/kvm/aarch64/page_fault_test.c
+++ b/tools/testing/selftests/kvm/aarch64/page_fault_test.c
@@ -96,14 +96,14 @@ static bool guest_check_lse(void)
 	uint64_t isar0 = read_sysreg(id_aa64isar0_el1);
 	uint64_t atomic;
 
-	atomic = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64ISAR0_ATOMICS), isar0);
+	atomic = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_ATOMIC), isar0);
 	return atomic >= 2;
 }
 
 static bool guest_check_dc_zva(void)
 {
 	uint64_t dczid = read_sysreg(dczid_el0);
-	uint64_t dzp = FIELD_GET(ARM64_FEATURE_MASK(DCZID_DZP), dczid);
+	uint64_t dzp = FIELD_GET(ARM64_FEATURE_MASK(DCZID_EL0_DZP), dczid);
 
 	return dzp == 0;
 }
@@ -135,8 +135,8 @@ static void guest_at(void)
 	uint64_t par;
 
 	asm volatile("at s1e1r, %0" :: "r" (guest_test_memory));
-	par = read_sysreg(par_el1);
 	isb();
+	par = read_sysreg(par_el1);
 
 	/* Bit 1 indicates whether the AT was successful */
 	GUEST_ASSERT_EQ(par & 1, 0);
@@ -196,7 +196,7 @@ static bool guest_set_ha(void)
 	uint64_t hadbs, tcr;
 
 	/* Skip if HA is not supported. */
-	hadbs = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR1_HADBS), mmfr1);
+	hadbs = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_HAFDBS), mmfr1);
 	if (hadbs == 0)
 		return false;
 
@@ -842,6 +842,7 @@ static void help(char *name)
 	.name			= SCAT2(ro_memslot_no_syndrome, _access),	\
 	.data_memslot_flags	= KVM_MEM_READONLY,				\
 	.pt_memslot_flags	= KVM_MEM_READONLY,				\
+	.guest_prepare		= { _PREPARE(_access) },			\
 	.guest_test		= _access,					\
 	.fail_vcpu_run_handler	= fail_vcpu_run_mmio_no_syndrome_handler,	\
 	.expected_events	= { .fail_vcpu_runs = 1 },			\
@@ -865,6 +866,7 @@ static void help(char *name)
 	.name			= SCAT2(ro_memslot_no_syn_and_dlog, _access),	\
 	.data_memslot_flags	= KVM_MEM_READONLY | KVM_MEM_LOG_DIRTY_PAGES,	\
 	.pt_memslot_flags	= KVM_MEM_READONLY | KVM_MEM_LOG_DIRTY_PAGES,	\
+	.guest_prepare		= { _PREPARE(_access) },			\
 	.guest_test		= _access,					\
 	.guest_test_check	= { _test_check },				\
 	.fail_vcpu_run_handler	= fail_vcpu_run_mmio_no_syndrome_handler,	\
@@ -894,6 +896,7 @@ static void help(char *name)
 	.data_memslot_flags	= KVM_MEM_READONLY,				\
 	.pt_memslot_flags	= KVM_MEM_READONLY,				\
 	.mem_mark_cmd		= CMD_HOLE_DATA | CMD_HOLE_PT,			\
+	.guest_prepare		= { _PREPARE(_access) },			\
 	.guest_test		= _access,					\
 	.uffd_data_handler	= _uffd_data_handler,				\
 	.uffd_pt_handler	= uffd_pt_handler,			\
diff --git a/tools/testing/selftests/kvm/aarch64/set_id_regs.c b/tools/testing/selftests/kvm/aarch64/set_id_regs.c
new file mode 100644
index 000000000000..bac05210b539
--- /dev/null
+++ b/tools/testing/selftests/kvm/aarch64/set_id_regs.c
@@ -0,0 +1,481 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * set_id_regs - Test for setting ID register from usersapce.
+ *
+ * Copyright (c) 2023 Google LLC.
+ *
+ *
+ * Test that KVM supports setting ID registers from userspace and handles the
+ * feature set correctly.
+ */
+
+#include <stdint.h>
+#include "kvm_util.h"
+#include "processor.h"
+#include "test_util.h"
+#include <linux/bitfield.h>
+
+enum ftr_type {
+	FTR_EXACT,			/* Use a predefined safe value */
+	FTR_LOWER_SAFE,			/* Smaller value is safe */
+	FTR_HIGHER_SAFE,		/* Bigger value is safe */
+	FTR_HIGHER_OR_ZERO_SAFE,	/* Bigger value is safe, but 0 is biggest */
+	FTR_END,			/* Mark the last ftr bits */
+};
+
+#define FTR_SIGNED	true	/* Value should be treated as signed */
+#define FTR_UNSIGNED	false	/* Value should be treated as unsigned */
+
+struct reg_ftr_bits {
+	char *name;
+	bool sign;
+	enum ftr_type type;
+	uint8_t shift;
+	uint64_t mask;
+	int64_t safe_val;
+};
+
+struct test_feature_reg {
+	uint32_t reg;
+	const struct reg_ftr_bits *ftr_bits;
+};
+
+#define __REG_FTR_BITS(NAME, SIGNED, TYPE, SHIFT, MASK, SAFE_VAL)	\
+	{								\
+		.name = #NAME,						\
+		.sign = SIGNED,						\
+		.type = TYPE,						\
+		.shift = SHIFT,						\
+		.mask = MASK,						\
+		.safe_val = SAFE_VAL,					\
+	}
+
+#define REG_FTR_BITS(type, reg, field, safe_val) \
+	__REG_FTR_BITS(reg##_##field, FTR_UNSIGNED, type, reg##_##field##_SHIFT, \
+		       reg##_##field##_MASK, safe_val)
+
+#define S_REG_FTR_BITS(type, reg, field, safe_val) \
+	__REG_FTR_BITS(reg##_##field, FTR_SIGNED, type, reg##_##field##_SHIFT, \
+		       reg##_##field##_MASK, safe_val)
+
+#define REG_FTR_END					\
+	{						\
+		.type = FTR_END,			\
+	}
+
+static const struct reg_ftr_bits ftr_id_aa64dfr0_el1[] = {
+	S_REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64DFR0_EL1, PMUVer, 0),
+	REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64DFR0_EL1, DebugVer, 0),
+	REG_FTR_END,
+};
+
+static const struct reg_ftr_bits ftr_id_dfr0_el1[] = {
+	S_REG_FTR_BITS(FTR_LOWER_SAFE, ID_DFR0_EL1, PerfMon, 0),
+	REG_FTR_BITS(FTR_LOWER_SAFE, ID_DFR0_EL1, CopDbg, 0),
+	REG_FTR_END,
+};
+
+static const struct reg_ftr_bits ftr_id_aa64isar0_el1[] = {
+	REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, RNDR, 0),
+	REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, TLB, 0),
+	REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, TS, 0),
+	REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, FHM, 0),
+	REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, DP, 0),
+	REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, SM4, 0),
+	REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, SM3, 0),
+	REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, SHA3, 0),
+	REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, RDM, 0),
+	REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, TME, 0),
+	REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, ATOMIC, 0),
+	REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, CRC32, 0),
+	REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, SHA2, 0),
+	REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, SHA1, 0),
+	REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, AES, 0),
+	REG_FTR_END,
+};
+
+static const struct reg_ftr_bits ftr_id_aa64isar1_el1[] = {
+	REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, LS64, 0),
+	REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, XS, 0),
+	REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, I8MM, 0),
+	REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, DGH, 0),
+	REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, BF16, 0),
+	REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, SPECRES, 0),
+	REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, SB, 0),
+	REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, FRINTTS, 0),
+	REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, LRCPC, 0),
+	REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, FCMA, 0),
+	REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, JSCVT, 0),
+	REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, DPB, 0),
+	REG_FTR_END,
+};
+
+static const struct reg_ftr_bits ftr_id_aa64isar2_el1[] = {
+	REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR2_EL1, BC, 0),
+	REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR2_EL1, RPRES, 0),
+	REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR2_EL1, WFxT, 0),
+	REG_FTR_END,
+};
+
+static const struct reg_ftr_bits ftr_id_aa64pfr0_el1[] = {
+	REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, CSV3, 0),
+	REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, CSV2, 0),
+	REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, DIT, 0),
+	REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, SEL2, 0),
+	REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL3, 0),
+	REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL2, 0),
+	REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL1, 0),
+	REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL0, 0),
+	REG_FTR_END,
+};
+
+static const struct reg_ftr_bits ftr_id_aa64mmfr0_el1[] = {
+	REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, ECV, 0),
+	REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, EXS, 0),
+	S_REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, TGRAN4, 0),
+	S_REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, TGRAN64, 0),
+	REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, TGRAN16, 0),
+	REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, BIGENDEL0, 0),
+	REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, SNSMEM, 0),
+	REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, BIGEND, 0),
+	REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, ASIDBITS, 0),
+	REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, PARANGE, 0),
+	REG_FTR_END,
+};
+
+static const struct reg_ftr_bits ftr_id_aa64mmfr1_el1[] = {
+	REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, TIDCP1, 0),
+	REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, AFP, 0),
+	REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, ETS, 0),
+	REG_FTR_BITS(FTR_HIGHER_SAFE, ID_AA64MMFR1_EL1, SpecSEI, 0),
+	REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, PAN, 0),
+	REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, LO, 0),
+	REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, HPDS, 0),
+	REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, HAFDBS, 0),
+	REG_FTR_END,
+};
+
+static const struct reg_ftr_bits ftr_id_aa64mmfr2_el1[] = {
+	REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR2_EL1, E0PD, 0),
+	REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR2_EL1, BBM, 0),
+	REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR2_EL1, TTL, 0),
+	REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR2_EL1, AT, 0),
+	REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR2_EL1, ST, 0),
+	REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR2_EL1, VARange, 0),
+	REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR2_EL1, IESB, 0),
+	REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR2_EL1, LSM, 0),
+	REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR2_EL1, UAO, 0),
+	REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR2_EL1, CnP, 0),
+	REG_FTR_END,
+};
+
+static const struct reg_ftr_bits ftr_id_aa64zfr0_el1[] = {
+	REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, F64MM, 0),
+	REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, F32MM, 0),
+	REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, I8MM, 0),
+	REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, SM4, 0),
+	REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, SHA3, 0),
+	REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, BF16, 0),
+	REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, BitPerm, 0),
+	REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, AES, 0),
+	REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, SVEver, 0),
+	REG_FTR_END,
+};
+
+#define TEST_REG(id, table)			\
+	{					\
+		.reg = id,			\
+		.ftr_bits = &((table)[0]),	\
+	}
+
+static struct test_feature_reg test_regs[] = {
+	TEST_REG(SYS_ID_AA64DFR0_EL1, ftr_id_aa64dfr0_el1),
+	TEST_REG(SYS_ID_DFR0_EL1, ftr_id_dfr0_el1),
+	TEST_REG(SYS_ID_AA64ISAR0_EL1, ftr_id_aa64isar0_el1),
+	TEST_REG(SYS_ID_AA64ISAR1_EL1, ftr_id_aa64isar1_el1),
+	TEST_REG(SYS_ID_AA64ISAR2_EL1, ftr_id_aa64isar2_el1),
+	TEST_REG(SYS_ID_AA64PFR0_EL1, ftr_id_aa64pfr0_el1),
+	TEST_REG(SYS_ID_AA64MMFR0_EL1, ftr_id_aa64mmfr0_el1),
+	TEST_REG(SYS_ID_AA64MMFR1_EL1, ftr_id_aa64mmfr1_el1),
+	TEST_REG(SYS_ID_AA64MMFR2_EL1, ftr_id_aa64mmfr2_el1),
+	TEST_REG(SYS_ID_AA64ZFR0_EL1, ftr_id_aa64zfr0_el1),
+};
+
+#define GUEST_REG_SYNC(id) GUEST_SYNC_ARGS(0, id, read_sysreg_s(id), 0, 0);
+
+static void guest_code(void)
+{
+	GUEST_REG_SYNC(SYS_ID_AA64DFR0_EL1);
+	GUEST_REG_SYNC(SYS_ID_DFR0_EL1);
+	GUEST_REG_SYNC(SYS_ID_AA64ISAR0_EL1);
+	GUEST_REG_SYNC(SYS_ID_AA64ISAR1_EL1);
+	GUEST_REG_SYNC(SYS_ID_AA64ISAR2_EL1);
+	GUEST_REG_SYNC(SYS_ID_AA64PFR0_EL1);
+	GUEST_REG_SYNC(SYS_ID_AA64MMFR0_EL1);
+	GUEST_REG_SYNC(SYS_ID_AA64MMFR1_EL1);
+	GUEST_REG_SYNC(SYS_ID_AA64MMFR2_EL1);
+	GUEST_REG_SYNC(SYS_ID_AA64ZFR0_EL1);
+
+	GUEST_DONE();
+}
+
+/* Return a safe value to a given ftr_bits an ftr value */
+uint64_t get_safe_value(const struct reg_ftr_bits *ftr_bits, uint64_t ftr)
+{
+	uint64_t ftr_max = GENMASK_ULL(ARM64_FEATURE_FIELD_BITS - 1, 0);
+
+	if (ftr_bits->type == FTR_UNSIGNED) {
+		switch (ftr_bits->type) {
+		case FTR_EXACT:
+			ftr = ftr_bits->safe_val;
+			break;
+		case FTR_LOWER_SAFE:
+			if (ftr > 0)
+				ftr--;
+			break;
+		case FTR_HIGHER_SAFE:
+			if (ftr < ftr_max)
+				ftr++;
+			break;
+		case FTR_HIGHER_OR_ZERO_SAFE:
+			if (ftr == ftr_max)
+				ftr = 0;
+			else if (ftr != 0)
+				ftr++;
+			break;
+		default:
+			break;
+		}
+	} else if (ftr != ftr_max) {
+		switch (ftr_bits->type) {
+		case FTR_EXACT:
+			ftr = ftr_bits->safe_val;
+			break;
+		case FTR_LOWER_SAFE:
+			if (ftr > 0)
+				ftr--;
+			break;
+		case FTR_HIGHER_SAFE:
+			if (ftr < ftr_max - 1)
+				ftr++;
+			break;
+		case FTR_HIGHER_OR_ZERO_SAFE:
+			if (ftr != 0 && ftr != ftr_max - 1)
+				ftr++;
+			break;
+		default:
+			break;
+		}
+	}
+
+	return ftr;
+}
+
+/* Return an invalid value to a given ftr_bits an ftr value */
+uint64_t get_invalid_value(const struct reg_ftr_bits *ftr_bits, uint64_t ftr)
+{
+	uint64_t ftr_max = GENMASK_ULL(ARM64_FEATURE_FIELD_BITS - 1, 0);
+
+	if (ftr_bits->type == FTR_UNSIGNED) {
+		switch (ftr_bits->type) {
+		case FTR_EXACT:
+			ftr = max((uint64_t)ftr_bits->safe_val + 1, ftr + 1);
+			break;
+		case FTR_LOWER_SAFE:
+			ftr++;
+			break;
+		case FTR_HIGHER_SAFE:
+			ftr--;
+			break;
+		case FTR_HIGHER_OR_ZERO_SAFE:
+			if (ftr == 0)
+				ftr = ftr_max;
+			else
+				ftr--;
+			break;
+		default:
+			break;
+		}
+	} else if (ftr != ftr_max) {
+		switch (ftr_bits->type) {
+		case FTR_EXACT:
+			ftr = max((uint64_t)ftr_bits->safe_val + 1, ftr + 1);
+			break;
+		case FTR_LOWER_SAFE:
+			ftr++;
+			break;
+		case FTR_HIGHER_SAFE:
+			ftr--;
+			break;
+		case FTR_HIGHER_OR_ZERO_SAFE:
+			if (ftr == 0)
+				ftr = ftr_max - 1;
+			else
+				ftr--;
+			break;
+		default:
+			break;
+		}
+	} else {
+		ftr = 0;
+	}
+
+	return ftr;
+}
+
+static void test_reg_set_success(struct kvm_vcpu *vcpu, uint64_t reg,
+				 const struct reg_ftr_bits *ftr_bits)
+{
+	uint8_t shift = ftr_bits->shift;
+	uint64_t mask = ftr_bits->mask;
+	uint64_t val, new_val, ftr;
+
+	vcpu_get_reg(vcpu, reg, &val);
+	ftr = (val & mask) >> shift;
+
+	ftr = get_safe_value(ftr_bits, ftr);
+
+	ftr <<= shift;
+	val &= ~mask;
+	val |= ftr;
+
+	vcpu_set_reg(vcpu, reg, val);
+	vcpu_get_reg(vcpu, reg, &new_val);
+	TEST_ASSERT_EQ(new_val, val);
+}
+
+static void test_reg_set_fail(struct kvm_vcpu *vcpu, uint64_t reg,
+			      const struct reg_ftr_bits *ftr_bits)
+{
+	uint8_t shift = ftr_bits->shift;
+	uint64_t mask = ftr_bits->mask;
+	uint64_t val, old_val, ftr;
+	int r;
+
+	vcpu_get_reg(vcpu, reg, &val);
+	ftr = (val & mask) >> shift;
+
+	ftr = get_invalid_value(ftr_bits, ftr);
+
+	old_val = val;
+	ftr <<= shift;
+	val &= ~mask;
+	val |= ftr;
+
+	r = __vcpu_set_reg(vcpu, reg, val);
+	TEST_ASSERT(r < 0 && errno == EINVAL,
+		    "Unexpected KVM_SET_ONE_REG error: r=%d, errno=%d", r, errno);
+
+	vcpu_get_reg(vcpu, reg, &val);
+	TEST_ASSERT_EQ(val, old_val);
+}
+
+static void test_user_set_reg(struct kvm_vcpu *vcpu, bool aarch64_only)
+{
+	uint64_t masks[KVM_ARM_FEATURE_ID_RANGE_SIZE];
+	struct reg_mask_range range = {
+		.addr = (__u64)masks,
+	};
+	int ret;
+
+	/* KVM should return error when reserved field is not zero */
+	range.reserved[0] = 1;
+	ret = __vm_ioctl(vcpu->vm, KVM_ARM_GET_REG_WRITABLE_MASKS, &range);
+	TEST_ASSERT(ret, "KVM doesn't check invalid parameters.");
+
+	/* Get writable masks for feature ID registers */
+	memset(range.reserved, 0, sizeof(range.reserved));
+	vm_ioctl(vcpu->vm, KVM_ARM_GET_REG_WRITABLE_MASKS, &range);
+
+	for (int i = 0; i < ARRAY_SIZE(test_regs); i++) {
+		const struct reg_ftr_bits *ftr_bits = test_regs[i].ftr_bits;
+		uint32_t reg_id = test_regs[i].reg;
+		uint64_t reg = KVM_ARM64_SYS_REG(reg_id);
+		int idx;
+
+		/* Get the index to masks array for the idreg */
+		idx = KVM_ARM_FEATURE_ID_RANGE_IDX(sys_reg_Op0(reg_id), sys_reg_Op1(reg_id),
+						   sys_reg_CRn(reg_id), sys_reg_CRm(reg_id),
+						   sys_reg_Op2(reg_id));
+
+		for (int j = 0;  ftr_bits[j].type != FTR_END; j++) {
+			/* Skip aarch32 reg on aarch64 only system, since they are RAZ/WI. */
+			if (aarch64_only && sys_reg_CRm(reg_id) < 4) {
+				ksft_test_result_skip("%s on AARCH64 only system\n",
+						      ftr_bits[j].name);
+				continue;
+			}
+
+			/* Make sure the feature field is writable */
+			TEST_ASSERT_EQ(masks[idx] & ftr_bits[j].mask, ftr_bits[j].mask);
+
+			test_reg_set_fail(vcpu, reg, &ftr_bits[j]);
+			test_reg_set_success(vcpu, reg, &ftr_bits[j]);
+
+			ksft_test_result_pass("%s\n", ftr_bits[j].name);
+		}
+	}
+}
+
+static void test_guest_reg_read(struct kvm_vcpu *vcpu)
+{
+	bool done = false;
+	struct ucall uc;
+	uint64_t val;
+
+	while (!done) {
+		vcpu_run(vcpu);
+
+		switch (get_ucall(vcpu, &uc)) {
+		case UCALL_ABORT:
+			REPORT_GUEST_ASSERT(uc);
+			break;
+		case UCALL_SYNC:
+			/* Make sure the written values are seen by guest */
+			vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(uc.args[2]), &val);
+			TEST_ASSERT_EQ(val, uc.args[3]);
+			break;
+		case UCALL_DONE:
+			done = true;
+			break;
+		default:
+			TEST_FAIL("Unexpected ucall: %lu", uc.cmd);
+		}
+	}
+}
+
+int main(void)
+{
+	struct kvm_vcpu *vcpu;
+	struct kvm_vm *vm;
+	bool aarch64_only;
+	uint64_t val, el0;
+	int ftr_cnt;
+
+	TEST_REQUIRE(kvm_has_cap(KVM_CAP_ARM_SUPPORTED_REG_MASK_RANGES));
+
+	vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+	/* Check for AARCH64 only system */
+	vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1), &val);
+	el0 = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL0), val);
+	aarch64_only = (el0 == ID_AA64PFR0_EL1_ELx_64BIT_ONLY);
+
+	ksft_print_header();
+
+	ftr_cnt = ARRAY_SIZE(ftr_id_aa64dfr0_el1) + ARRAY_SIZE(ftr_id_dfr0_el1) +
+		  ARRAY_SIZE(ftr_id_aa64isar0_el1) + ARRAY_SIZE(ftr_id_aa64isar1_el1) +
+		  ARRAY_SIZE(ftr_id_aa64isar2_el1) + ARRAY_SIZE(ftr_id_aa64pfr0_el1) +
+		  ARRAY_SIZE(ftr_id_aa64mmfr0_el1) + ARRAY_SIZE(ftr_id_aa64mmfr1_el1) +
+		  ARRAY_SIZE(ftr_id_aa64mmfr2_el1) + ARRAY_SIZE(ftr_id_aa64zfr0_el1) -
+		  ARRAY_SIZE(test_regs);
+
+	ksft_set_plan(ftr_cnt);
+
+	test_user_set_reg(vcpu, aarch64_only);
+	test_guest_reg_read(vcpu);
+
+	kvm_vm_free(vm);
+
+	ksft_finished();
+}
diff --git a/tools/testing/selftests/kvm/aarch64/vpmu_counter_access.c b/tools/testing/selftests/kvm/aarch64/vpmu_counter_access.c
new file mode 100644
index 000000000000..5ea78986e665
--- /dev/null
+++ b/tools/testing/selftests/kvm/aarch64/vpmu_counter_access.c
@@ -0,0 +1,670 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * vpmu_counter_access - Test vPMU event counter access
+ *
+ * Copyright (c) 2023 Google LLC.
+ *
+ * This test checks if the guest can see the same number of the PMU event
+ * counters (PMCR_EL0.N) that userspace sets, if the guest can access
+ * those counters, and if the guest is prevented from accessing any
+ * other counters.
+ * It also checks if the userspace accesses to the PMU regsisters honor the
+ * PMCR.N value that's set for the guest.
+ * This test runs only when KVM_CAP_ARM_PMU_V3 is supported on the host.
+ */
+#include <kvm_util.h>
+#include <processor.h>
+#include <test_util.h>
+#include <vgic.h>
+#include <perf/arm_pmuv3.h>
+#include <linux/bitfield.h>
+
+/* The max number of the PMU event counters (excluding the cycle counter) */
+#define ARMV8_PMU_MAX_GENERAL_COUNTERS	(ARMV8_PMU_MAX_COUNTERS - 1)
+
+/* The cycle counter bit position that's common among the PMU registers */
+#define ARMV8_PMU_CYCLE_IDX		31
+
+struct vpmu_vm {
+	struct kvm_vm *vm;
+	struct kvm_vcpu *vcpu;
+	int gic_fd;
+};
+
+static struct vpmu_vm vpmu_vm;
+
+struct pmreg_sets {
+	uint64_t set_reg_id;
+	uint64_t clr_reg_id;
+};
+
+#define PMREG_SET(set, clr) {.set_reg_id = set, .clr_reg_id = clr}
+
+static uint64_t get_pmcr_n(uint64_t pmcr)
+{
+	return (pmcr >> ARMV8_PMU_PMCR_N_SHIFT) & ARMV8_PMU_PMCR_N_MASK;
+}
+
+static void set_pmcr_n(uint64_t *pmcr, uint64_t pmcr_n)
+{
+	*pmcr = *pmcr & ~(ARMV8_PMU_PMCR_N_MASK << ARMV8_PMU_PMCR_N_SHIFT);
+	*pmcr |= (pmcr_n << ARMV8_PMU_PMCR_N_SHIFT);
+}
+
+static uint64_t get_counters_mask(uint64_t n)
+{
+	uint64_t mask = BIT(ARMV8_PMU_CYCLE_IDX);
+
+	if (n)
+		mask |= GENMASK(n - 1, 0);
+	return mask;
+}
+
+/* Read PMEVTCNTR<n>_EL0 through PMXEVCNTR_EL0 */
+static inline unsigned long read_sel_evcntr(int sel)
+{
+	write_sysreg(sel, pmselr_el0);
+	isb();
+	return read_sysreg(pmxevcntr_el0);
+}
+
+/* Write PMEVTCNTR<n>_EL0 through PMXEVCNTR_EL0 */
+static inline void write_sel_evcntr(int sel, unsigned long val)
+{
+	write_sysreg(sel, pmselr_el0);
+	isb();
+	write_sysreg(val, pmxevcntr_el0);
+	isb();
+}
+
+/* Read PMEVTYPER<n>_EL0 through PMXEVTYPER_EL0 */
+static inline unsigned long read_sel_evtyper(int sel)
+{
+	write_sysreg(sel, pmselr_el0);
+	isb();
+	return read_sysreg(pmxevtyper_el0);
+}
+
+/* Write PMEVTYPER<n>_EL0 through PMXEVTYPER_EL0 */
+static inline void write_sel_evtyper(int sel, unsigned long val)
+{
+	write_sysreg(sel, pmselr_el0);
+	isb();
+	write_sysreg(val, pmxevtyper_el0);
+	isb();
+}
+
+static inline void enable_counter(int idx)
+{
+	uint64_t v = read_sysreg(pmcntenset_el0);
+
+	write_sysreg(BIT(idx) | v, pmcntenset_el0);
+	isb();
+}
+
+static inline void disable_counter(int idx)
+{
+	uint64_t v = read_sysreg(pmcntenset_el0);
+
+	write_sysreg(BIT(idx) | v, pmcntenclr_el0);
+	isb();
+}
+
+static void pmu_disable_reset(void)
+{
+	uint64_t pmcr = read_sysreg(pmcr_el0);
+
+	/* Reset all counters, disabling them */
+	pmcr &= ~ARMV8_PMU_PMCR_E;
+	write_sysreg(pmcr | ARMV8_PMU_PMCR_P, pmcr_el0);
+	isb();
+}
+
+#define RETURN_READ_PMEVCNTRN(n) \
+	return read_sysreg(pmevcntr##n##_el0)
+static unsigned long read_pmevcntrn(int n)
+{
+	PMEVN_SWITCH(n, RETURN_READ_PMEVCNTRN);
+	return 0;
+}
+
+#define WRITE_PMEVCNTRN(n) \
+	write_sysreg(val, pmevcntr##n##_el0)
+static void write_pmevcntrn(int n, unsigned long val)
+{
+	PMEVN_SWITCH(n, WRITE_PMEVCNTRN);
+	isb();
+}
+
+#define READ_PMEVTYPERN(n) \
+	return read_sysreg(pmevtyper##n##_el0)
+static unsigned long read_pmevtypern(int n)
+{
+	PMEVN_SWITCH(n, READ_PMEVTYPERN);
+	return 0;
+}
+
+#define WRITE_PMEVTYPERN(n) \
+	write_sysreg(val, pmevtyper##n##_el0)
+static void write_pmevtypern(int n, unsigned long val)
+{
+	PMEVN_SWITCH(n, WRITE_PMEVTYPERN);
+	isb();
+}
+
+/*
+ * The pmc_accessor structure has pointers to PMEV{CNTR,TYPER}<n>_EL0
+ * accessors that test cases will use. Each of the accessors will
+ * either directly reads/writes PMEV{CNTR,TYPER}<n>_EL0
+ * (i.e. {read,write}_pmev{cnt,type}rn()), or reads/writes them through
+ * PMXEV{CNTR,TYPER}_EL0 (i.e. {read,write}_sel_ev{cnt,type}r()).
+ *
+ * This is used to test that combinations of those accessors provide
+ * the consistent behavior.
+ */
+struct pmc_accessor {
+	/* A function to be used to read PMEVTCNTR<n>_EL0 */
+	unsigned long	(*read_cntr)(int idx);
+	/* A function to be used to write PMEVTCNTR<n>_EL0 */
+	void		(*write_cntr)(int idx, unsigned long val);
+	/* A function to be used to read PMEVTYPER<n>_EL0 */
+	unsigned long	(*read_typer)(int idx);
+	/* A function to be used to write PMEVTYPER<n>_EL0 */
+	void		(*write_typer)(int idx, unsigned long val);
+};
+
+struct pmc_accessor pmc_accessors[] = {
+	/* test with all direct accesses */
+	{ read_pmevcntrn, write_pmevcntrn, read_pmevtypern, write_pmevtypern },
+	/* test with all indirect accesses */
+	{ read_sel_evcntr, write_sel_evcntr, read_sel_evtyper, write_sel_evtyper },
+	/* read with direct accesses, and write with indirect accesses */
+	{ read_pmevcntrn, write_sel_evcntr, read_pmevtypern, write_sel_evtyper },
+	/* read with indirect accesses, and write with direct accesses */
+	{ read_sel_evcntr, write_pmevcntrn, read_sel_evtyper, write_pmevtypern },
+};
+
+/*
+ * Convert a pointer of pmc_accessor to an index in pmc_accessors[],
+ * assuming that the pointer is one of the entries in pmc_accessors[].
+ */
+#define PMC_ACC_TO_IDX(acc)	(acc - &pmc_accessors[0])
+
+#define GUEST_ASSERT_BITMAP_REG(regname, mask, set_expected)			 \
+{										 \
+	uint64_t _tval = read_sysreg(regname);					 \
+										 \
+	if (set_expected)							 \
+		__GUEST_ASSERT((_tval & mask),					 \
+				"tval: 0x%lx; mask: 0x%lx; set_expected: 0x%lx", \
+				_tval, mask, set_expected);			 \
+	else									 \
+		__GUEST_ASSERT(!(_tval & mask),					 \
+				"tval: 0x%lx; mask: 0x%lx; set_expected: 0x%lx", \
+				_tval, mask, set_expected);			 \
+}
+
+/*
+ * Check if @mask bits in {PMCNTEN,PMINTEN,PMOVS}{SET,CLR} registers
+ * are set or cleared as specified in @set_expected.
+ */
+static void check_bitmap_pmu_regs(uint64_t mask, bool set_expected)
+{
+	GUEST_ASSERT_BITMAP_REG(pmcntenset_el0, mask, set_expected);
+	GUEST_ASSERT_BITMAP_REG(pmcntenclr_el0, mask, set_expected);
+	GUEST_ASSERT_BITMAP_REG(pmintenset_el1, mask, set_expected);
+	GUEST_ASSERT_BITMAP_REG(pmintenclr_el1, mask, set_expected);
+	GUEST_ASSERT_BITMAP_REG(pmovsset_el0, mask, set_expected);
+	GUEST_ASSERT_BITMAP_REG(pmovsclr_el0, mask, set_expected);
+}
+
+/*
+ * Check if the bit in {PMCNTEN,PMINTEN,PMOVS}{SET,CLR} registers corresponding
+ * to the specified counter (@pmc_idx) can be read/written as expected.
+ * When @set_op is true, it tries to set the bit for the counter in
+ * those registers by writing the SET registers (the bit won't be set
+ * if the counter is not implemented though).
+ * Otherwise, it tries to clear the bits in the registers by writing
+ * the CLR registers.
+ * Then, it checks if the values indicated in the registers are as expected.
+ */
+static void test_bitmap_pmu_regs(int pmc_idx, bool set_op)
+{
+	uint64_t pmcr_n, test_bit = BIT(pmc_idx);
+	bool set_expected = false;
+
+	if (set_op) {
+		write_sysreg(test_bit, pmcntenset_el0);
+		write_sysreg(test_bit, pmintenset_el1);
+		write_sysreg(test_bit, pmovsset_el0);
+
+		/* The bit will be set only if the counter is implemented */
+		pmcr_n = get_pmcr_n(read_sysreg(pmcr_el0));
+		set_expected = (pmc_idx < pmcr_n) ? true : false;
+	} else {
+		write_sysreg(test_bit, pmcntenclr_el0);
+		write_sysreg(test_bit, pmintenclr_el1);
+		write_sysreg(test_bit, pmovsclr_el0);
+	}
+	check_bitmap_pmu_regs(test_bit, set_expected);
+}
+
+/*
+ * Tests for reading/writing registers for the (implemented) event counter
+ * specified by @pmc_idx.
+ */
+static void test_access_pmc_regs(struct pmc_accessor *acc, int pmc_idx)
+{
+	uint64_t write_data, read_data;
+
+	/* Disable all PMCs and reset all PMCs to zero. */
+	pmu_disable_reset();
+
+	/*
+	 * Tests for reading/writing {PMCNTEN,PMINTEN,PMOVS}{SET,CLR}_EL1.
+	 */
+
+	/* Make sure that the bit in those registers are set to 0 */
+	test_bitmap_pmu_regs(pmc_idx, false);
+	/* Test if setting the bit in those registers works */
+	test_bitmap_pmu_regs(pmc_idx, true);
+	/* Test if clearing the bit in those registers works */
+	test_bitmap_pmu_regs(pmc_idx, false);
+
+	/*
+	 * Tests for reading/writing the event type register.
+	 */
+
+	/*
+	 * Set the event type register to an arbitrary value just for testing
+	 * of reading/writing the register.
+	 * Arm ARM says that for the event from 0x0000 to 0x003F,
+	 * the value indicated in the PMEVTYPER<n>_EL0.evtCount field is
+	 * the value written to the field even when the specified event
+	 * is not supported.
+	 */
+	write_data = (ARMV8_PMU_EXCLUDE_EL1 | ARMV8_PMUV3_PERFCTR_INST_RETIRED);
+	acc->write_typer(pmc_idx, write_data);
+	read_data = acc->read_typer(pmc_idx);
+	__GUEST_ASSERT(read_data == write_data,
+		       "pmc_idx: 0x%lx; acc_idx: 0x%lx; read_data: 0x%lx; write_data: 0x%lx",
+		       pmc_idx, PMC_ACC_TO_IDX(acc), read_data, write_data);
+
+	/*
+	 * Tests for reading/writing the event count register.
+	 */
+
+	read_data = acc->read_cntr(pmc_idx);
+
+	/* The count value must be 0, as it is disabled and reset */
+	__GUEST_ASSERT(read_data == 0,
+		       "pmc_idx: 0x%lx; acc_idx: 0x%lx; read_data: 0x%lx",
+		       pmc_idx, PMC_ACC_TO_IDX(acc), read_data);
+
+	write_data = read_data + pmc_idx + 0x12345;
+	acc->write_cntr(pmc_idx, write_data);
+	read_data = acc->read_cntr(pmc_idx);
+	__GUEST_ASSERT(read_data == write_data,
+		       "pmc_idx: 0x%lx; acc_idx: 0x%lx; read_data: 0x%lx; write_data: 0x%lx",
+		       pmc_idx, PMC_ACC_TO_IDX(acc), read_data, write_data);
+}
+
+#define INVALID_EC	(-1ul)
+uint64_t expected_ec = INVALID_EC;
+
+static void guest_sync_handler(struct ex_regs *regs)
+{
+	uint64_t esr, ec;
+
+	esr = read_sysreg(esr_el1);
+	ec = (esr >> ESR_EC_SHIFT) & ESR_EC_MASK;
+
+	__GUEST_ASSERT(expected_ec == ec,
+			"PC: 0x%lx; ESR: 0x%lx; EC: 0x%lx; EC expected: 0x%lx",
+			regs->pc, esr, ec, expected_ec);
+
+	/* skip the trapping instruction */
+	regs->pc += 4;
+
+	/* Use INVALID_EC to indicate an exception occurred */
+	expected_ec = INVALID_EC;
+}
+
+/*
+ * Run the given operation that should trigger an exception with the
+ * given exception class. The exception handler (guest_sync_handler)
+ * will reset op_end_addr to 0, expected_ec to INVALID_EC, and skip
+ * the instruction that trapped.
+ */
+#define TEST_EXCEPTION(ec, ops)				\
+({							\
+	GUEST_ASSERT(ec != INVALID_EC);			\
+	WRITE_ONCE(expected_ec, ec);			\
+	dsb(ish);					\
+	ops;						\
+	GUEST_ASSERT(expected_ec == INVALID_EC);	\
+})
+
+/*
+ * Tests for reading/writing registers for the unimplemented event counter
+ * specified by @pmc_idx (>= PMCR_EL0.N).
+ */
+static void test_access_invalid_pmc_regs(struct pmc_accessor *acc, int pmc_idx)
+{
+	/*
+	 * Reading/writing the event count/type registers should cause
+	 * an UNDEFINED exception.
+	 */
+	TEST_EXCEPTION(ESR_EC_UNKNOWN, acc->read_cntr(pmc_idx));
+	TEST_EXCEPTION(ESR_EC_UNKNOWN, acc->write_cntr(pmc_idx, 0));
+	TEST_EXCEPTION(ESR_EC_UNKNOWN, acc->read_typer(pmc_idx));
+	TEST_EXCEPTION(ESR_EC_UNKNOWN, acc->write_typer(pmc_idx, 0));
+	/*
+	 * The bit corresponding to the (unimplemented) counter in
+	 * {PMCNTEN,PMINTEN,PMOVS}{SET,CLR} registers should be RAZ.
+	 */
+	test_bitmap_pmu_regs(pmc_idx, 1);
+	test_bitmap_pmu_regs(pmc_idx, 0);
+}
+
+/*
+ * The guest is configured with PMUv3 with @expected_pmcr_n number of
+ * event counters.
+ * Check if @expected_pmcr_n is consistent with PMCR_EL0.N, and
+ * if reading/writing PMU registers for implemented or unimplemented
+ * counters works as expected.
+ */
+static void guest_code(uint64_t expected_pmcr_n)
+{
+	uint64_t pmcr, pmcr_n, unimp_mask;
+	int i, pmc;
+
+	__GUEST_ASSERT(expected_pmcr_n <= ARMV8_PMU_MAX_GENERAL_COUNTERS,
+			"Expected PMCR.N: 0x%lx; ARMv8 general counters: 0x%lx",
+			expected_pmcr_n, ARMV8_PMU_MAX_GENERAL_COUNTERS);
+
+	pmcr = read_sysreg(pmcr_el0);
+	pmcr_n = get_pmcr_n(pmcr);
+
+	/* Make sure that PMCR_EL0.N indicates the value userspace set */
+	__GUEST_ASSERT(pmcr_n == expected_pmcr_n,
+			"Expected PMCR.N: 0x%lx, PMCR.N: 0x%lx",
+			expected_pmcr_n, pmcr_n);
+
+	/*
+	 * Make sure that (RAZ) bits corresponding to unimplemented event
+	 * counters in {PMCNTEN,PMINTEN,PMOVS}{SET,CLR} registers are reset
+	 * to zero.
+	 * (NOTE: bits for implemented event counters are reset to UNKNOWN)
+	 */
+	unimp_mask = GENMASK_ULL(ARMV8_PMU_MAX_GENERAL_COUNTERS - 1, pmcr_n);
+	check_bitmap_pmu_regs(unimp_mask, false);
+
+	/*
+	 * Tests for reading/writing PMU registers for implemented counters.
+	 * Use each combination of PMEV{CNTR,TYPER}<n>_EL0 accessor functions.
+	 */
+	for (i = 0; i < ARRAY_SIZE(pmc_accessors); i++) {
+		for (pmc = 0; pmc < pmcr_n; pmc++)
+			test_access_pmc_regs(&pmc_accessors[i], pmc);
+	}
+
+	/*
+	 * Tests for reading/writing PMU registers for unimplemented counters.
+	 * Use each combination of PMEV{CNTR,TYPER}<n>_EL0 accessor functions.
+	 */
+	for (i = 0; i < ARRAY_SIZE(pmc_accessors); i++) {
+		for (pmc = pmcr_n; pmc < ARMV8_PMU_MAX_GENERAL_COUNTERS; pmc++)
+			test_access_invalid_pmc_regs(&pmc_accessors[i], pmc);
+	}
+
+	GUEST_DONE();
+}
+
+#define GICD_BASE_GPA	0x8000000ULL
+#define GICR_BASE_GPA	0x80A0000ULL
+
+/* Create a VM that has one vCPU with PMUv3 configured. */
+static void create_vpmu_vm(void *guest_code)
+{
+	struct kvm_vcpu_init init;
+	uint8_t pmuver, ec;
+	uint64_t dfr0, irq = 23;
+	struct kvm_device_attr irq_attr = {
+		.group = KVM_ARM_VCPU_PMU_V3_CTRL,
+		.attr = KVM_ARM_VCPU_PMU_V3_IRQ,
+		.addr = (uint64_t)&irq,
+	};
+	struct kvm_device_attr init_attr = {
+		.group = KVM_ARM_VCPU_PMU_V3_CTRL,
+		.attr = KVM_ARM_VCPU_PMU_V3_INIT,
+	};
+
+	/* The test creates the vpmu_vm multiple times. Ensure a clean state */
+	memset(&vpmu_vm, 0, sizeof(vpmu_vm));
+
+	vpmu_vm.vm = vm_create(1);
+	vm_init_descriptor_tables(vpmu_vm.vm);
+	for (ec = 0; ec < ESR_EC_NUM; ec++) {
+		vm_install_sync_handler(vpmu_vm.vm, VECTOR_SYNC_CURRENT, ec,
+					guest_sync_handler);
+	}
+
+	/* Create vCPU with PMUv3 */
+	vm_ioctl(vpmu_vm.vm, KVM_ARM_PREFERRED_TARGET, &init);
+	init.features[0] |= (1 << KVM_ARM_VCPU_PMU_V3);
+	vpmu_vm.vcpu = aarch64_vcpu_add(vpmu_vm.vm, 0, &init, guest_code);
+	vcpu_init_descriptor_tables(vpmu_vm.vcpu);
+	vpmu_vm.gic_fd = vgic_v3_setup(vpmu_vm.vm, 1, 64,
+					GICD_BASE_GPA, GICR_BASE_GPA);
+	__TEST_REQUIRE(vpmu_vm.gic_fd >= 0,
+		       "Failed to create vgic-v3, skipping");
+
+	/* Make sure that PMUv3 support is indicated in the ID register */
+	vcpu_get_reg(vpmu_vm.vcpu,
+		     KVM_ARM64_SYS_REG(SYS_ID_AA64DFR0_EL1), &dfr0);
+	pmuver = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMUVer), dfr0);
+	TEST_ASSERT(pmuver != ID_AA64DFR0_EL1_PMUVer_IMP_DEF &&
+		    pmuver >= ID_AA64DFR0_EL1_PMUVer_IMP,
+		    "Unexpected PMUVER (0x%x) on the vCPU with PMUv3", pmuver);
+
+	/* Initialize vPMU */
+	vcpu_ioctl(vpmu_vm.vcpu, KVM_SET_DEVICE_ATTR, &irq_attr);
+	vcpu_ioctl(vpmu_vm.vcpu, KVM_SET_DEVICE_ATTR, &init_attr);
+}
+
+static void destroy_vpmu_vm(void)
+{
+	close(vpmu_vm.gic_fd);
+	kvm_vm_free(vpmu_vm.vm);
+}
+
+static void run_vcpu(struct kvm_vcpu *vcpu, uint64_t pmcr_n)
+{
+	struct ucall uc;
+
+	vcpu_args_set(vcpu, 1, pmcr_n);
+	vcpu_run(vcpu);
+	switch (get_ucall(vcpu, &uc)) {
+	case UCALL_ABORT:
+		REPORT_GUEST_ASSERT(uc);
+		break;
+	case UCALL_DONE:
+		break;
+	default:
+		TEST_FAIL("Unknown ucall %lu", uc.cmd);
+		break;
+	}
+}
+
+static void test_create_vpmu_vm_with_pmcr_n(uint64_t pmcr_n, bool expect_fail)
+{
+	struct kvm_vcpu *vcpu;
+	uint64_t pmcr, pmcr_orig;
+
+	create_vpmu_vm(guest_code);
+	vcpu = vpmu_vm.vcpu;
+
+	vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_PMCR_EL0), &pmcr_orig);
+	pmcr = pmcr_orig;
+
+	/*
+	 * Setting a larger value of PMCR.N should not modify the field, and
+	 * return a success.
+	 */
+	set_pmcr_n(&pmcr, pmcr_n);
+	vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_PMCR_EL0), pmcr);
+	vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_PMCR_EL0), &pmcr);
+
+	if (expect_fail)
+		TEST_ASSERT(pmcr_orig == pmcr,
+			    "PMCR.N modified by KVM to a larger value (PMCR: 0x%lx) for pmcr_n: 0x%lx\n",
+			    pmcr, pmcr_n);
+	else
+		TEST_ASSERT(pmcr_n == get_pmcr_n(pmcr),
+			    "Failed to update PMCR.N to %lu (received: %lu)\n",
+			    pmcr_n, get_pmcr_n(pmcr));
+}
+
+/*
+ * Create a guest with one vCPU, set the PMCR_EL0.N for the vCPU to @pmcr_n,
+ * and run the test.
+ */
+static void run_access_test(uint64_t pmcr_n)
+{
+	uint64_t sp;
+	struct kvm_vcpu *vcpu;
+	struct kvm_vcpu_init init;
+
+	pr_debug("Test with pmcr_n %lu\n", pmcr_n);
+
+	test_create_vpmu_vm_with_pmcr_n(pmcr_n, false);
+	vcpu = vpmu_vm.vcpu;
+
+	/* Save the initial sp to restore them later to run the guest again */
+	vcpu_get_reg(vcpu, ARM64_CORE_REG(sp_el1), &sp);
+
+	run_vcpu(vcpu, pmcr_n);
+
+	/*
+	 * Reset and re-initialize the vCPU, and run the guest code again to
+	 * check if PMCR_EL0.N is preserved.
+	 */
+	vm_ioctl(vpmu_vm.vm, KVM_ARM_PREFERRED_TARGET, &init);
+	init.features[0] |= (1 << KVM_ARM_VCPU_PMU_V3);
+	aarch64_vcpu_setup(vcpu, &init);
+	vcpu_init_descriptor_tables(vcpu);
+	vcpu_set_reg(vcpu, ARM64_CORE_REG(sp_el1), sp);
+	vcpu_set_reg(vcpu, ARM64_CORE_REG(regs.pc), (uint64_t)guest_code);
+
+	run_vcpu(vcpu, pmcr_n);
+
+	destroy_vpmu_vm();
+}
+
+static struct pmreg_sets validity_check_reg_sets[] = {
+	PMREG_SET(SYS_PMCNTENSET_EL0, SYS_PMCNTENCLR_EL0),
+	PMREG_SET(SYS_PMINTENSET_EL1, SYS_PMINTENCLR_EL1),
+	PMREG_SET(SYS_PMOVSSET_EL0, SYS_PMOVSCLR_EL0),
+};
+
+/*
+ * Create a VM, and check if KVM handles the userspace accesses of
+ * the PMU register sets in @validity_check_reg_sets[] correctly.
+ */
+static void run_pmregs_validity_test(uint64_t pmcr_n)
+{
+	int i;
+	struct kvm_vcpu *vcpu;
+	uint64_t set_reg_id, clr_reg_id, reg_val;
+	uint64_t valid_counters_mask, max_counters_mask;
+
+	test_create_vpmu_vm_with_pmcr_n(pmcr_n, false);
+	vcpu = vpmu_vm.vcpu;
+
+	valid_counters_mask = get_counters_mask(pmcr_n);
+	max_counters_mask = get_counters_mask(ARMV8_PMU_MAX_COUNTERS);
+
+	for (i = 0; i < ARRAY_SIZE(validity_check_reg_sets); i++) {
+		set_reg_id = validity_check_reg_sets[i].set_reg_id;
+		clr_reg_id = validity_check_reg_sets[i].clr_reg_id;
+
+		/*
+		 * Test if the 'set' and 'clr' variants of the registers
+		 * are initialized based on the number of valid counters.
+		 */
+		vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(set_reg_id), &reg_val);
+		TEST_ASSERT((reg_val & (~valid_counters_mask)) == 0,
+			    "Initial read of set_reg: 0x%llx has unimplemented counters enabled: 0x%lx\n",
+			    KVM_ARM64_SYS_REG(set_reg_id), reg_val);
+
+		vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(clr_reg_id), &reg_val);
+		TEST_ASSERT((reg_val & (~valid_counters_mask)) == 0,
+			    "Initial read of clr_reg: 0x%llx has unimplemented counters enabled: 0x%lx\n",
+			    KVM_ARM64_SYS_REG(clr_reg_id), reg_val);
+
+		/*
+		 * Using the 'set' variant, force-set the register to the
+		 * max number of possible counters and test if KVM discards
+		 * the bits for unimplemented counters as it should.
+		 */
+		vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(set_reg_id), max_counters_mask);
+
+		vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(set_reg_id), &reg_val);
+		TEST_ASSERT((reg_val & (~valid_counters_mask)) == 0,
+			    "Read of set_reg: 0x%llx has unimplemented counters enabled: 0x%lx\n",
+			    KVM_ARM64_SYS_REG(set_reg_id), reg_val);
+
+		vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(clr_reg_id), &reg_val);
+		TEST_ASSERT((reg_val & (~valid_counters_mask)) == 0,
+			    "Read of clr_reg: 0x%llx has unimplemented counters enabled: 0x%lx\n",
+			    KVM_ARM64_SYS_REG(clr_reg_id), reg_val);
+	}
+
+	destroy_vpmu_vm();
+}
+
+/*
+ * Create a guest with one vCPU, and attempt to set the PMCR_EL0.N for
+ * the vCPU to @pmcr_n, which is larger than the host value.
+ * The attempt should fail as @pmcr_n is too big to set for the vCPU.
+ */
+static void run_error_test(uint64_t pmcr_n)
+{
+	pr_debug("Error test with pmcr_n %lu (larger than the host)\n", pmcr_n);
+
+	test_create_vpmu_vm_with_pmcr_n(pmcr_n, true);
+	destroy_vpmu_vm();
+}
+
+/*
+ * Return the default number of implemented PMU event counters excluding
+ * the cycle counter (i.e. PMCR_EL0.N value) for the guest.
+ */
+static uint64_t get_pmcr_n_limit(void)
+{
+	uint64_t pmcr;
+
+	create_vpmu_vm(guest_code);
+	vcpu_get_reg(vpmu_vm.vcpu, KVM_ARM64_SYS_REG(SYS_PMCR_EL0), &pmcr);
+	destroy_vpmu_vm();
+	return get_pmcr_n(pmcr);
+}
+
+int main(void)
+{
+	uint64_t i, pmcr_n;
+
+	TEST_REQUIRE(kvm_has_cap(KVM_CAP_ARM_PMU_V3));
+
+	pmcr_n = get_pmcr_n_limit();
+	for (i = 0; i <= pmcr_n; i++) {
+		run_access_test(i);
+		run_pmregs_validity_test(i);
+	}
+
+	for (i = pmcr_n + 1; i < ARMV8_PMU_MAX_COUNTERS; i++)
+		run_error_test(i);
+
+	return 0;
+}
diff --git a/tools/testing/selftests/kvm/include/aarch64/processor.h b/tools/testing/selftests/kvm/include/aarch64/processor.h
index cb537253a6b9..c42d683102c7 100644
--- a/tools/testing/selftests/kvm/include/aarch64/processor.h
+++ b/tools/testing/selftests/kvm/include/aarch64/processor.h
@@ -104,6 +104,7 @@ enum {
 #define ESR_EC_SHIFT		26
 #define ESR_EC_MASK		(ESR_EC_NUM - 1)
 
+#define ESR_EC_UNKNOWN		0x0
 #define ESR_EC_SVC64		0x15
 #define ESR_EC_IABT		0x21
 #define ESR_EC_DABT		0x25
diff --git a/tools/testing/selftests/kvm/include/test_util.h b/tools/testing/selftests/kvm/include/test_util.h
index 7e614adc6cf4..8e5f413a593d 100644
--- a/tools/testing/selftests/kvm/include/test_util.h
+++ b/tools/testing/selftests/kvm/include/test_util.h
@@ -33,7 +33,7 @@ static inline int _no_printf(const char *format, ...) { return 0; }
 #define pr_info(...) _no_printf(__VA_ARGS__)
 #endif
 
-void print_skip(const char *fmt, ...) __attribute__((format(printf, 1, 2)));
+void __printf(1, 2) print_skip(const char *fmt, ...);
 #define __TEST_REQUIRE(f, fmt, ...)				\
 do {								\
 	if (!(f))						\
@@ -46,9 +46,9 @@ ssize_t test_write(int fd, const void *buf, size_t count);
 ssize_t test_read(int fd, void *buf, size_t count);
 int test_seq_read(const char *path, char **bufp, size_t *sizep);
 
-void test_assert(bool exp, const char *exp_str,
-		 const char *file, unsigned int line, const char *fmt, ...)
-		__attribute__((format(printf, 5, 6)));
+void __printf(5, 6) test_assert(bool exp, const char *exp_str,
+				const char *file, unsigned int line,
+				const char *fmt, ...);
 
 #define TEST_ASSERT(e, fmt, ...) \
 	test_assert((e), #e, __FILE__, __LINE__, fmt, ##__VA_ARGS__)
diff --git a/tools/testing/selftests/kvm/include/ucall_common.h b/tools/testing/selftests/kvm/include/ucall_common.h
index 112bc1da732a..ce33d306c2cb 100644
--- a/tools/testing/selftests/kvm/include/ucall_common.h
+++ b/tools/testing/selftests/kvm/include/ucall_common.h
@@ -1,7 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
- * tools/testing/selftests/kvm/include/kvm_util.h
- *
  * Copyright (C) 2018, Google LLC.
  */
 #ifndef SELFTEST_KVM_UCALL_COMMON_H
diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h
index 4fd042112526..25bc61dac5fb 100644
--- a/tools/testing/selftests/kvm/include/x86_64/processor.h
+++ b/tools/testing/selftests/kvm/include/x86_64/processor.h
@@ -68,6 +68,12 @@ struct xstate {
 #define XFEATURE_MASK_OPMASK		BIT_ULL(5)
 #define XFEATURE_MASK_ZMM_Hi256		BIT_ULL(6)
 #define XFEATURE_MASK_Hi16_ZMM		BIT_ULL(7)
+#define XFEATURE_MASK_PT		BIT_ULL(8)
+#define XFEATURE_MASK_PKRU		BIT_ULL(9)
+#define XFEATURE_MASK_PASID		BIT_ULL(10)
+#define XFEATURE_MASK_CET_USER		BIT_ULL(11)
+#define XFEATURE_MASK_CET_KERNEL	BIT_ULL(12)
+#define XFEATURE_MASK_LBR		BIT_ULL(15)
 #define XFEATURE_MASK_XTILE_CFG		BIT_ULL(17)
 #define XFEATURE_MASK_XTILE_DATA	BIT_ULL(18)
 
@@ -147,6 +153,7 @@ struct kvm_x86_cpu_feature {
 #define	X86_FEATURE_CLWB		KVM_X86_CPU_FEATURE(0x7, 0, EBX, 24)
 #define	X86_FEATURE_UMIP		KVM_X86_CPU_FEATURE(0x7, 0, ECX, 2)
 #define	X86_FEATURE_PKU			KVM_X86_CPU_FEATURE(0x7, 0, ECX, 3)
+#define	X86_FEATURE_OSPKE		KVM_X86_CPU_FEATURE(0x7, 0, ECX, 4)
 #define	X86_FEATURE_LA57		KVM_X86_CPU_FEATURE(0x7, 0, ECX, 16)
 #define	X86_FEATURE_RDPID		KVM_X86_CPU_FEATURE(0x7, 0, ECX, 22)
 #define	X86_FEATURE_SGX_LC		KVM_X86_CPU_FEATURE(0x7, 0, ECX, 30)
@@ -553,6 +560,13 @@ static inline void xsetbv(u32 index, u64 value)
 	__asm__ __volatile__("xsetbv" :: "a" (eax), "d" (edx), "c" (index));
 }
 
+static inline void wrpkru(u32 pkru)
+{
+	/* Note, ECX and EDX are architecturally required to be '0'. */
+	asm volatile(".byte 0x0f,0x01,0xef\n\t"
+		     : : "a" (pkru), "c"(0), "d"(0));
+}
+
 static inline struct desc_ptr get_gdt(void)
 {
 	struct desc_ptr gdt;
@@ -908,6 +922,15 @@ static inline bool kvm_pmu_has(struct kvm_x86_pmu_feature feature)
 	       !kvm_cpu_has(feature.anti_feature);
 }
 
+static __always_inline uint64_t kvm_cpu_supported_xcr0(void)
+{
+	if (!kvm_cpu_has_p(X86_PROPERTY_SUPPORTED_XCR0_LO))
+		return 0;
+
+	return kvm_cpu_property(X86_PROPERTY_SUPPORTED_XCR0_LO) |
+	       ((uint64_t)kvm_cpu_property(X86_PROPERTY_SUPPORTED_XCR0_HI) << 32);
+}
+
 static inline size_t kvm_cpuid2_size(int nr_entries)
 {
 	return sizeof(struct kvm_cpuid2) +
diff --git a/tools/testing/selftests/kvm/lib/aarch64/processor.c b/tools/testing/selftests/kvm/lib/aarch64/processor.c
index 3a0259e25335..6fe12e985ba5 100644
--- a/tools/testing/selftests/kvm/lib/aarch64/processor.c
+++ b/tools/testing/selftests/kvm/lib/aarch64/processor.c
@@ -518,9 +518,9 @@ void aarch64_get_supported_page_sizes(uint32_t ipa,
 	err = ioctl(vcpu_fd, KVM_GET_ONE_REG, &reg);
 	TEST_ASSERT(err == 0, KVM_IOCTL_ERROR(KVM_GET_ONE_REG, vcpu_fd));
 
-	*ps4k = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR0_TGRAN4), val) != 0xf;
-	*ps64k = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR0_TGRAN64), val) == 0;
-	*ps16k = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR0_TGRAN16), val) != 0;
+	*ps4k = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_TGRAN4), val) != 0xf;
+	*ps64k = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_TGRAN64), val) == 0;
+	*ps16k = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_TGRAN16), val) != 0;
 
 	close(vcpu_fd);
 	close(vm_fd);
diff --git a/tools/testing/selftests/kvm/lib/guest_sprintf.c b/tools/testing/selftests/kvm/lib/guest_sprintf.c
index c4a69d8aeb68..74627514c4d4 100644
--- a/tools/testing/selftests/kvm/lib/guest_sprintf.c
+++ b/tools/testing/selftests/kvm/lib/guest_sprintf.c
@@ -200,6 +200,13 @@ repeat:
 			++fmt;
 		}
 
+		/*
+		 * Play nice with %llu, %llx, etc.  KVM selftests only support
+		 * 64-bit builds, so just treat %ll* the same as %l*.
+		 */
+		if (qualifier == 'l' && *fmt == 'l')
+			++fmt;
+
 		/* default base */
 		base = 10;
 
diff --git a/tools/testing/selftests/kvm/lib/x86_64/apic.c b/tools/testing/selftests/kvm/lib/x86_64/apic.c
index 7168e25c194e..89153a333e83 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/apic.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/apic.c
@@ -1,7 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /*
- * tools/testing/selftests/kvm/lib/x86_64/processor.c
- *
  * Copyright (C) 2021, Google LLC.
  */
 
diff --git a/tools/testing/selftests/kvm/memslot_perf_test.c b/tools/testing/selftests/kvm/memslot_perf_test.c
index 20eb2e730800..8698d1ab60d0 100644
--- a/tools/testing/selftests/kvm/memslot_perf_test.c
+++ b/tools/testing/selftests/kvm/memslot_perf_test.c
@@ -1033,9 +1033,8 @@ static bool test_loop(const struct test_data *data,
 		      struct test_result *rbestruntime)
 {
 	uint64_t maxslots;
-	struct test_result result;
+	struct test_result result = {};
 
-	result.nloops = 0;
 	if (!test_execute(targs->nslots, &maxslots, targs->seconds, data,
 			  &result.nloops,
 			  &result.slot_runtime, &result.guest_runtime)) {
@@ -1089,7 +1088,7 @@ int main(int argc, char *argv[])
 		.seconds = 5,
 		.runs = 1,
 	};
-	struct test_result rbestslottime;
+	struct test_result rbestslottime = {};
 	int tctr;
 
 	if (!check_memory_sizes())
@@ -1098,11 +1097,10 @@ int main(int argc, char *argv[])
 	if (!parse_args(argc, argv, &targs))
 		return -1;
 
-	rbestslottime.slottimens = 0;
 	for (tctr = targs.tfirst; tctr <= targs.tlast; tctr++) {
 		const struct test_data *data = &tests[tctr];
 		unsigned int runctr;
-		struct test_result rbestruntime;
+		struct test_result rbestruntime = {};
 
 		if (tctr > targs.tfirst)
 			pr_info("\n");
@@ -1110,7 +1108,6 @@ int main(int argc, char *argv[])
 		pr_info("Testing %s performance with %i runs, %d seconds each\n",
 			data->name, targs.runs, targs.seconds);
 
-		rbestruntime.runtimens = 0;
 		for (runctr = 0; runctr < targs.runs; runctr++)
 			if (!test_loop(data, &targs,
 				       &rbestslottime, &rbestruntime))
diff --git a/tools/testing/selftests/kvm/riscv/get-reg-list.c b/tools/testing/selftests/kvm/riscv/get-reg-list.c
index 9f99ea42f45f..6bedaea95395 100644
--- a/tools/testing/selftests/kvm/riscv/get-reg-list.c
+++ b/tools/testing/selftests/kvm/riscv/get-reg-list.c
@@ -25,6 +25,8 @@ bool filter_reg(__u64 reg)
 	 * the visibility of the ISA_EXT register itself.
 	 *
 	 * Based on above, we should filter-out all ISA_EXT registers.
+	 *
+	 * Note: The below list is alphabetically sorted.
 	 */
 	case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_A:
 	case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_C:
@@ -33,21 +35,23 @@ bool filter_reg(__u64 reg)
 	case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_H:
 	case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_I:
 	case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_M:
-	case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_SVPBMT:
+	case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_V:
+	case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_SMSTATEEN:
+	case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_SSAIA:
 	case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_SSTC:
 	case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_SVINVAL:
-	case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZIHINTPAUSE:
-	case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZICBOM:
-	case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZICBOZ:
-	case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZBB:
-	case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_SSAIA:
-	case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_V:
 	case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_SVNAPOT:
+	case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_SVPBMT:
 	case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZBA:
+	case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZBB:
 	case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZBS:
+	case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZICBOM:
+	case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZICBOZ:
 	case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZICNTR:
+	case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZICOND:
 	case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZICSR:
 	case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZIFENCEI:
+	case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZIHINTPAUSE:
 	case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZIHPM:
 		return true;
 	/* AIA registers are always available when Ssaia can't be disabled */
@@ -112,11 +116,13 @@ void finalize_vcpu(struct kvm_vcpu *vcpu, struct vcpu_reg_list *c)
 	}
 }
 
-static const char *config_id_to_str(__u64 id)
+static const char *config_id_to_str(const char *prefix, __u64 id)
 {
 	/* reg_off is the offset into struct kvm_riscv_config */
 	__u64 reg_off = id & ~(REG_MASK | KVM_REG_RISCV_CONFIG);
 
+	assert((id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_CONFIG);
+
 	switch (reg_off) {
 	case KVM_REG_RISCV_CONFIG_REG(isa):
 		return "KVM_REG_RISCV_CONFIG_REG(isa)";
@@ -134,11 +140,7 @@ static const char *config_id_to_str(__u64 id)
 		return "KVM_REG_RISCV_CONFIG_REG(satp_mode)";
 	}
 
-	/*
-	 * Config regs would grow regularly with new pseudo reg added, so
-	 * just show raw id to indicate a new pseudo config reg.
-	 */
-	return strdup_printf("KVM_REG_RISCV_CONFIG_REG(%lld) /* UNKNOWN */", reg_off);
+	return strdup_printf("%lld /* UNKNOWN */", reg_off);
 }
 
 static const char *core_id_to_str(const char *prefix, __u64 id)
@@ -146,6 +148,8 @@ static const char *core_id_to_str(const char *prefix, __u64 id)
 	/* reg_off is the offset into struct kvm_riscv_core */
 	__u64 reg_off = id & ~(REG_MASK | KVM_REG_RISCV_CORE);
 
+	assert((id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_CORE);
+
 	switch (reg_off) {
 	case KVM_REG_RISCV_CORE_REG(regs.pc):
 		return "KVM_REG_RISCV_CORE_REG(regs.pc)";
@@ -176,14 +180,15 @@ static const char *core_id_to_str(const char *prefix, __u64 id)
 		return "KVM_REG_RISCV_CORE_REG(mode)";
 	}
 
-	TEST_FAIL("%s: Unknown core reg id: 0x%llx", prefix, id);
-	return NULL;
+	return strdup_printf("%lld /* UNKNOWN */", reg_off);
 }
 
 #define RISCV_CSR_GENERAL(csr) \
 	"KVM_REG_RISCV_CSR_GENERAL | KVM_REG_RISCV_CSR_REG(" #csr ")"
 #define RISCV_CSR_AIA(csr) \
 	"KVM_REG_RISCV_CSR_AIA | KVM_REG_RISCV_CSR_REG(" #csr ")"
+#define RISCV_CSR_SMSTATEEN(csr) \
+	"KVM_REG_RISCV_CSR_SMSTATEEN | KVM_REG_RISCV_CSR_REG(" #csr ")"
 
 static const char *general_csr_id_to_str(__u64 reg_off)
 {
@@ -209,10 +214,11 @@ static const char *general_csr_id_to_str(__u64 reg_off)
 		return RISCV_CSR_GENERAL(satp);
 	case KVM_REG_RISCV_CSR_REG(scounteren):
 		return RISCV_CSR_GENERAL(scounteren);
+	case KVM_REG_RISCV_CSR_REG(senvcfg):
+		return RISCV_CSR_GENERAL(senvcfg);
 	}
 
-	TEST_FAIL("Unknown general csr reg: 0x%llx", reg_off);
-	return NULL;
+	return strdup_printf("KVM_REG_RISCV_CSR_GENERAL | %lld /* UNKNOWN */", reg_off);
 }
 
 static const char *aia_csr_id_to_str(__u64 reg_off)
@@ -235,7 +241,18 @@ static const char *aia_csr_id_to_str(__u64 reg_off)
 		return RISCV_CSR_AIA(iprio2h);
 	}
 
-	TEST_FAIL("Unknown aia csr reg: 0x%llx", reg_off);
+	return strdup_printf("KVM_REG_RISCV_CSR_AIA | %lld /* UNKNOWN */", reg_off);
+}
+
+static const char *smstateen_csr_id_to_str(__u64 reg_off)
+{
+	/* reg_off is the offset into struct kvm_riscv_smstateen_csr */
+	switch (reg_off) {
+	case KVM_REG_RISCV_CSR_SMSTATEEN_REG(sstateen0):
+		return RISCV_CSR_SMSTATEEN(sstateen0);
+	}
+
+	TEST_FAIL("Unknown smstateen csr reg: 0x%llx", reg_off);
 	return NULL;
 }
 
@@ -244,6 +261,8 @@ static const char *csr_id_to_str(const char *prefix, __u64 id)
 	__u64 reg_off = id & ~(REG_MASK | KVM_REG_RISCV_CSR);
 	__u64 reg_subtype = reg_off & KVM_REG_RISCV_SUBTYPE_MASK;
 
+	assert((id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_CSR);
+
 	reg_off &= ~KVM_REG_RISCV_SUBTYPE_MASK;
 
 	switch (reg_subtype) {
@@ -251,10 +270,11 @@ static const char *csr_id_to_str(const char *prefix, __u64 id)
 		return general_csr_id_to_str(reg_off);
 	case KVM_REG_RISCV_CSR_AIA:
 		return aia_csr_id_to_str(reg_off);
+	case KVM_REG_RISCV_CSR_SMSTATEEN:
+		return smstateen_csr_id_to_str(reg_off);
 	}
 
-	TEST_FAIL("%s: Unknown csr subtype: 0x%llx", prefix, reg_subtype);
-	return NULL;
+	return strdup_printf("%lld | %lld /* UNKNOWN */", reg_subtype, reg_off);
 }
 
 static const char *timer_id_to_str(const char *prefix, __u64 id)
@@ -262,6 +282,8 @@ static const char *timer_id_to_str(const char *prefix, __u64 id)
 	/* reg_off is the offset into struct kvm_riscv_timer */
 	__u64 reg_off = id & ~(REG_MASK | KVM_REG_RISCV_TIMER);
 
+	assert((id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_TIMER);
+
 	switch (reg_off) {
 	case KVM_REG_RISCV_TIMER_REG(frequency):
 		return "KVM_REG_RISCV_TIMER_REG(frequency)";
@@ -273,8 +295,7 @@ static const char *timer_id_to_str(const char *prefix, __u64 id)
 		return "KVM_REG_RISCV_TIMER_REG(state)";
 	}
 
-	TEST_FAIL("%s: Unknown timer reg id: 0x%llx", prefix, id);
-	return NULL;
+	return strdup_printf("%lld /* UNKNOWN */", reg_off);
 }
 
 static const char *fp_f_id_to_str(const char *prefix, __u64 id)
@@ -282,6 +303,8 @@ static const char *fp_f_id_to_str(const char *prefix, __u64 id)
 	/* reg_off is the offset into struct __riscv_f_ext_state */
 	__u64 reg_off = id & ~(REG_MASK | KVM_REG_RISCV_FP_F);
 
+	assert((id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_FP_F);
+
 	switch (reg_off) {
 	case KVM_REG_RISCV_FP_F_REG(f[0]) ...
 	     KVM_REG_RISCV_FP_F_REG(f[31]):
@@ -290,8 +313,7 @@ static const char *fp_f_id_to_str(const char *prefix, __u64 id)
 		return "KVM_REG_RISCV_FP_F_REG(fcsr)";
 	}
 
-	TEST_FAIL("%s: Unknown fp_f reg id: 0x%llx", prefix, id);
-	return NULL;
+	return strdup_printf("%lld /* UNKNOWN */", reg_off);
 }
 
 static const char *fp_d_id_to_str(const char *prefix, __u64 id)
@@ -299,6 +321,8 @@ static const char *fp_d_id_to_str(const char *prefix, __u64 id)
 	/* reg_off is the offset into struct __riscv_d_ext_state */
 	__u64 reg_off = id & ~(REG_MASK | KVM_REG_RISCV_FP_D);
 
+	assert((id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_FP_D);
+
 	switch (reg_off) {
 	case KVM_REG_RISCV_FP_D_REG(f[0]) ...
 	     KVM_REG_RISCV_FP_D_REG(f[31]):
@@ -307,96 +331,93 @@ static const char *fp_d_id_to_str(const char *prefix, __u64 id)
 		return "KVM_REG_RISCV_FP_D_REG(fcsr)";
 	}
 
-	TEST_FAIL("%s: Unknown fp_d reg id: 0x%llx", prefix, id);
-	return NULL;
+	return strdup_printf("%lld /* UNKNOWN */", reg_off);
 }
 
-static const char *isa_ext_id_to_str(__u64 id)
+#define KVM_ISA_EXT_ARR(ext)		\
+[KVM_RISCV_ISA_EXT_##ext] = "KVM_RISCV_ISA_EXT_" #ext
+
+static const char *isa_ext_id_to_str(const char *prefix, __u64 id)
 {
 	/* reg_off is the offset into unsigned long kvm_isa_ext_arr[] */
 	__u64 reg_off = id & ~(REG_MASK | KVM_REG_RISCV_ISA_EXT);
 
+	assert((id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_ISA_EXT);
+
 	static const char * const kvm_isa_ext_reg_name[] = {
-		"KVM_RISCV_ISA_EXT_A",
-		"KVM_RISCV_ISA_EXT_C",
-		"KVM_RISCV_ISA_EXT_D",
-		"KVM_RISCV_ISA_EXT_F",
-		"KVM_RISCV_ISA_EXT_H",
-		"KVM_RISCV_ISA_EXT_I",
-		"KVM_RISCV_ISA_EXT_M",
-		"KVM_RISCV_ISA_EXT_SVPBMT",
-		"KVM_RISCV_ISA_EXT_SSTC",
-		"KVM_RISCV_ISA_EXT_SVINVAL",
-		"KVM_RISCV_ISA_EXT_ZIHINTPAUSE",
-		"KVM_RISCV_ISA_EXT_ZICBOM",
-		"KVM_RISCV_ISA_EXT_ZICBOZ",
-		"KVM_RISCV_ISA_EXT_ZBB",
-		"KVM_RISCV_ISA_EXT_SSAIA",
-		"KVM_RISCV_ISA_EXT_V",
-		"KVM_RISCV_ISA_EXT_SVNAPOT",
-		"KVM_RISCV_ISA_EXT_ZBA",
-		"KVM_RISCV_ISA_EXT_ZBS",
-		"KVM_RISCV_ISA_EXT_ZICNTR",
-		"KVM_RISCV_ISA_EXT_ZICSR",
-		"KVM_RISCV_ISA_EXT_ZIFENCEI",
-		"KVM_RISCV_ISA_EXT_ZIHPM",
+		KVM_ISA_EXT_ARR(A),
+		KVM_ISA_EXT_ARR(C),
+		KVM_ISA_EXT_ARR(D),
+		KVM_ISA_EXT_ARR(F),
+		KVM_ISA_EXT_ARR(H),
+		KVM_ISA_EXT_ARR(I),
+		KVM_ISA_EXT_ARR(M),
+		KVM_ISA_EXT_ARR(V),
+		KVM_ISA_EXT_ARR(SMSTATEEN),
+		KVM_ISA_EXT_ARR(SSAIA),
+		KVM_ISA_EXT_ARR(SSTC),
+		KVM_ISA_EXT_ARR(SVINVAL),
+		KVM_ISA_EXT_ARR(SVNAPOT),
+		KVM_ISA_EXT_ARR(SVPBMT),
+		KVM_ISA_EXT_ARR(ZBA),
+		KVM_ISA_EXT_ARR(ZBB),
+		KVM_ISA_EXT_ARR(ZBS),
+		KVM_ISA_EXT_ARR(ZICBOM),
+		KVM_ISA_EXT_ARR(ZICBOZ),
+		KVM_ISA_EXT_ARR(ZICNTR),
+		KVM_ISA_EXT_ARR(ZICOND),
+		KVM_ISA_EXT_ARR(ZICSR),
+		KVM_ISA_EXT_ARR(ZIFENCEI),
+		KVM_ISA_EXT_ARR(ZIHINTPAUSE),
+		KVM_ISA_EXT_ARR(ZIHPM),
 	};
 
-	if (reg_off >= ARRAY_SIZE(kvm_isa_ext_reg_name)) {
-		/*
-		 * isa_ext regs would grow regularly with new isa extension added, so
-		 * just show "reg" to indicate a new extension.
-		 */
+	if (reg_off >= ARRAY_SIZE(kvm_isa_ext_reg_name))
 		return strdup_printf("%lld /* UNKNOWN */", reg_off);
-	}
 
 	return kvm_isa_ext_reg_name[reg_off];
 }
 
+#define KVM_SBI_EXT_ARR(ext)		\
+[ext] = "KVM_REG_RISCV_SBI_SINGLE | " #ext
+
 static const char *sbi_ext_single_id_to_str(__u64 reg_off)
 {
 	/* reg_off is KVM_RISCV_SBI_EXT_ID */
 	static const char * const kvm_sbi_ext_reg_name[] = {
-		"KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_V01",
-		"KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_TIME",
-		"KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_IPI",
-		"KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_RFENCE",
-		"KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_SRST",
-		"KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_HSM",
-		"KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_PMU",
-		"KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_EXPERIMENTAL",
-		"KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_VENDOR",
+		KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_V01),
+		KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_TIME),
+		KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_IPI),
+		KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_RFENCE),
+		KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_SRST),
+		KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_HSM),
+		KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_PMU),
+		KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_EXPERIMENTAL),
+		KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_VENDOR),
+		KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_DBCN),
 	};
 
-	if (reg_off >= ARRAY_SIZE(kvm_sbi_ext_reg_name)) {
-		/*
-		 * sbi_ext regs would grow regularly with new sbi extension added, so
-		 * just show "reg" to indicate a new extension.
-		 */
+	if (reg_off >= ARRAY_SIZE(kvm_sbi_ext_reg_name))
 		return strdup_printf("KVM_REG_RISCV_SBI_SINGLE | %lld /* UNKNOWN */", reg_off);
-	}
 
 	return kvm_sbi_ext_reg_name[reg_off];
 }
 
 static const char *sbi_ext_multi_id_to_str(__u64 reg_subtype, __u64 reg_off)
 {
-	if (reg_off > KVM_REG_RISCV_SBI_MULTI_REG_LAST) {
-		/*
-		 * sbi_ext regs would grow regularly with new sbi extension added, so
-		 * just show "reg" to indicate a new extension.
-		 */
-		return strdup_printf("%lld /* UNKNOWN */", reg_off);
-	}
+	const char *unknown = "";
+
+	if (reg_off > KVM_REG_RISCV_SBI_MULTI_REG_LAST)
+		unknown = " /* UNKNOWN */";
 
 	switch (reg_subtype) {
 	case KVM_REG_RISCV_SBI_MULTI_EN:
-		return strdup_printf("KVM_REG_RISCV_SBI_MULTI_EN | %lld", reg_off);
+		return strdup_printf("KVM_REG_RISCV_SBI_MULTI_EN | %lld%s", reg_off, unknown);
 	case KVM_REG_RISCV_SBI_MULTI_DIS:
-		return strdup_printf("KVM_REG_RISCV_SBI_MULTI_DIS | %lld", reg_off);
+		return strdup_printf("KVM_REG_RISCV_SBI_MULTI_DIS | %lld%s", reg_off, unknown);
 	}
 
-	return NULL;
+	return strdup_printf("%lld | %lld /* UNKNOWN */", reg_subtype, reg_off);
 }
 
 static const char *sbi_ext_id_to_str(const char *prefix, __u64 id)
@@ -404,6 +425,8 @@ static const char *sbi_ext_id_to_str(const char *prefix, __u64 id)
 	__u64 reg_off = id & ~(REG_MASK | KVM_REG_RISCV_SBI_EXT);
 	__u64 reg_subtype = reg_off & KVM_REG_RISCV_SUBTYPE_MASK;
 
+	assert((id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_SBI_EXT);
+
 	reg_off &= ~KVM_REG_RISCV_SUBTYPE_MASK;
 
 	switch (reg_subtype) {
@@ -414,8 +437,7 @@ static const char *sbi_ext_id_to_str(const char *prefix, __u64 id)
 		return sbi_ext_multi_id_to_str(reg_subtype, reg_off);
 	}
 
-	TEST_FAIL("%s: Unknown sbi ext subtype: 0x%llx", prefix, reg_subtype);
-	return NULL;
+	return strdup_printf("%lld | %lld /* UNKNOWN */", reg_subtype, reg_off);
 }
 
 void print_reg(const char *prefix, __u64 id)
@@ -436,14 +458,14 @@ void print_reg(const char *prefix, __u64 id)
 		reg_size = "KVM_REG_SIZE_U128";
 		break;
 	default:
-		TEST_FAIL("%s: Unexpected reg size: 0x%llx in reg id: 0x%llx",
-			  prefix, (id & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT, id);
+		printf("\tKVM_REG_RISCV | (%lld << KVM_REG_SIZE_SHIFT) | 0x%llx /* UNKNOWN */,",
+		       (id & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT, id & REG_MASK);
 	}
 
 	switch (id & KVM_REG_RISCV_TYPE_MASK) {
 	case KVM_REG_RISCV_CONFIG:
 		printf("\tKVM_REG_RISCV | %s | KVM_REG_RISCV_CONFIG | %s,\n",
-				reg_size, config_id_to_str(id));
+				reg_size, config_id_to_str(prefix, id));
 		break;
 	case KVM_REG_RISCV_CORE:
 		printf("\tKVM_REG_RISCV | %s | KVM_REG_RISCV_CORE | %s,\n",
@@ -467,15 +489,15 @@ void print_reg(const char *prefix, __u64 id)
 		break;
 	case KVM_REG_RISCV_ISA_EXT:
 		printf("\tKVM_REG_RISCV | %s | KVM_REG_RISCV_ISA_EXT | %s,\n",
-				reg_size, isa_ext_id_to_str(id));
+				reg_size, isa_ext_id_to_str(prefix, id));
 		break;
 	case KVM_REG_RISCV_SBI_EXT:
 		printf("\tKVM_REG_RISCV | %s | KVM_REG_RISCV_SBI_EXT | %s,\n",
 				reg_size, sbi_ext_id_to_str(prefix, id));
 		break;
 	default:
-		TEST_FAIL("%s: Unexpected reg type: 0x%llx in reg id: 0x%llx", prefix,
-				(id & KVM_REG_RISCV_TYPE_MASK) >> KVM_REG_RISCV_TYPE_SHIFT, id);
+		printf("\tKVM_REG_RISCV | %s | 0x%llx /* UNKNOWN */,",
+				reg_size, id & REG_MASK);
 	}
 }
 
@@ -532,6 +554,7 @@ static __u64 base_regs[] = {
 	KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_GENERAL | KVM_REG_RISCV_CSR_REG(sip),
 	KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_GENERAL | KVM_REG_RISCV_CSR_REG(satp),
 	KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_GENERAL | KVM_REG_RISCV_CSR_REG(scounteren),
+	KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_GENERAL | KVM_REG_RISCV_CSR_REG(senvcfg),
 	KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_TIMER | KVM_REG_RISCV_TIMER_REG(frequency),
 	KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_TIMER | KVM_REG_RISCV_TIMER_REG(time),
 	KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_TIMER | KVM_REG_RISCV_TIMER_REG(compare),
@@ -545,6 +568,7 @@ static __u64 base_regs[] = {
 	KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_PMU,
 	KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_EXPERIMENTAL,
 	KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_VENDOR,
+	KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_DBCN,
 	KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_MULTI_EN | 0,
 	KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_MULTI_DIS | 0,
 };
@@ -603,6 +627,10 @@ static __u64 zicntr_regs[] = {
 	KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZICNTR,
 };
 
+static __u64 zicond_regs[] = {
+	KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZICOND,
+};
+
 static __u64 zicsr_regs[] = {
 	KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZICSR,
 };
@@ -626,6 +654,11 @@ static __u64 aia_regs[] = {
 	KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_SSAIA,
 };
 
+static __u64 smstateen_regs[] = {
+	KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_SMSTATEEN | KVM_REG_RISCV_CSR_SMSTATEEN_REG(sstateen0),
+	KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_SMSTATEEN,
+};
+
 static __u64 fp_f_regs[] = {
 	KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[0]),
 	KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[1]),
@@ -725,6 +758,8 @@ static __u64 fp_d_regs[] = {
 	{"zbs", .feature = KVM_RISCV_ISA_EXT_ZBS, .regs = zbs_regs, .regs_n = ARRAY_SIZE(zbs_regs),}
 #define ZICNTR_REGS_SUBLIST \
 	{"zicntr", .feature = KVM_RISCV_ISA_EXT_ZICNTR, .regs = zicntr_regs, .regs_n = ARRAY_SIZE(zicntr_regs),}
+#define ZICOND_REGS_SUBLIST \
+	{"zicond", .feature = KVM_RISCV_ISA_EXT_ZICOND, .regs = zicond_regs, .regs_n = ARRAY_SIZE(zicond_regs),}
 #define ZICSR_REGS_SUBLIST \
 	{"zicsr", .feature = KVM_RISCV_ISA_EXT_ZICSR, .regs = zicsr_regs, .regs_n = ARRAY_SIZE(zicsr_regs),}
 #define ZIFENCEI_REGS_SUBLIST \
@@ -733,6 +768,8 @@ static __u64 fp_d_regs[] = {
 	{"zihpm", .feature = KVM_RISCV_ISA_EXT_ZIHPM, .regs = zihpm_regs, .regs_n = ARRAY_SIZE(zihpm_regs),}
 #define AIA_REGS_SUBLIST \
 	{"aia", .feature = KVM_RISCV_ISA_EXT_SSAIA, .regs = aia_regs, .regs_n = ARRAY_SIZE(aia_regs),}
+#define SMSTATEEN_REGS_SUBLIST \
+	{"smstateen", .feature = KVM_RISCV_ISA_EXT_SMSTATEEN, .regs = smstateen_regs, .regs_n = ARRAY_SIZE(smstateen_regs),}
 #define FP_F_REGS_SUBLIST \
 	{"fp_f", .feature = KVM_RISCV_ISA_EXT_F, .regs = fp_f_regs, \
 		.regs_n = ARRAY_SIZE(fp_f_regs),}
@@ -828,6 +865,14 @@ static struct vcpu_reg_list zicntr_config = {
 	},
 };
 
+static struct vcpu_reg_list zicond_config = {
+	.sublists = {
+	BASE_SUBLIST,
+	ZICOND_REGS_SUBLIST,
+	{0},
+	},
+};
+
 static struct vcpu_reg_list zicsr_config = {
 	.sublists = {
 	BASE_SUBLIST,
@@ -860,6 +905,14 @@ static struct vcpu_reg_list aia_config = {
 	},
 };
 
+static struct vcpu_reg_list smstateen_config = {
+	.sublists = {
+	BASE_SUBLIST,
+	SMSTATEEN_REGS_SUBLIST,
+	{0},
+	},
+};
+
 static struct vcpu_reg_list fp_f_config = {
 	.sublists = {
 	BASE_SUBLIST,
@@ -888,10 +941,12 @@ struct vcpu_reg_list *vcpu_configs[] = {
 	&zbb_config,
 	&zbs_config,
 	&zicntr_config,
+	&zicond_config,
 	&zicsr_config,
 	&zifencei_config,
 	&zihpm_config,
 	&aia_config,
+	&smstateen_config,
 	&fp_f_config,
 	&fp_d_config,
 };
diff --git a/tools/testing/selftests/kvm/x86_64/hwcr_msr_test.c b/tools/testing/selftests/kvm/x86_64/hwcr_msr_test.c
new file mode 100644
index 000000000000..df351ae17029
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/hwcr_msr_test.c
@@ -0,0 +1,47 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2023, Google LLC.
+ */
+
+#define _GNU_SOURCE /* for program_invocation_short_name */
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "vmx.h"
+
+void test_hwcr_bit(struct kvm_vcpu *vcpu, unsigned int bit)
+{
+	const uint64_t ignored = BIT_ULL(3) | BIT_ULL(6) | BIT_ULL(8);
+	const uint64_t valid = BIT_ULL(18) | BIT_ULL(24);
+	const uint64_t legal = ignored | valid;
+	uint64_t val = BIT_ULL(bit);
+	uint64_t actual;
+	int r;
+
+	r = _vcpu_set_msr(vcpu, MSR_K7_HWCR, val);
+	TEST_ASSERT(val & ~legal ? !r : r == 1,
+		    "Expected KVM_SET_MSRS(MSR_K7_HWCR) = 0x%lx to %s",
+		    val, val & ~legal ? "fail" : "succeed");
+
+	actual = vcpu_get_msr(vcpu, MSR_K7_HWCR);
+	TEST_ASSERT(actual == (val & valid),
+		    "Bit %u: unexpected HWCR 0x%lx; expected 0x%lx",
+		    bit, actual, (val & valid));
+
+	vcpu_set_msr(vcpu, MSR_K7_HWCR, 0);
+}
+
+int main(int argc, char *argv[])
+{
+	struct kvm_vm *vm;
+	struct kvm_vcpu *vcpu;
+	unsigned int bit;
+
+	vm = vm_create_with_one_vcpu(&vcpu, NULL);
+
+	for (bit = 0; bit < BITS_PER_LONG; bit++)
+		test_hwcr_bit(vcpu, bit);
+
+	kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c b/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c
index e446d76d1c0c..6c1278562090 100644
--- a/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c
+++ b/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c
@@ -1,7 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /*
- * KVM_GET/SET_* tests
- *
  * Copyright (C) 2022, Red Hat, Inc.
  *
  * Tests for Hyper-V extensions to SVM.
diff --git a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c
index 7f36c32fa760..18ac5c1952a3 100644
--- a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c
+++ b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c
@@ -1,7 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /*
- * tools/testing/selftests/kvm/nx_huge_page_test.c
- *
  * Usage: to be run via nx_huge_page_test.sh, which does the necessary
  * environment setup and teardown
  *
diff --git a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.sh b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.sh
index 0560149e66ed..7cbb409801ee 100755
--- a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.sh
+++ b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.sh
@@ -4,7 +4,6 @@
 # Wrapper script which performs setup and cleanup for nx_huge_pages_test.
 # Makes use of root privileges to set up huge pages and KVM module parameters.
 #
-# tools/testing/selftests/kvm/nx_huge_page_test.sh
 # Copyright (C) 2022, Google LLC.
 
 set -e
diff --git a/tools/testing/selftests/kvm/x86_64/state_test.c b/tools/testing/selftests/kvm/x86_64/state_test.c
index 4c4925a8ab45..88b58aab7207 100644
--- a/tools/testing/selftests/kvm/x86_64/state_test.c
+++ b/tools/testing/selftests/kvm/x86_64/state_test.c
@@ -139,6 +139,83 @@ static void vmx_l1_guest_code(struct vmx_pages *vmx_pages)
 static void __attribute__((__flatten__)) guest_code(void *arg)
 {
 	GUEST_SYNC(1);
+
+	if (this_cpu_has(X86_FEATURE_XSAVE)) {
+		uint64_t supported_xcr0 = this_cpu_supported_xcr0();
+		uint8_t buffer[4096];
+
+		memset(buffer, 0xcc, sizeof(buffer));
+
+		set_cr4(get_cr4() | X86_CR4_OSXSAVE);
+		GUEST_ASSERT(this_cpu_has(X86_FEATURE_OSXSAVE));
+
+		xsetbv(0, xgetbv(0) | supported_xcr0);
+
+		/*
+		 * Modify state for all supported xfeatures to take them out of
+		 * their "init" state, i.e. to make them show up in XSTATE_BV.
+		 *
+		 * Note off-by-default features, e.g. AMX, are out of scope for
+		 * this particular testcase as they have a different ABI.
+		 */
+		GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_FP);
+		asm volatile ("fincstp");
+
+		GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_SSE);
+		asm volatile ("vmovdqu %0, %%xmm0" :: "m" (buffer));
+
+		if (supported_xcr0 & XFEATURE_MASK_YMM)
+			asm volatile ("vmovdqu %0, %%ymm0" :: "m" (buffer));
+
+		if (supported_xcr0 & XFEATURE_MASK_AVX512) {
+			asm volatile ("kmovq %0, %%k1" :: "r" (-1ull));
+			asm volatile ("vmovupd %0, %%zmm0" :: "m" (buffer));
+			asm volatile ("vmovupd %0, %%zmm16" :: "m" (buffer));
+		}
+
+		if (this_cpu_has(X86_FEATURE_MPX)) {
+			uint64_t bounds[2] = { 10, 0xffffffffull };
+			uint64_t output[2] = { };
+
+			GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_BNDREGS);
+			GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_BNDCSR);
+
+			/*
+			 * Don't bother trying to get BNDCSR into the INUSE
+			 * state.  MSR_IA32_BNDCFGS doesn't count as it isn't
+			 * managed via XSAVE/XRSTOR, and BNDCFGU can only be
+			 * modified by XRSTOR.  Stuffing XSTATE_BV in the host
+			 * is simpler than doing XRSTOR here in the guest.
+			 *
+			 * However, temporarily enable MPX in BNDCFGS so that
+			 * BNDMOV actually loads BND1.  If MPX isn't *fully*
+			 * enabled, all MPX instructions are treated as NOPs.
+			 *
+			 * Hand encode "bndmov (%rax),%bnd1" as support for MPX
+			 * mnemonics/registers has been removed from gcc and
+			 * clang (and was never fully supported by clang).
+			 */
+			wrmsr(MSR_IA32_BNDCFGS, BIT_ULL(0));
+			asm volatile (".byte 0x66,0x0f,0x1a,0x08" :: "a" (bounds));
+			/*
+			 * Hand encode "bndmov %bnd1, (%rax)" to sanity check
+			 * that BND1 actually got loaded.
+			 */
+			asm volatile (".byte 0x66,0x0f,0x1b,0x08" :: "a" (output));
+			wrmsr(MSR_IA32_BNDCFGS, 0);
+
+			GUEST_ASSERT_EQ(bounds[0], output[0]);
+			GUEST_ASSERT_EQ(bounds[1], output[1]);
+		}
+		if (this_cpu_has(X86_FEATURE_PKU)) {
+			GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_PKRU);
+			set_cr4(get_cr4() | X86_CR4_PKE);
+			GUEST_ASSERT(this_cpu_has(X86_FEATURE_OSPKE));
+
+			wrpkru(-1u);
+		}
+	}
+
 	GUEST_SYNC(2);
 
 	if (arg) {
@@ -153,10 +230,11 @@ static void __attribute__((__flatten__)) guest_code(void *arg)
 
 int main(int argc, char *argv[])
 {
+	uint64_t *xstate_bv, saved_xstate_bv;
 	vm_vaddr_t nested_gva = 0;
-
+	struct kvm_cpuid2 empty_cpuid = {};
 	struct kvm_regs regs1, regs2;
-	struct kvm_vcpu *vcpu;
+	struct kvm_vcpu *vcpu, *vcpuN;
 	struct kvm_vm *vm;
 	struct kvm_x86_state *state;
 	struct ucall uc;
@@ -209,6 +287,34 @@ int main(int argc, char *argv[])
 		/* Restore state in a new VM.  */
 		vcpu = vm_recreate_with_one_vcpu(vm);
 		vcpu_load_state(vcpu, state);
+
+		/*
+		 * Restore XSAVE state in a dummy vCPU, first without doing
+		 * KVM_SET_CPUID2, and then with an empty guest CPUID.  Except
+		 * for off-by-default xfeatures, e.g. AMX, KVM is supposed to
+		 * allow KVM_SET_XSAVE regardless of guest CPUID.  Manually
+		 * load only XSAVE state, MSRs in particular have a much more
+		 * convoluted ABI.
+		 *
+		 * Load two versions of XSAVE state: one with the actual guest
+		 * XSAVE state, and one with all supported features forced "on"
+		 * in xstate_bv, e.g. to ensure that KVM allows loading all
+		 * supported features, even if something goes awry in saving
+		 * the original snapshot.
+		 */
+		xstate_bv = (void *)&((uint8_t *)state->xsave->region)[512];
+		saved_xstate_bv = *xstate_bv;
+
+		vcpuN = __vm_vcpu_add(vm, vcpu->id + 1);
+		vcpu_xsave_set(vcpuN, state->xsave);
+		*xstate_bv = kvm_cpu_supported_xcr0();
+		vcpu_xsave_set(vcpuN, state->xsave);
+
+		vcpu_init_cpuid(vcpuN, &empty_cpuid);
+		vcpu_xsave_set(vcpuN, state->xsave);
+		*xstate_bv = saved_xstate_bv;
+		vcpu_xsave_set(vcpuN, state->xsave);
+
 		kvm_x86_state_cleanup(state);
 
 		memset(&regs2, 0, sizeof(regs2));
diff --git a/tools/testing/selftests/kvm/x86_64/tsc_scaling_sync.c b/tools/testing/selftests/kvm/x86_64/tsc_scaling_sync.c
index 5b669818e39a..59c7304f805e 100644
--- a/tools/testing/selftests/kvm/x86_64/tsc_scaling_sync.c
+++ b/tools/testing/selftests/kvm/x86_64/tsc_scaling_sync.c
@@ -1,10 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /*
- * svm_vmcall_test
- *
  * Copyright © 2021 Amazon.com, Inc. or its affiliates.
- *
- * Xen shared_info / pvclock testing
  */
 
 #include "test_util.h"
diff --git a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c
index 05898ad9f4d9..9ec9ab60b63e 100644
--- a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c
+++ b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c
@@ -1,10 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /*
- * svm_vmcall_test
- *
  * Copyright © 2021 Amazon.com, Inc. or its affiliates.
- *
- * Xen shared_info / pvclock testing
  */
 
 #include "test_util.h"
diff --git a/tools/testing/selftests/landlock/base_test.c b/tools/testing/selftests/landlock/base_test.c
index 792c3f0a59b4..646f778dfb1e 100644
--- a/tools/testing/selftests/landlock/base_test.c
+++ b/tools/testing/selftests/landlock/base_test.c
@@ -75,7 +75,7 @@ TEST(abi_version)
 	const struct landlock_ruleset_attr ruleset_attr = {
 		.handled_access_fs = LANDLOCK_ACCESS_FS_READ_FILE,
 	};
-	ASSERT_EQ(3, landlock_create_ruleset(NULL, 0,
+	ASSERT_EQ(4, landlock_create_ruleset(NULL, 0,
 					     LANDLOCK_CREATE_RULESET_VERSION));
 
 	ASSERT_EQ(-1, landlock_create_ruleset(&ruleset_attr, 0,
diff --git a/tools/testing/selftests/landlock/common.h b/tools/testing/selftests/landlock/common.h
index d7987ae8d7fc..5b79758cae62 100644
--- a/tools/testing/selftests/landlock/common.h
+++ b/tools/testing/selftests/landlock/common.h
@@ -112,10 +112,13 @@ static void _init_caps(struct __test_metadata *const _metadata, bool drop_all)
 	cap_t cap_p;
 	/* Only these three capabilities are useful for the tests. */
 	const cap_value_t caps[] = {
+		/* clang-format off */
 		CAP_DAC_OVERRIDE,
 		CAP_MKNOD,
 		CAP_SYS_ADMIN,
 		CAP_SYS_CHROOT,
+		CAP_NET_BIND_SERVICE,
+		/* clang-format on */
 	};
 
 	cap_p = cap_get_proc();
@@ -256,3 +259,13 @@ static int __maybe_unused send_fd(int usock, int fd_tx)
 		return -errno;
 	return 0;
 }
+
+static void __maybe_unused
+enforce_ruleset(struct __test_metadata *const _metadata, const int ruleset_fd)
+{
+	ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0));
+	ASSERT_EQ(0, landlock_restrict_self(ruleset_fd, 0))
+	{
+		TH_LOG("Failed to enforce ruleset: %s", strerror(errno));
+	}
+}
diff --git a/tools/testing/selftests/landlock/config b/tools/testing/selftests/landlock/config
index 3dc9e438eab1..0086efaa7b68 100644
--- a/tools/testing/selftests/landlock/config
+++ b/tools/testing/selftests/landlock/config
@@ -1,5 +1,9 @@
 CONFIG_CGROUPS=y
 CONFIG_CGROUP_SCHED=y
+CONFIG_INET=y
+CONFIG_IPV6=y
+CONFIG_NET=y
+CONFIG_NET_NS=y
 CONFIG_OVERLAY_FS=y
 CONFIG_PROC_FS=y
 CONFIG_SECURITY=y
diff --git a/tools/testing/selftests/landlock/fs_test.c b/tools/testing/selftests/landlock/fs_test.c
index 251594306d40..18e1f86a6234 100644
--- a/tools/testing/selftests/landlock/fs_test.c
+++ b/tools/testing/selftests/landlock/fs_test.c
@@ -677,16 +677,6 @@ static int create_ruleset(struct __test_metadata *const _metadata,
 	return ruleset_fd;
 }
 
-static void enforce_ruleset(struct __test_metadata *const _metadata,
-			    const int ruleset_fd)
-{
-	ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0));
-	ASSERT_EQ(0, landlock_restrict_self(ruleset_fd, 0))
-	{
-		TH_LOG("Failed to enforce ruleset: %s", strerror(errno));
-	}
-}
-
 TEST_F_FORK(layout0, proc_nsfs)
 {
 	const struct rule rules[] = {
@@ -1635,6 +1625,65 @@ TEST_F_FORK(layout1, move_mount)
 	clear_cap(_metadata, CAP_SYS_ADMIN);
 }
 
+TEST_F_FORK(layout1, topology_changes_with_net_only)
+{
+	const struct landlock_ruleset_attr ruleset_net = {
+		.handled_access_net = LANDLOCK_ACCESS_NET_BIND_TCP |
+				      LANDLOCK_ACCESS_NET_CONNECT_TCP,
+	};
+	int ruleset_fd;
+
+	/* Add network restrictions. */
+	ruleset_fd =
+		landlock_create_ruleset(&ruleset_net, sizeof(ruleset_net), 0);
+	ASSERT_LE(0, ruleset_fd);
+	enforce_ruleset(_metadata, ruleset_fd);
+	ASSERT_EQ(0, close(ruleset_fd));
+
+	/* Mount, remount, move_mount, umount, and pivot_root checks. */
+	set_cap(_metadata, CAP_SYS_ADMIN);
+	ASSERT_EQ(0, mount_opt(&mnt_tmp, dir_s1d2));
+	ASSERT_EQ(0, mount(NULL, dir_s1d2, NULL, MS_PRIVATE | MS_REC, NULL));
+	ASSERT_EQ(0, syscall(__NR_move_mount, AT_FDCWD, dir_s1d2, AT_FDCWD,
+			     dir_s2d2, 0));
+	ASSERT_EQ(0, umount(dir_s2d2));
+	ASSERT_EQ(0, syscall(__NR_pivot_root, dir_s3d2, dir_s3d3));
+	ASSERT_EQ(0, chdir("/"));
+	clear_cap(_metadata, CAP_SYS_ADMIN);
+}
+
+TEST_F_FORK(layout1, topology_changes_with_net_and_fs)
+{
+	const struct landlock_ruleset_attr ruleset_net_fs = {
+		.handled_access_net = LANDLOCK_ACCESS_NET_BIND_TCP |
+				      LANDLOCK_ACCESS_NET_CONNECT_TCP,
+		.handled_access_fs = LANDLOCK_ACCESS_FS_EXECUTE,
+	};
+	int ruleset_fd;
+
+	/* Add network and filesystem restrictions. */
+	ruleset_fd = landlock_create_ruleset(&ruleset_net_fs,
+					     sizeof(ruleset_net_fs), 0);
+	ASSERT_LE(0, ruleset_fd);
+	enforce_ruleset(_metadata, ruleset_fd);
+	ASSERT_EQ(0, close(ruleset_fd));
+
+	/* Mount, remount, move_mount, umount, and pivot_root checks. */
+	set_cap(_metadata, CAP_SYS_ADMIN);
+	ASSERT_EQ(-1, mount_opt(&mnt_tmp, dir_s1d2));
+	ASSERT_EQ(EPERM, errno);
+	ASSERT_EQ(-1, mount(NULL, dir_s3d2, NULL, MS_PRIVATE | MS_REC, NULL));
+	ASSERT_EQ(EPERM, errno);
+	ASSERT_EQ(-1, syscall(__NR_move_mount, AT_FDCWD, dir_s3d2, AT_FDCWD,
+			      dir_s2d2, 0));
+	ASSERT_EQ(EPERM, errno);
+	ASSERT_EQ(-1, umount(dir_s3d2));
+	ASSERT_EQ(EPERM, errno);
+	ASSERT_EQ(-1, syscall(__NR_pivot_root, dir_s3d2, dir_s3d3));
+	ASSERT_EQ(EPERM, errno);
+	clear_cap(_metadata, CAP_SYS_ADMIN);
+}
+
 TEST_F_FORK(layout1, release_inodes)
 {
 	const struct rule rules[] = {
diff --git a/tools/testing/selftests/landlock/net_test.c b/tools/testing/selftests/landlock/net_test.c
new file mode 100644
index 000000000000..929e21c4db05
--- /dev/null
+++ b/tools/testing/selftests/landlock/net_test.c
@@ -0,0 +1,1738 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Landlock tests - Network
+ *
+ * Copyright © 2022-2023 Huawei Tech. Co., Ltd.
+ * Copyright © 2023 Microsoft Corporation
+ */
+
+#define _GNU_SOURCE
+#include <arpa/inet.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/landlock.h>
+#include <linux/in.h>
+#include <sched.h>
+#include <stdint.h>
+#include <string.h>
+#include <sys/prctl.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+
+#include "common.h"
+
+const short sock_port_start = (1 << 10);
+
+static const char loopback_ipv4[] = "127.0.0.1";
+static const char loopback_ipv6[] = "::1";
+
+/* Number pending connections queue to be hold. */
+const short backlog = 10;
+
+enum sandbox_type {
+	NO_SANDBOX,
+	/* This may be used to test rules that allow *and* deny accesses. */
+	TCP_SANDBOX,
+};
+
+struct protocol_variant {
+	int domain;
+	int type;
+};
+
+struct service_fixture {
+	struct protocol_variant protocol;
+	/* port is also stored in ipv4_addr.sin_port or ipv6_addr.sin6_port */
+	unsigned short port;
+	union {
+		struct sockaddr_in ipv4_addr;
+		struct sockaddr_in6 ipv6_addr;
+		struct {
+			struct sockaddr_un unix_addr;
+			socklen_t unix_addr_len;
+		};
+	};
+};
+
+static int set_service(struct service_fixture *const srv,
+		       const struct protocol_variant prot,
+		       const unsigned short index)
+{
+	memset(srv, 0, sizeof(*srv));
+
+	/*
+	 * Copies all protocol properties in case of the variant only contains
+	 * a subset of them.
+	 */
+	srv->protocol = prot;
+
+	/* Checks for port overflow. */
+	if (index > 2)
+		return 1;
+	srv->port = sock_port_start << (2 * index);
+
+	switch (prot.domain) {
+	case AF_UNSPEC:
+	case AF_INET:
+		srv->ipv4_addr.sin_family = prot.domain;
+		srv->ipv4_addr.sin_port = htons(srv->port);
+		srv->ipv4_addr.sin_addr.s_addr = inet_addr(loopback_ipv4);
+		return 0;
+
+	case AF_INET6:
+		srv->ipv6_addr.sin6_family = prot.domain;
+		srv->ipv6_addr.sin6_port = htons(srv->port);
+		inet_pton(AF_INET6, loopback_ipv6, &srv->ipv6_addr.sin6_addr);
+		return 0;
+
+	case AF_UNIX:
+		srv->unix_addr.sun_family = prot.domain;
+		sprintf(srv->unix_addr.sun_path,
+			"_selftests-landlock-net-tid%d-index%d", gettid(),
+			index);
+		srv->unix_addr_len = SUN_LEN(&srv->unix_addr);
+		srv->unix_addr.sun_path[0] = '\0';
+		return 0;
+	}
+	return 1;
+}
+
+static void setup_loopback(struct __test_metadata *const _metadata)
+{
+	set_cap(_metadata, CAP_SYS_ADMIN);
+	ASSERT_EQ(0, unshare(CLONE_NEWNET));
+	ASSERT_EQ(0, system("ip link set dev lo up"));
+	clear_cap(_metadata, CAP_SYS_ADMIN);
+}
+
+static bool is_restricted(const struct protocol_variant *const prot,
+			  const enum sandbox_type sandbox)
+{
+	switch (prot->domain) {
+	case AF_INET:
+	case AF_INET6:
+		switch (prot->type) {
+		case SOCK_STREAM:
+			return sandbox == TCP_SANDBOX;
+		}
+		break;
+	}
+	return false;
+}
+
+static int socket_variant(const struct service_fixture *const srv)
+{
+	int ret;
+
+	ret = socket(srv->protocol.domain, srv->protocol.type | SOCK_CLOEXEC,
+		     0);
+	if (ret < 0)
+		return -errno;
+	return ret;
+}
+
+#ifndef SIN6_LEN_RFC2133
+#define SIN6_LEN_RFC2133 24
+#endif
+
+static socklen_t get_addrlen(const struct service_fixture *const srv,
+			     const bool minimal)
+{
+	switch (srv->protocol.domain) {
+	case AF_UNSPEC:
+	case AF_INET:
+		return sizeof(srv->ipv4_addr);
+
+	case AF_INET6:
+		if (minimal)
+			return SIN6_LEN_RFC2133;
+		return sizeof(srv->ipv6_addr);
+
+	case AF_UNIX:
+		if (minimal)
+			return sizeof(srv->unix_addr) -
+			       sizeof(srv->unix_addr.sun_path);
+		return srv->unix_addr_len;
+
+	default:
+		return 0;
+	}
+}
+
+static void set_port(struct service_fixture *const srv, uint16_t port)
+{
+	switch (srv->protocol.domain) {
+	case AF_UNSPEC:
+	case AF_INET:
+		srv->ipv4_addr.sin_port = htons(port);
+		return;
+
+	case AF_INET6:
+		srv->ipv6_addr.sin6_port = htons(port);
+		return;
+
+	default:
+		return;
+	}
+}
+
+static uint16_t get_binded_port(int socket_fd,
+				const struct protocol_variant *const prot)
+{
+	struct sockaddr_in ipv4_addr;
+	struct sockaddr_in6 ipv6_addr;
+	socklen_t ipv4_addr_len, ipv6_addr_len;
+
+	/* Gets binded port. */
+	switch (prot->domain) {
+	case AF_UNSPEC:
+	case AF_INET:
+		ipv4_addr_len = sizeof(ipv4_addr);
+		getsockname(socket_fd, &ipv4_addr, &ipv4_addr_len);
+		return ntohs(ipv4_addr.sin_port);
+
+	case AF_INET6:
+		ipv6_addr_len = sizeof(ipv6_addr);
+		getsockname(socket_fd, &ipv6_addr, &ipv6_addr_len);
+		return ntohs(ipv6_addr.sin6_port);
+
+	default:
+		return 0;
+	}
+}
+
+static int bind_variant_addrlen(const int sock_fd,
+				const struct service_fixture *const srv,
+				const socklen_t addrlen)
+{
+	int ret;
+
+	switch (srv->protocol.domain) {
+	case AF_UNSPEC:
+	case AF_INET:
+		ret = bind(sock_fd, &srv->ipv4_addr, addrlen);
+		break;
+
+	case AF_INET6:
+		ret = bind(sock_fd, &srv->ipv6_addr, addrlen);
+		break;
+
+	case AF_UNIX:
+		ret = bind(sock_fd, &srv->unix_addr, addrlen);
+		break;
+
+	default:
+		errno = EAFNOSUPPORT;
+		return -errno;
+	}
+
+	if (ret < 0)
+		return -errno;
+	return ret;
+}
+
+static int bind_variant(const int sock_fd,
+			const struct service_fixture *const srv)
+{
+	return bind_variant_addrlen(sock_fd, srv, get_addrlen(srv, false));
+}
+
+static int connect_variant_addrlen(const int sock_fd,
+				   const struct service_fixture *const srv,
+				   const socklen_t addrlen)
+{
+	int ret;
+
+	switch (srv->protocol.domain) {
+	case AF_UNSPEC:
+	case AF_INET:
+		ret = connect(sock_fd, &srv->ipv4_addr, addrlen);
+		break;
+
+	case AF_INET6:
+		ret = connect(sock_fd, &srv->ipv6_addr, addrlen);
+		break;
+
+	case AF_UNIX:
+		ret = connect(sock_fd, &srv->unix_addr, addrlen);
+		break;
+
+	default:
+		errno = -EAFNOSUPPORT;
+		return -errno;
+	}
+
+	if (ret < 0)
+		return -errno;
+	return ret;
+}
+
+static int connect_variant(const int sock_fd,
+			   const struct service_fixture *const srv)
+{
+	return connect_variant_addrlen(sock_fd, srv, get_addrlen(srv, false));
+}
+
+FIXTURE(protocol)
+{
+	struct service_fixture srv0, srv1, srv2, unspec_any0, unspec_srv0;
+};
+
+FIXTURE_VARIANT(protocol)
+{
+	const enum sandbox_type sandbox;
+	const struct protocol_variant prot;
+};
+
+FIXTURE_SETUP(protocol)
+{
+	const struct protocol_variant prot_unspec = {
+		.domain = AF_UNSPEC,
+		.type = SOCK_STREAM,
+	};
+
+	disable_caps(_metadata);
+
+	ASSERT_EQ(0, set_service(&self->srv0, variant->prot, 0));
+	ASSERT_EQ(0, set_service(&self->srv1, variant->prot, 1));
+	ASSERT_EQ(0, set_service(&self->srv2, variant->prot, 2));
+
+	ASSERT_EQ(0, set_service(&self->unspec_srv0, prot_unspec, 0));
+
+	ASSERT_EQ(0, set_service(&self->unspec_any0, prot_unspec, 0));
+	self->unspec_any0.ipv4_addr.sin_addr.s_addr = htonl(INADDR_ANY);
+
+	setup_loopback(_metadata);
+};
+
+FIXTURE_TEARDOWN(protocol)
+{
+}
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(protocol, no_sandbox_with_ipv4_tcp) {
+	/* clang-format on */
+	.sandbox = NO_SANDBOX,
+	.prot = {
+		.domain = AF_INET,
+		.type = SOCK_STREAM,
+	},
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(protocol, no_sandbox_with_ipv6_tcp) {
+	/* clang-format on */
+	.sandbox = NO_SANDBOX,
+	.prot = {
+		.domain = AF_INET6,
+		.type = SOCK_STREAM,
+	},
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(protocol, no_sandbox_with_ipv4_udp) {
+	/* clang-format on */
+	.sandbox = NO_SANDBOX,
+	.prot = {
+		.domain = AF_INET,
+		.type = SOCK_DGRAM,
+	},
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(protocol, no_sandbox_with_ipv6_udp) {
+	/* clang-format on */
+	.sandbox = NO_SANDBOX,
+	.prot = {
+		.domain = AF_INET6,
+		.type = SOCK_DGRAM,
+	},
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(protocol, no_sandbox_with_unix_stream) {
+	/* clang-format on */
+	.sandbox = NO_SANDBOX,
+	.prot = {
+		.domain = AF_UNIX,
+		.type = SOCK_STREAM,
+	},
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(protocol, no_sandbox_with_unix_datagram) {
+	/* clang-format on */
+	.sandbox = NO_SANDBOX,
+	.prot = {
+		.domain = AF_UNIX,
+		.type = SOCK_DGRAM,
+	},
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(protocol, tcp_sandbox_with_ipv4_tcp) {
+	/* clang-format on */
+	.sandbox = TCP_SANDBOX,
+	.prot = {
+		.domain = AF_INET,
+		.type = SOCK_STREAM,
+	},
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(protocol, tcp_sandbox_with_ipv6_tcp) {
+	/* clang-format on */
+	.sandbox = TCP_SANDBOX,
+	.prot = {
+		.domain = AF_INET6,
+		.type = SOCK_STREAM,
+	},
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(protocol, tcp_sandbox_with_ipv4_udp) {
+	/* clang-format on */
+	.sandbox = TCP_SANDBOX,
+	.prot = {
+		.domain = AF_INET,
+		.type = SOCK_DGRAM,
+	},
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(protocol, tcp_sandbox_with_ipv6_udp) {
+	/* clang-format on */
+	.sandbox = TCP_SANDBOX,
+	.prot = {
+		.domain = AF_INET6,
+		.type = SOCK_DGRAM,
+	},
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(protocol, tcp_sandbox_with_unix_stream) {
+	/* clang-format on */
+	.sandbox = TCP_SANDBOX,
+	.prot = {
+		.domain = AF_UNIX,
+		.type = SOCK_STREAM,
+	},
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(protocol, tcp_sandbox_with_unix_datagram) {
+	/* clang-format on */
+	.sandbox = TCP_SANDBOX,
+	.prot = {
+		.domain = AF_UNIX,
+		.type = SOCK_DGRAM,
+	},
+};
+
+static void test_bind_and_connect(struct __test_metadata *const _metadata,
+				  const struct service_fixture *const srv,
+				  const bool deny_bind, const bool deny_connect)
+{
+	char buf = '\0';
+	int inval_fd, bind_fd, client_fd, status, ret;
+	pid_t child;
+
+	/* Starts invalid addrlen tests with bind. */
+	inval_fd = socket_variant(srv);
+	ASSERT_LE(0, inval_fd)
+	{
+		TH_LOG("Failed to create socket: %s", strerror(errno));
+	}
+
+	/* Tries to bind with zero as addrlen. */
+	EXPECT_EQ(-EINVAL, bind_variant_addrlen(inval_fd, srv, 0));
+
+	/* Tries to bind with too small addrlen. */
+	EXPECT_EQ(-EINVAL, bind_variant_addrlen(inval_fd, srv,
+						get_addrlen(srv, true) - 1));
+
+	/* Tries to bind with minimal addrlen. */
+	ret = bind_variant_addrlen(inval_fd, srv, get_addrlen(srv, true));
+	if (deny_bind) {
+		EXPECT_EQ(-EACCES, ret);
+	} else {
+		EXPECT_EQ(0, ret)
+		{
+			TH_LOG("Failed to bind to socket: %s", strerror(errno));
+		}
+	}
+	EXPECT_EQ(0, close(inval_fd));
+
+	/* Starts invalid addrlen tests with connect. */
+	inval_fd = socket_variant(srv);
+	ASSERT_LE(0, inval_fd);
+
+	/* Tries to connect with zero as addrlen. */
+	EXPECT_EQ(-EINVAL, connect_variant_addrlen(inval_fd, srv, 0));
+
+	/* Tries to connect with too small addrlen. */
+	EXPECT_EQ(-EINVAL, connect_variant_addrlen(inval_fd, srv,
+						   get_addrlen(srv, true) - 1));
+
+	/* Tries to connect with minimal addrlen. */
+	ret = connect_variant_addrlen(inval_fd, srv, get_addrlen(srv, true));
+	if (srv->protocol.domain == AF_UNIX) {
+		EXPECT_EQ(-EINVAL, ret);
+	} else if (deny_connect) {
+		EXPECT_EQ(-EACCES, ret);
+	} else if (srv->protocol.type == SOCK_STREAM) {
+		/* No listening server, whatever the value of deny_bind. */
+		EXPECT_EQ(-ECONNREFUSED, ret);
+	} else {
+		EXPECT_EQ(0, ret)
+		{
+			TH_LOG("Failed to connect to socket: %s",
+			       strerror(errno));
+		}
+	}
+	EXPECT_EQ(0, close(inval_fd));
+
+	/* Starts connection tests. */
+	bind_fd = socket_variant(srv);
+	ASSERT_LE(0, bind_fd);
+
+	ret = bind_variant(bind_fd, srv);
+	if (deny_bind) {
+		EXPECT_EQ(-EACCES, ret);
+	} else {
+		EXPECT_EQ(0, ret);
+
+		/* Creates a listening socket. */
+		if (srv->protocol.type == SOCK_STREAM)
+			EXPECT_EQ(0, listen(bind_fd, backlog));
+	}
+
+	child = fork();
+	ASSERT_LE(0, child);
+	if (child == 0) {
+		int connect_fd, ret;
+
+		/* Closes listening socket for the child. */
+		EXPECT_EQ(0, close(bind_fd));
+
+		/* Starts connection tests. */
+		connect_fd = socket_variant(srv);
+		ASSERT_LE(0, connect_fd);
+		ret = connect_variant(connect_fd, srv);
+		if (deny_connect) {
+			EXPECT_EQ(-EACCES, ret);
+		} else if (deny_bind) {
+			/* No listening server. */
+			EXPECT_EQ(-ECONNREFUSED, ret);
+		} else {
+			EXPECT_EQ(0, ret);
+			EXPECT_EQ(1, write(connect_fd, ".", 1));
+		}
+
+		EXPECT_EQ(0, close(connect_fd));
+		_exit(_metadata->passed ? EXIT_SUCCESS : EXIT_FAILURE);
+		return;
+	}
+
+	/* Accepts connection from the child. */
+	client_fd = bind_fd;
+	if (!deny_bind && !deny_connect) {
+		if (srv->protocol.type == SOCK_STREAM) {
+			client_fd = accept(bind_fd, NULL, 0);
+			ASSERT_LE(0, client_fd);
+		}
+
+		EXPECT_EQ(1, read(client_fd, &buf, 1));
+		EXPECT_EQ('.', buf);
+	}
+
+	EXPECT_EQ(child, waitpid(child, &status, 0));
+	EXPECT_EQ(1, WIFEXITED(status));
+	EXPECT_EQ(EXIT_SUCCESS, WEXITSTATUS(status));
+
+	/* Closes connection, if any. */
+	if (client_fd != bind_fd)
+		EXPECT_LE(0, close(client_fd));
+
+	/* Closes listening socket. */
+	EXPECT_EQ(0, close(bind_fd));
+}
+
+TEST_F(protocol, bind)
+{
+	if (variant->sandbox == TCP_SANDBOX) {
+		const struct landlock_ruleset_attr ruleset_attr = {
+			.handled_access_net = LANDLOCK_ACCESS_NET_BIND_TCP |
+					      LANDLOCK_ACCESS_NET_CONNECT_TCP,
+		};
+		const struct landlock_net_port_attr tcp_bind_connect_p0 = {
+			.allowed_access = LANDLOCK_ACCESS_NET_BIND_TCP |
+					  LANDLOCK_ACCESS_NET_CONNECT_TCP,
+			.port = self->srv0.port,
+		};
+		const struct landlock_net_port_attr tcp_connect_p1 = {
+			.allowed_access = LANDLOCK_ACCESS_NET_CONNECT_TCP,
+			.port = self->srv1.port,
+		};
+		int ruleset_fd;
+
+		ruleset_fd = landlock_create_ruleset(&ruleset_attr,
+						     sizeof(ruleset_attr), 0);
+		ASSERT_LE(0, ruleset_fd);
+
+		/* Allows connect and bind for the first port.  */
+		ASSERT_EQ(0,
+			  landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+					    &tcp_bind_connect_p0, 0));
+
+		/* Allows connect and denies bind for the second port. */
+		ASSERT_EQ(0,
+			  landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+					    &tcp_connect_p1, 0));
+
+		enforce_ruleset(_metadata, ruleset_fd);
+		EXPECT_EQ(0, close(ruleset_fd));
+	}
+
+	/* Binds a socket to the first port. */
+	test_bind_and_connect(_metadata, &self->srv0, false, false);
+
+	/* Binds a socket to the second port. */
+	test_bind_and_connect(_metadata, &self->srv1,
+			      is_restricted(&variant->prot, variant->sandbox),
+			      false);
+
+	/* Binds a socket to the third port. */
+	test_bind_and_connect(_metadata, &self->srv2,
+			      is_restricted(&variant->prot, variant->sandbox),
+			      is_restricted(&variant->prot, variant->sandbox));
+}
+
+TEST_F(protocol, connect)
+{
+	if (variant->sandbox == TCP_SANDBOX) {
+		const struct landlock_ruleset_attr ruleset_attr = {
+			.handled_access_net = LANDLOCK_ACCESS_NET_BIND_TCP |
+					      LANDLOCK_ACCESS_NET_CONNECT_TCP,
+		};
+		const struct landlock_net_port_attr tcp_bind_connect_p0 = {
+			.allowed_access = LANDLOCK_ACCESS_NET_BIND_TCP |
+					  LANDLOCK_ACCESS_NET_CONNECT_TCP,
+			.port = self->srv0.port,
+		};
+		const struct landlock_net_port_attr tcp_bind_p1 = {
+			.allowed_access = LANDLOCK_ACCESS_NET_BIND_TCP,
+			.port = self->srv1.port,
+		};
+		int ruleset_fd;
+
+		ruleset_fd = landlock_create_ruleset(&ruleset_attr,
+						     sizeof(ruleset_attr), 0);
+		ASSERT_LE(0, ruleset_fd);
+
+		/* Allows connect and bind for the first port. */
+		ASSERT_EQ(0,
+			  landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+					    &tcp_bind_connect_p0, 0));
+
+		/* Allows bind and denies connect for the second port. */
+		ASSERT_EQ(0,
+			  landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+					    &tcp_bind_p1, 0));
+
+		enforce_ruleset(_metadata, ruleset_fd);
+		EXPECT_EQ(0, close(ruleset_fd));
+	}
+
+	test_bind_and_connect(_metadata, &self->srv0, false, false);
+
+	test_bind_and_connect(_metadata, &self->srv1, false,
+			      is_restricted(&variant->prot, variant->sandbox));
+
+	test_bind_and_connect(_metadata, &self->srv2,
+			      is_restricted(&variant->prot, variant->sandbox),
+			      is_restricted(&variant->prot, variant->sandbox));
+}
+
+TEST_F(protocol, bind_unspec)
+{
+	const struct landlock_ruleset_attr ruleset_attr = {
+		.handled_access_net = LANDLOCK_ACCESS_NET_BIND_TCP,
+	};
+	const struct landlock_net_port_attr tcp_bind = {
+		.allowed_access = LANDLOCK_ACCESS_NET_BIND_TCP,
+		.port = self->srv0.port,
+	};
+	int bind_fd, ret;
+
+	if (variant->sandbox == TCP_SANDBOX) {
+		const int ruleset_fd = landlock_create_ruleset(
+			&ruleset_attr, sizeof(ruleset_attr), 0);
+		ASSERT_LE(0, ruleset_fd);
+
+		/* Allows bind. */
+		ASSERT_EQ(0,
+			  landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+					    &tcp_bind, 0));
+		enforce_ruleset(_metadata, ruleset_fd);
+		EXPECT_EQ(0, close(ruleset_fd));
+	}
+
+	bind_fd = socket_variant(&self->srv0);
+	ASSERT_LE(0, bind_fd);
+
+	/* Allowed bind on AF_UNSPEC/INADDR_ANY. */
+	ret = bind_variant(bind_fd, &self->unspec_any0);
+	if (variant->prot.domain == AF_INET) {
+		EXPECT_EQ(0, ret)
+		{
+			TH_LOG("Failed to bind to unspec/any socket: %s",
+			       strerror(errno));
+		}
+	} else {
+		EXPECT_EQ(-EINVAL, ret);
+	}
+	EXPECT_EQ(0, close(bind_fd));
+
+	if (variant->sandbox == TCP_SANDBOX) {
+		const int ruleset_fd = landlock_create_ruleset(
+			&ruleset_attr, sizeof(ruleset_attr), 0);
+		ASSERT_LE(0, ruleset_fd);
+
+		/* Denies bind. */
+		enforce_ruleset(_metadata, ruleset_fd);
+		EXPECT_EQ(0, close(ruleset_fd));
+	}
+
+	bind_fd = socket_variant(&self->srv0);
+	ASSERT_LE(0, bind_fd);
+
+	/* Denied bind on AF_UNSPEC/INADDR_ANY. */
+	ret = bind_variant(bind_fd, &self->unspec_any0);
+	if (variant->prot.domain == AF_INET) {
+		if (is_restricted(&variant->prot, variant->sandbox)) {
+			EXPECT_EQ(-EACCES, ret);
+		} else {
+			EXPECT_EQ(0, ret);
+		}
+	} else {
+		EXPECT_EQ(-EINVAL, ret);
+	}
+	EXPECT_EQ(0, close(bind_fd));
+
+	/* Checks bind with AF_UNSPEC and the loopback address. */
+	bind_fd = socket_variant(&self->srv0);
+	ASSERT_LE(0, bind_fd);
+	ret = bind_variant(bind_fd, &self->unspec_srv0);
+	if (variant->prot.domain == AF_INET) {
+		EXPECT_EQ(-EAFNOSUPPORT, ret);
+	} else {
+		EXPECT_EQ(-EINVAL, ret)
+		{
+			TH_LOG("Wrong bind error: %s", strerror(errno));
+		}
+	}
+	EXPECT_EQ(0, close(bind_fd));
+}
+
+TEST_F(protocol, connect_unspec)
+{
+	const struct landlock_ruleset_attr ruleset_attr = {
+		.handled_access_net = LANDLOCK_ACCESS_NET_CONNECT_TCP,
+	};
+	const struct landlock_net_port_attr tcp_connect = {
+		.allowed_access = LANDLOCK_ACCESS_NET_CONNECT_TCP,
+		.port = self->srv0.port,
+	};
+	int bind_fd, client_fd, status;
+	pid_t child;
+
+	/* Specific connection tests. */
+	bind_fd = socket_variant(&self->srv0);
+	ASSERT_LE(0, bind_fd);
+	EXPECT_EQ(0, bind_variant(bind_fd, &self->srv0));
+	if (self->srv0.protocol.type == SOCK_STREAM)
+		EXPECT_EQ(0, listen(bind_fd, backlog));
+
+	child = fork();
+	ASSERT_LE(0, child);
+	if (child == 0) {
+		int connect_fd, ret;
+
+		/* Closes listening socket for the child. */
+		EXPECT_EQ(0, close(bind_fd));
+
+		connect_fd = socket_variant(&self->srv0);
+		ASSERT_LE(0, connect_fd);
+		EXPECT_EQ(0, connect_variant(connect_fd, &self->srv0));
+
+		/* Tries to connect again, or set peer. */
+		ret = connect_variant(connect_fd, &self->srv0);
+		if (self->srv0.protocol.type == SOCK_STREAM) {
+			EXPECT_EQ(-EISCONN, ret);
+		} else {
+			EXPECT_EQ(0, ret);
+		}
+
+		if (variant->sandbox == TCP_SANDBOX) {
+			const int ruleset_fd = landlock_create_ruleset(
+				&ruleset_attr, sizeof(ruleset_attr), 0);
+			ASSERT_LE(0, ruleset_fd);
+
+			/* Allows connect. */
+			ASSERT_EQ(0, landlock_add_rule(ruleset_fd,
+						       LANDLOCK_RULE_NET_PORT,
+						       &tcp_connect, 0));
+			enforce_ruleset(_metadata, ruleset_fd);
+			EXPECT_EQ(0, close(ruleset_fd));
+		}
+
+		/* Disconnects already connected socket, or set peer. */
+		ret = connect_variant(connect_fd, &self->unspec_any0);
+		if (self->srv0.protocol.domain == AF_UNIX &&
+		    self->srv0.protocol.type == SOCK_STREAM) {
+			EXPECT_EQ(-EINVAL, ret);
+		} else {
+			EXPECT_EQ(0, ret);
+		}
+
+		/* Tries to reconnect, or set peer. */
+		ret = connect_variant(connect_fd, &self->srv0);
+		if (self->srv0.protocol.domain == AF_UNIX &&
+		    self->srv0.protocol.type == SOCK_STREAM) {
+			EXPECT_EQ(-EISCONN, ret);
+		} else {
+			EXPECT_EQ(0, ret);
+		}
+
+		if (variant->sandbox == TCP_SANDBOX) {
+			const int ruleset_fd = landlock_create_ruleset(
+				&ruleset_attr, sizeof(ruleset_attr), 0);
+			ASSERT_LE(0, ruleset_fd);
+
+			/* Denies connect. */
+			enforce_ruleset(_metadata, ruleset_fd);
+			EXPECT_EQ(0, close(ruleset_fd));
+		}
+
+		ret = connect_variant(connect_fd, &self->unspec_any0);
+		if (self->srv0.protocol.domain == AF_UNIX &&
+		    self->srv0.protocol.type == SOCK_STREAM) {
+			EXPECT_EQ(-EINVAL, ret);
+		} else {
+			/* Always allowed to disconnect. */
+			EXPECT_EQ(0, ret);
+		}
+
+		EXPECT_EQ(0, close(connect_fd));
+		_exit(_metadata->passed ? EXIT_SUCCESS : EXIT_FAILURE);
+		return;
+	}
+
+	client_fd = bind_fd;
+	if (self->srv0.protocol.type == SOCK_STREAM) {
+		client_fd = accept(bind_fd, NULL, 0);
+		ASSERT_LE(0, client_fd);
+	}
+
+	EXPECT_EQ(child, waitpid(child, &status, 0));
+	EXPECT_EQ(1, WIFEXITED(status));
+	EXPECT_EQ(EXIT_SUCCESS, WEXITSTATUS(status));
+
+	/* Closes connection, if any. */
+	if (client_fd != bind_fd)
+		EXPECT_LE(0, close(client_fd));
+
+	/* Closes listening socket. */
+	EXPECT_EQ(0, close(bind_fd));
+}
+
+FIXTURE(ipv4)
+{
+	struct service_fixture srv0, srv1;
+};
+
+FIXTURE_VARIANT(ipv4)
+{
+	const enum sandbox_type sandbox;
+	const int type;
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(ipv4, no_sandbox_with_tcp) {
+	/* clang-format on */
+	.sandbox = NO_SANDBOX,
+	.type = SOCK_STREAM,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(ipv4, tcp_sandbox_with_tcp) {
+	/* clang-format on */
+	.sandbox = TCP_SANDBOX,
+	.type = SOCK_STREAM,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(ipv4, no_sandbox_with_udp) {
+	/* clang-format on */
+	.sandbox = NO_SANDBOX,
+	.type = SOCK_DGRAM,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(ipv4, tcp_sandbox_with_udp) {
+	/* clang-format on */
+	.sandbox = TCP_SANDBOX,
+	.type = SOCK_DGRAM,
+};
+
+FIXTURE_SETUP(ipv4)
+{
+	const struct protocol_variant prot = {
+		.domain = AF_INET,
+		.type = variant->type,
+	};
+
+	disable_caps(_metadata);
+
+	set_service(&self->srv0, prot, 0);
+	set_service(&self->srv1, prot, 1);
+
+	setup_loopback(_metadata);
+};
+
+FIXTURE_TEARDOWN(ipv4)
+{
+}
+
+TEST_F(ipv4, from_unix_to_inet)
+{
+	int unix_stream_fd, unix_dgram_fd;
+
+	if (variant->sandbox == TCP_SANDBOX) {
+		const struct landlock_ruleset_attr ruleset_attr = {
+			.handled_access_net = LANDLOCK_ACCESS_NET_BIND_TCP |
+					      LANDLOCK_ACCESS_NET_CONNECT_TCP,
+		};
+		const struct landlock_net_port_attr tcp_bind_connect_p0 = {
+			.allowed_access = LANDLOCK_ACCESS_NET_BIND_TCP |
+					  LANDLOCK_ACCESS_NET_CONNECT_TCP,
+			.port = self->srv0.port,
+		};
+		int ruleset_fd;
+
+		/* Denies connect and bind to check errno value. */
+		ruleset_fd = landlock_create_ruleset(&ruleset_attr,
+						     sizeof(ruleset_attr), 0);
+		ASSERT_LE(0, ruleset_fd);
+
+		/* Allows connect and bind for srv0.  */
+		ASSERT_EQ(0,
+			  landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+					    &tcp_bind_connect_p0, 0));
+
+		enforce_ruleset(_metadata, ruleset_fd);
+		EXPECT_EQ(0, close(ruleset_fd));
+	}
+
+	unix_stream_fd = socket(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0);
+	ASSERT_LE(0, unix_stream_fd);
+
+	unix_dgram_fd = socket(AF_UNIX, SOCK_DGRAM | SOCK_CLOEXEC, 0);
+	ASSERT_LE(0, unix_dgram_fd);
+
+	/* Checks unix stream bind and connect for srv0. */
+	EXPECT_EQ(-EINVAL, bind_variant(unix_stream_fd, &self->srv0));
+	EXPECT_EQ(-EINVAL, connect_variant(unix_stream_fd, &self->srv0));
+
+	/* Checks unix stream bind and connect for srv1. */
+	EXPECT_EQ(-EINVAL, bind_variant(unix_stream_fd, &self->srv1))
+	{
+		TH_LOG("Wrong bind error: %s", strerror(errno));
+	}
+	EXPECT_EQ(-EINVAL, connect_variant(unix_stream_fd, &self->srv1));
+
+	/* Checks unix datagram bind and connect for srv0. */
+	EXPECT_EQ(-EINVAL, bind_variant(unix_dgram_fd, &self->srv0));
+	EXPECT_EQ(-EINVAL, connect_variant(unix_dgram_fd, &self->srv0));
+
+	/* Checks unix datagram bind and connect for srv1. */
+	EXPECT_EQ(-EINVAL, bind_variant(unix_dgram_fd, &self->srv1));
+	EXPECT_EQ(-EINVAL, connect_variant(unix_dgram_fd, &self->srv1));
+}
+
+FIXTURE(tcp_layers)
+{
+	struct service_fixture srv0, srv1;
+};
+
+FIXTURE_VARIANT(tcp_layers)
+{
+	const size_t num_layers;
+	const int domain;
+};
+
+FIXTURE_SETUP(tcp_layers)
+{
+	const struct protocol_variant prot = {
+		.domain = variant->domain,
+		.type = SOCK_STREAM,
+	};
+
+	disable_caps(_metadata);
+
+	ASSERT_EQ(0, set_service(&self->srv0, prot, 0));
+	ASSERT_EQ(0, set_service(&self->srv1, prot, 1));
+
+	setup_loopback(_metadata);
+};
+
+FIXTURE_TEARDOWN(tcp_layers)
+{
+}
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(tcp_layers, no_sandbox_with_ipv4) {
+	/* clang-format on */
+	.domain = AF_INET,
+	.num_layers = 0,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(tcp_layers, one_sandbox_with_ipv4) {
+	/* clang-format on */
+	.domain = AF_INET,
+	.num_layers = 1,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(tcp_layers, two_sandboxes_with_ipv4) {
+	/* clang-format on */
+	.domain = AF_INET,
+	.num_layers = 2,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(tcp_layers, three_sandboxes_with_ipv4) {
+	/* clang-format on */
+	.domain = AF_INET,
+	.num_layers = 3,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(tcp_layers, no_sandbox_with_ipv6) {
+	/* clang-format on */
+	.domain = AF_INET6,
+	.num_layers = 0,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(tcp_layers, one_sandbox_with_ipv6) {
+	/* clang-format on */
+	.domain = AF_INET6,
+	.num_layers = 1,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(tcp_layers, two_sandboxes_with_ipv6) {
+	/* clang-format on */
+	.domain = AF_INET6,
+	.num_layers = 2,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(tcp_layers, three_sandboxes_with_ipv6) {
+	/* clang-format on */
+	.domain = AF_INET6,
+	.num_layers = 3,
+};
+
+TEST_F(tcp_layers, ruleset_overlap)
+{
+	const struct landlock_ruleset_attr ruleset_attr = {
+		.handled_access_net = LANDLOCK_ACCESS_NET_BIND_TCP |
+				      LANDLOCK_ACCESS_NET_CONNECT_TCP,
+	};
+	const struct landlock_net_port_attr tcp_bind = {
+		.allowed_access = LANDLOCK_ACCESS_NET_BIND_TCP,
+		.port = self->srv0.port,
+	};
+	const struct landlock_net_port_attr tcp_bind_connect = {
+		.allowed_access = LANDLOCK_ACCESS_NET_BIND_TCP |
+				  LANDLOCK_ACCESS_NET_CONNECT_TCP,
+		.port = self->srv0.port,
+	};
+
+	if (variant->num_layers >= 1) {
+		int ruleset_fd;
+
+		ruleset_fd = landlock_create_ruleset(&ruleset_attr,
+						     sizeof(ruleset_attr), 0);
+		ASSERT_LE(0, ruleset_fd);
+
+		/* Allows bind. */
+		ASSERT_EQ(0,
+			  landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+					    &tcp_bind, 0));
+		/* Also allows bind, but allows connect too. */
+		ASSERT_EQ(0,
+			  landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+					    &tcp_bind_connect, 0));
+		enforce_ruleset(_metadata, ruleset_fd);
+		EXPECT_EQ(0, close(ruleset_fd));
+	}
+
+	if (variant->num_layers >= 2) {
+		int ruleset_fd;
+
+		/* Creates another ruleset layer. */
+		ruleset_fd = landlock_create_ruleset(&ruleset_attr,
+						     sizeof(ruleset_attr), 0);
+		ASSERT_LE(0, ruleset_fd);
+
+		/* Only allows bind. */
+		ASSERT_EQ(0,
+			  landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+					    &tcp_bind, 0));
+		enforce_ruleset(_metadata, ruleset_fd);
+		EXPECT_EQ(0, close(ruleset_fd));
+	}
+
+	if (variant->num_layers >= 3) {
+		int ruleset_fd;
+
+		/* Creates another ruleset layer. */
+		ruleset_fd = landlock_create_ruleset(&ruleset_attr,
+						     sizeof(ruleset_attr), 0);
+		ASSERT_LE(0, ruleset_fd);
+
+		/* Try to allow bind and connect. */
+		ASSERT_EQ(0,
+			  landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+					    &tcp_bind_connect, 0));
+		enforce_ruleset(_metadata, ruleset_fd);
+		EXPECT_EQ(0, close(ruleset_fd));
+	}
+
+	/*
+	 * Forbids to connect to the socket because only one ruleset layer
+	 * allows connect.
+	 */
+	test_bind_and_connect(_metadata, &self->srv0, false,
+			      variant->num_layers >= 2);
+}
+
+TEST_F(tcp_layers, ruleset_expand)
+{
+	if (variant->num_layers >= 1) {
+		const struct landlock_ruleset_attr ruleset_attr = {
+			.handled_access_net = LANDLOCK_ACCESS_NET_BIND_TCP,
+		};
+		/* Allows bind for srv0. */
+		const struct landlock_net_port_attr bind_srv0 = {
+			.allowed_access = LANDLOCK_ACCESS_NET_BIND_TCP,
+			.port = self->srv0.port,
+		};
+		int ruleset_fd;
+
+		ruleset_fd = landlock_create_ruleset(&ruleset_attr,
+						     sizeof(ruleset_attr), 0);
+		ASSERT_LE(0, ruleset_fd);
+		ASSERT_EQ(0,
+			  landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+					    &bind_srv0, 0));
+		enforce_ruleset(_metadata, ruleset_fd);
+		EXPECT_EQ(0, close(ruleset_fd));
+	}
+
+	if (variant->num_layers >= 2) {
+		/* Expands network mask with connect action. */
+		const struct landlock_ruleset_attr ruleset_attr = {
+			.handled_access_net = LANDLOCK_ACCESS_NET_BIND_TCP |
+					      LANDLOCK_ACCESS_NET_CONNECT_TCP,
+		};
+		/* Allows bind for srv0 and connect to srv0. */
+		const struct landlock_net_port_attr tcp_bind_connect_p0 = {
+			.allowed_access = LANDLOCK_ACCESS_NET_BIND_TCP |
+					  LANDLOCK_ACCESS_NET_CONNECT_TCP,
+			.port = self->srv0.port,
+		};
+		/* Try to allow bind for srv1. */
+		const struct landlock_net_port_attr tcp_bind_p1 = {
+			.allowed_access = LANDLOCK_ACCESS_NET_BIND_TCP,
+			.port = self->srv1.port,
+		};
+		int ruleset_fd;
+
+		ruleset_fd = landlock_create_ruleset(&ruleset_attr,
+						     sizeof(ruleset_attr), 0);
+		ASSERT_LE(0, ruleset_fd);
+		ASSERT_EQ(0,
+			  landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+					    &tcp_bind_connect_p0, 0));
+		ASSERT_EQ(0,
+			  landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+					    &tcp_bind_p1, 0));
+		enforce_ruleset(_metadata, ruleset_fd);
+		EXPECT_EQ(0, close(ruleset_fd));
+	}
+
+	if (variant->num_layers >= 3) {
+		const struct landlock_ruleset_attr ruleset_attr = {
+			.handled_access_net = LANDLOCK_ACCESS_NET_BIND_TCP |
+					      LANDLOCK_ACCESS_NET_CONNECT_TCP,
+		};
+		/* Allows connect to srv0, without bind rule. */
+		const struct landlock_net_port_attr tcp_bind_p0 = {
+			.allowed_access = LANDLOCK_ACCESS_NET_BIND_TCP,
+			.port = self->srv0.port,
+		};
+		int ruleset_fd;
+
+		ruleset_fd = landlock_create_ruleset(&ruleset_attr,
+						     sizeof(ruleset_attr), 0);
+		ASSERT_LE(0, ruleset_fd);
+		ASSERT_EQ(0,
+			  landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+					    &tcp_bind_p0, 0));
+		enforce_ruleset(_metadata, ruleset_fd);
+		EXPECT_EQ(0, close(ruleset_fd));
+	}
+
+	test_bind_and_connect(_metadata, &self->srv0, false,
+			      variant->num_layers >= 3);
+
+	test_bind_and_connect(_metadata, &self->srv1, variant->num_layers >= 1,
+			      variant->num_layers >= 2);
+}
+
+/* clang-format off */
+FIXTURE(mini) {};
+/* clang-format on */
+
+FIXTURE_SETUP(mini)
+{
+	disable_caps(_metadata);
+
+	setup_loopback(_metadata);
+};
+
+FIXTURE_TEARDOWN(mini)
+{
+}
+
+/* clang-format off */
+
+#define ACCESS_LAST LANDLOCK_ACCESS_NET_CONNECT_TCP
+
+#define ACCESS_ALL ( \
+	LANDLOCK_ACCESS_NET_BIND_TCP | \
+	LANDLOCK_ACCESS_NET_CONNECT_TCP)
+
+/* clang-format on */
+
+TEST_F(mini, network_access_rights)
+{
+	const struct landlock_ruleset_attr ruleset_attr = {
+		.handled_access_net = ACCESS_ALL,
+	};
+	struct landlock_net_port_attr net_port = {
+		.port = sock_port_start,
+	};
+	int ruleset_fd;
+	__u64 access;
+
+	ruleset_fd =
+		landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
+	ASSERT_LE(0, ruleset_fd);
+
+	for (access = 1; access <= ACCESS_LAST; access <<= 1) {
+		net_port.allowed_access = access;
+		EXPECT_EQ(0,
+			  landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+					    &net_port, 0))
+		{
+			TH_LOG("Failed to add rule with access 0x%llx: %s",
+			       access, strerror(errno));
+		}
+	}
+	EXPECT_EQ(0, close(ruleset_fd));
+}
+
+/* Checks invalid attribute, out of landlock network access range. */
+TEST_F(mini, unknown_access_rights)
+{
+	__u64 access_mask;
+
+	for (access_mask = 1ULL << 63; access_mask != ACCESS_LAST;
+	     access_mask >>= 1) {
+		const struct landlock_ruleset_attr ruleset_attr = {
+			.handled_access_net = access_mask,
+		};
+
+		EXPECT_EQ(-1, landlock_create_ruleset(&ruleset_attr,
+						      sizeof(ruleset_attr), 0));
+		EXPECT_EQ(EINVAL, errno);
+	}
+}
+
+TEST_F(mini, inval)
+{
+	const struct landlock_ruleset_attr ruleset_attr = {
+		.handled_access_net = LANDLOCK_ACCESS_NET_BIND_TCP
+	};
+	const struct landlock_net_port_attr tcp_bind_connect = {
+		.allowed_access = LANDLOCK_ACCESS_NET_BIND_TCP |
+				  LANDLOCK_ACCESS_NET_CONNECT_TCP,
+		.port = sock_port_start,
+	};
+	const struct landlock_net_port_attr tcp_denied = {
+		.allowed_access = 0,
+		.port = sock_port_start,
+	};
+	const struct landlock_net_port_attr tcp_bind = {
+		.allowed_access = LANDLOCK_ACCESS_NET_BIND_TCP,
+		.port = sock_port_start,
+	};
+	int ruleset_fd;
+
+	ruleset_fd =
+		landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
+	ASSERT_LE(0, ruleset_fd);
+
+	/* Checks unhandled allowed_access. */
+	EXPECT_EQ(-1, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+					&tcp_bind_connect, 0));
+	EXPECT_EQ(EINVAL, errno);
+
+	/* Checks zero access value. */
+	EXPECT_EQ(-1, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+					&tcp_denied, 0));
+	EXPECT_EQ(ENOMSG, errno);
+
+	/* Adds with legitimate values. */
+	ASSERT_EQ(0, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+				       &tcp_bind, 0));
+}
+
+TEST_F(mini, tcp_port_overflow)
+{
+	const struct landlock_ruleset_attr ruleset_attr = {
+		.handled_access_net = LANDLOCK_ACCESS_NET_BIND_TCP |
+				      LANDLOCK_ACCESS_NET_CONNECT_TCP,
+	};
+	const struct landlock_net_port_attr port_max_bind = {
+		.allowed_access = LANDLOCK_ACCESS_NET_BIND_TCP,
+		.port = UINT16_MAX,
+	};
+	const struct landlock_net_port_attr port_max_connect = {
+		.allowed_access = LANDLOCK_ACCESS_NET_CONNECT_TCP,
+		.port = UINT16_MAX,
+	};
+	const struct landlock_net_port_attr port_overflow1 = {
+		.allowed_access = LANDLOCK_ACCESS_NET_BIND_TCP,
+		.port = UINT16_MAX + 1,
+	};
+	const struct landlock_net_port_attr port_overflow2 = {
+		.allowed_access = LANDLOCK_ACCESS_NET_BIND_TCP,
+		.port = UINT16_MAX + 2,
+	};
+	const struct landlock_net_port_attr port_overflow3 = {
+		.allowed_access = LANDLOCK_ACCESS_NET_BIND_TCP,
+		.port = UINT32_MAX + 1UL,
+	};
+	const struct landlock_net_port_attr port_overflow4 = {
+		.allowed_access = LANDLOCK_ACCESS_NET_BIND_TCP,
+		.port = UINT32_MAX + 2UL,
+	};
+	const struct protocol_variant ipv4_tcp = {
+		.domain = AF_INET,
+		.type = SOCK_STREAM,
+	};
+	struct service_fixture srv_denied, srv_max_allowed;
+	int ruleset_fd;
+
+	ASSERT_EQ(0, set_service(&srv_denied, ipv4_tcp, 0));
+
+	/* Be careful to avoid port inconsistencies. */
+	srv_max_allowed = srv_denied;
+	srv_max_allowed.port = port_max_bind.port;
+	srv_max_allowed.ipv4_addr.sin_port = htons(port_max_bind.port);
+
+	ruleset_fd =
+		landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
+	ASSERT_LE(0, ruleset_fd);
+
+	ASSERT_EQ(0, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+				       &port_max_bind, 0));
+
+	EXPECT_EQ(-1, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+					&port_overflow1, 0));
+	EXPECT_EQ(EINVAL, errno);
+
+	EXPECT_EQ(-1, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+					&port_overflow2, 0));
+	EXPECT_EQ(EINVAL, errno);
+
+	EXPECT_EQ(-1, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+					&port_overflow3, 0));
+	EXPECT_EQ(EINVAL, errno);
+
+	/* Interleaves with invalid rule additions. */
+	ASSERT_EQ(0, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+				       &port_max_connect, 0));
+
+	EXPECT_EQ(-1, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+					&port_overflow4, 0));
+	EXPECT_EQ(EINVAL, errno);
+
+	enforce_ruleset(_metadata, ruleset_fd);
+
+	test_bind_and_connect(_metadata, &srv_denied, true, true);
+	test_bind_and_connect(_metadata, &srv_max_allowed, false, false);
+}
+
+FIXTURE(ipv4_tcp)
+{
+	struct service_fixture srv0, srv1;
+};
+
+FIXTURE_SETUP(ipv4_tcp)
+{
+	const struct protocol_variant ipv4_tcp = {
+		.domain = AF_INET,
+		.type = SOCK_STREAM,
+	};
+
+	disable_caps(_metadata);
+
+	ASSERT_EQ(0, set_service(&self->srv0, ipv4_tcp, 0));
+	ASSERT_EQ(0, set_service(&self->srv1, ipv4_tcp, 1));
+
+	setup_loopback(_metadata);
+};
+
+FIXTURE_TEARDOWN(ipv4_tcp)
+{
+}
+
+TEST_F(ipv4_tcp, port_endianness)
+{
+	const struct landlock_ruleset_attr ruleset_attr = {
+		.handled_access_net = LANDLOCK_ACCESS_NET_BIND_TCP |
+				      LANDLOCK_ACCESS_NET_CONNECT_TCP,
+	};
+	const struct landlock_net_port_attr bind_host_endian_p0 = {
+		.allowed_access = LANDLOCK_ACCESS_NET_BIND_TCP,
+		/* Host port format. */
+		.port = self->srv0.port,
+	};
+	const struct landlock_net_port_attr connect_big_endian_p0 = {
+		.allowed_access = LANDLOCK_ACCESS_NET_CONNECT_TCP,
+		/* Big endian port format. */
+		.port = htons(self->srv0.port),
+	};
+	const struct landlock_net_port_attr bind_connect_host_endian_p1 = {
+		.allowed_access = LANDLOCK_ACCESS_NET_BIND_TCP |
+				  LANDLOCK_ACCESS_NET_CONNECT_TCP,
+		/* Host port format. */
+		.port = self->srv1.port,
+	};
+	const unsigned int one = 1;
+	const char little_endian = *(const char *)&one;
+	int ruleset_fd;
+
+	ruleset_fd =
+		landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
+	ASSERT_LE(0, ruleset_fd);
+	ASSERT_EQ(0, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+				       &bind_host_endian_p0, 0));
+	ASSERT_EQ(0, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+				       &connect_big_endian_p0, 0));
+	ASSERT_EQ(0, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+				       &bind_connect_host_endian_p1, 0));
+	enforce_ruleset(_metadata, ruleset_fd);
+
+	/* No restriction for big endinan CPU. */
+	test_bind_and_connect(_metadata, &self->srv0, false, little_endian);
+
+	/* No restriction for any CPU. */
+	test_bind_and_connect(_metadata, &self->srv1, false, false);
+}
+
+TEST_F(ipv4_tcp, with_fs)
+{
+	const struct landlock_ruleset_attr ruleset_attr_fs_net = {
+		.handled_access_fs = LANDLOCK_ACCESS_FS_READ_DIR,
+		.handled_access_net = LANDLOCK_ACCESS_NET_BIND_TCP,
+	};
+	struct landlock_path_beneath_attr path_beneath = {
+		.allowed_access = LANDLOCK_ACCESS_FS_READ_DIR,
+		.parent_fd = -1,
+	};
+	struct landlock_net_port_attr tcp_bind = {
+		.allowed_access = LANDLOCK_ACCESS_NET_BIND_TCP,
+		.port = self->srv0.port,
+	};
+	int ruleset_fd, bind_fd, dir_fd;
+
+	/* Creates ruleset both for filesystem and network access. */
+	ruleset_fd = landlock_create_ruleset(&ruleset_attr_fs_net,
+					     sizeof(ruleset_attr_fs_net), 0);
+	ASSERT_LE(0, ruleset_fd);
+
+	/* Adds a filesystem rule. */
+	path_beneath.parent_fd = open("/dev", O_PATH | O_DIRECTORY | O_CLOEXEC);
+	ASSERT_LE(0, path_beneath.parent_fd);
+	ASSERT_EQ(0, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH,
+				       &path_beneath, 0));
+	EXPECT_EQ(0, close(path_beneath.parent_fd));
+
+	/* Adds a network rule. */
+	ASSERT_EQ(0, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+				       &tcp_bind, 0));
+
+	enforce_ruleset(_metadata, ruleset_fd);
+	EXPECT_EQ(0, close(ruleset_fd));
+
+	/* Tests file access. */
+	dir_fd = open("/dev", O_RDONLY);
+	EXPECT_LE(0, dir_fd);
+	EXPECT_EQ(0, close(dir_fd));
+
+	dir_fd = open("/", O_RDONLY);
+	EXPECT_EQ(-1, dir_fd);
+	EXPECT_EQ(EACCES, errno);
+
+	/* Tests port binding. */
+	bind_fd = socket(AF_INET, SOCK_STREAM | SOCK_CLOEXEC, 0);
+	ASSERT_LE(0, bind_fd);
+	EXPECT_EQ(0, bind_variant(bind_fd, &self->srv0));
+	EXPECT_EQ(0, close(bind_fd));
+
+	bind_fd = socket(AF_INET, SOCK_STREAM | SOCK_CLOEXEC, 0);
+	ASSERT_LE(0, bind_fd);
+	EXPECT_EQ(-EACCES, bind_variant(bind_fd, &self->srv1));
+}
+
+FIXTURE(port_specific)
+{
+	struct service_fixture srv0;
+};
+
+FIXTURE_VARIANT(port_specific)
+{
+	const enum sandbox_type sandbox;
+	const struct protocol_variant prot;
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(port_specific, no_sandbox_with_ipv4) {
+	/* clang-format on */
+	.sandbox = NO_SANDBOX,
+	.prot = {
+		.domain = AF_INET,
+		.type = SOCK_STREAM,
+	},
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(port_specific, sandbox_with_ipv4) {
+	/* clang-format on */
+	.sandbox = TCP_SANDBOX,
+	.prot = {
+		.domain = AF_INET,
+		.type = SOCK_STREAM,
+	},
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(port_specific, no_sandbox_with_ipv6) {
+	/* clang-format on */
+	.sandbox = NO_SANDBOX,
+	.prot = {
+		.domain = AF_INET6,
+		.type = SOCK_STREAM,
+	},
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(port_specific, sandbox_with_ipv6) {
+	/* clang-format on */
+	.sandbox = TCP_SANDBOX,
+	.prot = {
+		.domain = AF_INET6,
+		.type = SOCK_STREAM,
+	},
+};
+
+FIXTURE_SETUP(port_specific)
+{
+	disable_caps(_metadata);
+
+	ASSERT_EQ(0, set_service(&self->srv0, variant->prot, 0));
+
+	setup_loopback(_metadata);
+};
+
+FIXTURE_TEARDOWN(port_specific)
+{
+}
+
+TEST_F(port_specific, bind_connect_zero)
+{
+	int bind_fd, connect_fd, ret;
+	uint16_t port;
+
+	/* Adds a rule layer with bind and connect actions. */
+	if (variant->sandbox == TCP_SANDBOX) {
+		const struct landlock_ruleset_attr ruleset_attr = {
+			.handled_access_net = LANDLOCK_ACCESS_NET_BIND_TCP |
+					      LANDLOCK_ACCESS_NET_CONNECT_TCP
+		};
+		const struct landlock_net_port_attr tcp_bind_connect_zero = {
+			.allowed_access = LANDLOCK_ACCESS_NET_BIND_TCP |
+					  LANDLOCK_ACCESS_NET_CONNECT_TCP,
+			.port = 0,
+		};
+		int ruleset_fd;
+
+		ruleset_fd = landlock_create_ruleset(&ruleset_attr,
+						     sizeof(ruleset_attr), 0);
+		ASSERT_LE(0, ruleset_fd);
+
+		/* Checks zero port value on bind and connect actions. */
+		EXPECT_EQ(0,
+			  landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+					    &tcp_bind_connect_zero, 0));
+
+		enforce_ruleset(_metadata, ruleset_fd);
+		EXPECT_EQ(0, close(ruleset_fd));
+	}
+
+	bind_fd = socket_variant(&self->srv0);
+	ASSERT_LE(0, bind_fd);
+
+	connect_fd = socket_variant(&self->srv0);
+	ASSERT_LE(0, connect_fd);
+
+	/* Sets address port to 0 for both protocol families. */
+	set_port(&self->srv0, 0);
+	/*
+	 * Binds on port 0, which selects a random port within
+	 * ip_local_port_range.
+	 */
+	ret = bind_variant(bind_fd, &self->srv0);
+	EXPECT_EQ(0, ret);
+
+	EXPECT_EQ(0, listen(bind_fd, backlog));
+
+	/* Connects on port 0. */
+	ret = connect_variant(connect_fd, &self->srv0);
+	EXPECT_EQ(-ECONNREFUSED, ret);
+
+	/* Sets binded port for both protocol families. */
+	port = get_binded_port(bind_fd, &variant->prot);
+	EXPECT_NE(0, port);
+	set_port(&self->srv0, port);
+	/* Connects on the binded port. */
+	ret = connect_variant(connect_fd, &self->srv0);
+	if (is_restricted(&variant->prot, variant->sandbox)) {
+		/* Denied by Landlock. */
+		EXPECT_EQ(-EACCES, ret);
+	} else {
+		EXPECT_EQ(0, ret);
+	}
+
+	EXPECT_EQ(0, close(connect_fd));
+	EXPECT_EQ(0, close(bind_fd));
+}
+
+TEST_F(port_specific, bind_connect_1023)
+{
+	int bind_fd, connect_fd, ret;
+
+	/* Adds a rule layer with bind and connect actions. */
+	if (variant->sandbox == TCP_SANDBOX) {
+		const struct landlock_ruleset_attr ruleset_attr = {
+			.handled_access_net = LANDLOCK_ACCESS_NET_BIND_TCP |
+					      LANDLOCK_ACCESS_NET_CONNECT_TCP
+		};
+		/* A rule with port value less than 1024. */
+		const struct landlock_net_port_attr tcp_bind_connect_low_range = {
+			.allowed_access = LANDLOCK_ACCESS_NET_BIND_TCP |
+					  LANDLOCK_ACCESS_NET_CONNECT_TCP,
+			.port = 1023,
+		};
+		/* A rule with 1024 port. */
+		const struct landlock_net_port_attr tcp_bind_connect = {
+			.allowed_access = LANDLOCK_ACCESS_NET_BIND_TCP |
+					  LANDLOCK_ACCESS_NET_CONNECT_TCP,
+			.port = 1024,
+		};
+		int ruleset_fd;
+
+		ruleset_fd = landlock_create_ruleset(&ruleset_attr,
+						     sizeof(ruleset_attr), 0);
+		ASSERT_LE(0, ruleset_fd);
+
+		ASSERT_EQ(0,
+			  landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+					    &tcp_bind_connect_low_range, 0));
+		ASSERT_EQ(0,
+			  landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+					    &tcp_bind_connect, 0));
+
+		enforce_ruleset(_metadata, ruleset_fd);
+		EXPECT_EQ(0, close(ruleset_fd));
+	}
+
+	bind_fd = socket_variant(&self->srv0);
+	ASSERT_LE(0, bind_fd);
+
+	connect_fd = socket_variant(&self->srv0);
+	ASSERT_LE(0, connect_fd);
+
+	/* Sets address port to 1023 for both protocol families. */
+	set_port(&self->srv0, 1023);
+	/* Binds on port 1023. */
+	ret = bind_variant(bind_fd, &self->srv0);
+	/* Denied by the system. */
+	EXPECT_EQ(-EACCES, ret);
+
+	/* Binds on port 1023. */
+	set_cap(_metadata, CAP_NET_BIND_SERVICE);
+	ret = bind_variant(bind_fd, &self->srv0);
+	clear_cap(_metadata, CAP_NET_BIND_SERVICE);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(0, listen(bind_fd, backlog));
+
+	/* Connects on the binded port 1023. */
+	ret = connect_variant(connect_fd, &self->srv0);
+	EXPECT_EQ(0, ret);
+
+	EXPECT_EQ(0, close(connect_fd));
+	EXPECT_EQ(0, close(bind_fd));
+
+	bind_fd = socket_variant(&self->srv0);
+	ASSERT_LE(0, bind_fd);
+
+	connect_fd = socket_variant(&self->srv0);
+	ASSERT_LE(0, connect_fd);
+
+	/* Sets address port to 1024 for both protocol families. */
+	set_port(&self->srv0, 1024);
+	/* Binds on port 1024. */
+	ret = bind_variant(bind_fd, &self->srv0);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(0, listen(bind_fd, backlog));
+
+	/* Connects on the binded port 1024. */
+	ret = connect_variant(connect_fd, &self->srv0);
+	EXPECT_EQ(0, ret);
+
+	EXPECT_EQ(0, close(connect_fd));
+	EXPECT_EQ(0, close(bind_fd));
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/lkdtm/config b/tools/testing/selftests/lkdtm/config
index 5d52f64dfb43..7afe05e8c4d7 100644
--- a/tools/testing/selftests/lkdtm/config
+++ b/tools/testing/selftests/lkdtm/config
@@ -9,7 +9,6 @@ CONFIG_INIT_ON_FREE_DEFAULT_ON=y
 CONFIG_INIT_ON_ALLOC_DEFAULT_ON=y
 CONFIG_UBSAN=y
 CONFIG_UBSAN_BOUNDS=y
-CONFIG_UBSAN_TRAP=y
 CONFIG_STACKPROTECTOR_STRONG=y
 CONFIG_SLUB_DEBUG=y
 CONFIG_SLUB_DEBUG_ON=y
diff --git a/tools/testing/selftests/lkdtm/tests.txt b/tools/testing/selftests/lkdtm/tests.txt
index 607b8d7e3ea3..368973f05250 100644
--- a/tools/testing/selftests/lkdtm/tests.txt
+++ b/tools/testing/selftests/lkdtm/tests.txt
@@ -1,4 +1,5 @@
 #PANIC
+#PANIC_STOP_IRQOFF Crashes entire system
 BUG kernel BUG at
 WARNING WARNING:
 WARNING_MESSAGE message trigger
@@ -7,7 +8,7 @@ EXCEPTION
 #EXHAUST_STACK Corrupts memory on failure
 #CORRUPT_STACK Crashes entire system on success
 #CORRUPT_STACK_STRONG Crashes entire system on success
-ARRAY_BOUNDS
+ARRAY_BOUNDS call trace:|UBSAN: array-index-out-of-bounds
 CORRUPT_LIST_ADD list_add corruption
 CORRUPT_LIST_DEL list_del corruption
 STACK_GUARD_PAGE_LEADING
diff --git a/tools/testing/selftests/mm/.gitignore b/tools/testing/selftests/mm/.gitignore
index cdc9ce4426b9..4ff10ea61461 100644
--- a/tools/testing/selftests/mm/.gitignore
+++ b/tools/testing/selftests/mm/.gitignore
@@ -18,6 +18,8 @@ mremap_dontunmap
 mremap_test
 on-fault-limit
 transhuge-stress
+pagemap_ioctl
+*.tmp*
 protection_keys
 protection_keys_32
 protection_keys_64
@@ -43,3 +45,4 @@ mdwe_test
 gup_longterm
 mkdirty
 va_high_addr_switch
+hugetlb_fault_after_madv
diff --git a/tools/testing/selftests/mm/Makefile b/tools/testing/selftests/mm/Makefile
index 6a9fc5693145..78dfec8bc676 100644
--- a/tools/testing/selftests/mm/Makefile
+++ b/tools/testing/selftests/mm/Makefile
@@ -33,7 +33,7 @@ endif
 MAKEFLAGS += --no-builtin-rules
 
 CFLAGS = -Wall -I $(top_srcdir) $(EXTRA_CFLAGS) $(KHDR_INCLUDES)
-LDLIBS = -lrt -lpthread
+LDLIBS = -lrt -lpthread -lm
 
 TEST_GEN_FILES = cow
 TEST_GEN_FILES += compaction_test
@@ -60,6 +60,7 @@ TEST_GEN_FILES += mrelease_test
 TEST_GEN_FILES += mremap_dontunmap
 TEST_GEN_FILES += mremap_test
 TEST_GEN_FILES += on-fault-limit
+TEST_GEN_PROGS += pagemap_ioctl
 TEST_GEN_FILES += thuge-gen
 TEST_GEN_FILES += transhuge-stress
 TEST_GEN_FILES += uffd-stress
@@ -68,6 +69,7 @@ TEST_GEN_FILES += split_huge_page_test
 TEST_GEN_FILES += ksm_tests
 TEST_GEN_FILES += ksm_functional_tests
 TEST_GEN_FILES += mdwe_test
+TEST_GEN_FILES += hugetlb_fault_after_madv
 
 ifneq ($(ARCH),arm64)
 TEST_GEN_PROGS += soft-dirty
diff --git a/tools/testing/selftests/mm/config b/tools/testing/selftests/mm/config
index be087c4bc396..4309916f629e 100644
--- a/tools/testing/selftests/mm/config
+++ b/tools/testing/selftests/mm/config
@@ -1,5 +1,6 @@
 CONFIG_SYSVIPC=y
 CONFIG_USERFAULTFD=y
+CONFIG_PTE_MARKER_UFFD_WP=y
 CONFIG_TEST_VMALLOC=m
 CONFIG_DEVICE_PRIVATE=y
 CONFIG_TEST_HMM=m
diff --git a/tools/testing/selftests/mm/gup_longterm.c b/tools/testing/selftests/mm/gup_longterm.c
index d33d3e68ffab..ad168d35b23b 100644
--- a/tools/testing/selftests/mm/gup_longterm.c
+++ b/tools/testing/selftests/mm/gup_longterm.c
@@ -265,10 +265,11 @@ static void run_with_tmpfile(test_fn fn, const char *desc)
 	fd = fileno(file);
 	if (fd < 0) {
 		ksft_test_result_fail("fileno() failed\n");
-		return;
+		goto close;
 	}
 
 	fn(fd, pagesize);
+close:
 	fclose(file);
 }
 
diff --git a/tools/testing/selftests/mm/hugetlb-madvise.c b/tools/testing/selftests/mm/hugetlb-madvise.c
index d55322df4b73..f32d99565c5e 100644
--- a/tools/testing/selftests/mm/hugetlb-madvise.c
+++ b/tools/testing/selftests/mm/hugetlb-madvise.c
@@ -36,25 +36,6 @@
 unsigned long huge_page_size;
 unsigned long base_page_size;
 
-unsigned long get_free_hugepages(void)
-{
-	unsigned long fhp = 0;
-	char *line = NULL;
-	size_t linelen = 0;
-	FILE *f = fopen("/proc/meminfo", "r");
-
-	if (!f)
-		return fhp;
-	while (getline(&line, &linelen, f) > 0) {
-		if (sscanf(line, "HugePages_Free:      %lu", &fhp) == 1)
-			break;
-	}
-
-	free(line);
-	fclose(f);
-	return fhp;
-}
-
 void write_fault_pages(void *addr, unsigned long nr_pages)
 {
 	unsigned long i;
diff --git a/tools/testing/selftests/mm/hugetlb_fault_after_madv.c b/tools/testing/selftests/mm/hugetlb_fault_after_madv.c
new file mode 100644
index 000000000000..73b81c632366
--- /dev/null
+++ b/tools/testing/selftests/mm/hugetlb_fault_after_madv.c
@@ -0,0 +1,73 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <pthread.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "vm_util.h"
+#include "../kselftest.h"
+
+#define MMAP_SIZE (1 << 21)
+#define INLOOP_ITER 100
+
+char *huge_ptr;
+
+/* Touch the memory while it is being madvised() */
+void *touch(void *unused)
+{
+	char *ptr = (char *)huge_ptr;
+
+	for (int i = 0; i < INLOOP_ITER; i++)
+		ptr[0] = '.';
+
+	return NULL;
+}
+
+void *madv(void *unused)
+{
+	usleep(rand() % 10);
+
+	for (int i = 0; i < INLOOP_ITER; i++)
+		madvise(huge_ptr, MMAP_SIZE, MADV_DONTNEED);
+
+	return NULL;
+}
+
+int main(void)
+{
+	unsigned long free_hugepages;
+	pthread_t thread1, thread2;
+	/*
+	 * On kernel 6.4, we are able to reproduce the problem with ~1000
+	 * interactions
+	 */
+	int max = 10000;
+
+	srand(getpid());
+
+	free_hugepages = get_free_hugepages();
+	if (free_hugepages != 1) {
+		ksft_exit_skip("This test needs one and only one page to execute. Got %lu\n",
+			       free_hugepages);
+	}
+
+	while (max--) {
+		huge_ptr = mmap(NULL, MMAP_SIZE, PROT_READ | PROT_WRITE,
+				MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB,
+				-1, 0);
+
+		if ((unsigned long)huge_ptr == -1)
+			ksft_exit_skip("Failed to allocated huge page\n");
+
+		pthread_create(&thread1, NULL, madv, NULL);
+		pthread_create(&thread2, NULL, touch, NULL);
+
+		pthread_join(thread1, NULL);
+		pthread_join(thread2, NULL);
+		munmap(huge_ptr, MMAP_SIZE);
+	}
+
+	return KSFT_PASS;
+}
diff --git a/tools/testing/selftests/mm/ksm_functional_tests.c b/tools/testing/selftests/mm/ksm_functional_tests.c
index 901e950f9138..fbff0dd09191 100644
--- a/tools/testing/selftests/mm/ksm_functional_tests.c
+++ b/tools/testing/selftests/mm/ksm_functional_tests.c
@@ -26,6 +26,7 @@
 
 #define KiB 1024u
 #define MiB (1024 * KiB)
+#define FORK_EXEC_CHILD_PRG_NAME "ksm_fork_exec_child"
 
 static int mem_fd;
 static int ksm_fd;
@@ -479,6 +480,64 @@ static void test_prctl_fork(void)
 	ksft_test_result_pass("PR_SET_MEMORY_MERGE value is inherited\n");
 }
 
+static int ksm_fork_exec_child(void)
+{
+	/* Test if KSM is enabled for the process. */
+	return prctl(PR_GET_MEMORY_MERGE, 0, 0, 0, 0) == 1;
+}
+
+static void test_prctl_fork_exec(void)
+{
+	int ret, status;
+	pid_t child_pid;
+
+	ksft_print_msg("[RUN] %s\n", __func__);
+
+	ret = prctl(PR_SET_MEMORY_MERGE, 1, 0, 0, 0);
+	if (ret < 0 && errno == EINVAL) {
+		ksft_test_result_skip("PR_SET_MEMORY_MERGE not supported\n");
+		return;
+	} else if (ret) {
+		ksft_test_result_fail("PR_SET_MEMORY_MERGE=1 failed\n");
+		return;
+	}
+
+	child_pid = fork();
+	if (child_pid == -1) {
+		ksft_test_result_skip("fork() failed\n");
+		return;
+	} else if (child_pid == 0) {
+		char *prg_name = "./ksm_functional_tests";
+		char *argv_for_program[] = { prg_name, FORK_EXEC_CHILD_PRG_NAME };
+
+		execv(prg_name, argv_for_program);
+		return;
+	}
+
+	if (waitpid(child_pid, &status, 0) > 0) {
+		if (WIFEXITED(status)) {
+			status = WEXITSTATUS(status);
+			if (status) {
+				ksft_test_result_fail("KSM not enabled\n");
+				return;
+			}
+		} else {
+			ksft_test_result_fail("program didn't terminate normally\n");
+			return;
+		}
+	} else {
+		ksft_test_result_fail("waitpid() failed\n");
+		return;
+	}
+
+	if (prctl(PR_SET_MEMORY_MERGE, 0, 0, 0, 0)) {
+		ksft_test_result_fail("PR_SET_MEMORY_MERGE=0 failed\n");
+		return;
+	}
+
+	ksft_test_result_pass("PR_SET_MEMORY_MERGE value is inherited\n");
+}
+
 static void test_prctl_unmerge(void)
 {
 	const unsigned int size = 2 * MiB;
@@ -536,9 +595,13 @@ unmap:
 
 int main(int argc, char **argv)
 {
-	unsigned int tests = 7;
+	unsigned int tests = 8;
 	int err;
 
+	if (argc > 1 && !strcmp(argv[1], FORK_EXEC_CHILD_PRG_NAME)) {
+		exit(ksm_fork_exec_child() == 1 ? 0 : 1);
+	}
+
 #ifdef __NR_userfaultfd
 	tests++;
 #endif
@@ -576,6 +639,7 @@ int main(int argc, char **argv)
 
 	test_prctl();
 	test_prctl_fork();
+	test_prctl_fork_exec();
 	test_prctl_unmerge();
 
 	err = ksft_get_fail_cnt();
diff --git a/tools/testing/selftests/mm/mdwe_test.c b/tools/testing/selftests/mm/mdwe_test.c
index bc91bef5d254..200bedcdc32e 100644
--- a/tools/testing/selftests/mm/mdwe_test.c
+++ b/tools/testing/selftests/mm/mdwe_test.c
@@ -22,15 +22,104 @@
 
 TEST(prctl_flags)
 {
+	EXPECT_LT(prctl(PR_SET_MDWE, PR_MDWE_NO_INHERIT, 0L, 0L, 7L), 0);
+	EXPECT_EQ(errno, EINVAL);
+
 	EXPECT_LT(prctl(PR_SET_MDWE, 7L, 0L, 0L, 0L), 0);
+	EXPECT_EQ(errno, EINVAL);
 	EXPECT_LT(prctl(PR_SET_MDWE, 0L, 7L, 0L, 0L), 0);
+	EXPECT_EQ(errno, EINVAL);
 	EXPECT_LT(prctl(PR_SET_MDWE, 0L, 0L, 7L, 0L), 0);
+	EXPECT_EQ(errno, EINVAL);
 	EXPECT_LT(prctl(PR_SET_MDWE, 0L, 0L, 0L, 7L), 0);
+	EXPECT_EQ(errno, EINVAL);
 
 	EXPECT_LT(prctl(PR_GET_MDWE, 7L, 0L, 0L, 0L), 0);
+	EXPECT_EQ(errno, EINVAL);
 	EXPECT_LT(prctl(PR_GET_MDWE, 0L, 7L, 0L, 0L), 0);
+	EXPECT_EQ(errno, EINVAL);
 	EXPECT_LT(prctl(PR_GET_MDWE, 0L, 0L, 7L, 0L), 0);
+	EXPECT_EQ(errno, EINVAL);
 	EXPECT_LT(prctl(PR_GET_MDWE, 0L, 0L, 0L, 7L), 0);
+	EXPECT_EQ(errno, EINVAL);
+}
+
+FIXTURE(consecutive_prctl_flags) {};
+FIXTURE_SETUP(consecutive_prctl_flags) {}
+FIXTURE_TEARDOWN(consecutive_prctl_flags) {}
+
+FIXTURE_VARIANT(consecutive_prctl_flags)
+{
+	unsigned long first_flags;
+	unsigned long second_flags;
+	bool should_work;
+};
+
+FIXTURE_VARIANT_ADD(consecutive_prctl_flags, can_keep_no_flags)
+{
+	.first_flags = 0,
+	.second_flags = 0,
+	.should_work = true,
+};
+
+FIXTURE_VARIANT_ADD(consecutive_prctl_flags, can_keep_exec_gain)
+{
+	.first_flags = PR_MDWE_REFUSE_EXEC_GAIN,
+	.second_flags = PR_MDWE_REFUSE_EXEC_GAIN,
+	.should_work = true,
+};
+
+FIXTURE_VARIANT_ADD(consecutive_prctl_flags, can_keep_both_flags)
+{
+	.first_flags = PR_MDWE_REFUSE_EXEC_GAIN | PR_MDWE_NO_INHERIT,
+	.second_flags = PR_MDWE_REFUSE_EXEC_GAIN | PR_MDWE_NO_INHERIT,
+	.should_work = true,
+};
+
+FIXTURE_VARIANT_ADD(consecutive_prctl_flags, cant_disable_mdwe)
+{
+	.first_flags = PR_MDWE_REFUSE_EXEC_GAIN,
+	.second_flags = 0,
+	.should_work = false,
+};
+
+FIXTURE_VARIANT_ADD(consecutive_prctl_flags, cant_disable_mdwe_no_inherit)
+{
+	.first_flags = PR_MDWE_REFUSE_EXEC_GAIN | PR_MDWE_NO_INHERIT,
+	.second_flags = 0,
+	.should_work = false,
+};
+
+FIXTURE_VARIANT_ADD(consecutive_prctl_flags, cant_disable_no_inherit)
+{
+	.first_flags = PR_MDWE_REFUSE_EXEC_GAIN | PR_MDWE_NO_INHERIT,
+	.second_flags = PR_MDWE_REFUSE_EXEC_GAIN,
+	.should_work = false,
+};
+
+FIXTURE_VARIANT_ADD(consecutive_prctl_flags, cant_enable_no_inherit)
+{
+	.first_flags = PR_MDWE_REFUSE_EXEC_GAIN,
+	.second_flags = PR_MDWE_REFUSE_EXEC_GAIN | PR_MDWE_NO_INHERIT,
+	.should_work = false,
+};
+
+TEST_F(consecutive_prctl_flags, two_prctls)
+{
+	int ret;
+
+	EXPECT_EQ(prctl(PR_SET_MDWE, variant->first_flags, 0L, 0L, 0L), 0);
+
+	ret = prctl(PR_SET_MDWE, variant->second_flags, 0L, 0L, 0L);
+	if (variant->should_work) {
+		EXPECT_EQ(ret, 0);
+
+		ret = prctl(PR_GET_MDWE, 0L, 0L, 0L, 0L);
+		ASSERT_EQ(ret, variant->second_flags);
+	} else {
+		EXPECT_NE(ret, 0);
+		ASSERT_EQ(errno, EPERM);
+	}
 }
 
 FIXTURE(mdwe)
@@ -45,28 +134,45 @@ FIXTURE_VARIANT(mdwe)
 {
 	bool enabled;
 	bool forked;
+	bool inherit;
 };
 
 FIXTURE_VARIANT_ADD(mdwe, stock)
 {
-        .enabled = false,
+	.enabled = false,
 	.forked = false,
+	.inherit = false,
 };
 
 FIXTURE_VARIANT_ADD(mdwe, enabled)
 {
-        .enabled = true,
+	.enabled = true,
 	.forked = false,
+	.inherit = true,
+};
+
+FIXTURE_VARIANT_ADD(mdwe, inherited)
+{
+	.enabled = true,
+	.forked = true,
+	.inherit = true,
 };
 
-FIXTURE_VARIANT_ADD(mdwe, forked)
+FIXTURE_VARIANT_ADD(mdwe, not_inherited)
 {
-        .enabled = true,
+	.enabled = true,
 	.forked = true,
+	.inherit = false,
 };
 
+static bool executable_map_should_fail(const FIXTURE_VARIANT(mdwe) *variant)
+{
+	return variant->enabled && (!variant->forked || variant->inherit);
+}
+
 FIXTURE_SETUP(mdwe)
 {
+	unsigned long mdwe_flags;
 	int ret, status;
 
 	self->p = NULL;
@@ -76,13 +182,17 @@ FIXTURE_SETUP(mdwe)
 	if (!variant->enabled)
 		return;
 
-	ret = prctl(PR_SET_MDWE, PR_MDWE_REFUSE_EXEC_GAIN, 0L, 0L, 0L);
+	mdwe_flags = PR_MDWE_REFUSE_EXEC_GAIN;
+	if (!variant->inherit)
+		mdwe_flags |= PR_MDWE_NO_INHERIT;
+
+	ret = prctl(PR_SET_MDWE, mdwe_flags, 0L, 0L, 0L);
 	ASSERT_EQ(ret, 0) {
 		TH_LOG("PR_SET_MDWE failed or unsupported");
 	}
 
 	ret = prctl(PR_GET_MDWE, 0L, 0L, 0L, 0L);
-	ASSERT_EQ(ret, 1);
+	ASSERT_EQ(ret, mdwe_flags);
 
 	if (variant->forked) {
 		self->pid = fork();
@@ -113,7 +223,7 @@ TEST_F(mdwe, mmap_READ_EXEC)
 TEST_F(mdwe, mmap_WRITE_EXEC)
 {
 	self->p = mmap(NULL, self->size, PROT_WRITE | PROT_EXEC, self->flags, 0, 0);
-	if (variant->enabled) {
+	if (executable_map_should_fail(variant)) {
 		EXPECT_EQ(self->p, MAP_FAILED);
 	} else {
 		EXPECT_NE(self->p, MAP_FAILED);
@@ -139,7 +249,7 @@ TEST_F(mdwe, mprotect_add_EXEC)
 	ASSERT_NE(self->p, MAP_FAILED);
 
 	ret = mprotect(self->p, self->size, PROT_READ | PROT_EXEC);
-	if (variant->enabled) {
+	if (executable_map_should_fail(variant)) {
 		EXPECT_LT(ret, 0);
 	} else {
 		EXPECT_EQ(ret, 0);
@@ -154,7 +264,7 @@ TEST_F(mdwe, mprotect_WRITE_EXEC)
 	ASSERT_NE(self->p, MAP_FAILED);
 
 	ret = mprotect(self->p, self->size, PROT_WRITE | PROT_EXEC);
-	if (variant->enabled) {
+	if (executable_map_should_fail(variant)) {
 		EXPECT_LT(ret, 0);
 	} else {
 		EXPECT_EQ(ret, 0);
@@ -168,13 +278,10 @@ TEST_F(mdwe, mmap_FIXED)
 	self->p = mmap(NULL, self->size, PROT_READ, self->flags, 0, 0);
 	ASSERT_NE(self->p, MAP_FAILED);
 
-	p = mmap(self->p + self->size, self->size, PROT_READ | PROT_EXEC,
+	/* MAP_FIXED unmaps the existing page before mapping which is allowed */
+	p = mmap(self->p, self->size, PROT_READ | PROT_EXEC,
 		 self->flags | MAP_FIXED, 0, 0);
-	if (variant->enabled) {
-		EXPECT_EQ(p, MAP_FAILED);
-	} else {
-		EXPECT_EQ(p, self->p);
-	}
+	EXPECT_EQ(p, self->p);
 }
 
 TEST_F(mdwe, arm64_BTI)
diff --git a/tools/testing/selftests/mm/mremap_dontunmap.c b/tools/testing/selftests/mm/mremap_dontunmap.c
index ca2359835e75..a06e73ec8568 100644
--- a/tools/testing/selftests/mm/mremap_dontunmap.c
+++ b/tools/testing/selftests/mm/mremap_dontunmap.c
@@ -7,6 +7,7 @@
  */
 #define _GNU_SOURCE
 #include <sys/mman.h>
+#include <linux/mman.h>
 #include <errno.h>
 #include <stdio.h>
 #include <stdlib.h>
diff --git a/tools/testing/selftests/mm/mremap_test.c b/tools/testing/selftests/mm/mremap_test.c
index 5c3773de9f0f..1d4c1589c305 100644
--- a/tools/testing/selftests/mm/mremap_test.c
+++ b/tools/testing/selftests/mm/mremap_test.c
@@ -23,12 +23,15 @@
 #define VALIDATION_NO_THRESHOLD 0	/* Verify the entire region */
 
 #define MIN(X, Y) ((X) < (Y) ? (X) : (Y))
+#define SIZE_MB(m) ((size_t)m * (1024 * 1024))
+#define SIZE_KB(k) ((size_t)k * 1024)
 
 struct config {
 	unsigned long long src_alignment;
 	unsigned long long dest_alignment;
 	unsigned long long region_size;
 	int overlapping;
+	int dest_preamble_size;
 };
 
 struct test {
@@ -44,6 +47,7 @@ enum {
 	_1MB = 1ULL << 20,
 	_2MB = 2ULL << 20,
 	_4MB = 4ULL << 20,
+	_5MB = 5ULL << 20,
 	_1GB = 1ULL << 30,
 	_2GB = 2ULL << 30,
 	PMD = _2MB,
@@ -146,6 +150,60 @@ static bool is_range_mapped(FILE *maps_fp, void *start, void *end)
 }
 
 /*
+ * Returns the start address of the mapping on success, else returns
+ * NULL on failure.
+ */
+static void *get_source_mapping(struct config c)
+{
+	unsigned long long addr = 0ULL;
+	void *src_addr = NULL;
+	unsigned long long mmap_min_addr;
+
+	mmap_min_addr = get_mmap_min_addr();
+	/*
+	 * For some tests, we need to not have any mappings below the
+	 * source mapping. Add some headroom to mmap_min_addr for this.
+	 */
+	mmap_min_addr += 10 * _4MB;
+
+retry:
+	addr += c.src_alignment;
+	if (addr < mmap_min_addr)
+		goto retry;
+
+	src_addr = mmap((void *) addr, c.region_size, PROT_READ | PROT_WRITE,
+					MAP_FIXED_NOREPLACE | MAP_ANONYMOUS | MAP_SHARED,
+					-1, 0);
+	if (src_addr == MAP_FAILED) {
+		if (errno == EPERM || errno == EEXIST)
+			goto retry;
+		goto error;
+	}
+	/*
+	 * Check that the address is aligned to the specified alignment.
+	 * Addresses which have alignments that are multiples of that
+	 * specified are not considered valid. For instance, 1GB address is
+	 * 2MB-aligned, however it will not be considered valid for a
+	 * requested alignment of 2MB. This is done to reduce coincidental
+	 * alignment in the tests.
+	 */
+	if (((unsigned long long) src_addr & (c.src_alignment - 1)) ||
+			!((unsigned long long) src_addr & c.src_alignment)) {
+		munmap(src_addr, c.region_size);
+		goto retry;
+	}
+
+	if (!src_addr)
+		goto error;
+
+	return src_addr;
+error:
+	ksft_print_msg("Failed to map source region: %s\n",
+			strerror(errno));
+	return NULL;
+}
+
+/*
  * This test validates that merge is called when expanding a mapping.
  * Mapping containing three pages is created, middle page is unmapped
  * and then the mapping containing the first page is expanded so that
@@ -225,59 +283,83 @@ out:
 }
 
 /*
- * Returns the start address of the mapping on success, else returns
- * NULL on failure.
+ * Verify that an mremap within a range does not cause corruption
+ * of unrelated part of range.
+ *
+ * Consider the following range which is 2MB aligned and is
+ * a part of a larger 20MB range which is not shown. Each
+ * character is 256KB below making the source and destination
+ * 2MB each. The lower case letters are moved (s to d) and the
+ * upper case letters are not moved. The below test verifies
+ * that the upper case S letters are not corrupted by the
+ * adjacent mremap.
+ *
+ * |DDDDddddSSSSssss|
  */
-static void *get_source_mapping(struct config c)
+static void mremap_move_within_range(char pattern_seed)
 {
-	unsigned long long addr = 0ULL;
-	void *src_addr = NULL;
-	unsigned long long mmap_min_addr;
+	char *test_name = "mremap mremap move within range";
+	void *src, *dest;
+	int i, success = 1;
+
+	size_t size = SIZE_MB(20);
+	void *ptr = mmap(NULL, size, PROT_READ | PROT_WRITE,
+			 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+	if (ptr == MAP_FAILED) {
+		perror("mmap");
+		success = 0;
+		goto out;
+	}
+	memset(ptr, 0, size);
 
-	mmap_min_addr = get_mmap_min_addr();
+	src = ptr + SIZE_MB(6);
+	src = (void *)((unsigned long)src & ~(SIZE_MB(2) - 1));
 
-retry:
-	addr += c.src_alignment;
-	if (addr < mmap_min_addr)
-		goto retry;
+	/* Set byte pattern for source block. */
+	srand(pattern_seed);
+	for (i = 0; i < SIZE_MB(2); i++) {
+		((char *)src)[i] = (char) rand();
+	}
 
-	src_addr = mmap((void *) addr, c.region_size, PROT_READ | PROT_WRITE,
-					MAP_FIXED_NOREPLACE | MAP_ANONYMOUS | MAP_SHARED,
-					-1, 0);
-	if (src_addr == MAP_FAILED) {
-		if (errno == EPERM || errno == EEXIST)
-			goto retry;
-		goto error;
+	dest = src - SIZE_MB(2);
+
+	void *new_ptr = mremap(src + SIZE_MB(1), SIZE_MB(1), SIZE_MB(1),
+						   MREMAP_MAYMOVE | MREMAP_FIXED, dest + SIZE_MB(1));
+	if (new_ptr == MAP_FAILED) {
+		perror("mremap");
+		success = 0;
+		goto out;
 	}
-	/*
-	 * Check that the address is aligned to the specified alignment.
-	 * Addresses which have alignments that are multiples of that
-	 * specified are not considered valid. For instance, 1GB address is
-	 * 2MB-aligned, however it will not be considered valid for a
-	 * requested alignment of 2MB. This is done to reduce coincidental
-	 * alignment in the tests.
-	 */
-	if (((unsigned long long) src_addr & (c.src_alignment - 1)) ||
-			!((unsigned long long) src_addr & c.src_alignment)) {
-		munmap(src_addr, c.region_size);
-		goto retry;
+
+	/* Verify byte pattern after remapping */
+	srand(pattern_seed);
+	for (i = 0; i < SIZE_MB(1); i++) {
+		char c = (char) rand();
+
+		if (((char *)src)[i] != c) {
+			ksft_print_msg("Data at src at %d got corrupted due to unrelated mremap\n",
+				       i);
+			ksft_print_msg("Expected: %#x\t Got: %#x\n", c & 0xff,
+					((char *) src)[i] & 0xff);
+			success = 0;
+		}
 	}
 
-	if (!src_addr)
-		goto error;
+out:
+	if (munmap(ptr, size) == -1)
+		perror("munmap");
 
-	return src_addr;
-error:
-	ksft_print_msg("Failed to map source region: %s\n",
-			strerror(errno));
-	return NULL;
+	if (success)
+		ksft_test_result_pass("%s\n", test_name);
+	else
+		ksft_test_result_fail("%s\n", test_name);
 }
 
 /* Returns the time taken for the remap on success else returns -1. */
 static long long remap_region(struct config c, unsigned int threshold_mb,
 			      char pattern_seed)
 {
-	void *addr, *src_addr, *dest_addr;
+	void *addr, *src_addr, *dest_addr, *dest_preamble_addr;
 	unsigned long long i;
 	struct timespec t_start = {0, 0}, t_end = {0, 0};
 	long long  start_ns, end_ns, align_mask, ret, offset;
@@ -294,7 +376,7 @@ static long long remap_region(struct config c, unsigned int threshold_mb,
 		goto out;
 	}
 
-	/* Set byte pattern */
+	/* Set byte pattern for source block. */
 	srand(pattern_seed);
 	for (i = 0; i < threshold; i++)
 		memset((char *) src_addr + i, (char) rand(), 1);
@@ -306,6 +388,9 @@ static long long remap_region(struct config c, unsigned int threshold_mb,
 	addr = (void *) (((unsigned long long) src_addr + c.region_size
 			  + offset) & align_mask);
 
+	/* Remap after the destination block preamble. */
+	addr += c.dest_preamble_size;
+
 	/* See comment in get_source_mapping() */
 	if (!((unsigned long long) addr & c.dest_alignment))
 		addr = (void *) ((unsigned long long) addr | c.dest_alignment);
@@ -316,11 +401,29 @@ static long long remap_region(struct config c, unsigned int threshold_mb,
 		if (addr + c.dest_alignment < addr) {
 			ksft_print_msg("Couldn't find a valid region to remap to\n");
 			ret = -1;
-			goto out;
+			goto clean_up_src;
 		}
 		addr += c.dest_alignment;
 	}
 
+	if (c.dest_preamble_size) {
+		dest_preamble_addr = mmap((void *) addr - c.dest_preamble_size, c.dest_preamble_size,
+					  PROT_READ | PROT_WRITE,
+					  MAP_FIXED_NOREPLACE | MAP_ANONYMOUS | MAP_SHARED,
+							-1, 0);
+		if (dest_preamble_addr == MAP_FAILED) {
+			ksft_print_msg("Failed to map dest preamble region: %s\n",
+					strerror(errno));
+			ret = -1;
+			goto clean_up_src;
+		}
+
+		/* Set byte pattern for the dest preamble block. */
+		srand(pattern_seed);
+		for (i = 0; i < c.dest_preamble_size; i++)
+			memset((char *) dest_preamble_addr + i, (char) rand(), 1);
+	}
+
 	clock_gettime(CLOCK_MONOTONIC, &t_start);
 	dest_addr = mremap(src_addr, c.region_size, c.region_size,
 					  MREMAP_MAYMOVE|MREMAP_FIXED, (char *) addr);
@@ -329,7 +432,7 @@ static long long remap_region(struct config c, unsigned int threshold_mb,
 	if (dest_addr == MAP_FAILED) {
 		ksft_print_msg("mremap failed: %s\n", strerror(errno));
 		ret = -1;
-		goto clean_up_src;
+		goto clean_up_dest_preamble;
 	}
 
 	/* Verify byte pattern after remapping */
@@ -338,7 +441,7 @@ static long long remap_region(struct config c, unsigned int threshold_mb,
 		char c = (char) rand();
 
 		if (((char *) dest_addr)[i] != c) {
-			ksft_print_msg("Data after remap doesn't match at offset %d\n",
+			ksft_print_msg("Data after remap doesn't match at offset %llu\n",
 				       i);
 			ksft_print_msg("Expected: %#x\t Got: %#x\n", c & 0xff,
 					((char *) dest_addr)[i] & 0xff);
@@ -347,6 +450,23 @@ static long long remap_region(struct config c, unsigned int threshold_mb,
 		}
 	}
 
+	/* Verify the dest preamble byte pattern after remapping */
+	if (c.dest_preamble_size) {
+		srand(pattern_seed);
+		for (i = 0; i < c.dest_preamble_size; i++) {
+			char c = (char) rand();
+
+			if (((char *) dest_preamble_addr)[i] != c) {
+				ksft_print_msg("Preamble data after remap doesn't match at offset %d\n",
+					       i);
+				ksft_print_msg("Expected: %#x\t Got: %#x\n", c & 0xff,
+					       ((char *) dest_preamble_addr)[i] & 0xff);
+				ret = -1;
+				goto clean_up_dest;
+			}
+		}
+	}
+
 	start_ns = t_start.tv_sec * NS_PER_SEC + t_start.tv_nsec;
 	end_ns = t_end.tv_sec * NS_PER_SEC + t_end.tv_nsec;
 	ret = end_ns - start_ns;
@@ -359,12 +479,92 @@ static long long remap_region(struct config c, unsigned int threshold_mb,
  */
 clean_up_dest:
 	munmap(dest_addr, c.region_size);
+clean_up_dest_preamble:
+	if (c.dest_preamble_size && dest_preamble_addr)
+		munmap(dest_preamble_addr, c.dest_preamble_size);
 clean_up_src:
 	munmap(src_addr, c.region_size);
 out:
 	return ret;
 }
 
+/*
+ * Verify that an mremap aligning down does not destroy
+ * the beginning of the mapping just because the aligned
+ * down address landed on a mapping that maybe does not exist.
+ */
+static void mremap_move_1mb_from_start(char pattern_seed)
+{
+	char *test_name = "mremap move 1mb from start at 1MB+256KB aligned src";
+	void *src = NULL, *dest = NULL;
+	int i, success = 1;
+
+	/* Config to reuse get_source_mapping() to do an aligned mmap. */
+	struct config c = {
+		.src_alignment = SIZE_MB(1) + SIZE_KB(256),
+		.region_size = SIZE_MB(6)
+	};
+
+	src = get_source_mapping(c);
+	if (!src) {
+		success = 0;
+		goto out;
+	}
+
+	c.src_alignment = SIZE_MB(1) + SIZE_KB(256);
+	dest = get_source_mapping(c);
+	if (!dest) {
+		success = 0;
+		goto out;
+	}
+
+	/* Set byte pattern for source block. */
+	srand(pattern_seed);
+	for (i = 0; i < SIZE_MB(2); i++) {
+		((char *)src)[i] = (char) rand();
+	}
+
+	/*
+	 * Unmap the beginning of dest so that the aligned address
+	 * falls on no mapping.
+	 */
+	munmap(dest, SIZE_MB(1));
+
+	void *new_ptr = mremap(src + SIZE_MB(1), SIZE_MB(1), SIZE_MB(1),
+						   MREMAP_MAYMOVE | MREMAP_FIXED, dest + SIZE_MB(1));
+	if (new_ptr == MAP_FAILED) {
+		perror("mremap");
+		success = 0;
+		goto out;
+	}
+
+	/* Verify byte pattern after remapping */
+	srand(pattern_seed);
+	for (i = 0; i < SIZE_MB(1); i++) {
+		char c = (char) rand();
+
+		if (((char *)src)[i] != c) {
+			ksft_print_msg("Data at src at %d got corrupted due to unrelated mremap\n",
+				       i);
+			ksft_print_msg("Expected: %#x\t Got: %#x\n", c & 0xff,
+					((char *) src)[i] & 0xff);
+			success = 0;
+		}
+	}
+
+out:
+	if (src && munmap(src, c.region_size) == -1)
+		perror("munmap src");
+
+	if (dest && munmap(dest, c.region_size) == -1)
+		perror("munmap dest");
+
+	if (success)
+		ksft_test_result_pass("%s\n", test_name);
+	else
+		ksft_test_result_fail("%s\n", test_name);
+}
+
 static void run_mremap_test_case(struct test test_case, int *failures,
 				 unsigned int threshold_mb,
 				 unsigned int pattern_seed)
@@ -434,7 +634,7 @@ static int parse_args(int argc, char **argv, unsigned int *threshold_mb,
 	return 0;
 }
 
-#define MAX_TEST 13
+#define MAX_TEST 15
 #define MAX_PERF_TEST 3
 int main(int argc, char **argv)
 {
@@ -443,7 +643,8 @@ int main(int argc, char **argv)
 	unsigned int threshold_mb = VALIDATION_DEFAULT_THRESHOLD;
 	unsigned int pattern_seed;
 	int num_expand_tests = 2;
-	struct test test_cases[MAX_TEST];
+	int num_misc_tests = 2;
+	struct test test_cases[MAX_TEST] = {};
 	struct test perf_test_cases[MAX_PERF_TEST];
 	int page_size;
 	time_t t;
@@ -500,6 +701,15 @@ int main(int argc, char **argv)
 	test_cases[12] = MAKE_TEST(PUD, PUD, _2GB, NON_OVERLAPPING, EXPECT_SUCCESS,
 				   "2GB mremap - Source PUD-aligned, Destination PUD-aligned");
 
+	/* Src and Dest addr 1MB aligned. 5MB mremap. */
+	test_cases[13] = MAKE_TEST(_1MB, _1MB, _5MB, NON_OVERLAPPING, EXPECT_SUCCESS,
+				  "5MB mremap - Source 1MB-aligned, Destination 1MB-aligned");
+
+	/* Src and Dest addr 1MB aligned. 5MB mremap. */
+	test_cases[14] = MAKE_TEST(_1MB, _1MB, _5MB, NON_OVERLAPPING, EXPECT_SUCCESS,
+				  "5MB mremap - Source 1MB-aligned, Dest 1MB-aligned with 40MB Preamble");
+	test_cases[14].config.dest_preamble_size = 10 * _4MB;
+
 	perf_test_cases[0] =  MAKE_TEST(page_size, page_size, _1GB, NON_OVERLAPPING, EXPECT_SUCCESS,
 					"1GB mremap - Source PTE-aligned, Destination PTE-aligned");
 	/*
@@ -515,7 +725,7 @@ int main(int argc, char **argv)
 				(threshold_mb * _1MB >= _1GB);
 
 	ksft_set_plan(ARRAY_SIZE(test_cases) + (run_perf_tests ?
-		      ARRAY_SIZE(perf_test_cases) : 0) + num_expand_tests);
+		      ARRAY_SIZE(perf_test_cases) : 0) + num_expand_tests + num_misc_tests);
 
 	for (i = 0; i < ARRAY_SIZE(test_cases); i++)
 		run_mremap_test_case(test_cases[i], &failures, threshold_mb,
@@ -533,6 +743,9 @@ int main(int argc, char **argv)
 
 	fclose(maps_fp);
 
+	mremap_move_within_range(pattern_seed);
+	mremap_move_1mb_from_start(pattern_seed);
+
 	if (run_perf_tests) {
 		ksft_print_msg("\n%s\n",
 		 "mremap HAVE_MOVE_PMD/PUD optimization time comparison for 1GB region:");
diff --git a/tools/testing/selftests/mm/pagemap_ioctl.c b/tools/testing/selftests/mm/pagemap_ioctl.c
new file mode 100644
index 000000000000..befab43719ba
--- /dev/null
+++ b/tools/testing/selftests/mm/pagemap_ioctl.c
@@ -0,0 +1,1661 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <fcntl.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <errno.h>
+#include <malloc.h>
+#include "vm_util.h"
+#include "../kselftest.h"
+#include <linux/types.h>
+#include <linux/memfd.h>
+#include <linux/userfaultfd.h>
+#include <linux/fs.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <math.h>
+#include <asm/unistd.h>
+#include <pthread.h>
+#include <sys/resource.h>
+#include <assert.h>
+#include <sys/ipc.h>
+#include <sys/shm.h>
+
+#define PAGEMAP_BITS_ALL		(PAGE_IS_WPALLOWED | PAGE_IS_WRITTEN |	\
+					 PAGE_IS_FILE | PAGE_IS_PRESENT |	\
+					 PAGE_IS_SWAPPED | PAGE_IS_PFNZERO |	\
+					 PAGE_IS_HUGE)
+#define PAGEMAP_NON_WRITTEN_BITS	(PAGE_IS_WPALLOWED | PAGE_IS_FILE |	\
+					 PAGE_IS_PRESENT | PAGE_IS_SWAPPED |	\
+					 PAGE_IS_PFNZERO | PAGE_IS_HUGE)
+
+#define TEST_ITERATIONS 100
+#define PAGEMAP "/proc/self/pagemap"
+int pagemap_fd;
+int uffd;
+int page_size;
+int hpage_size;
+
+#define LEN(region)	((region.end - region.start)/page_size)
+
+static long pagemap_ioctl(void *start, int len, void *vec, int vec_len, int flag,
+			  int max_pages, long required_mask, long anyof_mask, long excluded_mask,
+			  long return_mask)
+{
+	struct pm_scan_arg arg;
+
+	arg.start = (uintptr_t)start;
+	arg.end = (uintptr_t)(start + len);
+	arg.vec = (uintptr_t)vec;
+	arg.vec_len = vec_len;
+	arg.flags = flag;
+	arg.size = sizeof(struct pm_scan_arg);
+	arg.max_pages = max_pages;
+	arg.category_mask = required_mask;
+	arg.category_anyof_mask = anyof_mask;
+	arg.category_inverted = excluded_mask;
+	arg.return_mask = return_mask;
+
+	return ioctl(pagemap_fd, PAGEMAP_SCAN, &arg);
+}
+
+static long pagemap_ioc(void *start, int len, void *vec, int vec_len, int flag,
+			int max_pages, long required_mask, long anyof_mask, long excluded_mask,
+			long return_mask, long *walk_end)
+{
+	struct pm_scan_arg arg;
+	int ret;
+
+	arg.start = (uintptr_t)start;
+	arg.end = (uintptr_t)(start + len);
+	arg.vec = (uintptr_t)vec;
+	arg.vec_len = vec_len;
+	arg.flags = flag;
+	arg.size = sizeof(struct pm_scan_arg);
+	arg.max_pages = max_pages;
+	arg.category_mask = required_mask;
+	arg.category_anyof_mask = anyof_mask;
+	arg.category_inverted = excluded_mask;
+	arg.return_mask = return_mask;
+
+	ret = ioctl(pagemap_fd, PAGEMAP_SCAN, &arg);
+
+	if (walk_end)
+		*walk_end = arg.walk_end;
+
+	return ret;
+}
+
+
+int init_uffd(void)
+{
+	struct uffdio_api uffdio_api;
+
+	uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK | UFFD_USER_MODE_ONLY);
+	if (uffd == -1)
+		return uffd;
+
+	uffdio_api.api = UFFD_API;
+	uffdio_api.features = UFFD_FEATURE_WP_UNPOPULATED | UFFD_FEATURE_WP_ASYNC |
+			      UFFD_FEATURE_WP_HUGETLBFS_SHMEM;
+	if (ioctl(uffd, UFFDIO_API, &uffdio_api))
+		return -1;
+
+	if (!(uffdio_api.api & UFFDIO_REGISTER_MODE_WP) ||
+	    !(uffdio_api.features & UFFD_FEATURE_WP_UNPOPULATED) ||
+	    !(uffdio_api.features & UFFD_FEATURE_WP_ASYNC) ||
+	    !(uffdio_api.features & UFFD_FEATURE_WP_HUGETLBFS_SHMEM))
+		return -1;
+
+	return 0;
+}
+
+int wp_init(void *lpBaseAddress, int dwRegionSize)
+{
+	struct uffdio_register uffdio_register;
+	struct uffdio_writeprotect wp;
+
+	uffdio_register.range.start = (unsigned long)lpBaseAddress;
+	uffdio_register.range.len = dwRegionSize;
+	uffdio_register.mode = UFFDIO_REGISTER_MODE_WP;
+	if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register))
+		ksft_exit_fail_msg("ioctl(UFFDIO_REGISTER) %d %s\n", errno, strerror(errno));
+
+	if (!(uffdio_register.ioctls & UFFDIO_WRITEPROTECT))
+		ksft_exit_fail_msg("ioctl set is incorrect\n");
+
+	wp.range.start = (unsigned long)lpBaseAddress;
+	wp.range.len = dwRegionSize;
+	wp.mode = UFFDIO_WRITEPROTECT_MODE_WP;
+
+	if (ioctl(uffd, UFFDIO_WRITEPROTECT, &wp))
+		ksft_exit_fail_msg("ioctl(UFFDIO_WRITEPROTECT)\n");
+
+	return 0;
+}
+
+int wp_free(void *lpBaseAddress, int dwRegionSize)
+{
+	struct uffdio_register uffdio_register;
+
+	uffdio_register.range.start = (unsigned long)lpBaseAddress;
+	uffdio_register.range.len = dwRegionSize;
+	uffdio_register.mode = UFFDIO_REGISTER_MODE_WP;
+	if (ioctl(uffd, UFFDIO_UNREGISTER, &uffdio_register.range))
+		ksft_exit_fail_msg("ioctl unregister failure\n");
+	return 0;
+}
+
+int wp_addr_range(void *lpBaseAddress, int dwRegionSize)
+{
+	if (pagemap_ioctl(lpBaseAddress, dwRegionSize, NULL, 0,
+			  PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC,
+			  0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN) < 0)
+		ksft_exit_fail_msg("error %d %d %s\n", 1, errno, strerror(errno));
+
+	return 0;
+}
+
+void *gethugetlb_mem(int size, int *shmid)
+{
+	char *mem;
+
+	if (shmid) {
+		*shmid = shmget(2, size, SHM_HUGETLB | IPC_CREAT | SHM_R | SHM_W);
+		if (*shmid < 0)
+			return NULL;
+
+		mem = shmat(*shmid, 0, 0);
+		if (mem == (char *)-1) {
+			shmctl(*shmid, IPC_RMID, NULL);
+			ksft_exit_fail_msg("Shared memory attach failure\n");
+		}
+	} else {
+		mem = mmap(NULL, size, PROT_READ | PROT_WRITE,
+			   MAP_ANONYMOUS | MAP_HUGETLB | MAP_PRIVATE, -1, 0);
+		if (mem == MAP_FAILED)
+			return NULL;
+	}
+
+	return mem;
+}
+
+int userfaultfd_tests(void)
+{
+	int mem_size, vec_size, written, num_pages = 16;
+	char *mem, *vec;
+
+	mem_size = num_pages * page_size;
+	mem = mmap(NULL, mem_size, PROT_NONE, MAP_PRIVATE | MAP_ANON, -1, 0);
+	if (mem == MAP_FAILED)
+		ksft_exit_fail_msg("error nomem\n");
+
+	wp_init(mem, mem_size);
+
+	/* Change protection of pages differently */
+	mprotect(mem, mem_size/8, PROT_READ|PROT_WRITE);
+	mprotect(mem + 1 * mem_size/8, mem_size/8, PROT_READ);
+	mprotect(mem + 2 * mem_size/8, mem_size/8, PROT_READ|PROT_WRITE);
+	mprotect(mem + 3 * mem_size/8, mem_size/8, PROT_READ);
+	mprotect(mem + 4 * mem_size/8, mem_size/8, PROT_READ|PROT_WRITE);
+	mprotect(mem + 5 * mem_size/8, mem_size/8, PROT_NONE);
+	mprotect(mem + 6 * mem_size/8, mem_size/8, PROT_READ|PROT_WRITE);
+	mprotect(mem + 7 * mem_size/8, mem_size/8, PROT_READ);
+
+	wp_addr_range(mem + (mem_size/16), mem_size - 2 * (mem_size/8));
+	wp_addr_range(mem, mem_size);
+
+	vec_size = mem_size/page_size;
+	vec = malloc(sizeof(struct page_region) * vec_size);
+
+	written = pagemap_ioctl(mem, mem_size, vec, 1, PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC,
+				vec_size - 2, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN);
+	if (written < 0)
+		ksft_exit_fail_msg("error %d %d %s\n", written, errno, strerror(errno));
+
+	ksft_test_result(written == 0, "%s all new pages must not be written (dirty)\n", __func__);
+
+	wp_free(mem, mem_size);
+	munmap(mem, mem_size);
+	free(vec);
+	return 0;
+}
+
+int get_reads(struct page_region *vec, int vec_size)
+{
+	int i, sum = 0;
+
+	for (i = 0; i < vec_size; i++)
+		sum += LEN(vec[i]);
+
+	return sum;
+}
+
+int sanity_tests_sd(void)
+{
+	int mem_size, vec_size, ret, ret2, ret3, i, num_pages = 1000, total_pages = 0;
+	int total_writes, total_reads, reads, count;
+	struct page_region *vec, *vec2;
+	char *mem, *m[2];
+	long walk_end;
+
+	vec_size = num_pages/2;
+	mem_size = num_pages * page_size;
+
+	vec = malloc(sizeof(struct page_region) * vec_size);
+	if (!vec)
+		ksft_exit_fail_msg("error nomem\n");
+
+	vec2 = malloc(sizeof(struct page_region) * vec_size);
+	if (!vec2)
+		ksft_exit_fail_msg("error nomem\n");
+
+	mem = mmap(NULL, mem_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0);
+	if (mem == MAP_FAILED)
+		ksft_exit_fail_msg("error nomem\n");
+
+	wp_init(mem, mem_size);
+	wp_addr_range(mem, mem_size);
+
+	/* 1. wrong operation */
+	ksft_test_result(pagemap_ioctl(mem, 0, vec, vec_size, 0,
+				       0, PAGEMAP_BITS_ALL, 0, 0, PAGEMAP_BITS_ALL) == 0,
+			 "%s Zero range size is valid\n", __func__);
+
+	ksft_test_result(pagemap_ioctl(mem, mem_size, NULL, vec_size, 0,
+				       0, PAGEMAP_BITS_ALL, 0, 0, PAGEMAP_BITS_ALL) < 0,
+			 "%s output buffer must be specified with size\n", __func__);
+
+	ksft_test_result(pagemap_ioctl(mem, mem_size, vec, 0, 0,
+				       0, PAGEMAP_BITS_ALL, 0, 0, PAGEMAP_BITS_ALL) == 0,
+			 "%s output buffer can be 0\n", __func__);
+
+	ksft_test_result(pagemap_ioctl(mem, mem_size, 0, 0, 0,
+				       0, PAGEMAP_BITS_ALL, 0, 0, PAGEMAP_BITS_ALL) == 0,
+			 "%s output buffer can be 0\n", __func__);
+
+	ksft_test_result(pagemap_ioctl(mem, mem_size, vec, vec_size, -1,
+				       0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN) < 0,
+			 "%s wrong flag specified\n", __func__);
+
+	ksft_test_result(pagemap_ioctl(mem, mem_size, vec, vec_size,
+				       PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC | 0xFF,
+				       0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN) < 0,
+			 "%s flag has extra bits specified\n", __func__);
+
+	ksft_test_result(pagemap_ioctl(mem, mem_size, vec, vec_size, 0,
+				       0, 0, 0, 0, PAGE_IS_WRITTEN) >= 0,
+			 "%s no selection mask is specified\n", __func__);
+
+	ksft_test_result(pagemap_ioctl(mem, mem_size, vec, vec_size, 0,
+				       0, PAGE_IS_WRITTEN, PAGE_IS_WRITTEN, 0, 0) == 0,
+			 "%s no return mask is specified\n", __func__);
+
+	ksft_test_result(pagemap_ioctl(mem, mem_size, vec, vec_size, 0,
+				       0, PAGE_IS_WRITTEN, 0, 0, 0x1000) < 0,
+			 "%s wrong return mask specified\n", __func__);
+
+	ksft_test_result(pagemap_ioctl(mem, mem_size, vec, vec_size,
+				       PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC,
+				       0, 0xFFF, PAGE_IS_WRITTEN, 0, PAGE_IS_WRITTEN) < 0,
+			 "%s mixture of correct and wrong flag\n", __func__);
+
+	ksft_test_result(pagemap_ioctl(mem, mem_size, vec, vec_size,
+				       PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC,
+				       0, 0, 0, PAGEMAP_BITS_ALL, PAGE_IS_WRITTEN) >= 0,
+			 "%s PAGEMAP_BITS_ALL can be specified with PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC\n",
+			 __func__);
+
+	/* 2. Clear area with larger vec size */
+	ret = pagemap_ioctl(mem, mem_size, vec, vec_size,
+			    PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC, 0,
+			    PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN);
+	ksft_test_result(ret >= 0, "%s Clear area with larger vec size\n", __func__);
+
+	/* 3. Repeated pattern of written and non-written pages */
+	for (i = 0; i < mem_size; i += 2 * page_size)
+		mem[i]++;
+
+	ret = pagemap_ioctl(mem, mem_size, vec, vec_size, 0, 0, PAGE_IS_WRITTEN, 0,
+			    0, PAGE_IS_WRITTEN);
+	if (ret < 0)
+		ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+
+	ksft_test_result(ret == mem_size/(page_size * 2),
+			 "%s Repeated pattern of written and non-written pages\n", __func__);
+
+	/* 4. Repeated pattern of written and non-written pages in parts */
+	ret = pagemap_ioctl(mem, mem_size, vec, vec_size,
+			    PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC,
+			    num_pages/2 - 2, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN);
+	if (ret < 0)
+		ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+
+	ret2 = pagemap_ioctl(mem, mem_size, vec, 2, 0, 0, PAGE_IS_WRITTEN, 0, 0,
+			     PAGE_IS_WRITTEN);
+	if (ret2 < 0)
+		ksft_exit_fail_msg("error %d %d %s\n", ret2, errno, strerror(errno));
+
+	ret3 = pagemap_ioctl(mem, mem_size, vec, vec_size,
+			     PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC,
+			     0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN);
+	if (ret3 < 0)
+		ksft_exit_fail_msg("error %d %d %s\n", ret3, errno, strerror(errno));
+
+	ksft_test_result((ret + ret3) == num_pages/2 && ret2 == 2,
+			 "%s Repeated pattern of written and non-written pages in parts %d %d %d\n",
+			 __func__, ret, ret3, ret2);
+
+	/* 5. Repeated pattern of written and non-written pages max_pages */
+	for (i = 0; i < mem_size; i += 2 * page_size)
+		mem[i]++;
+	mem[(mem_size/page_size - 1) * page_size]++;
+
+	ret = pagemap_ioctl(mem, mem_size, vec, vec_size,
+			    PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC,
+			    num_pages/2, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN);
+	if (ret < 0)
+		ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+
+	ret2 = pagemap_ioctl(mem, mem_size, vec, vec_size,
+			     PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC,
+			     0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN);
+	if (ret2 < 0)
+		ksft_exit_fail_msg("error %d %d %s\n", ret2, errno, strerror(errno));
+
+	ksft_test_result(ret == num_pages/2 && ret2 == 1,
+			 "%s Repeated pattern of written and non-written pages max_pages\n",
+			 __func__);
+
+	/* 6. only get 2 dirty pages and clear them as well */
+	vec_size = mem_size/page_size;
+	memset(mem, -1, mem_size);
+
+	/* get and clear second and third pages */
+	ret = pagemap_ioctl(mem + page_size, 2 * page_size, vec, 1,
+			    PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC,
+			    2, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN);
+	if (ret < 0)
+		ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+
+	ret2 = pagemap_ioctl(mem, mem_size, vec2, vec_size, 0, 0,
+			      PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN);
+	if (ret2 < 0)
+		ksft_exit_fail_msg("error %d %d %s\n", ret2, errno, strerror(errno));
+
+	ksft_test_result(ret == 1 && LEN(vec[0]) == 2 &&
+			 vec[0].start == (uintptr_t)(mem + page_size) &&
+			 ret2 == 2 && LEN(vec2[0]) == 1 && vec2[0].start == (uintptr_t)mem &&
+			 LEN(vec2[1]) == vec_size - 3 &&
+			 vec2[1].start == (uintptr_t)(mem + 3 * page_size),
+			 "%s only get 2 written pages and clear them as well\n", __func__);
+
+	wp_free(mem, mem_size);
+	munmap(mem, mem_size);
+
+	/* 7. Two regions */
+	m[0] = mmap(NULL, mem_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0);
+	if (m[0] == MAP_FAILED)
+		ksft_exit_fail_msg("error nomem\n");
+	m[1] = mmap(NULL, mem_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0);
+	if (m[1] == MAP_FAILED)
+		ksft_exit_fail_msg("error nomem\n");
+
+	wp_init(m[0], mem_size);
+	wp_init(m[1], mem_size);
+	wp_addr_range(m[0], mem_size);
+	wp_addr_range(m[1], mem_size);
+
+	memset(m[0], 'a', mem_size);
+	memset(m[1], 'b', mem_size);
+
+	wp_addr_range(m[0], mem_size);
+
+	ret = pagemap_ioctl(m[1], mem_size, vec, 1, 0, 0, PAGE_IS_WRITTEN, 0, 0,
+			    PAGE_IS_WRITTEN);
+	if (ret < 0)
+		ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+
+	ksft_test_result(ret == 1 && LEN(vec[0]) == mem_size/page_size,
+			 "%s Two regions\n", __func__);
+
+	wp_free(m[0], mem_size);
+	wp_free(m[1], mem_size);
+	munmap(m[0], mem_size);
+	munmap(m[1], mem_size);
+
+	free(vec);
+	free(vec2);
+
+	/* 8. Smaller vec */
+	mem_size = 1050 * page_size;
+	vec_size = mem_size/(page_size*2);
+
+	vec = malloc(sizeof(struct page_region) * vec_size);
+	if (!vec)
+		ksft_exit_fail_msg("error nomem\n");
+
+	mem = mmap(NULL, mem_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0);
+	if (mem == MAP_FAILED)
+		ksft_exit_fail_msg("error nomem\n");
+
+	wp_init(mem, mem_size);
+	wp_addr_range(mem, mem_size);
+
+	ret = pagemap_ioctl(mem, mem_size, vec, vec_size,
+			    PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC, 0,
+			    PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN);
+	if (ret < 0)
+		ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+
+	for (i = 0; i < mem_size/page_size; i += 2)
+		mem[i * page_size]++;
+
+	ret = pagemap_ioctl(mem, mem_size, vec, vec_size,
+			    PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC,
+			    mem_size/(page_size*5), PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN);
+	if (ret < 0)
+		ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+
+	total_pages += ret;
+
+	ret = pagemap_ioctl(mem, mem_size, vec, vec_size,
+			    PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC,
+			    mem_size/(page_size*5), PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN);
+	if (ret < 0)
+		ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+
+	total_pages += ret;
+
+	ret = pagemap_ioctl(mem, mem_size, vec, vec_size,
+			    PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC,
+			    mem_size/(page_size*5), PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN);
+	if (ret < 0)
+		ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+
+	total_pages += ret;
+
+	ksft_test_result(total_pages == mem_size/(page_size*2), "%s Smaller max_pages\n", __func__);
+
+	free(vec);
+	wp_free(mem, mem_size);
+	munmap(mem, mem_size);
+	total_pages = 0;
+
+	/* 9. Smaller vec */
+	mem_size = 10000 * page_size;
+	vec_size = 50;
+
+	vec = malloc(sizeof(struct page_region) * vec_size);
+	if (!vec)
+		ksft_exit_fail_msg("error nomem\n");
+
+	mem = mmap(NULL, mem_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0);
+	if (mem == MAP_FAILED)
+		ksft_exit_fail_msg("error nomem\n");
+
+	wp_init(mem, mem_size);
+	wp_addr_range(mem, mem_size);
+
+	for (count = 0; count < TEST_ITERATIONS; count++) {
+		total_writes = total_reads = 0;
+		walk_end = (long)mem;
+
+		for (i = 0; i < mem_size; i += page_size) {
+			if (rand() % 2) {
+				mem[i]++;
+				total_writes++;
+			}
+		}
+
+		while (total_reads < total_writes) {
+			ret = pagemap_ioc((void *)walk_end, mem_size-(walk_end - (long)mem), vec,
+					  vec_size, PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC,
+					  0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end);
+			if (ret < 0)
+				ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+
+			if (ret > vec_size)
+				break;
+
+			reads = get_reads(vec, ret);
+			total_reads += reads;
+		}
+
+		if (total_reads != total_writes)
+			break;
+	}
+
+	ksft_test_result(count == TEST_ITERATIONS, "Smaller vec\n");
+
+	free(vec);
+	wp_free(mem, mem_size);
+	munmap(mem, mem_size);
+
+	/* 10. Walk_end tester */
+	vec_size = 1000;
+	mem_size = vec_size * page_size;
+
+	vec = malloc(sizeof(struct page_region) * vec_size);
+	if (!vec)
+		ksft_exit_fail_msg("error nomem\n");
+
+	mem = mmap(NULL, mem_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0);
+	if (mem == MAP_FAILED)
+		ksft_exit_fail_msg("error nomem\n");
+
+	wp_init(mem, mem_size);
+	wp_addr_range(mem, mem_size);
+
+	memset(mem, 0, mem_size);
+
+	ret = pagemap_ioc(mem, 0, vec, vec_size, 0,
+			  0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end);
+	if (ret < 0)
+		ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+	ksft_test_result(ret == 0 && walk_end == (long)mem,
+			 "Walk_end: Same start and end address\n");
+
+	ret = pagemap_ioc(mem, 0, vec, vec_size, PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC,
+			  0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end);
+	if (ret < 0)
+		ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+	ksft_test_result(ret == 0 && walk_end == (long)mem,
+			 "Walk_end: Same start and end with WP\n");
+
+	ret = pagemap_ioc(mem, 0, vec, 0, PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC,
+			  0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end);
+	if (ret < 0)
+		ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+	ksft_test_result(ret == 0 && walk_end == (long)mem,
+			 "Walk_end: Same start and end with 0 output buffer\n");
+
+	ret = pagemap_ioc(mem, mem_size, vec, vec_size, 0,
+			  0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end);
+	if (ret < 0)
+		ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+	ksft_test_result(ret == 1 && walk_end == (long)(mem + mem_size),
+			 "Walk_end: Big vec\n");
+
+	ret = pagemap_ioc(mem, mem_size, vec, 1, 0,
+			  0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end);
+	if (ret < 0)
+		ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+	ksft_test_result(ret == 1 && walk_end == (long)(mem + mem_size),
+			 "Walk_end: vec of minimum length\n");
+
+	ret = pagemap_ioc(mem, mem_size, vec, 1, 0,
+			  vec_size, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end);
+	if (ret < 0)
+		ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+	ksft_test_result(ret == 1 && walk_end == (long)(mem + mem_size),
+			 "Walk_end: Max pages specified\n");
+
+	ret = pagemap_ioc(mem, mem_size, vec, vec_size, 0,
+			  vec_size/2, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end);
+	if (ret < 0)
+		ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+	ksft_test_result(ret == 1 && walk_end == (long)(mem + mem_size/2),
+			 "Walk_end: Half max pages\n");
+
+	ret = pagemap_ioc(mem, mem_size, vec, vec_size, 0,
+			  1, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end);
+	if (ret < 0)
+		ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+	ksft_test_result(ret == 1 && walk_end == (long)(mem + page_size),
+			 "Walk_end: 1 max page\n");
+
+	ret = pagemap_ioc(mem, mem_size, vec, vec_size, 0,
+			  -1, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end);
+	if (ret < 0)
+		ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+	ksft_test_result(ret == 1 && walk_end == (long)(mem + mem_size),
+			 "Walk_end: max pages\n");
+
+	wp_addr_range(mem, mem_size);
+	for (i = 0; i < mem_size; i += 2 * page_size)
+		mem[i]++;
+
+	ret = pagemap_ioc(mem, mem_size, vec, vec_size, 0,
+			  0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end);
+	if (ret < 0)
+		ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+	ksft_test_result(ret == vec_size/2 && walk_end == (long)(mem + mem_size),
+			 "Walk_end sparse: Big vec\n");
+
+	ret = pagemap_ioc(mem, mem_size, vec, 1, 0,
+			  0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end);
+	if (ret < 0)
+		ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+	ksft_test_result(ret == 1 && walk_end == (long)(mem + page_size * 2),
+			 "Walk_end sparse: vec of minimum length\n");
+
+	ret = pagemap_ioc(mem, mem_size, vec, 1, 0,
+			  vec_size, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end);
+	if (ret < 0)
+		ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+	ksft_test_result(ret == 1 && walk_end == (long)(mem + page_size * 2),
+			 "Walk_end sparse: Max pages specified\n");
+
+	ret = pagemap_ioc(mem, mem_size, vec, vec_size/2, 0,
+			  vec_size, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end);
+	if (ret < 0)
+		ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+	ksft_test_result(ret == vec_size/2 && walk_end == (long)(mem + mem_size),
+			 "Walk_end sparse: Max pages specified\n");
+
+	ret = pagemap_ioc(mem, mem_size, vec, vec_size, 0,
+			  vec_size, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end);
+	if (ret < 0)
+		ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+	ksft_test_result(ret == vec_size/2 && walk_end == (long)(mem + mem_size),
+			 "Walk_end sparse: Max pages specified\n");
+
+	ret = pagemap_ioc(mem, mem_size, vec, vec_size, 0,
+			  vec_size/2, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end);
+	if (ret < 0)
+		ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+	ksft_test_result(ret == vec_size/2 && walk_end == (long)(mem + mem_size),
+			 "Walk_endsparse : Half max pages\n");
+
+	ret = pagemap_ioc(mem, mem_size, vec, vec_size, 0,
+			  1, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end);
+	if (ret < 0)
+		ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+	ksft_test_result(ret == 1 && walk_end == (long)(mem + page_size * 2),
+			 "Walk_end: 1 max page\n");
+
+	free(vec);
+	wp_free(mem, mem_size);
+	munmap(mem, mem_size);
+
+	return 0;
+}
+
+int base_tests(char *prefix, char *mem, int mem_size, int skip)
+{
+	int vec_size, written;
+	struct page_region *vec, *vec2;
+
+	if (skip) {
+		ksft_test_result_skip("%s all new pages must not be written (dirty)\n", prefix);
+		ksft_test_result_skip("%s all pages must be written (dirty)\n", prefix);
+		ksft_test_result_skip("%s all pages dirty other than first and the last one\n",
+				      prefix);
+		ksft_test_result_skip("%s PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC\n", prefix);
+		ksft_test_result_skip("%s only middle page dirty\n", prefix);
+		ksft_test_result_skip("%s only two middle pages dirty\n", prefix);
+		return 0;
+	}
+
+	vec_size = mem_size/page_size;
+	vec = malloc(sizeof(struct page_region) * vec_size);
+	vec2 = malloc(sizeof(struct page_region) * vec_size);
+
+	/* 1. all new pages must be not be written (dirty) */
+	written = pagemap_ioctl(mem, mem_size, vec, 1, PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC,
+				vec_size - 2, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN);
+	if (written < 0)
+		ksft_exit_fail_msg("error %d %d %s\n", written, errno, strerror(errno));
+
+	ksft_test_result(written == 0, "%s all new pages must not be written (dirty)\n", prefix);
+
+	/* 2. all pages must be written */
+	memset(mem, -1, mem_size);
+
+	written = pagemap_ioctl(mem, mem_size, vec, 1, 0, 0, PAGE_IS_WRITTEN, 0, 0,
+			      PAGE_IS_WRITTEN);
+	if (written < 0)
+		ksft_exit_fail_msg("error %d %d %s\n", written, errno, strerror(errno));
+
+	ksft_test_result(written == 1 && LEN(vec[0]) == mem_size/page_size,
+			 "%s all pages must be written (dirty)\n", prefix);
+
+	/* 3. all pages dirty other than first and the last one */
+	written = pagemap_ioctl(mem, mem_size, vec, 1, PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC,
+				0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN);
+	if (written < 0)
+		ksft_exit_fail_msg("error %d %d %s\n", written, errno, strerror(errno));
+
+	memset(mem + page_size, 0, mem_size - (2 * page_size));
+
+	written = pagemap_ioctl(mem, mem_size, vec, 1, PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC,
+				0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN);
+	if (written < 0)
+		ksft_exit_fail_msg("error %d %d %s\n", written, errno, strerror(errno));
+
+	ksft_test_result(written == 1 && LEN(vec[0]) >= vec_size - 2 && LEN(vec[0]) <= vec_size,
+			 "%s all pages dirty other than first and the last one\n", prefix);
+
+	written = pagemap_ioctl(mem, mem_size, vec, 1, 0, 0,
+				PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN);
+	if (written < 0)
+		ksft_exit_fail_msg("error %d %d %s\n", written, errno, strerror(errno));
+
+	ksft_test_result(written == 0,
+			 "%s PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC\n", prefix);
+
+	/* 4. only middle page dirty */
+	written = pagemap_ioctl(mem, mem_size, vec, 1, PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC,
+				0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN);
+	if (written < 0)
+		ksft_exit_fail_msg("error %d %d %s\n", written, errno, strerror(errno));
+
+	mem[vec_size/2 * page_size]++;
+
+	written = pagemap_ioctl(mem, mem_size, vec, vec_size, 0, 0, PAGE_IS_WRITTEN,
+				0, 0, PAGE_IS_WRITTEN);
+	if (written < 0)
+		ksft_exit_fail_msg("error %d %d %s\n", written, errno, strerror(errno));
+
+	ksft_test_result(written == 1 && LEN(vec[0]) >= 1,
+			 "%s only middle page dirty\n", prefix);
+
+	/* 5. only two middle pages dirty and walk over only middle pages */
+	written = pagemap_ioctl(mem, mem_size, vec, 1, PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC,
+				0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN | PAGE_IS_HUGE);
+	if (written < 0)
+		ksft_exit_fail_msg("error %d %d %s\n", written, errno, strerror(errno));
+
+	mem[vec_size/2 * page_size]++;
+	mem[(vec_size/2 + 1) * page_size]++;
+
+	written = pagemap_ioctl(&mem[vec_size/2 * page_size], 2 * page_size, vec, 1, 0,
+				0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN | PAGE_IS_HUGE);
+	if (written < 0)
+		ksft_exit_fail_msg("error %d %d %s\n", written, errno, strerror(errno));
+
+	ksft_test_result(written == 1 && vec[0].start == (uintptr_t)(&mem[vec_size/2 * page_size])
+			 && LEN(vec[0]) == 2,
+			 "%s only two middle pages dirty\n", prefix);
+
+	free(vec);
+	free(vec2);
+	return 0;
+}
+
+void *gethugepage(int map_size)
+{
+	int ret;
+	char *map;
+
+	map = memalign(hpage_size, map_size);
+	if (!map)
+		ksft_exit_fail_msg("memalign failed %d %s\n", errno, strerror(errno));
+
+	ret = madvise(map, map_size, MADV_HUGEPAGE);
+	if (ret)
+		return NULL;
+
+	memset(map, 0, map_size);
+
+	return map;
+}
+
+int hpage_unit_tests(void)
+{
+	char *map;
+	int ret, ret2;
+	size_t num_pages = 10;
+	int map_size = hpage_size * num_pages;
+	int vec_size = map_size/page_size;
+	struct page_region *vec, *vec2;
+
+	vec = malloc(sizeof(struct page_region) * vec_size);
+	vec2 = malloc(sizeof(struct page_region) * vec_size);
+	if (!vec || !vec2)
+		ksft_exit_fail_msg("malloc failed\n");
+
+	map = gethugepage(map_size);
+	if (map) {
+		wp_init(map, map_size);
+		wp_addr_range(map, map_size);
+
+		/* 1. all new huge page must not be written (dirty) */
+		ret = pagemap_ioctl(map, map_size, vec, vec_size,
+				    PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC, 0,
+				    PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN);
+		if (ret < 0)
+			ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+
+		ksft_test_result(ret == 0, "%s all new huge page must not be written (dirty)\n",
+				 __func__);
+
+		/* 2. all the huge page must not be written */
+		ret = pagemap_ioctl(map, map_size, vec, vec_size, 0, 0,
+				    PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN);
+		if (ret < 0)
+			ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+
+		ksft_test_result(ret == 0, "%s all the huge page must not be written\n", __func__);
+
+		/* 3. all the huge page must be written and clear dirty as well */
+		memset(map, -1, map_size);
+		ret = pagemap_ioctl(map, map_size, vec, vec_size,
+				    PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC,
+				    0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN);
+		if (ret < 0)
+			ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+
+		ksft_test_result(ret == 1 && vec[0].start == (uintptr_t)map &&
+				 LEN(vec[0]) == vec_size && vec[0].categories == PAGE_IS_WRITTEN,
+				 "%s all the huge page must be written and clear\n", __func__);
+
+		/* 4. only middle page written */
+		wp_free(map, map_size);
+		free(map);
+		map = gethugepage(map_size);
+		wp_init(map, map_size);
+		wp_addr_range(map, map_size);
+		map[vec_size/2 * page_size]++;
+
+		ret = pagemap_ioctl(map, map_size, vec, vec_size, 0, 0,
+				    PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN);
+		if (ret < 0)
+			ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+
+		ksft_test_result(ret == 1 && LEN(vec[0]) > 0,
+				 "%s only middle page written\n", __func__);
+
+		wp_free(map, map_size);
+		free(map);
+	} else {
+		ksft_test_result_skip("%s all new huge page must be written\n", __func__);
+		ksft_test_result_skip("%s all the huge page must not be written\n", __func__);
+		ksft_test_result_skip("%s all the huge page must be written and clear\n", __func__);
+		ksft_test_result_skip("%s only middle page written\n", __func__);
+	}
+
+	/* 5. clear first half of huge page */
+	map = gethugepage(map_size);
+	if (map) {
+		wp_init(map, map_size);
+		wp_addr_range(map, map_size);
+
+		memset(map, 0, map_size);
+
+		wp_addr_range(map, map_size/2);
+
+		ret = pagemap_ioctl(map, map_size, vec, vec_size, 0, 0,
+				    PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN);
+		if (ret < 0)
+			ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+
+		ksft_test_result(ret == 1 && LEN(vec[0]) == vec_size/2 &&
+				 vec[0].start == (uintptr_t)(map + map_size/2),
+				 "%s clear first half of huge page\n", __func__);
+		wp_free(map, map_size);
+		free(map);
+	} else {
+		ksft_test_result_skip("%s clear first half of huge page\n", __func__);
+	}
+
+	/* 6. clear first half of huge page with limited buffer */
+	map = gethugepage(map_size);
+	if (map) {
+		wp_init(map, map_size);
+		wp_addr_range(map, map_size);
+
+		memset(map, 0, map_size);
+
+		ret = pagemap_ioctl(map, map_size, vec, vec_size,
+				    PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC,
+				    vec_size/2, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN);
+		if (ret < 0)
+			ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+
+		ret = pagemap_ioctl(map, map_size, vec, vec_size, 0, 0,
+				    PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN);
+		if (ret < 0)
+			ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+
+		ksft_test_result(ret == 1 && LEN(vec[0]) == vec_size/2 &&
+				 vec[0].start == (uintptr_t)(map + map_size/2),
+				 "%s clear first half of huge page with limited buffer\n",
+				 __func__);
+		wp_free(map, map_size);
+		free(map);
+	} else {
+		ksft_test_result_skip("%s clear first half of huge page with limited buffer\n",
+				      __func__);
+	}
+
+	/* 7. clear second half of huge page */
+	map = gethugepage(map_size);
+	if (map) {
+		wp_init(map, map_size);
+		wp_addr_range(map, map_size);
+
+		memset(map, -1, map_size);
+
+		ret = pagemap_ioctl(map + map_size/2, map_size/2, vec, vec_size,
+				    PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC, vec_size/2,
+				    PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN);
+		if (ret < 0)
+			ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+
+		ret = pagemap_ioctl(map, map_size, vec, vec_size, 0, 0,
+				    PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN);
+		if (ret < 0)
+			ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+
+		ksft_test_result(ret == 1 && LEN(vec[0]) == vec_size/2,
+				 "%s clear second half huge page\n", __func__);
+		wp_free(map, map_size);
+		free(map);
+	} else {
+		ksft_test_result_skip("%s clear second half huge page\n", __func__);
+	}
+
+	/* 8. get half huge page */
+	map = gethugepage(map_size);
+	if (map) {
+		wp_init(map, map_size);
+		wp_addr_range(map, map_size);
+
+		memset(map, -1, map_size);
+		usleep(100);
+
+		ret = pagemap_ioctl(map, map_size, vec, 1,
+				    PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC,
+				    hpage_size/(2*page_size), PAGE_IS_WRITTEN, 0, 0,
+				    PAGE_IS_WRITTEN);
+		if (ret < 0)
+			ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+
+		ksft_test_result(ret == 1 && LEN(vec[0]) == hpage_size/(2*page_size),
+				 "%s get half huge page\n", __func__);
+
+		ret2 = pagemap_ioctl(map, map_size, vec, vec_size, 0, 0,
+				    PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN);
+		if (ret2 < 0)
+			ksft_exit_fail_msg("error %d %d %s\n", ret2, errno, strerror(errno));
+
+		ksft_test_result(ret2 == 1 && LEN(vec[0]) == (map_size - hpage_size/2)/page_size,
+				 "%s get half huge page\n", __func__);
+
+		wp_free(map, map_size);
+		free(map);
+	} else {
+		ksft_test_result_skip("%s get half huge page\n", __func__);
+		ksft_test_result_skip("%s get half huge page\n", __func__);
+	}
+
+	free(vec);
+	free(vec2);
+	return 0;
+}
+
+int unmapped_region_tests(void)
+{
+	void *start = (void *)0x10000000;
+	int written, len = 0x00040000;
+	int vec_size = len / page_size;
+	struct page_region *vec = malloc(sizeof(struct page_region) * vec_size);
+
+	/* 1. Get written pages */
+	written = pagemap_ioctl(start, len, vec, vec_size, 0, 0,
+				PAGEMAP_NON_WRITTEN_BITS, 0, 0, PAGEMAP_NON_WRITTEN_BITS);
+	if (written < 0)
+		ksft_exit_fail_msg("error %d %d %s\n", written, errno, strerror(errno));
+
+	ksft_test_result(written >= 0, "%s Get status of pages\n", __func__);
+
+	free(vec);
+	return 0;
+}
+
+static void test_simple(void)
+{
+	int i;
+	char *map;
+	struct page_region vec;
+
+	map = aligned_alloc(page_size, page_size);
+	if (!map)
+		ksft_exit_fail_msg("aligned_alloc failed\n");
+
+	wp_init(map, page_size);
+	wp_addr_range(map, page_size);
+
+	for (i = 0 ; i < TEST_ITERATIONS; i++) {
+		if (pagemap_ioctl(map, page_size, &vec, 1, 0, 0,
+				  PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN) == 1) {
+			ksft_print_msg("written bit was 1, but should be 0 (i=%d)\n", i);
+			break;
+		}
+
+		wp_addr_range(map, page_size);
+		/* Write something to the page to get the written bit enabled on the page */
+		map[0]++;
+
+		if (pagemap_ioctl(map, page_size, &vec, 1, 0, 0,
+				  PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN) == 0) {
+			ksft_print_msg("written bit was 0, but should be 1 (i=%d)\n", i);
+			break;
+		}
+
+		wp_addr_range(map, page_size);
+	}
+	wp_free(map, page_size);
+	free(map);
+
+	ksft_test_result(i == TEST_ITERATIONS, "Test %s\n", __func__);
+}
+
+int sanity_tests(void)
+{
+	int mem_size, vec_size, ret, fd, i, buf_size;
+	struct page_region *vec;
+	char *mem, *fmem;
+	struct stat sbuf;
+	char *tmp_buf;
+
+	/* 1. wrong operation */
+	mem_size = 10 * page_size;
+	vec_size = mem_size / page_size;
+
+	vec = malloc(sizeof(struct page_region) * vec_size);
+	mem = mmap(NULL, mem_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0);
+	if (mem == MAP_FAILED || vec == MAP_FAILED)
+		ksft_exit_fail_msg("error nomem\n");
+
+	wp_init(mem, mem_size);
+	wp_addr_range(mem, mem_size);
+
+	ksft_test_result(pagemap_ioctl(mem, mem_size, vec, vec_size,
+				       PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC,
+				       0, PAGEMAP_BITS_ALL, 0, 0, PAGEMAP_BITS_ALL) >= 0,
+			 "%s WP op can be specified with !PAGE_IS_WRITTEN\n", __func__);
+	ksft_test_result(pagemap_ioctl(mem, mem_size, vec, vec_size, 0, 0,
+				       PAGEMAP_BITS_ALL, 0, 0, PAGEMAP_BITS_ALL) >= 0,
+			 "%s required_mask specified\n", __func__);
+	ksft_test_result(pagemap_ioctl(mem, mem_size, vec, vec_size, 0, 0,
+				       0, PAGEMAP_BITS_ALL, 0, PAGEMAP_BITS_ALL) >= 0,
+			 "%s anyof_mask specified\n", __func__);
+	ksft_test_result(pagemap_ioctl(mem, mem_size, vec, vec_size, 0, 0,
+				       0, 0, PAGEMAP_BITS_ALL, PAGEMAP_BITS_ALL) >= 0,
+			 "%s excluded_mask specified\n", __func__);
+	ksft_test_result(pagemap_ioctl(mem, mem_size, vec, vec_size, 0, 0,
+				       PAGEMAP_BITS_ALL, PAGEMAP_BITS_ALL, 0,
+				       PAGEMAP_BITS_ALL) >= 0,
+			 "%s required_mask and anyof_mask specified\n", __func__);
+	wp_free(mem, mem_size);
+	munmap(mem, mem_size);
+
+	/* 2. Get sd and present pages with anyof_mask */
+	mem = mmap(NULL, mem_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0);
+	if (mem == MAP_FAILED)
+		ksft_exit_fail_msg("error nomem\n");
+	wp_init(mem, mem_size);
+	wp_addr_range(mem, mem_size);
+
+	memset(mem, 0, mem_size);
+
+	ret = pagemap_ioctl(mem, mem_size, vec, vec_size, 0, 0,
+			    0, PAGEMAP_BITS_ALL, 0, PAGEMAP_BITS_ALL);
+	ksft_test_result(ret >= 0 && vec[0].start == (uintptr_t)mem && LEN(vec[0]) == vec_size &&
+			 (vec[0].categories & (PAGE_IS_WRITTEN | PAGE_IS_PRESENT)) ==
+			 (PAGE_IS_WRITTEN | PAGE_IS_PRESENT),
+			 "%s Get sd and present pages with anyof_mask\n", __func__);
+
+	/* 3. Get sd and present pages with required_mask */
+	ret = pagemap_ioctl(mem, mem_size, vec, vec_size, 0, 0,
+			    PAGEMAP_BITS_ALL, 0, 0, PAGEMAP_BITS_ALL);
+	ksft_test_result(ret >= 0 && vec[0].start == (uintptr_t)mem && LEN(vec[0]) == vec_size &&
+			 (vec[0].categories & (PAGE_IS_WRITTEN | PAGE_IS_PRESENT)) ==
+			 (PAGE_IS_WRITTEN | PAGE_IS_PRESENT),
+			 "%s Get all the pages with required_mask\n", __func__);
+
+	/* 4. Get sd and present pages with required_mask and anyof_mask */
+	ret = pagemap_ioctl(mem, mem_size, vec, vec_size, 0, 0,
+			    PAGE_IS_WRITTEN, PAGE_IS_PRESENT, 0, PAGEMAP_BITS_ALL);
+	ksft_test_result(ret >= 0 && vec[0].start == (uintptr_t)mem && LEN(vec[0]) == vec_size &&
+			 (vec[0].categories & (PAGE_IS_WRITTEN | PAGE_IS_PRESENT)) ==
+			 (PAGE_IS_WRITTEN | PAGE_IS_PRESENT),
+			 "%s Get sd and present pages with required_mask and anyof_mask\n",
+			 __func__);
+
+	/* 5. Don't get sd pages */
+	ret = pagemap_ioctl(mem, mem_size, vec, vec_size, 0, 0,
+			    PAGE_IS_WRITTEN, 0, PAGE_IS_WRITTEN, PAGEMAP_BITS_ALL);
+	ksft_test_result(ret == 0, "%s Don't get sd pages\n", __func__);
+
+	/* 6. Don't get present pages */
+	ret = pagemap_ioctl(mem, mem_size, vec, vec_size, 0, 0,
+			    PAGE_IS_PRESENT, 0, PAGE_IS_PRESENT, PAGEMAP_BITS_ALL);
+	ksft_test_result(ret == 0, "%s Don't get present pages\n", __func__);
+
+	wp_free(mem, mem_size);
+	munmap(mem, mem_size);
+
+	/* 8. Find written present pages with return mask */
+	mem = mmap(NULL, mem_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0);
+	if (mem == MAP_FAILED)
+		ksft_exit_fail_msg("error nomem\n");
+	wp_init(mem, mem_size);
+	wp_addr_range(mem, mem_size);
+
+	memset(mem, 0, mem_size);
+
+	ret = pagemap_ioctl(mem, mem_size, vec, vec_size,
+			    PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC, 0,
+			    0, PAGEMAP_BITS_ALL, 0, PAGE_IS_WRITTEN);
+	ksft_test_result(ret >= 0 && vec[0].start == (uintptr_t)mem && LEN(vec[0]) == vec_size &&
+			 vec[0].categories == PAGE_IS_WRITTEN,
+			 "%s Find written present pages with return mask\n", __func__);
+	wp_free(mem, mem_size);
+	munmap(mem, mem_size);
+
+	/* 9. Memory mapped file */
+	fd = open(__FILE__, O_RDONLY);
+	if (fd < 0)
+		ksft_exit_fail_msg("%s Memory mapped file\n", __func__);
+
+	ret = stat(__FILE__, &sbuf);
+	if (ret < 0)
+		ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+
+	fmem = mmap(NULL, sbuf.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
+	if (fmem == MAP_FAILED)
+		ksft_exit_fail_msg("error nomem %d %s\n", errno, strerror(errno));
+
+	tmp_buf = malloc(sbuf.st_size);
+	memcpy(tmp_buf, fmem, sbuf.st_size);
+
+	ret = pagemap_ioctl(fmem, sbuf.st_size, vec, vec_size, 0, 0,
+			    0, PAGEMAP_NON_WRITTEN_BITS, 0, PAGEMAP_NON_WRITTEN_BITS);
+
+	ksft_test_result(ret >= 0 && vec[0].start == (uintptr_t)fmem &&
+			 LEN(vec[0]) == ceilf((float)sbuf.st_size/page_size) &&
+			 (vec[0].categories & PAGE_IS_FILE),
+			 "%s Memory mapped file\n", __func__);
+
+	munmap(fmem, sbuf.st_size);
+	close(fd);
+
+	/* 10. Create and read/write to a memory mapped file */
+	buf_size = page_size * 10;
+
+	fd = open(__FILE__".tmp2", O_RDWR | O_CREAT, 0666);
+	if (fd < 0)
+		ksft_exit_fail_msg("Read/write to memory: %s\n",
+				   strerror(errno));
+
+	for (i = 0; i < buf_size; i++)
+		if (write(fd, "c", 1) < 0)
+			ksft_exit_fail_msg("Create and read/write to a memory mapped file\n");
+
+	fmem = mmap(NULL, buf_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
+	if (fmem == MAP_FAILED)
+		ksft_exit_fail_msg("error nomem %d %s\n", errno, strerror(errno));
+
+	wp_init(fmem, buf_size);
+	wp_addr_range(fmem, buf_size);
+
+	for (i = 0; i < buf_size; i++)
+		fmem[i] = 'z';
+
+	msync(fmem, buf_size, MS_SYNC);
+
+	ret = pagemap_ioctl(fmem, buf_size, vec, vec_size, 0, 0,
+			    PAGE_IS_WRITTEN, PAGE_IS_PRESENT | PAGE_IS_SWAPPED | PAGE_IS_FILE, 0,
+			    PAGEMAP_BITS_ALL);
+
+	ksft_test_result(ret >= 0 && vec[0].start == (uintptr_t)fmem &&
+			 LEN(vec[0]) == (buf_size/page_size) &&
+			 (vec[0].categories & PAGE_IS_WRITTEN),
+			 "%s Read/write to memory\n", __func__);
+
+	wp_free(fmem, buf_size);
+	munmap(fmem, buf_size);
+	close(fd);
+
+	free(vec);
+	return 0;
+}
+
+int mprotect_tests(void)
+{
+	int ret;
+	char *mem, *mem2;
+	struct page_region vec;
+	int pagemap_fd = open("/proc/self/pagemap", O_RDONLY);
+
+	if (pagemap_fd < 0) {
+		fprintf(stderr, "open() failed\n");
+		exit(1);
+	}
+
+	/* 1. Map two pages */
+	mem = mmap(0, 2 * page_size, PROT_READ|PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0);
+	if (mem == MAP_FAILED)
+		ksft_exit_fail_msg("error nomem\n");
+	wp_init(mem, 2 * page_size);
+	wp_addr_range(mem, 2 * page_size);
+
+	/* Populate both pages. */
+	memset(mem, 1, 2 * page_size);
+
+	ret = pagemap_ioctl(mem, 2 * page_size, &vec, 1, 0, 0, PAGE_IS_WRITTEN,
+			    0, 0, PAGE_IS_WRITTEN);
+	if (ret < 0)
+		ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+
+	ksft_test_result(ret == 1 && LEN(vec) == 2, "%s Both pages written\n", __func__);
+
+	/* 2. Start tracking */
+	wp_addr_range(mem, 2 * page_size);
+
+	ksft_test_result(pagemap_ioctl(mem, 2 * page_size, &vec, 1, 0, 0,
+				       PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN) == 0,
+			 "%s Both pages are not written (dirty)\n", __func__);
+
+	/* 3. Remap the second page */
+	mem2 = mmap(mem + page_size, page_size, PROT_READ|PROT_WRITE,
+		    MAP_PRIVATE|MAP_ANON|MAP_FIXED, -1, 0);
+	if (mem2 == MAP_FAILED)
+		ksft_exit_fail_msg("error nomem\n");
+	wp_init(mem2, page_size);
+	wp_addr_range(mem2, page_size);
+
+	/* Protect + unprotect. */
+	mprotect(mem, page_size, PROT_NONE);
+	mprotect(mem, 2 * page_size, PROT_READ);
+	mprotect(mem, 2 * page_size, PROT_READ|PROT_WRITE);
+
+	/* Modify both pages. */
+	memset(mem, 2, 2 * page_size);
+
+	/* Protect + unprotect. */
+	mprotect(mem, page_size, PROT_NONE);
+	mprotect(mem, page_size, PROT_READ);
+	mprotect(mem, page_size, PROT_READ|PROT_WRITE);
+
+	ret = pagemap_ioctl(mem, 2 * page_size, &vec, 1, 0, 0, PAGE_IS_WRITTEN,
+			    0, 0, PAGE_IS_WRITTEN);
+	if (ret < 0)
+		ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+
+	ksft_test_result(ret == 1 && LEN(vec) == 2,
+			 "%s Both pages written after remap and mprotect\n", __func__);
+
+	/* 4. Clear and make the pages written */
+	wp_addr_range(mem, 2 * page_size);
+
+	memset(mem, 'A', 2 * page_size);
+
+	ret = pagemap_ioctl(mem, 2 * page_size, &vec, 1, 0, 0, PAGE_IS_WRITTEN,
+			    0, 0, PAGE_IS_WRITTEN);
+	if (ret < 0)
+		ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+
+	ksft_test_result(ret == 1 && LEN(vec) == 2,
+			 "%s Clear and make the pages written\n", __func__);
+
+	wp_free(mem, 2 * page_size);
+	munmap(mem, 2 * page_size);
+	return 0;
+}
+
+/* transact test */
+static const unsigned int nthreads = 6, pages_per_thread = 32, access_per_thread = 8;
+static pthread_barrier_t start_barrier, end_barrier;
+static unsigned int extra_thread_faults;
+static unsigned int iter_count = 1000;
+static volatile int finish;
+
+static ssize_t get_dirty_pages_reset(char *mem, unsigned int count,
+				     int reset, int page_size)
+{
+	struct pm_scan_arg arg = {0};
+	struct page_region rgns[256];
+	int i, j, cnt, ret;
+
+	arg.size = sizeof(struct pm_scan_arg);
+	arg.start = (uintptr_t)mem;
+	arg.max_pages = count;
+	arg.end = (uintptr_t)(mem + count * page_size);
+	arg.vec = (uintptr_t)rgns;
+	arg.vec_len = sizeof(rgns) / sizeof(*rgns);
+	if (reset)
+		arg.flags |= PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC;
+	arg.category_mask = PAGE_IS_WRITTEN;
+	arg.return_mask = PAGE_IS_WRITTEN;
+
+	ret = ioctl(pagemap_fd, PAGEMAP_SCAN, &arg);
+	if (ret < 0)
+		ksft_exit_fail_msg("ioctl failed\n");
+
+	cnt = 0;
+	for (i = 0; i < ret; ++i) {
+		if (rgns[i].categories != PAGE_IS_WRITTEN)
+			ksft_exit_fail_msg("wrong flags\n");
+
+		for (j = 0; j < LEN(rgns[i]); ++j)
+			cnt++;
+	}
+
+	return cnt;
+}
+
+void *thread_proc(void *mem)
+{
+	int *m = mem;
+	long curr_faults, faults;
+	struct rusage r;
+	unsigned int i;
+	int ret;
+
+	if (getrusage(RUSAGE_THREAD, &r))
+		ksft_exit_fail_msg("getrusage\n");
+
+	curr_faults = r.ru_minflt;
+
+	while (!finish) {
+		ret = pthread_barrier_wait(&start_barrier);
+		if (ret && ret != PTHREAD_BARRIER_SERIAL_THREAD)
+			ksft_exit_fail_msg("pthread_barrier_wait\n");
+
+		for (i = 0; i < access_per_thread; ++i)
+			__atomic_add_fetch(m + i * (0x1000 / sizeof(*m)), 1, __ATOMIC_SEQ_CST);
+
+		ret = pthread_barrier_wait(&end_barrier);
+		if (ret && ret != PTHREAD_BARRIER_SERIAL_THREAD)
+			ksft_exit_fail_msg("pthread_barrier_wait\n");
+
+		if (getrusage(RUSAGE_THREAD, &r))
+			ksft_exit_fail_msg("getrusage\n");
+
+		faults = r.ru_minflt - curr_faults;
+		if (faults < access_per_thread)
+			ksft_exit_fail_msg("faults < access_per_thread");
+
+		__atomic_add_fetch(&extra_thread_faults, faults - access_per_thread,
+				   __ATOMIC_SEQ_CST);
+		curr_faults = r.ru_minflt;
+	}
+
+	return NULL;
+}
+
+static void transact_test(int page_size)
+{
+	unsigned int i, count, extra_pages;
+	pthread_t th;
+	char *mem;
+	int ret, c;
+
+	if (pthread_barrier_init(&start_barrier, NULL, nthreads + 1))
+		ksft_exit_fail_msg("pthread_barrier_init\n");
+
+	if (pthread_barrier_init(&end_barrier, NULL, nthreads + 1))
+		ksft_exit_fail_msg("pthread_barrier_init\n");
+
+	mem = mmap(NULL, 0x1000 * nthreads * pages_per_thread, PROT_READ | PROT_WRITE,
+		   MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+	if (mem == MAP_FAILED)
+		ksft_exit_fail_msg("Error mmap %s.\n", strerror(errno));
+
+	wp_init(mem, 0x1000 * nthreads * pages_per_thread);
+	wp_addr_range(mem, 0x1000 * nthreads * pages_per_thread);
+
+	memset(mem, 0, 0x1000 * nthreads * pages_per_thread);
+
+	count = get_dirty_pages_reset(mem, nthreads * pages_per_thread, 1, page_size);
+	ksft_test_result(count > 0, "%s count %d\n", __func__, count);
+	count = get_dirty_pages_reset(mem, nthreads * pages_per_thread, 1, page_size);
+	ksft_test_result(count == 0, "%s count %d\n", __func__, count);
+
+	finish = 0;
+	for (i = 0; i < nthreads; ++i)
+		pthread_create(&th, NULL, thread_proc, mem + 0x1000 * i * pages_per_thread);
+
+	extra_pages = 0;
+	for (i = 0; i < iter_count; ++i) {
+		count = 0;
+
+		ret = pthread_barrier_wait(&start_barrier);
+		if (ret && ret != PTHREAD_BARRIER_SERIAL_THREAD)
+			ksft_exit_fail_msg("pthread_barrier_wait\n");
+
+		count = get_dirty_pages_reset(mem, nthreads * pages_per_thread, 1,
+					      page_size);
+
+		ret = pthread_barrier_wait(&end_barrier);
+		if (ret && ret != PTHREAD_BARRIER_SERIAL_THREAD)
+			ksft_exit_fail_msg("pthread_barrier_wait\n");
+
+		if (count > nthreads * access_per_thread)
+			ksft_exit_fail_msg("Too big count %d expected %d, iter %d\n",
+					   count, nthreads * access_per_thread, i);
+
+		c = get_dirty_pages_reset(mem, nthreads * pages_per_thread, 1, page_size);
+		count += c;
+
+		if (c > nthreads * access_per_thread) {
+			ksft_test_result_fail(" %s count > nthreads\n", __func__);
+			return;
+		}
+
+		if (count != nthreads * access_per_thread) {
+			/*
+			 * The purpose of the test is to make sure that no page updates are lost
+			 * when the page updates and read-resetting soft dirty flags are performed
+			 * in parallel. However, it is possible that the application will get the
+			 * soft dirty flags twice on the two consecutive read-resets. This seems
+			 * unavoidable as soft dirty flag is handled in software through page faults
+			 * in kernel. While the updating the flags is supposed to be synchronized
+			 * between page fault handling and read-reset, it is possible that
+			 * read-reset happens after page fault PTE update but before the application
+			 * re-executes write instruction. So read-reset gets the flag, clears write
+			 * access and application gets page fault again for the same write.
+			 */
+			if (count < nthreads * access_per_thread) {
+				ksft_test_result_fail("Lost update, iter %d, %d vs %d.\n", i, count,
+						      nthreads * access_per_thread);
+				return;
+			}
+
+			extra_pages += count - nthreads * access_per_thread;
+		}
+	}
+
+	pthread_barrier_wait(&start_barrier);
+	finish = 1;
+	pthread_barrier_wait(&end_barrier);
+
+	ksft_test_result_pass("%s Extra pages %u (%.1lf%%), extra thread faults %d.\n", __func__,
+			      extra_pages,
+			      100.0 * extra_pages / (iter_count * nthreads * access_per_thread),
+			      extra_thread_faults);
+}
+
+int main(void)
+{
+	int mem_size, shmid, buf_size, fd, i, ret;
+	char *mem, *map, *fmem;
+	struct stat sbuf;
+
+	ksft_print_header();
+
+	if (init_uffd())
+		return ksft_exit_pass();
+
+	ksft_set_plan(115);
+
+	page_size = getpagesize();
+	hpage_size = read_pmd_pagesize();
+
+	pagemap_fd = open(PAGEMAP, O_RDONLY);
+	if (pagemap_fd < 0)
+		return -EINVAL;
+
+	/* 1. Sanity testing */
+	sanity_tests_sd();
+
+	/* 2. Normal page testing */
+	mem_size = 10 * page_size;
+	mem = mmap(NULL, mem_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0);
+	if (mem == MAP_FAILED)
+		ksft_exit_fail_msg("error nomem\n");
+	wp_init(mem, mem_size);
+	wp_addr_range(mem, mem_size);
+
+	base_tests("Page testing:", mem, mem_size, 0);
+
+	wp_free(mem, mem_size);
+	munmap(mem, mem_size);
+
+	/* 3. Large page testing */
+	mem_size = 512 * 10 * page_size;
+	mem = mmap(NULL, mem_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0);
+	if (mem == MAP_FAILED)
+		ksft_exit_fail_msg("error nomem\n");
+	wp_init(mem, mem_size);
+	wp_addr_range(mem, mem_size);
+
+	base_tests("Large Page testing:", mem, mem_size, 0);
+
+	wp_free(mem, mem_size);
+	munmap(mem, mem_size);
+
+	/* 4. Huge page testing */
+	map = gethugepage(hpage_size);
+	if (map) {
+		wp_init(map, hpage_size);
+		wp_addr_range(map, hpage_size);
+		base_tests("Huge page testing:", map, hpage_size, 0);
+		wp_free(map, hpage_size);
+		free(map);
+	} else {
+		base_tests("Huge page testing:", NULL, 0, 1);
+	}
+
+	/* 5. SHM Hugetlb page testing */
+	mem_size = 2*1024*1024;
+	mem = gethugetlb_mem(mem_size, &shmid);
+	if (mem) {
+		wp_init(mem, mem_size);
+		wp_addr_range(mem, mem_size);
+
+		base_tests("Hugetlb shmem testing:", mem, mem_size, 0);
+
+		wp_free(mem, mem_size);
+		shmctl(shmid, IPC_RMID, NULL);
+	} else {
+		base_tests("Hugetlb shmem testing:", NULL, 0, 1);
+	}
+
+	/* 6. Hugetlb page testing */
+	mem = gethugetlb_mem(mem_size, NULL);
+	if (mem) {
+		wp_init(mem, mem_size);
+		wp_addr_range(mem, mem_size);
+
+		base_tests("Hugetlb mem testing:", mem, mem_size, 0);
+
+		wp_free(mem, mem_size);
+	} else {
+		base_tests("Hugetlb mem testing:", NULL, 0, 1);
+	}
+
+	/* 7. File Hugetlb testing */
+	mem_size = 2*1024*1024;
+	fd = memfd_create("uffd-test", MFD_HUGETLB | MFD_NOEXEC_SEAL);
+	mem = mmap(NULL, mem_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+	if (mem) {
+		wp_init(mem, mem_size);
+		wp_addr_range(mem, mem_size);
+
+		base_tests("Hugetlb shmem testing:", mem, mem_size, 0);
+
+		wp_free(mem, mem_size);
+		shmctl(shmid, IPC_RMID, NULL);
+	} else {
+		base_tests("Hugetlb shmem testing:", NULL, 0, 1);
+	}
+	close(fd);
+
+	/* 8. File memory testing */
+	buf_size = page_size * 10;
+
+	fd = open(__FILE__".tmp0", O_RDWR | O_CREAT, 0777);
+	if (fd < 0)
+		ksft_exit_fail_msg("Create and read/write to a memory mapped file: %s\n",
+				   strerror(errno));
+
+	for (i = 0; i < buf_size; i++)
+		if (write(fd, "c", 1) < 0)
+			ksft_exit_fail_msg("Create and read/write to a memory mapped file\n");
+
+	ret = stat(__FILE__".tmp0", &sbuf);
+	if (ret < 0)
+		ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+
+	fmem = mmap(NULL, sbuf.st_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
+	if (fmem == MAP_FAILED)
+		ksft_exit_fail_msg("error nomem %d %s\n", errno, strerror(errno));
+
+	wp_init(fmem, sbuf.st_size);
+	wp_addr_range(fmem, sbuf.st_size);
+
+	base_tests("File memory testing:", fmem, sbuf.st_size, 0);
+
+	wp_free(fmem, sbuf.st_size);
+	munmap(fmem, sbuf.st_size);
+	close(fd);
+
+	/* 9. File memory testing */
+	buf_size = page_size * 10;
+
+	fd = memfd_create(__FILE__".tmp00", MFD_NOEXEC_SEAL);
+	if (fd < 0)
+		ksft_exit_fail_msg("Create and read/write to a memory mapped file: %s\n",
+				   strerror(errno));
+
+	if (ftruncate(fd, buf_size))
+		ksft_exit_fail_msg("Error ftruncate\n");
+
+	for (i = 0; i < buf_size; i++)
+		if (write(fd, "c", 1) < 0)
+			ksft_exit_fail_msg("Create and read/write to a memory mapped file\n");
+
+	fmem = mmap(NULL, buf_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
+	if (fmem == MAP_FAILED)
+		ksft_exit_fail_msg("error nomem %d %s\n", errno, strerror(errno));
+
+	wp_init(fmem, buf_size);
+	wp_addr_range(fmem, buf_size);
+
+	base_tests("File anonymous memory testing:", fmem, buf_size, 0);
+
+	wp_free(fmem, buf_size);
+	munmap(fmem, buf_size);
+	close(fd);
+
+	/* 10. Huge page tests */
+	hpage_unit_tests();
+
+	/* 11. Iterative test */
+	test_simple();
+
+	/* 12. Mprotect test */
+	mprotect_tests();
+
+	/* 13. Transact test */
+	transact_test(page_size);
+
+	/* 14. Sanity testing */
+	sanity_tests();
+
+	/*15. Unmapped address test */
+	unmapped_region_tests();
+
+	/* 16. Userfaultfd tests */
+	userfaultfd_tests();
+
+	close(pagemap_fd);
+	return ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/mm/pkey-helpers.h b/tools/testing/selftests/mm/pkey-helpers.h
index 92f3be3dd8e5..1af3156a9db8 100644
--- a/tools/testing/selftests/mm/pkey-helpers.h
+++ b/tools/testing/selftests/mm/pkey-helpers.h
@@ -34,7 +34,7 @@ extern int test_nr;
 extern int iteration_nr;
 
 #ifdef __GNUC__
-__attribute__((format(printf, 1, 2)))
+__printf(1, 2)
 #endif
 static inline void sigsafe_printf(const char *format, ...)
 {
diff --git a/tools/testing/selftests/mm/run_vmtests.sh b/tools/testing/selftests/mm/run_vmtests.sh
index 3e2bc818d566..00757445278e 100755
--- a/tools/testing/selftests/mm/run_vmtests.sh
+++ b/tools/testing/selftests/mm/run_vmtests.sh
@@ -56,6 +56,8 @@ separated by spaces:
 	memory protection key tests
 - soft_dirty
 	test soft dirty page bit semantics
+- pagemap
+	test pagemap_scan IOCTL
 - cow
 	test copy-on-write semantics
 - thp
@@ -221,6 +223,13 @@ CATEGORY="hugetlb" run_test ./hugepage-mremap
 CATEGORY="hugetlb" run_test ./hugepage-vmemmap
 CATEGORY="hugetlb" run_test ./hugetlb-madvise
 
+nr_hugepages_tmp=$(cat /proc/sys/vm/nr_hugepages)
+# For this test, we need one and just one huge page
+echo 1 > /proc/sys/vm/nr_hugepages
+CATEGORY="hugetlb" run_test ./hugetlb_fault_after_madv
+# Restore the previous number of huge pages, since further tests rely on it
+echo "$nr_hugepages_tmp" > /proc/sys/vm/nr_hugepages
+
 if test_selected "hugetlb"; then
 	echo "NOTE: These hugetlb tests provide minimal coverage.  Use"
 	echo "      https://github.com/libhugetlbfs/libhugetlbfs.git for"
@@ -303,6 +312,7 @@ CATEGORY="hmm" run_test bash ./test_hmm.sh smoke
 # MADV_POPULATE_READ and MADV_POPULATE_WRITE tests
 CATEGORY="madv_populate" run_test ./madv_populate
 
+echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope
 CATEGORY="memfd_secret" run_test ./memfd_secret
 
 # KSM KSM_MERGE_TIME_HUGE_PAGES test with size of 100
@@ -342,6 +352,8 @@ then
 	CATEGORY="soft_dirty" run_test ./soft-dirty
 fi
 
+CATEGORY="pagemap" run_test ./pagemap_ioctl
+
 # COW tests
 CATEGORY="cow" run_test ./cow
 
diff --git a/tools/testing/selftests/mm/vm_util.c b/tools/testing/selftests/mm/vm_util.c
index 558c9cd8901c..3082b40492dd 100644
--- a/tools/testing/selftests/mm/vm_util.c
+++ b/tools/testing/selftests/mm/vm_util.c
@@ -269,3 +269,22 @@ int uffd_unregister(int uffd, void *addr, uint64_t len)
 
 	return ret;
 }
+
+unsigned long get_free_hugepages(void)
+{
+	unsigned long fhp = 0;
+	char *line = NULL;
+	size_t linelen = 0;
+	FILE *f = fopen("/proc/meminfo", "r");
+
+	if (!f)
+		return fhp;
+	while (getline(&line, &linelen, f) > 0) {
+		if (sscanf(line, "HugePages_Free:      %lu", &fhp) == 1)
+			break;
+	}
+
+	free(line);
+	fclose(f);
+	return fhp;
+}
diff --git a/tools/testing/selftests/mm/vm_util.h b/tools/testing/selftests/mm/vm_util.h
index c7fa61f0dff8..c02990bbd56f 100644
--- a/tools/testing/selftests/mm/vm_util.h
+++ b/tools/testing/selftests/mm/vm_util.h
@@ -51,6 +51,7 @@ int uffd_register(int uffd, void *addr, uint64_t len,
 int uffd_unregister(int uffd, void *addr, uint64_t len);
 int uffd_register_with_ioctls(int uffd, void *addr, uint64_t len,
 			      bool miss, bool wp, bool minor, uint64_t *ioctls);
+unsigned long get_free_hugepages(void);
 
 /*
  * On ppc64 this will only work with radix 2M hugepage size
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index 61939a695f95..9274edfb76ff 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -34,6 +34,7 @@ TEST_PROGS += gro.sh
 TEST_PROGS += gre_gso.sh
 TEST_PROGS += cmsg_so_mark.sh
 TEST_PROGS += cmsg_time.sh cmsg_ipv6.sh
+TEST_PROGS += netns-name.sh
 TEST_PROGS += srv6_end_dt46_l3vpn_test.sh
 TEST_PROGS += srv6_end_dt4_l3vpn_test.sh
 TEST_PROGS += srv6_end_dt6_l3vpn_test.sh
@@ -90,6 +91,7 @@ TEST_PROGS += test_bridge_neigh_suppress.sh
 TEST_PROGS += test_vxlan_nolocalbypass.sh
 TEST_PROGS += test_bridge_backup_port.sh
 TEST_PROGS += fdb_flush.sh
+TEST_PROGS += fq_band_pktlimit.sh
 
 TEST_FILES := settings
 
@@ -99,6 +101,7 @@ $(OUTPUT)/reuseport_bpf_numa: LDLIBS += -lnuma
 $(OUTPUT)/tcp_mmap: LDLIBS += -lpthread -lcrypto
 $(OUTPUT)/tcp_inq: LDLIBS += -lpthread
 $(OUTPUT)/bind_bhash: LDLIBS += -lpthread
+$(OUTPUT)/io_uring_zerocopy_tx: CFLAGS += -I../../../include/
 
 # Rules to generate bpf obj nat6to4.o
 CLANG ?= clang
diff --git a/tools/testing/selftests/net/cmsg_sender.c b/tools/testing/selftests/net/cmsg_sender.c
index 24b21b15ed3f..8d7575389f58 100644
--- a/tools/testing/selftests/net/cmsg_sender.c
+++ b/tools/testing/selftests/net/cmsg_sender.c
@@ -45,11 +45,13 @@ struct options {
 	const char *host;
 	const char *service;
 	unsigned int size;
+	unsigned int num_pkt;
 	struct {
 		unsigned int mark;
 		unsigned int dontfrag;
 		unsigned int tclass;
 		unsigned int hlimit;
+		unsigned int priority;
 	} sockopt;
 	struct {
 		unsigned int family;
@@ -72,6 +74,7 @@ struct options {
 	} v6;
 } opt = {
 	.size = 13,
+	.num_pkt = 1,
 	.sock = {
 		.family	= AF_UNSPEC,
 		.type	= SOCK_DGRAM,
@@ -112,7 +115,7 @@ static void cs_parse_args(int argc, char *argv[])
 {
 	int o;
 
-	while ((o = getopt(argc, argv, "46sS:p:m:M:d:tf:F:c:C:l:L:H:")) != -1) {
+	while ((o = getopt(argc, argv, "46sS:p:P:m:M:n:d:tf:F:c:C:l:L:H:")) != -1) {
 		switch (o) {
 		case 's':
 			opt.silent_send = true;
@@ -138,7 +141,9 @@ static void cs_parse_args(int argc, char *argv[])
 				cs_usage(argv[0]);
 			}
 			break;
-
+		case 'P':
+			opt.sockopt.priority = atoi(optarg);
+			break;
 		case 'm':
 			opt.mark.ena = true;
 			opt.mark.val = atoi(optarg);
@@ -146,6 +151,9 @@ static void cs_parse_args(int argc, char *argv[])
 		case 'M':
 			opt.sockopt.mark = atoi(optarg);
 			break;
+		case 'n':
+			opt.num_pkt = atoi(optarg);
+			break;
 		case 'd':
 			opt.txtime.ena = true;
 			opt.txtime.delay = atoi(optarg);
@@ -410,6 +418,10 @@ static void ca_set_sockopts(int fd)
 	    setsockopt(fd, SOL_IPV6, IPV6_UNICAST_HOPS,
 		       &opt.sockopt.hlimit, sizeof(opt.sockopt.hlimit)))
 		error(ERN_SOCKOPT, errno, "setsockopt IPV6_HOPLIMIT");
+	if (opt.sockopt.priority &&
+	    setsockopt(fd, SOL_SOCKET, SO_PRIORITY,
+		       &opt.sockopt.priority, sizeof(opt.sockopt.priority)))
+		error(ERN_SOCKOPT, errno, "setsockopt SO_PRIORITY");
 }
 
 int main(int argc, char *argv[])
@@ -421,6 +433,7 @@ int main(int argc, char *argv[])
 	char *buf;
 	int err;
 	int fd;
+	int i;
 
 	cs_parse_args(argc, argv);
 
@@ -480,24 +493,27 @@ int main(int argc, char *argv[])
 
 	cs_write_cmsg(fd, &msg, cbuf, sizeof(cbuf));
 
-	err = sendmsg(fd, &msg, 0);
-	if (err < 0) {
-		if (!opt.silent_send)
-			fprintf(stderr, "send failed: %s\n", strerror(errno));
-		err = ERN_SEND;
-		goto err_out;
-	} else if (err != (int)opt.size) {
-		fprintf(stderr, "short send\n");
-		err = ERN_SEND_SHORT;
-		goto err_out;
-	} else {
-		err = ERN_SUCCESS;
+	for (i = 0; i < opt.num_pkt; i++) {
+		err = sendmsg(fd, &msg, 0);
+		if (err < 0) {
+			if (!opt.silent_send)
+				fprintf(stderr, "send failed: %s\n", strerror(errno));
+			err = ERN_SEND;
+			goto err_out;
+		} else if (err != (int)opt.size) {
+			fprintf(stderr, "short send\n");
+			err = ERN_SEND_SHORT;
+			goto err_out;
+		}
 	}
+	err = ERN_SUCCESS;
 
-	/* Make sure all timestamps have time to loop back */
-	usleep(opt.txtime.delay);
+	if (opt.ts.ena) {
+		/* Make sure all timestamps have time to loop back */
+		usleep(opt.txtime.delay);
 
-	cs_read_cmsg(fd, &msg, cbuf, sizeof(cbuf));
+		cs_read_cmsg(fd, &msg, cbuf, sizeof(cbuf));
+	}
 
 err_out:
 	close(fd);
diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh
index e7d2a530618a..66d0db7a2614 100755
--- a/tools/testing/selftests/net/fib_tests.sh
+++ b/tools/testing/selftests/net/fib_tests.sh
@@ -2437,6 +2437,9 @@ ipv4_mpath_list_test()
 	run_cmd "ip -n ns2 route add 203.0.113.0/24
 		nexthop via 172.16.201.2 nexthop via 172.16.202.2"
 	run_cmd "ip netns exec ns2 sysctl -qw net.ipv4.fib_multipath_hash_policy=1"
+	run_cmd "ip netns exec ns2 sysctl -qw net.ipv4.conf.veth2.rp_filter=0"
+	run_cmd "ip netns exec ns2 sysctl -qw net.ipv4.conf.all.rp_filter=0"
+	run_cmd "ip netns exec ns2 sysctl -qw net.ipv4.conf.default.rp_filter=0"
 	set +e
 
 	local dmac=$(ip -n ns2 -j link show dev veth2 | jq -r '.[]["address"]')
@@ -2449,7 +2452,7 @@ ipv4_mpath_list_test()
 	# words, the FIB lookup tracepoint needs to be triggered for every
 	# packet.
 	local t0_rx_pkts=$(link_stats_get ns2 veth2 rx packets)
-	run_cmd "perf stat -e fib:fib_table_lookup --filter 'err == 0' -j -o $tmp_file -- $cmd"
+	run_cmd "perf stat -a -e fib:fib_table_lookup --filter 'err == 0' -j -o $tmp_file -- $cmd"
 	local t1_rx_pkts=$(link_stats_get ns2 veth2 rx packets)
 	local diff=$(echo $t1_rx_pkts - $t0_rx_pkts | bc -l)
 	list_rcv_eval $tmp_file $diff
@@ -2494,7 +2497,7 @@ ipv6_mpath_list_test()
 	# words, the FIB lookup tracepoint needs to be triggered for every
 	# packet.
 	local t0_rx_pkts=$(link_stats_get ns2 veth2 rx packets)
-	run_cmd "perf stat -e fib6:fib6_table_lookup --filter 'err == 0' -j -o $tmp_file -- $cmd"
+	run_cmd "perf stat -a -e fib6:fib6_table_lookup --filter 'err == 0' -j -o $tmp_file -- $cmd"
 	local t1_rx_pkts=$(link_stats_get ns2 veth2 rx packets)
 	local diff=$(echo $t1_rx_pkts - $t0_rx_pkts | bc -l)
 	list_rcv_eval $tmp_file $diff
diff --git a/tools/testing/selftests/net/forwarding/bridge_mdb.sh b/tools/testing/selftests/net/forwarding/bridge_mdb.sh
index d0c6c499d5da..e4e3e9405056 100755
--- a/tools/testing/selftests/net/forwarding/bridge_mdb.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_mdb.sh
@@ -145,14 +145,14 @@ cfg_test_host_common()
 
 	# Check basic add, replace and delete behavior.
 	bridge mdb add dev br0 port br0 grp $grp $state vid 10
-	bridge mdb show dev br0 vid 10 | grep -q "$grp"
+	bridge mdb get dev br0 grp $grp vid 10 &> /dev/null
 	check_err $? "Failed to add $name host entry"
 
 	bridge mdb replace dev br0 port br0 grp $grp $state vid 10 &> /dev/null
 	check_fail $? "Managed to replace $name host entry"
 
 	bridge mdb del dev br0 port br0 grp $grp $state vid 10
-	bridge mdb show dev br0 vid 10 | grep -q "$grp"
+	bridge mdb get dev br0 grp $grp vid 10 &> /dev/null
 	check_fail $? "Failed to delete $name host entry"
 
 	# Check error cases.
@@ -200,7 +200,7 @@ cfg_test_port_common()
 
 	# Check basic add, replace and delete behavior.
 	bridge mdb add dev br0 port $swp1 $grp_key permanent vid 10
-	bridge mdb show dev br0 vid 10 | grep -q "$grp_key"
+	bridge mdb get dev br0 $grp_key vid 10 &> /dev/null
 	check_err $? "Failed to add $name entry"
 
 	bridge mdb replace dev br0 port $swp1 $grp_key permanent vid 10 \
@@ -208,31 +208,31 @@ cfg_test_port_common()
 	check_err $? "Failed to replace $name entry"
 
 	bridge mdb del dev br0 port $swp1 $grp_key permanent vid 10
-	bridge mdb show dev br0 vid 10 | grep -q "$grp_key"
+	bridge mdb get dev br0 $grp_key vid 10 &> /dev/null
 	check_fail $? "Failed to delete $name entry"
 
 	# Check default protocol and replacement.
 	bridge mdb add dev br0 port $swp1 $grp_key permanent vid 10
-	bridge -d mdb show dev br0 vid 10 | grep "$grp_key" | grep -q "static"
+	bridge -d mdb get dev br0 $grp_key vid 10 | grep -q "static"
 	check_err $? "$name entry not added with default \"static\" protocol"
 
 	bridge mdb replace dev br0 port $swp1 $grp_key permanent vid 10 \
 		proto 123
-	bridge -d mdb show dev br0 vid 10 | grep "$grp_key" | grep -q "123"
+	bridge -d mdb get dev br0 $grp_key vid 10 | grep -q "123"
 	check_err $? "Failed to replace protocol of $name entry"
 	bridge mdb del dev br0 port $swp1 $grp_key permanent vid 10
 
 	# Check behavior when VLAN is not specified.
 	bridge mdb add dev br0 port $swp1 $grp_key permanent
-	bridge mdb show dev br0 vid 10 | grep -q "$grp_key"
+	bridge mdb get dev br0 $grp_key vid 10 &> /dev/null
 	check_err $? "$name entry with VLAN 10 not added when VLAN was not specified"
-	bridge mdb show dev br0 vid 20 | grep -q "$grp_key"
+	bridge mdb get dev br0 $grp_key vid 20 &> /dev/null
 	check_err $? "$name entry with VLAN 20 not added when VLAN was not specified"
 
 	bridge mdb del dev br0 port $swp1 $grp_key permanent
-	bridge mdb show dev br0 vid 10 | grep -q "$grp_key"
+	bridge mdb get dev br0 $grp_key vid 10 &> /dev/null
 	check_fail $? "$name entry with VLAN 10 not deleted when VLAN was not specified"
-	bridge mdb show dev br0 vid 20 | grep -q "$grp_key"
+	bridge mdb get dev br0 $grp_key vid 20 &> /dev/null
 	check_fail $? "$name entry with VLAN 20 not deleted when VLAN was not specified"
 
 	# Check behavior when bridge port is down.
@@ -298,21 +298,21 @@ __cfg_test_port_ip_star_g()
 	RET=0
 
 	bridge mdb add dev br0 port $swp1 grp $grp vid 10
-	bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -q "exclude"
+	bridge -d mdb get dev br0 grp $grp vid 10 | grep -q "exclude"
 	check_err $? "Default filter mode is not \"exclude\""
 	bridge mdb del dev br0 port $swp1 grp $grp vid 10
 
 	# Check basic add and delete behavior.
 	bridge mdb add dev br0 port $swp1 grp $grp vid 10 filter_mode exclude \
 		source_list $src1
-	bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -q -v "src"
+	bridge -d mdb get dev br0 grp $grp vid 10 &> /dev/null
 	check_err $? "(*, G) entry not created"
-	bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -q "src $src1"
+	bridge -d mdb get dev br0 grp $grp src $src1 vid 10 &> /dev/null
 	check_err $? "(S, G) entry not created"
 	bridge mdb del dev br0 port $swp1 grp $grp vid 10
-	bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -q -v "src"
+	bridge -d mdb get dev br0 grp $grp vid 10 &> /dev/null
 	check_fail $? "(*, G) entry not deleted"
-	bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -q "src $src1"
+	bridge -d mdb get dev br0 grp $grp src $src1 vid 10 &> /dev/null
 	check_fail $? "(S, G) entry not deleted"
 
 	## State (permanent / temp) tests.
@@ -321,18 +321,15 @@ __cfg_test_port_ip_star_g()
 	bridge mdb add dev br0 port $swp1 grp $grp permanent vid 10 \
 		filter_mode exclude source_list $src1
 
-	bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -v "src" | \
-		grep -q "permanent"
+	bridge -d mdb get dev br0 grp $grp vid 10 | grep -q "permanent"
 	check_err $? "(*, G) entry not added as \"permanent\" when should"
-	bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep "src" | \
+	bridge -d mdb get dev br0 grp $grp src $src1 vid 10 | \
 		grep -q "permanent"
 	check_err $? "(S, G) entry not added as \"permanent\" when should"
 
-	bridge -d -s mdb show dev br0 vid 10 | grep "$grp" | grep -v "src" | \
-		grep -q " 0.00"
+	bridge -d -s mdb get dev br0 grp $grp vid 10 | grep -q " 0.00"
 	check_err $? "(*, G) \"permanent\" entry has a pending group timer"
-	bridge -d -s mdb show dev br0 vid 10 | grep "$grp" | grep -v "src" | \
-		grep -q "\/0.00"
+	bridge -d -s mdb get dev br0 grp $grp vid 10 | grep -q "\/0.00"
 	check_err $? "\"permanent\" source entry has a pending source timer"
 
 	bridge mdb del dev br0 port $swp1 grp $grp vid 10
@@ -342,18 +339,14 @@ __cfg_test_port_ip_star_g()
 	bridge mdb add dev br0 port $swp1 grp $grp temp vid 10 \
 		filter_mode exclude source_list $src1
 
-	bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -v "src" | \
-		grep -q "temp"
+	bridge -d mdb get dev br0 grp $grp vid 10 | grep -q "temp"
 	check_err $? "(*, G) EXCLUDE entry not added as \"temp\" when should"
-	bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep "src" | \
-		grep -q "temp"
+	bridge -d mdb get dev br0 grp $grp src $src1 vid 10 | grep -q "temp"
 	check_err $? "(S, G) \"blocked\" entry not added as \"temp\" when should"
 
-	bridge -d -s mdb show dev br0 vid 10 | grep "$grp" | grep -v "src" | \
-		grep -q " 0.00"
+	bridge -d -s mdb get dev br0 grp $grp vid 10 | grep -q " 0.00"
 	check_fail $? "(*, G) EXCLUDE entry does not have a pending group timer"
-	bridge -d -s mdb show dev br0 vid 10 | grep "$grp" | grep -v "src" | \
-		grep -q "\/0.00"
+	bridge -d -s mdb get dev br0 grp $grp vid 10 | grep -q "\/0.00"
 	check_err $? "\"blocked\" source entry has a pending source timer"
 
 	bridge mdb del dev br0 port $swp1 grp $grp vid 10
@@ -363,18 +356,14 @@ __cfg_test_port_ip_star_g()
 	bridge mdb add dev br0 port $swp1 grp $grp temp vid 10 \
 		filter_mode include source_list $src1
 
-	bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -v "src" | \
-		grep -q "temp"
+	bridge -d mdb get dev br0 grp $grp vid 10 | grep -q "temp"
 	check_err $? "(*, G) INCLUDE entry not added as \"temp\" when should"
-	bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep "src" | \
-		grep -q "temp"
+	bridge -d mdb get dev br0 grp $grp src $src1 vid 10 | grep -q "temp"
 	check_err $? "(S, G) entry not added as \"temp\" when should"
 
-	bridge -d -s mdb show dev br0 vid 10 | grep "$grp" | grep -v "src" | \
-		grep -q " 0.00"
+	bridge -d -s mdb get dev br0 grp $grp vid 10 | grep -q " 0.00"
 	check_err $? "(*, G) INCLUDE entry has a pending group timer"
-	bridge -d -s mdb show dev br0 vid 10 | grep "$grp" | grep -v "src" | \
-		grep -q "\/0.00"
+	bridge -d -s mdb get dev br0 grp $grp vid 10 | grep -q "\/0.00"
 	check_fail $? "Source entry does not have a pending source timer"
 
 	bridge mdb del dev br0 port $swp1 grp $grp vid 10
@@ -383,8 +372,7 @@ __cfg_test_port_ip_star_g()
 	bridge mdb add dev br0 port $swp1 grp $grp temp vid 10 \
 		filter_mode include source_list $src1
 
-	bridge -d -s mdb show dev br0 vid 10 | grep "$grp" | grep "src" | \
-		grep -q " 0.00"
+	bridge -d -s mdb get dev br0 grp $grp src $src1 vid 10 | grep -q " 0.00"
 	check_err $? "(S, G) entry has a pending group timer"
 
 	bridge mdb del dev br0 port $swp1 grp $grp vid 10
@@ -396,11 +384,9 @@ __cfg_test_port_ip_star_g()
 	bridge mdb add dev br0 port $swp1 grp $grp vid 10 \
 		filter_mode include source_list $src1
 
-	bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -v "src" | \
-		grep -q "include"
+	bridge -d mdb get dev br0 grp $grp vid 10 | grep -q "include"
 	check_err $? "(*, G) INCLUDE not added with \"include\" filter mode"
-	bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep "src" | \
-		grep -q "blocked"
+	bridge -d mdb get dev br0 grp $grp src $src1 vid 10 | grep -q "blocked"
 	check_fail $? "(S, G) entry marked as \"blocked\" when should not"
 
 	bridge mdb del dev br0 port $swp1 grp $grp vid 10
@@ -410,11 +396,9 @@ __cfg_test_port_ip_star_g()
 	bridge mdb add dev br0 port $swp1 grp $grp vid 10 \
 		filter_mode exclude source_list $src1
 
-	bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -v "src" | \
-		grep -q "exclude"
+	bridge -d mdb get dev br0 grp $grp vid 10 | grep -q "exclude"
 	check_err $? "(*, G) EXCLUDE not added with \"exclude\" filter mode"
-	bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep "src" | \
-		grep -q "blocked"
+	bridge -d mdb get dev br0 grp $grp src $src1 vid 10 | grep -q "blocked"
 	check_err $? "(S, G) entry not marked as \"blocked\" when should"
 
 	bridge mdb del dev br0 port $swp1 grp $grp vid 10
@@ -426,11 +410,9 @@ __cfg_test_port_ip_star_g()
 	bridge mdb add dev br0 port $swp1 grp $grp vid 10 \
 		filter_mode exclude source_list $src1 proto zebra
 
-	bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -v "src" | \
-		grep -q "zebra"
+	bridge -d mdb get dev br0 grp $grp vid 10 | grep -q "zebra"
 	check_err $? "(*, G) entry not added with \"zebra\" protocol"
-	bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep "src" | \
-		grep -q "zebra"
+	bridge -d mdb get dev br0 grp $grp src $src1 vid 10 | grep -q "zebra"
 	check_err $? "(S, G) entry not marked added with \"zebra\" protocol"
 
 	bridge mdb del dev br0 port $swp1 grp $grp vid 10
@@ -443,20 +425,16 @@ __cfg_test_port_ip_star_g()
 
 	bridge mdb replace dev br0 port $swp1 grp $grp permanent vid 10 \
 		filter_mode exclude source_list $src1
-	bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -v "src" | \
-		grep -q "permanent"
+	bridge -d mdb get dev br0 grp $grp vid 10 | grep -q "permanent"
 	check_err $? "(*, G) entry not marked as \"permanent\" after replace"
-	bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep "src" | \
-		grep -q "permanent"
+	bridge -d mdb get dev br0 grp $grp src $src1 vid 10 | grep -q "permanent"
 	check_err $? "(S, G) entry not marked as \"permanent\" after replace"
 
 	bridge mdb replace dev br0 port $swp1 grp $grp temp vid 10 \
 		filter_mode exclude source_list $src1
-	bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -v "src" | \
-		grep -q "temp"
+	bridge -d mdb get dev br0 grp $grp vid 10 | grep -q "temp"
 	check_err $? "(*, G) entry not marked as \"temp\" after replace"
-	bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep "src" | \
-		grep -q "temp"
+	bridge -d mdb get dev br0 grp $grp src $src1 vid 10 | grep -q "temp"
 	check_err $? "(S, G) entry not marked as \"temp\" after replace"
 
 	bridge mdb del dev br0 port $swp1 grp $grp vid 10
@@ -467,20 +445,16 @@ __cfg_test_port_ip_star_g()
 
 	bridge mdb replace dev br0 port $swp1 grp $grp temp vid 10 \
 		filter_mode include source_list $src1
-	bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -v "src" | \
-		grep -q "include"
+	bridge -d mdb get dev br0 grp $grp vid 10 | grep -q "include"
 	check_err $? "(*, G) not marked with \"include\" filter mode after replace"
-	bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep "src" | \
-		grep -q "blocked"
+	bridge -d mdb get dev br0 grp $grp src $src1 vid 10 | grep -q "blocked"
 	check_fail $? "(S, G) marked as \"blocked\" after replace"
 
 	bridge mdb replace dev br0 port $swp1 grp $grp temp vid 10 \
 		filter_mode exclude source_list $src1
-	bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -v "src" | \
-		grep -q "exclude"
+	bridge -d mdb get dev br0 grp $grp vid 10 | grep -q "exclude"
 	check_err $? "(*, G) not marked with \"exclude\" filter mode after replace"
-	bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep "src" | \
-		grep -q "blocked"
+	bridge -d mdb get dev br0 grp $grp src $src1 vid 10 | grep -q "blocked"
 	check_err $? "(S, G) not marked as \"blocked\" after replace"
 
 	bridge mdb del dev br0 port $swp1 grp $grp vid 10
@@ -491,20 +465,20 @@ __cfg_test_port_ip_star_g()
 
 	bridge mdb replace dev br0 port $swp1 grp $grp temp vid 10 \
 		filter_mode exclude source_list $src1,$src2,$src3
-	bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -q "src $src1"
+	bridge -d mdb get dev br0 grp $grp src $src1 vid 10 &> /dev/null
 	check_err $? "(S, G) entry for source $src1 not created after replace"
-	bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -q "src $src2"
+	bridge -d mdb get dev br0 grp $grp src $src2 vid 10 &> /dev/null
 	check_err $? "(S, G) entry for source $src2 not created after replace"
-	bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -q "src $src3"
+	bridge -d mdb get dev br0 grp $grp src $src3 vid 10 &> /dev/null
 	check_err $? "(S, G) entry for source $src3 not created after replace"
 
 	bridge mdb replace dev br0 port $swp1 grp $grp temp vid 10 \
 		filter_mode exclude source_list $src1,$src3
-	bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -q "src $src1"
+	bridge -d mdb get dev br0 grp $grp src $src1 vid 10 &> /dev/null
 	check_err $? "(S, G) entry for source $src1 not created after second replace"
-	bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -q "src $src2"
+	bridge -d mdb get dev br0 grp $grp src $src2 vid 10 &> /dev/null
 	check_fail $? "(S, G) entry for source $src2 created after second replace"
-	bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -q "src $src3"
+	bridge -d mdb get dev br0 grp $grp src $src3 vid 10 &> /dev/null
 	check_err $? "(S, G) entry for source $src3 not created after second replace"
 
 	bridge mdb del dev br0 port $swp1 grp $grp vid 10
@@ -515,11 +489,9 @@ __cfg_test_port_ip_star_g()
 
 	bridge mdb replace dev br0 port $swp1 grp $grp temp vid 10 \
 		filter_mode exclude source_list $src1 proto bgp
-	bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -v "src" | \
-		grep -q "bgp"
+	bridge -d mdb get dev br0 grp $grp vid 10 | grep -q "bgp"
 	check_err $? "(*, G) protocol not changed to \"bgp\" after replace"
-	bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep "src" | \
-		grep -q "bgp"
+	bridge -d mdb get dev br0 grp $grp src $src1 vid 10 | grep -q "bgp"
 	check_err $? "(S, G) protocol not changed to \"bgp\" after replace"
 
 	bridge mdb del dev br0 port $swp1 grp $grp vid 10
@@ -532,8 +504,8 @@ __cfg_test_port_ip_star_g()
 	bridge mdb add dev br0 port $swp2 grp $grp vid 10 \
 		filter_mode include source_list $src1
 	bridge mdb add dev br0 port $swp1 grp $grp vid 10
-	bridge -d mdb show dev br0 vid 10 | grep "$swp1" | grep "$grp" | \
-		grep "$src1" | grep -q "added_by_star_ex"
+	bridge -d mdb get dev br0 grp $grp src $src1 vid 10 | grep "$swp1" | \
+		grep -q "added_by_star_ex"
 	check_err $? "\"added_by_star_ex\" entry not created after adding (*, G) entry"
 	bridge mdb del dev br0 port $swp1 grp $grp vid 10
 	bridge mdb del dev br0 port $swp2 grp $grp src $src1 vid 10
@@ -606,27 +578,23 @@ __cfg_test_port_ip_sg()
 	RET=0
 
 	bridge mdb add dev br0 port $swp1 $grp_key vid 10
-	bridge -d mdb show dev br0 vid 10 | grep "$grp_key" | grep -q "include"
+	bridge -d mdb get dev br0 $grp_key vid 10 | grep -q "include"
 	check_err $? "Default filter mode is not \"include\""
 	bridge mdb del dev br0 port $swp1 $grp_key vid 10
 
 	# Check that entries can be added as both permanent and temp and that
 	# group timer is set correctly.
 	bridge mdb add dev br0 port $swp1 $grp_key permanent vid 10
-	bridge -d mdb show dev br0 vid 10 | grep "$grp_key" | \
-		grep -q "permanent"
+	bridge -d mdb get dev br0 $grp_key vid 10 | grep -q "permanent"
 	check_err $? "Entry not added as \"permanent\" when should"
-	bridge -d -s mdb show dev br0 vid 10 | grep "$grp_key" | \
-		grep -q " 0.00"
+	bridge -d -s mdb get dev br0 $grp_key vid 10 | grep -q " 0.00"
 	check_err $? "\"permanent\" entry has a pending group timer"
 	bridge mdb del dev br0 port $swp1 $grp_key vid 10
 
 	bridge mdb add dev br0 port $swp1 $grp_key temp vid 10
-	bridge -d mdb show dev br0 vid 10 | grep "$grp_key" | \
-		grep -q "temp"
+	bridge -d mdb get dev br0 $grp_key vid 10 | grep -q "temp"
 	check_err $? "Entry not added as \"temp\" when should"
-	bridge -d -s mdb show dev br0 vid 10 | grep "$grp_key" | \
-		grep -q " 0.00"
+	bridge -d -s mdb get dev br0 $grp_key vid 10 | grep -q " 0.00"
 	check_fail $? "\"temp\" entry has an unpending group timer"
 	bridge mdb del dev br0 port $swp1 $grp_key vid 10
 
@@ -650,24 +618,19 @@ __cfg_test_port_ip_sg()
 	# Check that we can replace available attributes.
 	bridge mdb add dev br0 port $swp1 $grp_key vid 10 proto 123
 	bridge mdb replace dev br0 port $swp1 $grp_key vid 10 proto 111
-	bridge -d mdb show dev br0 vid 10 | grep "$grp_key" | \
-		grep -q "111"
+	bridge -d mdb get dev br0 $grp_key vid 10 | grep -q "111"
 	check_err $? "Failed to replace protocol"
 
 	bridge mdb replace dev br0 port $swp1 $grp_key vid 10 permanent
-	bridge -d mdb show dev br0 vid 10 | grep "$grp_key" | \
-		grep -q "permanent"
+	bridge -d mdb get dev br0 $grp_key vid 10 | grep -q "permanent"
 	check_err $? "Entry not marked as \"permanent\" after replace"
-	bridge -d -s mdb show dev br0 vid 10 | grep "$grp_key" | \
-		grep -q " 0.00"
+	bridge -d -s mdb get dev br0 $grp_key vid 10 | grep -q " 0.00"
 	check_err $? "Entry has a pending group timer after replace"
 
 	bridge mdb replace dev br0 port $swp1 $grp_key vid 10 temp
-	bridge -d mdb show dev br0 vid 10 | grep "$grp_key" | \
-		grep -q "temp"
+	bridge -d mdb get dev br0 $grp_key vid 10 | grep -q "temp"
 	check_err $? "Entry not marked as \"temp\" after replace"
-	bridge -d -s mdb show dev br0 vid 10 | grep "$grp_key" | \
-		grep -q " 0.00"
+	bridge -d -s mdb get dev br0 $grp_key vid 10 | grep -q " 0.00"
 	check_fail $? "Entry has an unpending group timer after replace"
 	bridge mdb del dev br0 port $swp1 $grp_key vid 10
 
@@ -675,7 +638,7 @@ __cfg_test_port_ip_sg()
 	# (*, G) ports need to be added to it.
 	bridge mdb add dev br0 port $swp2 grp $grp vid 10
 	bridge mdb add dev br0 port $swp1 $grp_key vid 10
-	bridge mdb show dev br0 vid 10 | grep "$grp_key" | grep $swp2 | \
+	bridge mdb get dev br0 $grp_key vid 10 | grep $swp2 | \
 		grep -q "added_by_star_ex"
 	check_err $? "\"added_by_star_ex\" entry not created after adding (S, G) entry"
 	bridge mdb del dev br0 port $swp1 $grp_key vid 10
@@ -1132,7 +1095,7 @@ ctrl_igmpv3_is_in_test()
 	$MZ $h1.10 -c 1 -a own -b 01:00:5e:01:01:01 -A 192.0.2.1 -B 239.1.1.1 \
 		-t ip proto=2,p=$(igmpv3_is_in_get 239.1.1.1 192.0.2.2) -q
 
-	bridge -d mdb show dev br0 vid 10 | grep 239.1.1.1 | grep -q 192.0.2.2
+	bridge mdb get dev br0 grp 239.1.1.1 src 192.0.2.2 vid 10 &> /dev/null
 	check_fail $? "Permanent entry affected by IGMP packet"
 
 	# Replace the permanent entry with a temporary one and check that after
@@ -1145,12 +1108,10 @@ ctrl_igmpv3_is_in_test()
 	$MZ $h1.10 -a own -b 01:00:5e:01:01:01 -c 1 -A 192.0.2.1 -B 239.1.1.1 \
 		-t ip proto=2,p=$(igmpv3_is_in_get 239.1.1.1 192.0.2.2) -q
 
-	bridge -d mdb show dev br0 vid 10 | grep 239.1.1.1 | grep -v "src" | \
-		grep -q 192.0.2.2
+	bridge -d mdb get dev br0 grp 239.1.1.1 vid 10 | grep -q 192.0.2.2
 	check_err $? "Source not add to source list"
 
-	bridge -d mdb show dev br0 vid 10 | grep 239.1.1.1 | \
-		grep -q "src 192.0.2.2"
+	bridge mdb get dev br0 grp 239.1.1.1 src 192.0.2.2 vid 10 &> /dev/null
 	check_err $? "(S, G) entry not created for new source"
 
 	bridge mdb del dev br0 port $swp1 grp 239.1.1.1 vid 10
@@ -1172,8 +1133,7 @@ ctrl_mldv2_is_in_test()
 	$MZ -6 $h1.10 -a own -b 33:33:00:00:00:01 -c 1 -A fe80::1 -B ff0e::1 \
 		-t ip hop=1,next=0,p="$p" -q
 
-	bridge -d mdb show dev br0 vid 10 | grep ff0e::1 | \
-		grep -q 2001:db8:1::2
+	bridge mdb get dev br0 grp ff0e::1 src 2001:db8:1::2 vid 10 &> /dev/null
 	check_fail $? "Permanent entry affected by MLD packet"
 
 	# Replace the permanent entry with a temporary one and check that after
@@ -1186,12 +1146,10 @@ ctrl_mldv2_is_in_test()
 	$MZ -6 $h1.10 -a own -b 33:33:00:00:00:01 -c 1 -A fe80::1 -B ff0e::1 \
 		-t ip hop=1,next=0,p="$p" -q
 
-	bridge -d mdb show dev br0 vid 10 | grep ff0e::1 | grep -v "src" | \
-		grep -q 2001:db8:1::2
+	bridge -d mdb get dev br0 grp ff0e::1 vid 10 | grep -q 2001:db8:1::2
 	check_err $? "Source not add to source list"
 
-	bridge -d mdb show dev br0 vid 10 | grep ff0e::1 | \
-		grep -q "src 2001:db8:1::2"
+	bridge mdb get dev br0 grp ff0e::1 src 2001:db8:1::2 vid 10 &> /dev/null
 	check_err $? "(S, G) entry not created for new source"
 
 	bridge mdb del dev br0 port $swp1 grp ff0e::1 vid 10
@@ -1208,8 +1166,8 @@ ctrl_test()
 	ctrl_mldv2_is_in_test
 }
 
-if ! bridge mdb help 2>&1 | grep -q "replace"; then
-	echo "SKIP: iproute2 too old, missing bridge mdb replace support"
+if ! bridge mdb help 2>&1 | grep -q "get"; then
+	echo "SKIP: iproute2 too old, missing bridge mdb get support"
 	exit $ksft_skip
 fi
 
diff --git a/tools/testing/selftests/net/fq_band_pktlimit.sh b/tools/testing/selftests/net/fq_band_pktlimit.sh
new file mode 100755
index 000000000000..24b77bdf41ff
--- /dev/null
+++ b/tools/testing/selftests/net/fq_band_pktlimit.sh
@@ -0,0 +1,57 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Verify that FQ has a packet limit per band:
+#
+# 1. set the limit to 10 per band
+# 2. send 20 pkts on band A: verify that 10 are queued, 10 dropped
+# 3. send 20 pkts on band A: verify that  0 are queued, 20 dropped
+# 4. send 20 pkts on band B: verify that 10 are queued, 10 dropped
+#
+# Send packets with a 100ms delay to ensure that previously sent
+# packets are still queued when later ones are sent.
+# Use SO_TXTIME for this.
+
+die() {
+	echo "$1"
+	exit 1
+}
+
+# run inside private netns
+if [[ $# -eq 0 ]]; then
+	./in_netns.sh "$0" __subprocess
+	exit
+fi
+
+ip link add type dummy
+ip link set dev dummy0 up
+ip -6 addr add fdaa::1/128 dev dummy0
+ip -6 route add fdaa::/64 dev dummy0
+tc qdisc replace dev dummy0 root handle 1: fq quantum 1514 initial_quantum 1514 limit 10
+
+./cmsg_sender -6 -p u -d 100000 -n 20 fdaa::2 8000
+OUT1="$(tc -s qdisc show dev dummy0 | grep '^\ Sent')"
+
+./cmsg_sender -6 -p u -d 100000 -n 20 fdaa::2 8000
+OUT2="$(tc -s qdisc show dev dummy0 | grep '^\ Sent')"
+
+./cmsg_sender -6 -p u -d 100000 -n 20 -P 7 fdaa::2 8000
+OUT3="$(tc -s qdisc show dev dummy0 | grep '^\ Sent')"
+
+# Initial stats will report zero sent, as all packets are still
+# queued in FQ. Sleep for the delay period (100ms) and see that
+# twenty are now sent.
+sleep 0.1
+OUT4="$(tc -s qdisc show dev dummy0 | grep '^\ Sent')"
+
+# Log the output after the test
+echo "${OUT1}"
+echo "${OUT2}"
+echo "${OUT3}"
+echo "${OUT4}"
+
+# Test the output for expected values
+echo "${OUT1}" | grep -q '0\ pkt\ (dropped\ 10'  || die "unexpected drop count at 1"
+echo "${OUT2}" | grep -q '0\ pkt\ (dropped\ 30'  || die "unexpected drop count at 2"
+echo "${OUT3}" | grep -q '0\ pkt\ (dropped\ 40'  || die "unexpected drop count at 3"
+echo "${OUT4}" | grep -q '20\ pkt\ (dropped\ 40' || die "unexpected accept count at 4"
diff --git a/tools/testing/selftests/net/io_uring_zerocopy_tx.c b/tools/testing/selftests/net/io_uring_zerocopy_tx.c
index 154287740172..76e604e4810e 100644
--- a/tools/testing/selftests/net/io_uring_zerocopy_tx.c
+++ b/tools/testing/selftests/net/io_uring_zerocopy_tx.c
@@ -36,6 +36,8 @@
 #include <sys/un.h>
 #include <sys/wait.h>
 
+#include <io_uring/mini_liburing.h>
+
 #define NOTIF_TAG 0xfffffffULL
 #define NONZC_TAG 0
 #define ZC_TAG 1
@@ -60,272 +62,6 @@ static struct sockaddr_storage cfg_dst_addr;
 
 static char payload[IP_MAXPACKET] __attribute__((aligned(4096)));
 
-struct io_sq_ring {
-	unsigned *head;
-	unsigned *tail;
-	unsigned *ring_mask;
-	unsigned *ring_entries;
-	unsigned *flags;
-	unsigned *array;
-};
-
-struct io_cq_ring {
-	unsigned *head;
-	unsigned *tail;
-	unsigned *ring_mask;
-	unsigned *ring_entries;
-	struct io_uring_cqe *cqes;
-};
-
-struct io_uring_sq {
-	unsigned *khead;
-	unsigned *ktail;
-	unsigned *kring_mask;
-	unsigned *kring_entries;
-	unsigned *kflags;
-	unsigned *kdropped;
-	unsigned *array;
-	struct io_uring_sqe *sqes;
-
-	unsigned sqe_head;
-	unsigned sqe_tail;
-
-	size_t ring_sz;
-};
-
-struct io_uring_cq {
-	unsigned *khead;
-	unsigned *ktail;
-	unsigned *kring_mask;
-	unsigned *kring_entries;
-	unsigned *koverflow;
-	struct io_uring_cqe *cqes;
-
-	size_t ring_sz;
-};
-
-struct io_uring {
-	struct io_uring_sq sq;
-	struct io_uring_cq cq;
-	int ring_fd;
-};
-
-#ifdef __alpha__
-# ifndef __NR_io_uring_setup
-#  define __NR_io_uring_setup		535
-# endif
-# ifndef __NR_io_uring_enter
-#  define __NR_io_uring_enter		536
-# endif
-# ifndef __NR_io_uring_register
-#  define __NR_io_uring_register	537
-# endif
-#else /* !__alpha__ */
-# ifndef __NR_io_uring_setup
-#  define __NR_io_uring_setup		425
-# endif
-# ifndef __NR_io_uring_enter
-#  define __NR_io_uring_enter		426
-# endif
-# ifndef __NR_io_uring_register
-#  define __NR_io_uring_register	427
-# endif
-#endif
-
-#if defined(__x86_64) || defined(__i386__)
-#define read_barrier()	__asm__ __volatile__("":::"memory")
-#define write_barrier()	__asm__ __volatile__("":::"memory")
-#else
-
-#define read_barrier()	__sync_synchronize()
-#define write_barrier()	__sync_synchronize()
-#endif
-
-static int io_uring_setup(unsigned int entries, struct io_uring_params *p)
-{
-	return syscall(__NR_io_uring_setup, entries, p);
-}
-
-static int io_uring_enter(int fd, unsigned int to_submit,
-			  unsigned int min_complete,
-			  unsigned int flags, sigset_t *sig)
-{
-	return syscall(__NR_io_uring_enter, fd, to_submit, min_complete,
-			flags, sig, _NSIG / 8);
-}
-
-static int io_uring_register_buffers(struct io_uring *ring,
-				     const struct iovec *iovecs,
-				     unsigned nr_iovecs)
-{
-	int ret;
-
-	ret = syscall(__NR_io_uring_register, ring->ring_fd,
-		      IORING_REGISTER_BUFFERS, iovecs, nr_iovecs);
-	return (ret < 0) ? -errno : ret;
-}
-
-static int io_uring_mmap(int fd, struct io_uring_params *p,
-			 struct io_uring_sq *sq, struct io_uring_cq *cq)
-{
-	size_t size;
-	void *ptr;
-	int ret;
-
-	sq->ring_sz = p->sq_off.array + p->sq_entries * sizeof(unsigned);
-	ptr = mmap(0, sq->ring_sz, PROT_READ | PROT_WRITE,
-		   MAP_SHARED | MAP_POPULATE, fd, IORING_OFF_SQ_RING);
-	if (ptr == MAP_FAILED)
-		return -errno;
-	sq->khead = ptr + p->sq_off.head;
-	sq->ktail = ptr + p->sq_off.tail;
-	sq->kring_mask = ptr + p->sq_off.ring_mask;
-	sq->kring_entries = ptr + p->sq_off.ring_entries;
-	sq->kflags = ptr + p->sq_off.flags;
-	sq->kdropped = ptr + p->sq_off.dropped;
-	sq->array = ptr + p->sq_off.array;
-
-	size = p->sq_entries * sizeof(struct io_uring_sqe);
-	sq->sqes = mmap(0, size, PROT_READ | PROT_WRITE,
-			MAP_SHARED | MAP_POPULATE, fd, IORING_OFF_SQES);
-	if (sq->sqes == MAP_FAILED) {
-		ret = -errno;
-err:
-		munmap(sq->khead, sq->ring_sz);
-		return ret;
-	}
-
-	cq->ring_sz = p->cq_off.cqes + p->cq_entries * sizeof(struct io_uring_cqe);
-	ptr = mmap(0, cq->ring_sz, PROT_READ | PROT_WRITE,
-			MAP_SHARED | MAP_POPULATE, fd, IORING_OFF_CQ_RING);
-	if (ptr == MAP_FAILED) {
-		ret = -errno;
-		munmap(sq->sqes, p->sq_entries * sizeof(struct io_uring_sqe));
-		goto err;
-	}
-	cq->khead = ptr + p->cq_off.head;
-	cq->ktail = ptr + p->cq_off.tail;
-	cq->kring_mask = ptr + p->cq_off.ring_mask;
-	cq->kring_entries = ptr + p->cq_off.ring_entries;
-	cq->koverflow = ptr + p->cq_off.overflow;
-	cq->cqes = ptr + p->cq_off.cqes;
-	return 0;
-}
-
-static int io_uring_queue_init(unsigned entries, struct io_uring *ring,
-			       unsigned flags)
-{
-	struct io_uring_params p;
-	int fd, ret;
-
-	memset(ring, 0, sizeof(*ring));
-	memset(&p, 0, sizeof(p));
-	p.flags = flags;
-
-	fd = io_uring_setup(entries, &p);
-	if (fd < 0)
-		return fd;
-	ret = io_uring_mmap(fd, &p, &ring->sq, &ring->cq);
-	if (!ret)
-		ring->ring_fd = fd;
-	else
-		close(fd);
-	return ret;
-}
-
-static int io_uring_submit(struct io_uring *ring)
-{
-	struct io_uring_sq *sq = &ring->sq;
-	const unsigned mask = *sq->kring_mask;
-	unsigned ktail, submitted, to_submit;
-	int ret;
-
-	read_barrier();
-	if (*sq->khead != *sq->ktail) {
-		submitted = *sq->kring_entries;
-		goto submit;
-	}
-	if (sq->sqe_head == sq->sqe_tail)
-		return 0;
-
-	ktail = *sq->ktail;
-	to_submit = sq->sqe_tail - sq->sqe_head;
-	for (submitted = 0; submitted < to_submit; submitted++) {
-		read_barrier();
-		sq->array[ktail++ & mask] = sq->sqe_head++ & mask;
-	}
-	if (!submitted)
-		return 0;
-
-	if (*sq->ktail != ktail) {
-		write_barrier();
-		*sq->ktail = ktail;
-		write_barrier();
-	}
-submit:
-	ret = io_uring_enter(ring->ring_fd, submitted, 0,
-				IORING_ENTER_GETEVENTS, NULL);
-	return ret < 0 ? -errno : ret;
-}
-
-static inline void io_uring_prep_send(struct io_uring_sqe *sqe, int sockfd,
-				      const void *buf, size_t len, int flags)
-{
-	memset(sqe, 0, sizeof(*sqe));
-	sqe->opcode = (__u8) IORING_OP_SEND;
-	sqe->fd = sockfd;
-	sqe->addr = (unsigned long) buf;
-	sqe->len = len;
-	sqe->msg_flags = (__u32) flags;
-}
-
-static inline void io_uring_prep_sendzc(struct io_uring_sqe *sqe, int sockfd,
-				        const void *buf, size_t len, int flags,
-				        unsigned zc_flags)
-{
-	io_uring_prep_send(sqe, sockfd, buf, len, flags);
-	sqe->opcode = (__u8) IORING_OP_SEND_ZC;
-	sqe->ioprio = zc_flags;
-}
-
-static struct io_uring_sqe *io_uring_get_sqe(struct io_uring *ring)
-{
-	struct io_uring_sq *sq = &ring->sq;
-
-	if (sq->sqe_tail + 1 - sq->sqe_head > *sq->kring_entries)
-		return NULL;
-	return &sq->sqes[sq->sqe_tail++ & *sq->kring_mask];
-}
-
-static int io_uring_wait_cqe(struct io_uring *ring, struct io_uring_cqe **cqe_ptr)
-{
-	struct io_uring_cq *cq = &ring->cq;
-	const unsigned mask = *cq->kring_mask;
-	unsigned head = *cq->khead;
-	int ret;
-
-	*cqe_ptr = NULL;
-	do {
-		read_barrier();
-		if (head != *cq->ktail) {
-			*cqe_ptr = &cq->cqes[head & mask];
-			break;
-		}
-		ret = io_uring_enter(ring->ring_fd, 0, 1,
-					IORING_ENTER_GETEVENTS, NULL);
-		if (ret < 0)
-			return -errno;
-	} while (1);
-
-	return 0;
-}
-
-static inline void io_uring_cqe_seen(struct io_uring *ring)
-{
-	*(&ring->cq)->khead += 1;
-	write_barrier();
-}
-
 static unsigned long gettimeofday_ms(void)
 {
 	struct timeval tv;
diff --git a/tools/testing/selftests/net/mptcp/diag.sh b/tools/testing/selftests/net/mptcp/diag.sh
index 85a8ee9395b3..95b498efacd1 100755
--- a/tools/testing/selftests/net/mptcp/diag.sh
+++ b/tools/testing/selftests/net/mptcp/diag.sh
@@ -182,23 +182,6 @@ chk_msk_inuse()
 	__chk_nr get_msk_inuse $expected "$msg" 0
 }
 
-# $1: ns, $2: port
-wait_local_port_listen()
-{
-	local listener_ns="${1}"
-	local port="${2}"
-
-	local port_hex i
-
-	port_hex="$(printf "%04X" "${port}")"
-	for i in $(seq 10); do
-		ip netns exec "${listener_ns}" cat /proc/net/tcp | \
-			awk "BEGIN {rc=1} {if (\$2 ~ /:${port_hex}\$/ && \$4 ~ /0A/) {rc=0; exit}} END {exit rc}" &&
-			break
-		sleep 0.1
-	done
-}
-
 wait_connected()
 {
 	local listener_ns="${1}"
@@ -222,7 +205,7 @@ echo "a" | \
 		ip netns exec $ns \
 			./mptcp_connect -p 10000 -l -t ${timeout_poll} -w 20 \
 				0.0.0.0 >/dev/null &
-wait_local_port_listen $ns 10000
+mptcp_lib_wait_local_port_listen $ns 10000
 chk_msk_nr 0 "no msk on netns creation"
 chk_msk_listen 10000
 
@@ -245,7 +228,7 @@ echo "a" | \
 		ip netns exec $ns \
 			./mptcp_connect -p 10001 -l -s TCP -t ${timeout_poll} -w 20 \
 				0.0.0.0 >/dev/null &
-wait_local_port_listen $ns 10001
+mptcp_lib_wait_local_port_listen $ns 10001
 echo "b" | \
 	timeout ${timeout_test} \
 		ip netns exec $ns \
@@ -266,7 +249,7 @@ for I in `seq 1 $NR_CLIENTS`; do
 				./mptcp_connect -p $((I+10001)) -l -w 20 \
 					-t ${timeout_poll} 0.0.0.0 >/dev/null &
 done
-wait_local_port_listen $ns $((NR_CLIENTS + 10001))
+mptcp_lib_wait_local_port_listen $ns $((NR_CLIENTS + 10001))
 
 for I in `seq 1 $NR_CLIENTS`; do
 	echo "b" | \
diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
index 61a2a1988ce6..7898d62fce0b 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
@@ -254,31 +254,6 @@ else
 	set_ethtool_flags "$ns4" ns4eth3 "$ethtool_args"
 fi
 
-print_file_err()
-{
-	ls -l "$1" 1>&2
-	echo "Trailing bytes are: "
-	tail -c 27 "$1"
-}
-
-check_transfer()
-{
-	local in=$1
-	local out=$2
-	local what=$3
-
-	cmp "$in" "$out" > /dev/null 2>&1
-	if [ $? -ne 0 ] ;then
-		echo "[ FAIL ] $what does not match (in, out):"
-		print_file_err "$in"
-		print_file_err "$out"
-
-		return 1
-	fi
-
-	return 0
-}
-
 check_mptcp_disabled()
 {
 	local disabled_ns="ns_disabled-$rndh"
@@ -310,12 +285,6 @@ check_mptcp_disabled()
 	return 0
 }
 
-# $1: IP address
-is_v6()
-{
-	[ -z "${1##*:*}" ]
-}
-
 do_ping()
 {
 	local listener_ns="$1"
@@ -324,7 +293,7 @@ do_ping()
 	local ping_args="-q -c 1"
 	local rc=0
 
-	if is_v6 "${connect_addr}"; then
+	if mptcp_lib_is_v6 "${connect_addr}"; then
 		$ipv6 || return 0
 		ping_args="${ping_args} -6"
 	fi
@@ -341,38 +310,6 @@ do_ping()
 	return 0
 }
 
-# $1: ns, $2: MIB counter
-get_mib_counter()
-{
-	local listener_ns="${1}"
-	local mib="${2}"
-
-	# strip the header
-	ip netns exec "${listener_ns}" \
-		nstat -z -a "${mib}" | \
-			tail -n+2 | \
-			while read a count c rest; do
-				echo $count
-			done
-}
-
-# $1: ns, $2: port
-wait_local_port_listen()
-{
-	local listener_ns="${1}"
-	local port="${2}"
-
-	local port_hex i
-
-	port_hex="$(printf "%04X" "${port}")"
-	for i in $(seq 10); do
-		ip netns exec "${listener_ns}" cat /proc/net/tcp* | \
-			awk "BEGIN {rc=1} {if (\$2 ~ /:${port_hex}\$/ && \$4 ~ /0A/) {rc=0; exit}} END {exit rc}" &&
-			break
-		sleep 0.1
-	done
-}
-
 do_transfer()
 {
 	local listener_ns="$1"
@@ -441,12 +378,12 @@ do_transfer()
 			nstat -n
 	fi
 
-	local stat_synrx_last_l=$(get_mib_counter "${listener_ns}" "MPTcpExtMPCapableSYNRX")
-	local stat_ackrx_last_l=$(get_mib_counter "${listener_ns}" "MPTcpExtMPCapableACKRX")
-	local stat_cookietx_last=$(get_mib_counter "${listener_ns}" "TcpExtSyncookiesSent")
-	local stat_cookierx_last=$(get_mib_counter "${listener_ns}" "TcpExtSyncookiesRecv")
-	local stat_csum_err_s=$(get_mib_counter "${listener_ns}" "MPTcpExtDataCsumErr")
-	local stat_csum_err_c=$(get_mib_counter "${connector_ns}" "MPTcpExtDataCsumErr")
+	local stat_synrx_last_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableSYNRX")
+	local stat_ackrx_last_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableACKRX")
+	local stat_cookietx_last=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesSent")
+	local stat_cookierx_last=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesRecv")
+	local stat_csum_err_s=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtDataCsumErr")
+	local stat_csum_err_c=$(mptcp_lib_get_counter "${connector_ns}" "MPTcpExtDataCsumErr")
 
 	timeout ${timeout_test} \
 		ip netns exec ${listener_ns} \
@@ -454,7 +391,7 @@ do_transfer()
 				$extra_args $local_addr < "$sin" > "$sout" &
 	local spid=$!
 
-	wait_local_port_listen "${listener_ns}" "${port}"
+	mptcp_lib_wait_local_port_listen "${listener_ns}" "${port}"
 
 	local start
 	start=$(date +%s%3N)
@@ -504,16 +441,16 @@ do_transfer()
 		return 1
 	fi
 
-	check_transfer $sin $cout "file received by client"
+	mptcp_lib_check_transfer $sin $cout "file received by client"
 	retc=$?
-	check_transfer $cin $sout "file received by server"
+	mptcp_lib_check_transfer $cin $sout "file received by server"
 	rets=$?
 
-	local stat_synrx_now_l=$(get_mib_counter "${listener_ns}" "MPTcpExtMPCapableSYNRX")
-	local stat_ackrx_now_l=$(get_mib_counter "${listener_ns}" "MPTcpExtMPCapableACKRX")
-	local stat_cookietx_now=$(get_mib_counter "${listener_ns}" "TcpExtSyncookiesSent")
-	local stat_cookierx_now=$(get_mib_counter "${listener_ns}" "TcpExtSyncookiesRecv")
-	local stat_ooo_now=$(get_mib_counter "${listener_ns}" "TcpExtTCPOFOQueue")
+	local stat_synrx_now_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableSYNRX")
+	local stat_ackrx_now_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableACKRX")
+	local stat_cookietx_now=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesSent")
+	local stat_cookierx_now=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesRecv")
+	local stat_ooo_now=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtTCPOFOQueue")
 
 	expect_synrx=$((stat_synrx_last_l))
 	expect_ackrx=$((stat_ackrx_last_l))
@@ -542,8 +479,8 @@ do_transfer()
 	fi
 
 	if $checksum; then
-		local csum_err_s=$(get_mib_counter "${listener_ns}" "MPTcpExtDataCsumErr")
-		local csum_err_c=$(get_mib_counter "${connector_ns}" "MPTcpExtDataCsumErr")
+		local csum_err_s=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtDataCsumErr")
+		local csum_err_c=$(mptcp_lib_get_counter "${connector_ns}" "MPTcpExtDataCsumErr")
 
 		local csum_err_s_nr=$((csum_err_s - stat_csum_err_s))
 		if [ $csum_err_s_nr -gt 0 ]; then
@@ -613,9 +550,8 @@ make_file()
 	ksize=$((SIZE / 1024))
 	rem=$((SIZE - (ksize * 1024)))
 
-	dd if=/dev/urandom of="$name" bs=1024 count=$ksize 2> /dev/null
-	dd if=/dev/urandom conv=notrunc of="$name" bs=1 count=$rem 2> /dev/null
-	echo -e "\nMPTCP_TEST_FILE_END_MARKER" >> "$name"
+	mptcp_lib_make_file $name 1024 $ksize
+	dd if=/dev/urandom conv=notrunc of="$name" oflag=append bs=1 count=$rem 2> /dev/null
 
 	echo "Created $name (size $(du -b "$name")) containing data sent by $who"
 }
@@ -635,12 +571,12 @@ run_tests_lo()
 	fi
 
 	# skip if we don't want v6
-	if ! $ipv6 && is_v6 "${connect_addr}"; then
+	if ! $ipv6 && mptcp_lib_is_v6 "${connect_addr}"; then
 		return 0
 	fi
 
 	local local_addr
-	if is_v6 "${connect_addr}"; then
+	if mptcp_lib_is_v6 "${connect_addr}"; then
 		local_addr="::"
 	else
 		local_addr="0.0.0.0"
@@ -708,7 +644,7 @@ run_test_transparent()
 	TEST_GROUP="${msg}"
 
 	# skip if we don't want v6
-	if ! $ipv6 && is_v6 "${connect_addr}"; then
+	if ! $ipv6 && mptcp_lib_is_v6 "${connect_addr}"; then
 		return 0
 	fi
 
@@ -716,7 +652,7 @@ run_test_transparent()
 	# the required infrastructure in MPTCP sockopt code. To support TOS, the
 	# following function has been exported (T). Not great but better than
 	# checking for a specific kernel version.
-	if ! mptcp_lib_kallsyms_has "T ip_sock_set_tos$"; then
+	if ! mptcp_lib_kallsyms_has "T __ip_sock_set_tos$"; then
 		echo "INFO: ${msg} not supported by the kernel: SKIP"
 		mptcp_lib_result_skip "${TEST_GROUP}"
 		return
@@ -741,7 +677,7 @@ EOF
 	fi
 
 	local local_addr
-	if is_v6 "${connect_addr}"; then
+	if mptcp_lib_is_v6 "${connect_addr}"; then
 		local_addr="::"
 		r6flag="-6"
 	else
diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index ee1f89a872b3..8362ea454af3 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -511,13 +511,6 @@ get_failed_tests_ids()
 	done | sort -n
 }
 
-print_file_err()
-{
-	ls -l "$1" 1>&2
-	echo -n "Trailing bytes are: "
-	tail -c 27 "$1"
-}
-
 check_transfer()
 {
 	local in=$1
@@ -548,8 +541,8 @@ check_transfer()
 		local sum=$((0${a} + 0${b}))
 		if [ $check_invert -eq 0 ] || [ $sum -ne $((0xff)) ]; then
 			fail_test "$what does not match (in, out):"
-			print_file_err "$in"
-			print_file_err "$out"
+			mptcp_lib_print_file_err "$in"
+			mptcp_lib_print_file_err "$out"
 
 			return 1
 		else
@@ -587,49 +580,9 @@ link_failure()
 	done
 }
 
-# $1: IP address
-is_v6()
-{
-	[ -z "${1##*:*}" ]
-}
-
-# $1: ns, $2: port
-wait_local_port_listen()
-{
-	local listener_ns="${1}"
-	local port="${2}"
-
-	local port_hex
-	port_hex="$(printf "%04X" "${port}")"
-
-	local i
-	for i in $(seq 10); do
-		ip netns exec "${listener_ns}" cat /proc/net/tcp* | \
-			awk "BEGIN {rc=1} {if (\$2 ~ /:${port_hex}\$/ && \$4 ~ /0A/) {rc=0; exit}} END {exit rc}" &&
-			break
-		sleep 0.1
-	done
-}
-
-# $1: ns ; $2: counter
-get_counter()
-{
-	local ns="${1}"
-	local counter="${2}"
-	local count
-
-	count=$(ip netns exec ${ns} nstat -asz "${counter}" | awk 'NR==1 {next} {print $2}')
-	if [ -z "${count}" ]; then
-		mptcp_lib_fail_if_expected_feature "${counter} counter"
-		return 1
-	fi
-
-	echo "${count}"
-}
-
 rm_addr_count()
 {
-	get_counter "${1}" "MPTcpExtRmAddr"
+	mptcp_lib_get_counter "${1}" "MPTcpExtRmAddr"
 }
 
 # $1: ns, $2: old rm_addr counter in $ns
@@ -649,7 +602,7 @@ wait_rm_addr()
 
 rm_sf_count()
 {
-	get_counter "${1}" "MPTcpExtRmSubflow"
+	mptcp_lib_get_counter "${1}" "MPTcpExtRmSubflow"
 }
 
 # $1: ns, $2: old rm_sf counter in $ns
@@ -672,26 +625,20 @@ wait_mpj()
 	local ns="${1}"
 	local cnt old_cnt
 
-	old_cnt=$(get_counter ${ns} "MPTcpExtMPJoinAckRx")
+	old_cnt=$(mptcp_lib_get_counter ${ns} "MPTcpExtMPJoinAckRx")
 
 	local i
 	for i in $(seq 10); do
-		cnt=$(get_counter ${ns} "MPTcpExtMPJoinAckRx")
+		cnt=$(mptcp_lib_get_counter ${ns} "MPTcpExtMPJoinAckRx")
 		[ "$cnt" = "${old_cnt}" ] || break
 		sleep 0.1
 	done
 }
 
-kill_wait()
-{
-	kill $1 > /dev/null 2>&1
-	wait $1 2>/dev/null
-}
-
 kill_events_pids()
 {
-	kill_wait $evts_ns1_pid
-	kill_wait $evts_ns2_pid
+	mptcp_lib_kill_wait $evts_ns1_pid
+	mptcp_lib_kill_wait $evts_ns2_pid
 }
 
 kill_tests_wait()
@@ -901,7 +848,7 @@ pm_nl_set_endpoint()
 		local id=10
 		while [ $add_nr_ns1 -gt 0 ]; do
 			local addr
-			if is_v6 "${connect_addr}"; then
+			if mptcp_lib_is_v6 "${connect_addr}"; then
 				addr="dead:beef:$counter::1"
 			else
 				addr="10.0.$counter.1"
@@ -953,7 +900,7 @@ pm_nl_set_endpoint()
 		local id=20
 		while [ $add_nr_ns2 -gt 0 ]; do
 			local addr
-			if is_v6 "${connect_addr}"; then
+			if mptcp_lib_is_v6 "${connect_addr}"; then
 				addr="dead:beef:$counter::2"
 			else
 				addr="10.0.$counter.2"
@@ -995,7 +942,7 @@ pm_nl_set_endpoint()
 			pm_nl_flush_endpoint ${connector_ns}
 		elif [ $rm_nr_ns2 -eq 9 ]; then
 			local addr
-			if is_v6 "${connect_addr}"; then
+			if mptcp_lib_is_v6 "${connect_addr}"; then
 				addr="dead:beef:1::2"
 			else
 				addr="10.0.1.2"
@@ -1117,7 +1064,7 @@ do_transfer()
 	fi
 	local spid=$!
 
-	wait_local_port_listen "${listener_ns}" "${port}"
+	mptcp_lib_wait_local_port_listen "${listener_ns}" "${port}"
 
 	extra_cl_args="$extra_args $extra_cl_args"
 	if [ "$test_linkfail" -eq 0 ];then
@@ -1199,8 +1146,7 @@ make_file()
 	local who=$2
 	local size=$3
 
-	dd if=/dev/urandom of="$name" bs=1024 count=$size 2> /dev/null
-	echo -e "\nMPTCP_TEST_FILE_END_MARKER" >> "$name"
+	mptcp_lib_make_file $name 1024 $size
 
 	print_info "Test file (size $size KB) for $who"
 }
@@ -1284,7 +1230,7 @@ chk_csum_nr()
 	fi
 
 	print_check "sum"
-	count=$(get_counter ${ns1} "MPTcpExtDataCsumErr")
+	count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtDataCsumErr")
 	if [ "$count" != "$csum_ns1" ]; then
 		extra_msg="$extra_msg ns1=$count"
 	fi
@@ -1297,7 +1243,7 @@ chk_csum_nr()
 		print_ok
 	fi
 	print_check "csum"
-	count=$(get_counter ${ns2} "MPTcpExtDataCsumErr")
+	count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtDataCsumErr")
 	if [ "$count" != "$csum_ns2" ]; then
 		extra_msg="$extra_msg ns2=$count"
 	fi
@@ -1341,7 +1287,7 @@ chk_fail_nr()
 	fi
 
 	print_check "ftx"
-	count=$(get_counter ${ns_tx} "MPTcpExtMPFailTx")
+	count=$(mptcp_lib_get_counter ${ns_tx} "MPTcpExtMPFailTx")
 	if [ "$count" != "$fail_tx" ]; then
 		extra_msg="$extra_msg,tx=$count"
 	fi
@@ -1355,7 +1301,7 @@ chk_fail_nr()
 	fi
 
 	print_check "failrx"
-	count=$(get_counter ${ns_rx} "MPTcpExtMPFailRx")
+	count=$(mptcp_lib_get_counter ${ns_rx} "MPTcpExtMPFailRx")
 	if [ "$count" != "$fail_rx" ]; then
 		extra_msg="$extra_msg,rx=$count"
 	fi
@@ -1388,7 +1334,7 @@ chk_fclose_nr()
 	fi
 
 	print_check "ctx"
-	count=$(get_counter ${ns_tx} "MPTcpExtMPFastcloseTx")
+	count=$(mptcp_lib_get_counter ${ns_tx} "MPTcpExtMPFastcloseTx")
 	if [ -z "$count" ]; then
 		print_skip
 	elif [ "$count" != "$fclose_tx" ]; then
@@ -1399,7 +1345,7 @@ chk_fclose_nr()
 	fi
 
 	print_check "fclzrx"
-	count=$(get_counter ${ns_rx} "MPTcpExtMPFastcloseRx")
+	count=$(mptcp_lib_get_counter ${ns_rx} "MPTcpExtMPFastcloseRx")
 	if [ -z "$count" ]; then
 		print_skip
 	elif [ "$count" != "$fclose_rx" ]; then
@@ -1429,20 +1375,24 @@ chk_rst_nr()
 	fi
 
 	print_check "rtx"
-	count=$(get_counter ${ns_tx} "MPTcpExtMPRstTx")
+	count=$(mptcp_lib_get_counter ${ns_tx} "MPTcpExtMPRstTx")
 	if [ -z "$count" ]; then
 		print_skip
-	elif [ $count -lt $rst_tx ]; then
+	# accept more rst than expected except if we don't expect any
+	elif { [ $rst_tx -ne 0 ] && [ $count -lt $rst_tx ]; } ||
+	     { [ $rst_tx -eq 0 ] && [ $count -ne 0 ]; }; then
 		fail_test "got $count MP_RST[s] TX expected $rst_tx"
 	else
 		print_ok
 	fi
 
 	print_check "rstrx"
-	count=$(get_counter ${ns_rx} "MPTcpExtMPRstRx")
+	count=$(mptcp_lib_get_counter ${ns_rx} "MPTcpExtMPRstRx")
 	if [ -z "$count" ]; then
 		print_skip
-	elif [ "$count" -lt "$rst_rx" ]; then
+	# accept more rst than expected except if we don't expect any
+	elif { [ $rst_rx -ne 0 ] && [ $count -lt $rst_rx ]; } ||
+	     { [ $rst_rx -eq 0 ] && [ $count -ne 0 ]; }; then
 		fail_test "got $count MP_RST[s] RX expected $rst_rx"
 	else
 		print_ok
@@ -1458,7 +1408,7 @@ chk_infi_nr()
 	local count
 
 	print_check "itx"
-	count=$(get_counter ${ns2} "MPTcpExtInfiniteMapTx")
+	count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtInfiniteMapTx")
 	if [ -z "$count" ]; then
 		print_skip
 	elif [ "$count" != "$infi_tx" ]; then
@@ -1468,7 +1418,7 @@ chk_infi_nr()
 	fi
 
 	print_check "infirx"
-	count=$(get_counter ${ns1} "MPTcpExtInfiniteMapRx")
+	count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtInfiniteMapRx")
 	if [ -z "$count" ]; then
 		print_skip
 	elif [ "$count" != "$infi_rx" ]; then
@@ -1497,7 +1447,7 @@ chk_join_nr()
 	fi
 
 	print_check "syn"
-	count=$(get_counter ${ns1} "MPTcpExtMPJoinSynRx")
+	count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinSynRx")
 	if [ -z "$count" ]; then
 		print_skip
 	elif [ "$count" != "$syn_nr" ]; then
@@ -1508,7 +1458,7 @@ chk_join_nr()
 
 	print_check "synack"
 	with_cookie=$(ip netns exec $ns2 sysctl -n net.ipv4.tcp_syncookies)
-	count=$(get_counter ${ns2} "MPTcpExtMPJoinSynAckRx")
+	count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtMPJoinSynAckRx")
 	if [ -z "$count" ]; then
 		print_skip
 	elif [ "$count" != "$syn_ack_nr" ]; then
@@ -1525,7 +1475,7 @@ chk_join_nr()
 	fi
 
 	print_check "ack"
-	count=$(get_counter ${ns1} "MPTcpExtMPJoinAckRx")
+	count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinAckRx")
 	if [ -z "$count" ]; then
 		print_skip
 	elif [ "$count" != "$ack_nr" ]; then
@@ -1558,8 +1508,8 @@ chk_stale_nr()
 
 	print_check "stale"
 
-	stale_nr=$(get_counter ${ns} "MPTcpExtSubflowStale")
-	recover_nr=$(get_counter ${ns} "MPTcpExtSubflowRecover")
+	stale_nr=$(mptcp_lib_get_counter ${ns} "MPTcpExtSubflowStale")
+	recover_nr=$(mptcp_lib_get_counter ${ns} "MPTcpExtSubflowRecover")
 	if [ -z "$stale_nr" ] || [ -z "$recover_nr" ]; then
 		print_skip
 	elif [ $stale_nr -lt $stale_min ] ||
@@ -1596,7 +1546,7 @@ chk_add_nr()
 	timeout=$(ip netns exec $ns1 sysctl -n net.mptcp.add_addr_timeout)
 
 	print_check "add"
-	count=$(get_counter ${ns2} "MPTcpExtAddAddr")
+	count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtAddAddr")
 	if [ -z "$count" ]; then
 		print_skip
 	# if the test configured a short timeout tolerate greater then expected
@@ -1608,7 +1558,7 @@ chk_add_nr()
 	fi
 
 	print_check "echo"
-	count=$(get_counter ${ns1} "MPTcpExtEchoAdd")
+	count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtEchoAdd")
 	if [ -z "$count" ]; then
 		print_skip
 	elif [ "$count" != "$echo_nr" ]; then
@@ -1619,7 +1569,7 @@ chk_add_nr()
 
 	if [ $port_nr -gt 0 ]; then
 		print_check "pt"
-		count=$(get_counter ${ns2} "MPTcpExtPortAdd")
+		count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtPortAdd")
 		if [ -z "$count" ]; then
 			print_skip
 		elif [ "$count" != "$port_nr" ]; then
@@ -1629,7 +1579,7 @@ chk_add_nr()
 		fi
 
 		print_check "syn"
-		count=$(get_counter ${ns1} "MPTcpExtMPJoinPortSynRx")
+		count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinPortSynRx")
 		if [ -z "$count" ]; then
 			print_skip
 		elif [ "$count" != "$syn_nr" ]; then
@@ -1640,7 +1590,7 @@ chk_add_nr()
 		fi
 
 		print_check "synack"
-		count=$(get_counter ${ns2} "MPTcpExtMPJoinPortSynAckRx")
+		count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtMPJoinPortSynAckRx")
 		if [ -z "$count" ]; then
 			print_skip
 		elif [ "$count" != "$syn_ack_nr" ]; then
@@ -1651,7 +1601,7 @@ chk_add_nr()
 		fi
 
 		print_check "ack"
-		count=$(get_counter ${ns1} "MPTcpExtMPJoinPortAckRx")
+		count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinPortAckRx")
 		if [ -z "$count" ]; then
 			print_skip
 		elif [ "$count" != "$ack_nr" ]; then
@@ -1662,7 +1612,7 @@ chk_add_nr()
 		fi
 
 		print_check "syn"
-		count=$(get_counter ${ns1} "MPTcpExtMismatchPortSynRx")
+		count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMismatchPortSynRx")
 		if [ -z "$count" ]; then
 			print_skip
 		elif [ "$count" != "$mis_syn_nr" ]; then
@@ -1673,7 +1623,7 @@ chk_add_nr()
 		fi
 
 		print_check "ack"
-		count=$(get_counter ${ns1} "MPTcpExtMismatchPortAckRx")
+		count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMismatchPortAckRx")
 		if [ -z "$count" ]; then
 			print_skip
 		elif [ "$count" != "$mis_ack_nr" ]; then
@@ -1695,7 +1645,7 @@ chk_add_tx_nr()
 	timeout=$(ip netns exec $ns1 sysctl -n net.mptcp.add_addr_timeout)
 
 	print_check "add TX"
-	count=$(get_counter ${ns1} "MPTcpExtAddAddrTx")
+	count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtAddAddrTx")
 	if [ -z "$count" ]; then
 		print_skip
 	# if the test configured a short timeout tolerate greater then expected
@@ -1707,7 +1657,7 @@ chk_add_tx_nr()
 	fi
 
 	print_check "echo TX"
-	count=$(get_counter ${ns2} "MPTcpExtEchoAddTx")
+	count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtEchoAddTx")
 	if [ -z "$count" ]; then
 		print_skip
 	elif [ "$count" != "$echo_tx_nr" ]; then
@@ -1745,7 +1695,7 @@ chk_rm_nr()
 	fi
 
 	print_check "rm"
-	count=$(get_counter ${addr_ns} "MPTcpExtRmAddr")
+	count=$(mptcp_lib_get_counter ${addr_ns} "MPTcpExtRmAddr")
 	if [ -z "$count" ]; then
 		print_skip
 	elif [ "$count" != "$rm_addr_nr" ]; then
@@ -1755,18 +1705,21 @@ chk_rm_nr()
 	fi
 
 	print_check "rmsf"
-	count=$(get_counter ${subflow_ns} "MPTcpExtRmSubflow")
+	count=$(mptcp_lib_get_counter ${subflow_ns} "MPTcpExtRmSubflow")
 	if [ -z "$count" ]; then
 		print_skip
 	elif [ -n "$simult" ]; then
 		local cnt suffix
 
-		cnt=$(get_counter ${addr_ns} "MPTcpExtRmSubflow")
+		cnt=$(mptcp_lib_get_counter ${addr_ns} "MPTcpExtRmSubflow")
 
 		# in case of simult flush, the subflow removal count on each side is
 		# unreliable
 		count=$((count + cnt))
-		[ "$count" != "$rm_subflow_nr" ] && suffix="$count in [$rm_subflow_nr:$((rm_subflow_nr*2))]"
+		if [ "$count" != "$rm_subflow_nr" ]; then
+			suffix="$count in [$rm_subflow_nr:$((rm_subflow_nr*2))]"
+			extra_msg="$extra_msg simult"
+		fi
 		if [ $count -ge "$rm_subflow_nr" ] && \
 		   [ "$count" -le "$((rm_subflow_nr *2 ))" ]; then
 			print_ok "$suffix"
@@ -1787,7 +1740,7 @@ chk_rm_tx_nr()
 	local rm_addr_tx_nr=$1
 
 	print_check "rm TX"
-	count=$(get_counter ${ns2} "MPTcpExtRmAddrTx")
+	count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtRmAddrTx")
 	if [ -z "$count" ]; then
 		print_skip
 	elif [ "$count" != "$rm_addr_tx_nr" ]; then
@@ -1804,7 +1757,7 @@ chk_prio_nr()
 	local count
 
 	print_check "ptx"
-	count=$(get_counter ${ns1} "MPTcpExtMPPrioTx")
+	count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPPrioTx")
 	if [ -z "$count" ]; then
 		print_skip
 	elif [ "$count" != "$mp_prio_nr_tx" ]; then
@@ -1814,7 +1767,7 @@ chk_prio_nr()
 	fi
 
 	print_check "prx"
-	count=$(get_counter ${ns1} "MPTcpExtMPPrioRx")
+	count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPPrioRx")
 	if [ -z "$count" ]; then
 		print_skip
 	elif [ "$count" != "$mp_prio_nr_rx" ]; then
@@ -1860,12 +1813,10 @@ chk_mptcp_info()
 	local cnt2
 	local dump_stats
 
-	print_check "mptcp_info ${info1:0:8}=$exp1:$exp2"
+	print_check "mptcp_info ${info1:0:15}=$exp1:$exp2"
 
-	cnt1=$(ss -N $ns1 -inmHM | grep "$info1:" |
-	       sed -n 's/.*\('"$info1"':\)\([[:digit:]]*\).*$/\2/p;q')
-	cnt2=$(ss -N $ns2 -inmHM | grep "$info2:" |
-	       sed -n 's/.*\('"$info2"':\)\([[:digit:]]*\).*$/\2/p;q')
+	cnt1=$(ss -N $ns1 -inmHM | mptcp_lib_get_info_value "$info1" "$info1")
+	cnt2=$(ss -N $ns2 -inmHM | mptcp_lib_get_info_value "$info2" "$info2")
 	# 'ss' only display active connections and counters that are not 0.
 	[ -z "$cnt1" ] && cnt1=0
 	[ -z "$cnt2" ] && cnt2=0
@@ -1883,6 +1834,42 @@ chk_mptcp_info()
 	fi
 }
 
+# $1: subflows in ns1 ; $2: subflows in ns2
+# number of all subflows, including the initial subflow.
+chk_subflows_total()
+{
+	local cnt1
+	local cnt2
+	local info="subflows_total"
+	local dump_stats
+
+	# if subflows_total counter is supported, use it:
+	if [ -n "$(ss -N $ns1 -inmHM | mptcp_lib_get_info_value $info $info)" ]; then
+		chk_mptcp_info $info $1 $info $2
+		return
+	fi
+
+	print_check "$info $1:$2"
+
+	# if not, count the TCP connections that are in fact MPTCP subflows
+	cnt1=$(ss -N $ns1 -ti state established state syn-sent state syn-recv |
+	       grep -c tcp-ulp-mptcp)
+	cnt2=$(ss -N $ns2 -ti state established state syn-sent state syn-recv |
+	       grep -c tcp-ulp-mptcp)
+
+	if [ "$1" != "$cnt1" ] || [ "$2" != "$cnt2" ]; then
+		fail_test "got subflows $cnt1:$cnt2 expected $1:$2"
+		dump_stats=1
+	else
+		print_ok
+	fi
+
+	if [ "$dump_stats" = 1 ]; then
+		ss -N $ns1 -ti
+		ss -N $ns2 -ti
+	fi
+}
+
 chk_link_usage()
 {
 	local ns=$1
@@ -1914,7 +1901,7 @@ wait_attempt_fail()
 	while [ $time -lt $timeout_ms ]; do
 		local cnt
 
-		cnt=$(get_counter ${ns} "TcpAttemptFails")
+		cnt=$(mptcp_lib_get_counter ${ns} "TcpAttemptFails")
 
 		[ "$cnt" = 1 ] && return 1
 		time=$((time + 100))
@@ -2305,6 +2292,7 @@ remove_tests()
 		chk_join_nr 1 1 1
 		chk_rm_tx_nr 1
 		chk_rm_nr 1 1
+		chk_rst_nr 0 0
 	fi
 
 	# multiple subflows, remove
@@ -2317,6 +2305,7 @@ remove_tests()
 			run_tests $ns1 $ns2 10.0.1.1
 		chk_join_nr 2 2 2
 		chk_rm_nr 2 2
+		chk_rst_nr 0 0
 	fi
 
 	# single address, remove
@@ -2329,6 +2318,7 @@ remove_tests()
 		chk_join_nr 1 1 1
 		chk_add_nr 1 1
 		chk_rm_nr 1 1 invert
+		chk_rst_nr 0 0
 	fi
 
 	# subflow and signal, remove
@@ -2342,6 +2332,7 @@ remove_tests()
 		chk_join_nr 2 2 2
 		chk_add_nr 1 1
 		chk_rm_nr 1 1
+		chk_rst_nr 0 0
 	fi
 
 	# subflows and signal, remove
@@ -2356,6 +2347,7 @@ remove_tests()
 		chk_join_nr 3 3 3
 		chk_add_nr 1 1
 		chk_rm_nr 2 2
+		chk_rst_nr 0 0
 	fi
 
 	# addresses remove
@@ -2370,6 +2362,7 @@ remove_tests()
 		chk_join_nr 3 3 3
 		chk_add_nr 3 3
 		chk_rm_nr 3 3 invert
+		chk_rst_nr 0 0
 	fi
 
 	# invalid addresses remove
@@ -2384,6 +2377,7 @@ remove_tests()
 		chk_join_nr 1 1 1
 		chk_add_nr 3 3
 		chk_rm_nr 3 1 invert
+		chk_rst_nr 0 0
 	fi
 
 	# subflows and signal, flush
@@ -2398,6 +2392,7 @@ remove_tests()
 		chk_join_nr 3 3 3
 		chk_add_nr 1 1
 		chk_rm_nr 1 3 invert simult
+		chk_rst_nr 0 0
 	fi
 
 	# subflows flush
@@ -2417,6 +2412,7 @@ remove_tests()
 		else
 			chk_rm_nr 3 3
 		fi
+		chk_rst_nr 0 0
 	fi
 
 	# addresses flush
@@ -2431,6 +2427,7 @@ remove_tests()
 		chk_join_nr 3 3 3
 		chk_add_nr 3 3
 		chk_rm_nr 3 3 invert simult
+		chk_rst_nr 0 0
 	fi
 
 	# invalid addresses flush
@@ -2445,6 +2442,7 @@ remove_tests()
 		chk_join_nr 1 1 1
 		chk_add_nr 3 3
 		chk_rm_nr 3 1 invert
+		chk_rst_nr 0 0
 	fi
 
 	# remove id 0 subflow
@@ -2456,6 +2454,7 @@ remove_tests()
 			run_tests $ns1 $ns2 10.0.1.1
 		chk_join_nr 1 1 1
 		chk_rm_nr 1 1
+		chk_rst_nr 0 0
 	fi
 
 	# remove id 0 address
@@ -2468,6 +2467,7 @@ remove_tests()
 		chk_join_nr 1 1 1
 		chk_add_nr 1 1
 		chk_rm_nr 1 1 invert
+		chk_rst_nr 0 0 invert
 	fi
 }
 
@@ -2794,6 +2794,7 @@ backup_tests()
 	fi
 }
 
+SUB_ESTABLISHED=10 # MPTCP_EVENT_SUB_ESTABLISHED
 LISTENER_CREATED=15 #MPTCP_EVENT_LISTENER_CREATED
 LISTENER_CLOSED=16  #MPTCP_EVENT_LISTENER_CLOSED
 
@@ -2828,13 +2829,13 @@ verify_listener_events()
 		return
 	fi
 
-	type=$(grep "type:$e_type," $evt | sed -n 's/.*\(type:\)\([[:digit:]]*\).*$/\2/p;q')
-	family=$(grep "type:$e_type," $evt | sed -n 's/.*\(family:\)\([[:digit:]]*\).*$/\2/p;q')
-	sport=$(grep "type:$e_type," $evt | sed -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q')
+	type=$(mptcp_lib_evts_get_info type "$evt" "$e_type")
+	family=$(mptcp_lib_evts_get_info family "$evt" "$e_type")
+	sport=$(mptcp_lib_evts_get_info sport "$evt" "$e_type")
 	if [ $family ] && [ $family = $AF_INET6 ]; then
-		saddr=$(grep "type:$e_type," $evt | sed -n 's/.*\(saddr6:\)\([0-9a-f:.]*\).*$/\2/p;q')
+		saddr=$(mptcp_lib_evts_get_info saddr6 "$evt" "$e_type")
 	else
-		saddr=$(grep "type:$e_type," $evt | sed -n 's/.*\(saddr4:\)\([0-9.]*\).*$/\2/p;q')
+		saddr=$(mptcp_lib_evts_get_info saddr4 "$evt" "$e_type")
 	fi
 
 	if [ $type ] && [ $type = $e_type ] &&
@@ -3220,7 +3221,7 @@ fastclose_tests()
 	if reset_check_counter "fastclose server test" "MPTcpExtMPFastcloseRx"; then
 		test_linkfail=1024 fastclose=server \
 			run_tests $ns1 $ns2 10.0.1.1
-		chk_join_nr 0 0 0
+		chk_join_nr 0 0 0 0 0 0 1
 		chk_fclose_nr 1 1 invert
 		chk_rst_nr 1 1
 	fi
@@ -3229,8 +3230,7 @@ fastclose_tests()
 pedit_action_pkts()
 {
 	tc -n $ns2 -j -s action show action pedit index 100 | \
-		grep "packets" | \
-		sed 's/.*"packets":\([0-9]\+\),.*/\1/'
+		mptcp_lib_get_info_value \"packets\" packets
 }
 
 fail_tests()
@@ -3255,69 +3255,70 @@ fail_tests()
 	fi
 }
 
+# $1: ns ; $2: addr ; $3: id
 userspace_pm_add_addr()
 {
-	local addr=$1
-	local id=$2
+	local evts=$evts_ns1
 	local tk
 
-	tk=$(grep "type:1," "$evts_ns1" |
-	     sed -n 's/.*\(token:\)\([[:digit:]]*\).*$/\2/p;q')
-	ip netns exec $ns1 ./pm_nl_ctl ann $addr token $tk id $id
+	[ "$1" == "$ns2" ] && evts=$evts_ns2
+	tk=$(mptcp_lib_evts_get_info token "$evts")
+
+	ip netns exec $1 ./pm_nl_ctl ann $2 token $tk id $3
 	sleep 1
 }
 
-userspace_pm_rm_sf_addr_ns1()
+# $1: ns ; $2: id
+userspace_pm_rm_addr()
 {
-	local addr=$1
-	local id=$2
-	local tk sp da dp
-
-	tk=$(grep "type:1," "$evts_ns1" |
-	     sed -n 's/.*\(token:\)\([[:digit:]]*\).*$/\2/p;q')
-	sp=$(grep "type:10" "$evts_ns1" |
-	     sed -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q')
-	da=$(grep "type:10" "$evts_ns1" |
-	     sed -n 's/.*\(daddr6:\)\([0-9a-f:.]*\).*$/\2/p;q')
-	dp=$(grep "type:10" "$evts_ns1" |
-	     sed -n 's/.*\(dport:\)\([[:digit:]]*\).*$/\2/p;q')
-	ip netns exec $ns1 ./pm_nl_ctl rem token $tk id $id
-	ip netns exec $ns1 ./pm_nl_ctl dsf lip "::ffff:$addr" \
-				lport $sp rip $da rport $dp token $tk
-	wait_rm_addr $ns1 1
-	wait_rm_sf $ns1 1
+	local evts=$evts_ns1
+	local tk
+	local cnt
+
+	[ "$1" == "$ns2" ] && evts=$evts_ns2
+	tk=$(mptcp_lib_evts_get_info token "$evts")
+
+	cnt=$(rm_addr_count ${1})
+	ip netns exec $1 ./pm_nl_ctl rem token $tk id $2
+	wait_rm_addr $1 "${cnt}"
 }
 
+# $1: ns ; $2: addr ; $3: id
 userspace_pm_add_sf()
 {
-	local addr=$1
-	local id=$2
+	local evts=$evts_ns1
 	local tk da dp
 
-	tk=$(sed -n 's/.*\(token:\)\([[:digit:]]*\).*$/\2/p;q' "$evts_ns2")
-	da=$(sed -n 's/.*\(daddr4:\)\([0-9.]*\).*$/\2/p;q' "$evts_ns2")
-	dp=$(sed -n 's/.*\(dport:\)\([[:digit:]]*\).*$/\2/p;q' "$evts_ns2")
-	ip netns exec $ns2 ./pm_nl_ctl csf lip $addr lid $id \
+	[ "$1" == "$ns2" ] && evts=$evts_ns2
+	tk=$(mptcp_lib_evts_get_info token "$evts")
+	da=$(mptcp_lib_evts_get_info daddr4 "$evts")
+	dp=$(mptcp_lib_evts_get_info dport "$evts")
+
+	ip netns exec $1 ./pm_nl_ctl csf lip $2 lid $3 \
 				rip $da rport $dp token $tk
 	sleep 1
 }
 
-userspace_pm_rm_sf_addr_ns2()
+# $1: ns ; $2: addr $3: event type
+userspace_pm_rm_sf()
 {
-	local addr=$1
-	local id=$2
+	local evts=$evts_ns1
+	local t=${3:-1}
+	local ip=4
 	local tk da dp sp
+	local cnt
 
-	tk=$(sed -n 's/.*\(token:\)\([[:digit:]]*\).*$/\2/p;q' "$evts_ns2")
-	da=$(sed -n 's/.*\(daddr4:\)\([0-9.]*\).*$/\2/p;q' "$evts_ns2")
-	dp=$(sed -n 's/.*\(dport:\)\([[:digit:]]*\).*$/\2/p;q' "$evts_ns2")
-	sp=$(grep "type:10" "$evts_ns2" |
-	     sed -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q')
-	ip netns exec $ns2 ./pm_nl_ctl rem token $tk id $id
-	ip netns exec $ns2 ./pm_nl_ctl dsf lip $addr lport $sp \
+	[ "$1" == "$ns2" ] && evts=$evts_ns2
+	if mptcp_lib_is_v6 $2; then ip=6; fi
+	tk=$(mptcp_lib_evts_get_info token "$evts")
+	da=$(mptcp_lib_evts_get_info "daddr$ip" "$evts" $t)
+	dp=$(mptcp_lib_evts_get_info dport "$evts" $t)
+	sp=$(mptcp_lib_evts_get_info sport "$evts" $t)
+
+	cnt=$(rm_sf_count ${1})
+	ip netns exec $1 ./pm_nl_ctl dsf lip $2 lport $sp \
 				rip $da rport $dp token $tk
-	wait_rm_addr $ns2 1
-	wait_rm_sf $ns2 1
+	wait_rm_sf $1 "${cnt}"
 }
 
 userspace_tests()
@@ -3400,18 +3401,21 @@ userspace_tests()
 	   continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then
 		set_userspace_pm $ns1
 		pm_nl_set_limits $ns2 1 1
-		speed=10 \
+		speed=5 \
 			run_tests $ns1 $ns2 10.0.1.1 &
 		local tests_pid=$!
 		wait_mpj $ns1
-		userspace_pm_add_addr 10.0.2.1 10
+		userspace_pm_add_addr $ns1 10.0.2.1 10
 		chk_join_nr 1 1 1
 		chk_add_nr 1 1
 		chk_mptcp_info subflows 1 subflows 1
+		chk_subflows_total 2 2
 		chk_mptcp_info add_addr_signal 1 add_addr_accepted 1
-		userspace_pm_rm_sf_addr_ns1 10.0.2.1 10
+		userspace_pm_rm_addr $ns1 10
+		userspace_pm_rm_sf $ns1 "::ffff:10.0.2.1" $SUB_ESTABLISHED
 		chk_rm_nr 1 1 invert
 		chk_mptcp_info subflows 0 subflows 0
+		chk_subflows_total 1 1
 		kill_events_pids
 		wait $tests_pid
 	fi
@@ -3421,16 +3425,88 @@ userspace_tests()
 	   continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then
 		set_userspace_pm $ns2
 		pm_nl_set_limits $ns1 0 1
-		speed=10 \
+		speed=5 \
 			run_tests $ns1 $ns2 10.0.1.1 &
 		local tests_pid=$!
 		wait_mpj $ns2
-		userspace_pm_add_sf 10.0.3.2 20
+		userspace_pm_add_sf $ns2 10.0.3.2 20
 		chk_join_nr 1 1 1
 		chk_mptcp_info subflows 1 subflows 1
-		userspace_pm_rm_sf_addr_ns2 10.0.3.2 20
+		chk_subflows_total 2 2
+		userspace_pm_rm_addr $ns2 20
+		userspace_pm_rm_sf $ns2 10.0.3.2 $SUB_ESTABLISHED
 		chk_rm_nr 1 1
 		chk_mptcp_info subflows 0 subflows 0
+		chk_subflows_total 1 1
+		kill_events_pids
+		wait $tests_pid
+	fi
+
+	# userspace pm create id 0 subflow
+	if reset_with_events "userspace pm create id 0 subflow" &&
+	   continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then
+		set_userspace_pm $ns2
+		pm_nl_set_limits $ns1 0 1
+		speed=5 \
+			run_tests $ns1 $ns2 10.0.1.1 &
+		local tests_pid=$!
+		wait_mpj $ns2
+		chk_mptcp_info subflows 0 subflows 0
+		chk_subflows_total 1 1
+		userspace_pm_add_sf $ns2 10.0.3.2 0
+		chk_join_nr 1 1 1
+		chk_mptcp_info subflows 1 subflows 1
+		chk_subflows_total 2 2
+		kill_events_pids
+		wait $tests_pid
+	fi
+
+	# userspace pm remove initial subflow
+	if reset_with_events "userspace pm remove initial subflow" &&
+	   continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then
+		set_userspace_pm $ns2
+		pm_nl_set_limits $ns1 0 1
+		speed=5 \
+			run_tests $ns1 $ns2 10.0.1.1 &
+		local tests_pid=$!
+		wait_mpj $ns2
+		userspace_pm_add_sf $ns2 10.0.3.2 20
+		chk_join_nr 1 1 1
+		chk_mptcp_info subflows 1 subflows 1
+		chk_subflows_total 2 2
+		userspace_pm_rm_sf $ns2 10.0.1.2
+		# we don't look at the counter linked to the RM_ADDR but
+		# to the one linked to the subflows that have been removed
+		chk_rm_nr 0 1
+		chk_rst_nr 0 0 invert
+		chk_mptcp_info subflows 1 subflows 1
+		chk_subflows_total 1 1
+		kill_events_pids
+		wait $tests_pid
+	fi
+
+	# userspace pm send RM_ADDR for ID 0
+	if reset_with_events "userspace pm send RM_ADDR for ID 0" &&
+	   continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then
+		set_userspace_pm $ns1
+		pm_nl_set_limits $ns2 1 1
+		speed=5 \
+			run_tests $ns1 $ns2 10.0.1.1 &
+		local tests_pid=$!
+		wait_mpj $ns1
+		userspace_pm_add_addr $ns1 10.0.2.1 10
+		chk_join_nr 1 1 1
+		chk_add_nr 1 1
+		chk_mptcp_info subflows 1 subflows 1
+		chk_subflows_total 2 2
+		chk_mptcp_info add_addr_signal 1 add_addr_accepted 1
+		userspace_pm_rm_addr $ns1 0
+		# we don't look at the counter linked to the subflows that
+		# have been removed but to the one linked to the RM_ADDR
+		chk_rm_nr 1 0 invert
+		chk_rst_nr 0 0 invert
+		chk_mptcp_info subflows 1 subflows 1
+		chk_subflows_total 1 1
 		kill_events_pids
 		wait $tests_pid
 	fi
diff --git a/tools/testing/selftests/net/mptcp/mptcp_lib.sh b/tools/testing/selftests/net/mptcp/mptcp_lib.sh
index 92a5befe8039..022262a2cfe0 100644
--- a/tools/testing/selftests/net/mptcp/mptcp_lib.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_lib.sh
@@ -207,3 +207,94 @@ mptcp_lib_result_print_all_tap() {
 		printf "%s\n" "${subtest}"
 	done
 }
+
+# get the value of keyword $1 in the line marked by keyword $2
+mptcp_lib_get_info_value() {
+	grep "${2}" | sed -n 's/.*\('"${1}"':\)\([0-9a-f:.]*\).*$/\2/p;q'
+}
+
+# $1: info name ; $2: evts_ns ; $3: event type
+mptcp_lib_evts_get_info() {
+	mptcp_lib_get_info_value "${1}" "^type:${3:-1}," < "${2}"
+}
+
+# $1: PID
+mptcp_lib_kill_wait() {
+	[ "${1}" -eq 0 ] && return 0
+
+	kill -SIGUSR1 "${1}" > /dev/null 2>&1
+	kill "${1}" > /dev/null 2>&1
+	wait "${1}" 2>/dev/null
+}
+
+# $1: IP address
+mptcp_lib_is_v6() {
+	[ -z "${1##*:*}" ]
+}
+
+# $1: ns, $2: MIB counter
+mptcp_lib_get_counter() {
+	local ns="${1}"
+	local counter="${2}"
+	local count
+
+	count=$(ip netns exec "${ns}" nstat -asz "${counter}" |
+		awk 'NR==1 {next} {print $2}')
+	if [ -z "${count}" ]; then
+		mptcp_lib_fail_if_expected_feature "${counter} counter"
+		return 1
+	fi
+
+	echo "${count}"
+}
+
+mptcp_lib_make_file() {
+	local name="${1}"
+	local bs="${2}"
+	local size="${3}"
+
+	dd if=/dev/urandom of="${name}" bs="${bs}" count="${size}" 2> /dev/null
+	echo -e "\nMPTCP_TEST_FILE_END_MARKER" >> "${name}"
+}
+
+# $1: file
+mptcp_lib_print_file_err() {
+	ls -l "${1}" 1>&2
+	echo "Trailing bytes are: "
+	tail -c 27 "${1}"
+}
+
+# $1: input file ; $2: output file ; $3: what kind of file
+mptcp_lib_check_transfer() {
+	local in="${1}"
+	local out="${2}"
+	local what="${3}"
+
+	if ! cmp "$in" "$out" > /dev/null 2>&1; then
+		echo "[ FAIL ] $what does not match (in, out):"
+		mptcp_lib_print_file_err "$in"
+		mptcp_lib_print_file_err "$out"
+
+		return 1
+	fi
+
+	return 0
+}
+
+# $1: ns, $2: port
+mptcp_lib_wait_local_port_listen() {
+	local listener_ns="${1}"
+	local port="${2}"
+
+	local port_hex
+	port_hex="$(printf "%04X" "${port}")"
+
+	local _
+	for _ in $(seq 10); do
+		ip netns exec "${listener_ns}" cat /proc/net/tcp* | \
+			awk "BEGIN {rc=1} {if (\$2 ~ /:${port_hex}\$/ && \$4 ~ /0A/) \
+			     {rc=0; exit}} END {exit rc}" &&
+			break
+		sleep 0.1
+	done
+}
diff --git a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh
index 8c8694f21e7d..c643872ddf47 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh
@@ -11,7 +11,6 @@ cout=""
 ksft_skip=4
 timeout_poll=30
 timeout_test=$((timeout_poll * 2 + 1))
-mptcp_connect=""
 iptables="iptables"
 ip6tables="ip6tables"
 
@@ -136,38 +135,6 @@ check_mark()
 	return 0
 }
 
-print_file_err()
-{
-	ls -l "$1" 1>&2
-	echo "Trailing bytes are: "
-	tail -c 27 "$1"
-}
-
-check_transfer()
-{
-	local in=$1
-	local out=$2
-	local what=$3
-
-	cmp "$in" "$out" > /dev/null 2>&1
-	if [ $? -ne 0 ] ;then
-		echo "[ FAIL ] $what does not match (in, out):"
-		print_file_err "$in"
-		print_file_err "$out"
-		ret=1
-
-		return 1
-	fi
-
-	return 0
-}
-
-# $1: IP address
-is_v6()
-{
-	[ -z "${1##*:*}" ]
-}
-
 do_transfer()
 {
 	local listener_ns="$1"
@@ -184,7 +151,7 @@ do_transfer()
 	local mptcp_connect="./mptcp_connect -r 20"
 
 	local local_addr ip
-	if is_v6 "${connect_addr}"; then
+	if mptcp_lib_is_v6 "${connect_addr}"; then
 		local_addr="::"
 		ip=ipv6
 	else
@@ -239,7 +206,7 @@ do_transfer()
 		check_mark $connector_ns 4 || retc=1
 	fi
 
-	check_transfer $cin $sout "file received by server"
+	mptcp_lib_check_transfer $cin $sout "file received by server"
 	rets=$?
 
 	mptcp_lib_result_code "${retc}" "mark ${ip}"
@@ -258,8 +225,7 @@ make_file()
 	local who=$2
 	local size=$3
 
-	dd if=/dev/urandom of="$name" bs=1024 count=$size 2> /dev/null
-	echo -e "\nMPTCP_TEST_FILE_END_MARKER" >> "$name"
+	mptcp_lib_make_file $name 1024 $size
 
 	echo "Created $name (size $size KB) containing data sent by $who"
 }
diff --git a/tools/testing/selftests/net/mptcp/simult_flows.sh b/tools/testing/selftests/net/mptcp/simult_flows.sh
index ce9203b817f8..ae8ad5d6fb9d 100755
--- a/tools/testing/selftests/net/mptcp/simult_flows.sh
+++ b/tools/testing/selftests/net/mptcp/simult_flows.sh
@@ -123,23 +123,6 @@ setup()
 	grep -q ' kmemleak_init$\| lockdep_init$\| kasan_init$\| prove_locking$' /proc/kallsyms && slack=$((slack+550))
 }
 
-# $1: ns, $2: port
-wait_local_port_listen()
-{
-	local listener_ns="${1}"
-	local port="${2}"
-
-	local port_hex i
-
-	port_hex="$(printf "%04X" "${port}")"
-	for i in $(seq 10); do
-		ip netns exec "${listener_ns}" cat /proc/net/tcp* | \
-			awk "BEGIN {rc=1} {if (\$2 ~ /:${port_hex}\$/ && \$4 ~ /0A/) {rc=0; exit}} END {exit rc}" &&
-			break
-		sleep 0.1
-	done
-}
-
 do_transfer()
 {
 	local cin=$1
@@ -179,7 +162,7 @@ do_transfer()
 				0.0.0.0 < "$sin" > "$sout" &
 	local spid=$!
 
-	wait_local_port_listen "${ns3}" "${port}"
+	mptcp_lib_wait_local_port_listen "${ns3}" "${port}"
 
 	timeout ${timeout_test} \
 		ip netns exec ${ns1} \
diff --git a/tools/testing/selftests/net/mptcp/userspace_pm.sh b/tools/testing/selftests/net/mptcp/userspace_pm.sh
index b25a3e33eb25..6167837f48e1 100755
--- a/tools/testing/selftests/net/mptcp/userspace_pm.sh
+++ b/tools/testing/selftests/net/mptcp/userspace_pm.sh
@@ -108,15 +108,6 @@ test_fail()
 	mptcp_lib_result_fail "${test_name}"
 }
 
-kill_wait()
-{
-	[ $1 -eq 0 ] && return 0
-
-	kill -SIGUSR1 $1 > /dev/null 2>&1
-	kill $1 > /dev/null 2>&1
-	wait $1 2>/dev/null
-}
-
 # This function is used in the cleanup trap
 #shellcheck disable=SC2317
 cleanup()
@@ -128,7 +119,7 @@ cleanup()
 	for pid in $client4_pid $server4_pid $client6_pid $server6_pid\
 		   $server_evts_pid $client_evts_pid
 	do
-		kill_wait $pid
+		mptcp_lib_kill_wait $pid
 	done
 
 	local netns
@@ -173,22 +164,12 @@ print_title "Init"
 print_test "Created network namespaces ns1, ns2"
 test_pass
 
-make_file()
-{
-	# Store a chunk of data in a file to transmit over an MPTCP connection
-	local name=$1
-	local ksize=1
-
-	dd if=/dev/urandom of="$name" bs=2 count=$ksize 2> /dev/null
-	echo -e "\nMPTCP_TEST_FILE_END_MARKER" >> "$name"
-}
-
 make_connection()
 {
 	if [ -z "$file" ]; then
 		file=$(mktemp)
 	fi
-	make_file "$file" "client"
+	mptcp_lib_make_file "$file" 2 1
 
 	local is_v6=$1
 	local app_port=$app4_port
@@ -210,7 +191,7 @@ make_connection()
 	fi
 	:>"$client_evts"
 	if [ $client_evts_pid -ne 0 ]; then
-		kill_wait $client_evts_pid
+		mptcp_lib_kill_wait $client_evts_pid
 	fi
 	ip netns exec "$ns2" ./pm_nl_ctl events >> "$client_evts" 2>&1 &
 	client_evts_pid=$!
@@ -219,7 +200,7 @@ make_connection()
 	fi
 	:>"$server_evts"
 	if [ $server_evts_pid -ne 0 ]; then
-		kill_wait $server_evts_pid
+		mptcp_lib_kill_wait $server_evts_pid
 	fi
 	ip netns exec "$ns1" ./pm_nl_ctl events >> "$server_evts" 2>&1 &
 	server_evts_pid=$!
@@ -247,14 +228,11 @@ make_connection()
 	local server_token
 	local server_serverside
 
-	client_token=$(sed --unbuffered -n 's/.*\(token:\)\([[:digit:]]*\).*$/\2/p;q' "$client_evts")
-	client_port=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$client_evts")
-	client_serverside=$(sed --unbuffered -n 's/.*\(server_side:\)\([[:digit:]]*\).*$/\2/p;q'\
-				      "$client_evts")
-	server_token=$(grep "type:1," "$server_evts" |
-		       sed --unbuffered -n 's/.*\(token:\)\([[:digit:]]*\).*$/\2/p;q')
-	server_serverside=$(grep "type:1," "$server_evts" |
-			    sed --unbuffered -n 's/.*\(server_side:\)\([[:digit:]]*\).*$/\2/p;q')
+	client_token=$(mptcp_lib_evts_get_info token "$client_evts")
+	client_port=$(mptcp_lib_evts_get_info sport "$client_evts")
+	client_serverside=$(mptcp_lib_evts_get_info server_side "$client_evts")
+	server_token=$(mptcp_lib_evts_get_info token "$server_evts")
+	server_serverside=$(mptcp_lib_evts_get_info server_side "$server_evts")
 
 	print_test "Established IP${is_v6} MPTCP Connection ns2 => ns1"
 	if [ "$client_token" != "" ] && [ "$server_token" != "" ] && [ "$client_serverside" = 0 ] &&
@@ -340,16 +318,16 @@ verify_announce_event()
 	local dport
 	local id
 
-	type=$(sed --unbuffered -n 's/.*\(type:\)\([[:digit:]]*\).*$/\2/p;q' "$evt")
-	token=$(sed --unbuffered -n 's/.*\(token:\)\([[:digit:]]*\).*$/\2/p;q' "$evt")
+	type=$(mptcp_lib_evts_get_info type "$evt" $e_type)
+	token=$(mptcp_lib_evts_get_info token "$evt" $e_type)
 	if [ "$e_af" = "v6" ]
 	then
-		addr=$(sed --unbuffered -n 's/.*\(daddr6:\)\([0-9a-f:.]*\).*$/\2/p;q' "$evt")
+		addr=$(mptcp_lib_evts_get_info daddr6 "$evt" $e_type)
 	else
-		addr=$(sed --unbuffered -n 's/.*\(daddr4:\)\([0-9.]*\).*$/\2/p;q' "$evt")
+		addr=$(mptcp_lib_evts_get_info daddr4 "$evt" $e_type)
 	fi
-	dport=$(sed --unbuffered -n 's/.*\(dport:\)\([[:digit:]]*\).*$/\2/p;q' "$evt")
-	id=$(sed --unbuffered -n 's/.*\(rem_id:\)\([[:digit:]]*\).*$/\2/p;q' "$evt")
+	dport=$(mptcp_lib_evts_get_info dport "$evt" $e_type)
+	id=$(mptcp_lib_evts_get_info rem_id "$evt" $e_type)
 
 	check_expected "type" "token" "addr" "dport" "id"
 }
@@ -367,7 +345,7 @@ test_announce()
 	   $client_addr_id dev ns2eth1 > /dev/null 2>&1
 
 	local type
-	type=$(sed --unbuffered -n 's/.*\(type:\)\([[:digit:]]*\).*$/\2/p;q' "$server_evts")
+	type=$(mptcp_lib_evts_get_info type "$server_evts")
 	print_test "ADD_ADDR 10.0.2.2 (ns2) => ns1, invalid token"
 	if [ "$type" = "" ]
 	then
@@ -446,9 +424,9 @@ verify_remove_event()
 	local token
 	local id
 
-	type=$(sed --unbuffered -n 's/.*\(type:\)\([[:digit:]]*\).*$/\2/p;q' "$evt")
-	token=$(sed --unbuffered -n 's/.*\(token:\)\([[:digit:]]*\).*$/\2/p;q' "$evt")
-	id=$(sed --unbuffered -n 's/.*\(rem_id:\)\([[:digit:]]*\).*$/\2/p;q' "$evt")
+	type=$(mptcp_lib_evts_get_info type "$evt" $e_type)
+	token=$(mptcp_lib_evts_get_info token "$evt" $e_type)
+	id=$(mptcp_lib_evts_get_info rem_id "$evt" $e_type)
 
 	check_expected "type" "token" "id"
 }
@@ -466,7 +444,7 @@ test_remove()
 	   $client_addr_id > /dev/null 2>&1
 	print_test "RM_ADDR id:${client_addr_id} ns2 => ns1, invalid token"
 	local type
-	type=$(sed --unbuffered -n 's/.*\(type:\)\([[:digit:]]*\).*$/\2/p;q' "$server_evts")
+	type=$(mptcp_lib_evts_get_info type "$server_evts")
 	if [ "$type" = "" ]
 	then
 		test_pass
@@ -479,7 +457,7 @@ test_remove()
 	ip netns exec "$ns2" ./pm_nl_ctl rem token "$client4_token" id\
 	   $invalid_id > /dev/null 2>&1
 	print_test "RM_ADDR id:${invalid_id} ns2 => ns1, invalid id"
-	type=$(sed --unbuffered -n 's/.*\(type:\)\([[:digit:]]*\).*$/\2/p;q' "$server_evts")
+	type=$(mptcp_lib_evts_get_info type "$server_evts")
 	if [ "$type" = "" ]
 	then
 		test_pass
@@ -583,19 +561,19 @@ verify_subflow_events()
 		fi
 	fi
 
-	type=$(sed --unbuffered -n 's/.*\(type:\)\([[:digit:]]*\).*$/\2/p;q' "$evt")
-	token=$(sed --unbuffered -n 's/.*\(token:\)\([[:digit:]]*\).*$/\2/p;q' "$evt")
-	family=$(sed --unbuffered -n 's/.*\(family:\)\([[:digit:]]*\).*$/\2/p;q' "$evt")
-	dport=$(sed --unbuffered -n 's/.*\(dport:\)\([[:digit:]]*\).*$/\2/p;q' "$evt")
-	locid=$(sed --unbuffered -n 's/.*\(loc_id:\)\([[:digit:]]*\).*$/\2/p;q' "$evt")
-	remid=$(sed --unbuffered -n 's/.*\(rem_id:\)\([[:digit:]]*\).*$/\2/p;q' "$evt")
+	type=$(mptcp_lib_evts_get_info type "$evt" $e_type)
+	token=$(mptcp_lib_evts_get_info token "$evt" $e_type)
+	family=$(mptcp_lib_evts_get_info family "$evt" $e_type)
+	dport=$(mptcp_lib_evts_get_info dport "$evt" $e_type)
+	locid=$(mptcp_lib_evts_get_info loc_id "$evt" $e_type)
+	remid=$(mptcp_lib_evts_get_info rem_id "$evt" $e_type)
 	if [ "$family" = "$AF_INET6" ]
 	then
-		saddr=$(sed --unbuffered -n 's/.*\(saddr6:\)\([0-9a-f:.]*\).*$/\2/p;q' "$evt")
-		daddr=$(sed --unbuffered -n 's/.*\(daddr6:\)\([0-9a-f:.]*\).*$/\2/p;q' "$evt")
+		saddr=$(mptcp_lib_evts_get_info saddr6 "$evt" $e_type)
+		daddr=$(mptcp_lib_evts_get_info daddr6 "$evt" $e_type)
 	else
-		saddr=$(sed --unbuffered -n 's/.*\(saddr4:\)\([0-9.]*\).*$/\2/p;q' "$evt")
-		daddr=$(sed --unbuffered -n 's/.*\(daddr4:\)\([0-9.]*\).*$/\2/p;q' "$evt")
+		saddr=$(mptcp_lib_evts_get_info saddr4 "$evt" $e_type)
+		daddr=$(mptcp_lib_evts_get_info daddr4 "$evt" $e_type)
 	fi
 
 	check_expected "type" "token" "daddr" "dport" "family" "saddr" "locid" "remid"
@@ -627,10 +605,10 @@ test_subflows()
 			      "10.0.2.2" "$client4_port" "23" "$client_addr_id" "ns1" "ns2"
 
 	# Delete the listener from the client ns, if one was created
-	kill_wait $listener_pid
+	mptcp_lib_kill_wait $listener_pid
 
 	local sport
-	sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$server_evts")
+	sport=$(mptcp_lib_evts_get_info sport "$server_evts" $SUB_ESTABLISHED)
 
 	# DESTROY_SUBFLOW from server to client machine
 	:>"$server_evts"
@@ -666,9 +644,9 @@ test_subflows()
 			      "$client_addr_id" "ns1" "ns2"
 
 	# Delete the listener from the client ns, if one was created
-	kill_wait $listener_pid
+	mptcp_lib_kill_wait $listener_pid
 
-	sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$server_evts")
+	sport=$(mptcp_lib_evts_get_info sport "$server_evts" $SUB_ESTABLISHED)
 
 	# DESTROY_SUBFLOW6 from server to client machine
 	:>"$server_evts"
@@ -705,9 +683,9 @@ test_subflows()
 			      "$client_addr_id" "ns1" "ns2"
 
 	# Delete the listener from the client ns, if one was created
-	kill_wait $listener_pid
+	mptcp_lib_kill_wait $listener_pid
 
-	sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$server_evts")
+	sport=$(mptcp_lib_evts_get_info sport "$server_evts" $SUB_ESTABLISHED)
 
 	# DESTROY_SUBFLOW from server to client machine
 	:>"$server_evts"
@@ -743,9 +721,9 @@ test_subflows()
 			      "10.0.2.1" "$app4_port" "23" "$server_addr_id" "ns2" "ns1"
 
 	# Delete the listener from the server ns, if one was created
-	kill_wait $listener_pid
+	mptcp_lib_kill_wait $listener_pid
 
-	sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$client_evts")
+	sport=$(mptcp_lib_evts_get_info sport "$client_evts" $SUB_ESTABLISHED)
 
 	# DESTROY_SUBFLOW from client to server machine
 	:>"$client_evts"
@@ -782,9 +760,9 @@ test_subflows()
 			      "$server_addr_id" "ns2" "ns1"
 
 	# Delete the listener from the server ns, if one was created
-	kill_wait $listener_pid
+	mptcp_lib_kill_wait $listener_pid
 
-	sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$client_evts")
+	sport=$(mptcp_lib_evts_get_info sport "$client_evts" $SUB_ESTABLISHED)
 
 	# DESTROY_SUBFLOW6 from client to server machine
 	:>"$client_evts"
@@ -819,9 +797,9 @@ test_subflows()
 			      "10.0.2.2" "10.0.2.1" "$new4_port" "23" "$server_addr_id" "ns2" "ns1"
 
 	# Delete the listener from the server ns, if one was created
-	kill_wait $listener_pid
+	mptcp_lib_kill_wait $listener_pid
 
-	sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$client_evts")
+	sport=$(mptcp_lib_evts_get_info sport "$client_evts" $SUB_ESTABLISHED)
 
 	# DESTROY_SUBFLOW from client to server machine
 	:>"$client_evts"
@@ -865,9 +843,9 @@ test_subflows_v4_v6_mix()
 			      "$server_addr_id" "ns2" "ns1"
 
 	# Delete the listener from the server ns, if one was created
-	kill_wait $listener_pid
+	mptcp_lib_kill_wait $listener_pid
 
-	sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$client_evts")
+	sport=$(mptcp_lib_evts_get_info sport "$client_evts" $SUB_ESTABLISHED)
 
 	# DESTROY_SUBFLOW from client to server machine
 	:>"$client_evts"
@@ -896,9 +874,10 @@ test_prio()
 
 	# Check TX
 	print_test "MP_PRIO TX"
-	count=$(ip netns exec "$ns2" nstat -as | grep MPTcpExtMPPrioTx | awk '{print $2}')
-	[ -z "$count" ] && count=0
-	if [ $count != 1 ]; then
+	count=$(mptcp_lib_get_counter "$ns2" "MPTcpExtMPPrioTx")
+	if [ -z "$count" ]; then
+		test_skip
+	elif [ $count != 1 ]; then
 		test_fail "Count != 1: ${count}"
 	else
 		test_pass
@@ -906,9 +885,10 @@ test_prio()
 
 	# Check RX
 	print_test "MP_PRIO RX"
-	count=$(ip netns exec "$ns1" nstat -as | grep MPTcpExtMPPrioRx | awk '{print $2}')
-	[ -z "$count" ] && count=0
-	if [ $count != 1 ]; then
+	count=$(mptcp_lib_get_counter "$ns1" "MPTcpExtMPPrioRx")
+	if [ -z "$count" ]; then
+		test_skip
+	elif [ $count != 1 ]; then
 		test_fail "Count != 1: ${count}"
 	else
 		test_pass
@@ -933,18 +913,13 @@ verify_listener_events()
 		print_test "CLOSE_LISTENER $e_saddr:$e_sport"
 	fi
 
-	type=$(grep "type:$e_type," $evt |
-	       sed --unbuffered -n 's/.*\(type:\)\([[:digit:]]*\).*$/\2/p;q')
-	family=$(grep "type:$e_type," $evt |
-		 sed --unbuffered -n 's/.*\(family:\)\([[:digit:]]*\).*$/\2/p;q')
-	sport=$(grep "type:$e_type," $evt |
-		sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q')
+	type=$(mptcp_lib_evts_get_info type $evt $e_type)
+	family=$(mptcp_lib_evts_get_info family $evt $e_type)
+	sport=$(mptcp_lib_evts_get_info sport $evt $e_type)
 	if [ $family ] && [ $family = $AF_INET6 ]; then
-		saddr=$(grep "type:$e_type," $evt |
-			sed --unbuffered -n 's/.*\(saddr6:\)\([0-9a-f:.]*\).*$/\2/p;q')
+		saddr=$(mptcp_lib_evts_get_info saddr6 $evt $e_type)
 	else
-		saddr=$(grep "type:$e_type," $evt |
-			sed --unbuffered -n 's/.*\(saddr4:\)\([0-9.]*\).*$/\2/p;q')
+		saddr=$(mptcp_lib_evts_get_info saddr4 $evt $e_type)
 	fi
 
 	check_expected "type" "family" "saddr" "sport"
@@ -982,7 +957,7 @@ test_listener()
 	sleep 0.5
 
 	# Delete the listener from the client ns, if one was created
-	kill_wait $listener_pid
+	mptcp_lib_kill_wait $listener_pid
 
 	sleep 0.5
 	verify_listener_events $client_evts $LISTENER_CLOSED $AF_INET 10.0.2.2 $client4_port
diff --git a/tools/testing/selftests/net/net_helper.sh b/tools/testing/selftests/net/net_helper.sh
new file mode 100755
index 000000000000..4fe0befa13fb
--- /dev/null
+++ b/tools/testing/selftests/net/net_helper.sh
@@ -0,0 +1,22 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Helper functions
+
+wait_local_port_listen()
+{
+	local listener_ns="${1}"
+	local port="${2}"
+	local protocol="${3}"
+	local port_hex
+	local i
+
+	port_hex="$(printf "%04X" "${port}")"
+	for i in $(seq 10); do
+		if ip netns exec "${listener_ns}" cat /proc/net/"${protocol}"* | \
+		   grep -q "${port_hex}"; then
+			break
+		fi
+		sleep 0.1
+	done
+}
diff --git a/tools/testing/selftests/net/netns-name.sh b/tools/testing/selftests/net/netns-name.sh
new file mode 100755
index 000000000000..7d3d3fc99461
--- /dev/null
+++ b/tools/testing/selftests/net/netns-name.sh
@@ -0,0 +1,87 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+set -o pipefail
+
+NS=netns-name-test
+DEV=dummy-dev0
+DEV2=dummy-dev1
+ALT_NAME=some-alt-name
+
+RET_CODE=0
+
+cleanup() {
+    ip netns del $NS
+}
+
+trap cleanup EXIT
+
+fail() {
+    echo "ERROR: ${1:-unexpected return code} (ret: $_)" >&2
+    RET_CODE=1
+}
+
+ip netns add $NS
+
+#
+# Test basic move without a rename
+#
+ip -netns $NS link add name $DEV type dummy || fail
+ip -netns $NS link set dev $DEV netns 1 ||
+    fail "Can't perform a netns move"
+ip link show dev $DEV >> /dev/null || fail "Device not found after move"
+ip link del $DEV || fail
+
+#
+# Test move with a conflict
+#
+ip link add name $DEV type dummy
+ip -netns $NS link add name $DEV type dummy || fail
+ip -netns $NS link set dev $DEV netns 1 2> /dev/null &&
+    fail "Performed a netns move with a name conflict"
+ip link show dev $DEV >> /dev/null || fail "Device not found after move"
+ip -netns $NS link del $DEV || fail
+ip link del $DEV || fail
+
+#
+# Test move with a conflict and rename
+#
+ip link add name $DEV type dummy
+ip -netns $NS link add name $DEV type dummy || fail
+ip -netns $NS link set dev $DEV netns 1 name $DEV2 ||
+    fail "Can't perform a netns move with rename"
+ip link del $DEV2 || fail
+ip link del $DEV || fail
+
+#
+# Test dup alt-name with netns move
+#
+ip link add name $DEV type dummy || fail
+ip link property add dev $DEV altname $ALT_NAME || fail
+ip -netns $NS link add name $DEV2 type dummy || fail
+ip -netns $NS link property add dev $DEV2 altname $ALT_NAME || fail
+
+ip -netns $NS link set dev $DEV2 netns 1 2> /dev/null &&
+    fail "Moved with alt-name dup"
+
+ip link del $DEV || fail
+ip -netns $NS link del $DEV2 || fail
+
+#
+# Test creating alt-name in one net-ns and using in another
+#
+ip -netns $NS link add name $DEV type dummy || fail
+ip -netns $NS link property add dev $DEV altname $ALT_NAME || fail
+ip -netns $NS link set dev $DEV netns 1 || fail
+ip link show dev $ALT_NAME >> /dev/null || fail "Can't find alt-name after move"
+ip  -netns $NS link show dev $ALT_NAME 2> /dev/null &&
+    fail "Can still find alt-name after move"
+ip link del $DEV || fail
+
+echo -ne "$(basename $0) \t\t\t\t"
+if [ $RET_CODE -eq 0 ]; then
+    echo "[  OK  ]"
+else
+    echo "[ FAIL ]"
+fi
+exit $RET_CODE
diff --git a/tools/testing/selftests/net/openvswitch/openvswitch.sh b/tools/testing/selftests/net/openvswitch/openvswitch.sh
index 9c2012d70b08..f8499d4c87f3 100755
--- a/tools/testing/selftests/net/openvswitch/openvswitch.sh
+++ b/tools/testing/selftests/net/openvswitch/openvswitch.sh
@@ -3,6 +3,8 @@
 #
 # OVS kernel module self tests
 
+trap ovs_exit_sig EXIT TERM INT ERR
+
 # Kselftest framework requirement - SKIP code is 4.
 ksft_skip=4
 
@@ -142,6 +144,12 @@ ovs_add_flow () {
 	return 0
 }
 
+ovs_del_flows () {
+	info "Deleting all flows from DP: sbx:$1 br:$2"
+	ovs_sbx "$1" python3 $ovs_base/ovs-dpctl.py del-flows "$2"
+	return 0
+}
+
 ovs_drop_record_and_run () {
 	local sbx=$1
 	shift
@@ -198,6 +206,17 @@ test_drop_reason() {
 	ip netns exec server ip addr add 172.31.110.20/24 dev s1
 	ip netns exec server ip link set s1 up
 
+	# Check if drop reasons can be sent
+	ovs_add_flow "test_drop_reason" dropreason \
+		'in_port(1),eth(),eth_type(0x0806),arp()' 'drop(10)' 2>/dev/null
+	if [ $? == 1 ]; then
+		info "no support for drop reasons - skipping"
+		ovs_exit_sig
+		return $ksft_skip
+	fi
+
+	ovs_del_flows "test_drop_reason" dropreason
+
 	# Allow ARP
 	ovs_add_flow "test_drop_reason" dropreason \
 		'in_port(1),eth(),eth_type(0x0806),arp()' '2' || return 1
@@ -525,7 +544,7 @@ run_test() {
 	fi
 
 	if python3 ovs-dpctl.py -h 2>&1 | \
-	     grep "Need to install the python" >/dev/null 2>&1; then
+	     grep -E "Need to (install|upgrade) the python" >/dev/null 2>&1; then
 		stdbuf -o0 printf "TEST: %-60s  [PYLIB]\n" "${tdesc}"
 		return $ksft_skip
 	fi
diff --git a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py
index 912dc8c49085..b97e621face9 100644
--- a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py
+++ b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py
@@ -28,8 +28,10 @@ try:
     from pyroute2.netlink import nlmsg_atoms
     from pyroute2.netlink.exceptions import NetlinkError
     from pyroute2.netlink.generic import GenericNetlinkSocket
+    import pyroute2
+
 except ModuleNotFoundError:
-    print("Need to install the python pyroute2 package.")
+    print("Need to install the python pyroute2 package >= 0.6.")
     sys.exit(0)
 
 
@@ -1117,12 +1119,14 @@ class ovskey(nla):
                 "src",
                 lambda x: str(ipaddress.IPv4Address(x)),
                 int,
+                convert_ipv4,
             ),
             (
                 "dst",
                 "dst",
-                lambda x: str(ipaddress.IPv6Address(x)),
+                lambda x: str(ipaddress.IPv4Address(x)),
                 int,
+                convert_ipv4,
             ),
             ("tp_src", "tp_src", "%d", int),
             ("tp_dst", "tp_dst", "%d", int),
@@ -1904,6 +1908,32 @@ class OvsFlow(GenericNetlinkSocket):
             raise ne
         return reply
 
+    def del_flows(self, dpifindex):
+        """
+        Send a del message to the kernel that will drop all flows.
+
+        dpifindex should be a valid datapath obtained by calling
+        into the OvsDatapath lookup
+        """
+
+        flowmsg = OvsFlow.ovs_flow_msg()
+        flowmsg["cmd"] = OVS_FLOW_CMD_DEL
+        flowmsg["version"] = OVS_DATAPATH_VERSION
+        flowmsg["reserved"] = 0
+        flowmsg["dpifindex"] = dpifindex
+
+        try:
+            reply = self.nlm_request(
+                flowmsg,
+                msg_type=self.prid,
+                msg_flags=NLM_F_REQUEST | NLM_F_ACK,
+            )
+            reply = reply[0]
+        except NetlinkError as ne:
+            print(flowmsg)
+            raise ne
+        return reply
+
     def dump(self, dpifindex, flowspec=None):
         """
         Returns a list of messages containing flows.
@@ -1998,6 +2028,12 @@ def main(argv):
     nlmsg_atoms.ovskey = ovskey
     nlmsg_atoms.ovsactions = ovsactions
 
+    # version check for pyroute2
+    prverscheck = pyroute2.__version__.split(".")
+    if int(prverscheck[0]) == 0 and int(prverscheck[1]) < 6:
+        print("Need to upgrade the python pyroute2 package to >= 0.6.")
+        sys.exit(0)
+
     parser = argparse.ArgumentParser()
     parser.add_argument(
         "-v",
@@ -2060,6 +2096,9 @@ def main(argv):
     addflcmd.add_argument("flow", help="Flow specification")
     addflcmd.add_argument("acts", help="Flow actions")
 
+    delfscmd = subparsers.add_parser("del-flows")
+    delfscmd.add_argument("flsbr", help="Datapath name")
+
     args = parser.parse_args()
 
     if args.verbose > 0:
@@ -2143,6 +2182,11 @@ def main(argv):
         flow = OvsFlow.ovs_flow_msg()
         flow.parse(args.flow, args.acts, rep["dpifindex"])
         ovsflow.add_flow(rep["dpifindex"], flow)
+    elif hasattr(args, "flsbr"):
+        rep = ovsdp.info(args.flsbr, 0)
+        if rep is None:
+            print("DP '%s' not found." % args.flsbr)
+        ovsflow.del_flows(rep["dpifindex"])
 
     return 0
 
diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh
index f838dd370f6a..b3b2dc5a630c 100755
--- a/tools/testing/selftests/net/pmtu.sh
+++ b/tools/testing/selftests/net/pmtu.sh
@@ -2048,7 +2048,7 @@ run_test() {
 	case $ret in
 		0)
 			all_skipped=false
-			[ $exitcode=$ksft_skip ] && exitcode=0
+			[ $exitcode -eq $ksft_skip ] && exitcode=0
 		;;
 		$ksft_skip)
 			[ $all_skipped = true ] && exitcode=$ksft_skip
diff --git a/tools/testing/selftests/net/route_localnet.sh b/tools/testing/selftests/net/route_localnet.sh
index 116bfeab72fa..e08701c750e3 100755
--- a/tools/testing/selftests/net/route_localnet.sh
+++ b/tools/testing/selftests/net/route_localnet.sh
@@ -18,8 +18,10 @@ setup() {
     ip route del 127.0.0.0/8 dev lo table local
     ip netns exec "${PEER_NS}" ip route del 127.0.0.0/8 dev lo table local
 
-    ifconfig veth0 127.25.3.4/24 up
-    ip netns exec "${PEER_NS}" ifconfig veth1 127.25.3.14/24 up
+    ip address add 127.25.3.4/24 dev veth0
+    ip link set dev veth0 up
+    ip netns exec "${PEER_NS}" ip address add 127.25.3.14/24 dev veth1
+    ip netns exec "${PEER_NS}" ip link set dev veth1 up
 
     ip route flush cache
     ip netns exec "${PEER_NS}" ip route flush cache
diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh
index 5f2b3f6c0d74..38be9706c45f 100755
--- a/tools/testing/selftests/net/rtnetlink.sh
+++ b/tools/testing/selftests/net/rtnetlink.sh
@@ -859,7 +859,7 @@ kci_test_gretap()
 
 
 	run_cmd ip -netns "$testns" addr add dev "$DEV_NS" 10.1.1.100/24
-	run_cmd ip -netns "$testns" link set dev $DEV_NS ups
+	run_cmd ip -netns "$testns" link set dev $DEV_NS up
 	run_cmd ip -netns "$testns" link del "$DEV_NS"
 
 	# test external mode
diff --git a/tools/testing/selftests/net/test_vxlan_mdb.sh b/tools/testing/selftests/net/test_vxlan_mdb.sh
index 31e5f0f8859d..6e996f8063cd 100755
--- a/tools/testing/selftests/net/test_vxlan_mdb.sh
+++ b/tools/testing/selftests/net/test_vxlan_mdb.sh
@@ -337,62 +337,62 @@ basic_common()
 	# Basic add, replace and delete behavior.
 	run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 $grp_key permanent dst $vtep_ip src_vni 10010"
 	log_test $? 0 "MDB entry addition"
-	run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep \"$grp_key\""
+	run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 $grp_key src_vni 10010"
 	log_test $? 0 "MDB entry presence after addition"
 
 	run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 $grp_key permanent dst $vtep_ip src_vni 10010"
 	log_test $? 0 "MDB entry replacement"
-	run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep \"$grp_key\""
+	run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 $grp_key src_vni 10010"
 	log_test $? 0 "MDB entry presence after replacement"
 
 	run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 $grp_key dst $vtep_ip src_vni 10010"
 	log_test $? 0 "MDB entry deletion"
-	run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep \"$grp_key\""
-	log_test $? 1 "MDB entry presence after deletion"
+	run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 $grp_key src_vni 10010"
+	log_test $? 254 "MDB entry presence after deletion"
 
 	run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 $grp_key dst $vtep_ip src_vni 10010"
 	log_test $? 255 "Non-existent MDB entry deletion"
 
 	# Default protocol and replacement.
 	run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 $grp_key permanent dst $vtep_ip src_vni 10010"
-	run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep \"$grp_key\" | grep \"proto static\""
+	run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 $grp_key src_vni 10010 | grep \"proto static\""
 	log_test $? 0 "MDB entry default protocol"
 
 	run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 $grp_key permanent proto 123 dst $vtep_ip src_vni 10010"
-	run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep \"$grp_key\" | grep \"proto 123\""
+	run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 $grp_key src_vni 10010 | grep \"proto 123\""
 	log_test $? 0 "MDB entry protocol replacement"
 
 	run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 $grp_key dst $vtep_ip src_vni 10010"
 
 	# Default destination port and replacement.
 	run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 $grp_key permanent dst $vtep_ip src_vni 10010"
-	run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep \"$grp_key\" | grep \" dst_port \""
+	run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 $grp_key src_vni 10010 | grep \" dst_port \""
 	log_test $? 1 "MDB entry default destination port"
 
 	run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 $grp_key permanent dst $vtep_ip dst_port 1234 src_vni 10010"
-	run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep \"$grp_key\" | grep \"dst_port 1234\""
+	run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 $grp_key src_vni 10010 | grep \"dst_port 1234\""
 	log_test $? 0 "MDB entry destination port replacement"
 
 	run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 $grp_key dst $vtep_ip src_vni 10010"
 
 	# Default destination VNI and replacement.
 	run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 $grp_key permanent dst $vtep_ip src_vni 10010"
-	run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep \"$grp_key\" | grep \" vni \""
+	run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 $grp_key src_vni 10010 | grep \" vni \""
 	log_test $? 1 "MDB entry default destination VNI"
 
 	run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 $grp_key permanent dst $vtep_ip vni 1234 src_vni 10010"
-	run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep \"$grp_key\" | grep \"vni 1234\""
+	run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 $grp_key src_vni 10010 | grep \"vni 1234\""
 	log_test $? 0 "MDB entry destination VNI replacement"
 
 	run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 $grp_key dst $vtep_ip src_vni 10010"
 
 	# Default outgoing interface and replacement.
 	run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 $grp_key permanent dst $vtep_ip src_vni 10010"
-	run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep \"$grp_key\" | grep \" via \""
+	run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 $grp_key src_vni 10010 | grep \" via \""
 	log_test $? 1 "MDB entry default outgoing interface"
 
 	run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 $grp_key permanent dst $vtep_ip src_vni 10010 via veth0"
-	run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep \"$grp_key\" | grep \"via veth0\""
+	run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 $grp_key src_vni 10010 | grep \"via veth0\""
 	log_test $? 0 "MDB entry outgoing interface replacement"
 
 	run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 $grp_key dst $vtep_ip src_vni 10010"
@@ -550,127 +550,127 @@ star_g_common()
 	# Basic add, replace and delete behavior.
 	run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $src1 dst $vtep_ip src_vni 10010"
 	log_test $? 0 "(*, G) MDB entry addition with source list"
-	run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep -v \" src \""
+	run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src_vni 10010"
 	log_test $? 0 "(*, G) MDB entry presence after addition"
-	run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \"src $src1\""
+	run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src $src1 src_vni 10010"
 	log_test $? 0 "(S, G) MDB entry presence after addition"
 
 	run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $src1 dst $vtep_ip src_vni 10010"
 	log_test $? 0 "(*, G) MDB entry replacement with source list"
-	run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep -v \" src \""
+	run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src_vni 10010"
 	log_test $? 0 "(*, G) MDB entry presence after replacement"
-	run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \"src $src1\""
+	run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src $src1 src_vni 10010"
 	log_test $? 0 "(S, G) MDB entry presence after replacement"
 
 	run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep_ip src_vni 10010"
 	log_test $? 0 "(*, G) MDB entry deletion"
-	run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep -v \" src \""
-	log_test $? 1 "(*, G) MDB entry presence after deletion"
-	run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \"src $src1\""
-	log_test $? 1 "(S, G) MDB entry presence after deletion"
+	run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src_vni 10010"
+	log_test $? 254 "(*, G) MDB entry presence after deletion"
+	run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src $src1 src_vni 10010"
+	log_test $? 254 "(S, G) MDB entry presence after deletion"
 
 	# Default filter mode and replacement.
 	run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp $grp permanent dst $vtep_ip src_vni 10010"
-	run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep exclude"
+	run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src_vni 10010 | grep exclude"
 	log_test $? 0 "(*, G) MDB entry default filter mode"
 
 	run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode include source_list $src1 dst $vtep_ip src_vni 10010"
-	run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep include"
+	run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src_vni 10010 | grep include"
 	log_test $? 0 "(*, G) MDB entry after replacing filter mode to \"include\""
-	run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \"src $src1\""
+	run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src $src1 src_vni 10010"
 	log_test $? 0 "(S, G) MDB entry after replacing filter mode to \"include\""
-	run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \"src $src1\" | grep blocked"
+	run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src $src1 src_vni 10010 | grep blocked"
 	log_test $? 1 "\"blocked\" flag after replacing filter mode to \"include\""
 
 	run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $src1 dst $vtep_ip src_vni 10010"
-	run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep exclude"
+	run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src_vni 10010 | grep exclude"
 	log_test $? 0 "(*, G) MDB entry after replacing filter mode to \"exclude\""
-	run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \"src $src1\""
+	run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grep grp $grp src $src1 src_vni 10010"
 	log_test $? 0 "(S, G) MDB entry after replacing filter mode to \"exclude\""
-	run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \"src $src1\" | grep blocked"
+	run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src $src1 src_vni 10010 | grep blocked"
 	log_test $? 0 "\"blocked\" flag after replacing filter mode to \"exclude\""
 
 	run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep_ip src_vni 10010"
 
 	# Default source list and replacement.
 	run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp $grp permanent dst $vtep_ip src_vni 10010"
-	run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep source_list"
+	run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src_vni 10010 | grep source_list"
 	log_test $? 1 "(*, G) MDB entry default source list"
 
 	run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $src1,$src2,$src3 dst $vtep_ip src_vni 10010"
-	run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \"src $src1\""
+	run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src $src1 src_vni 10010"
 	log_test $? 0 "(S, G) MDB entry of 1st source after replacing source list"
-	run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \"src $src2\""
+	run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src $src2 src_vni 10010"
 	log_test $? 0 "(S, G) MDB entry of 2nd source after replacing source list"
-	run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \"src $src3\""
+	run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src $src3 src_vni 10010"
 	log_test $? 0 "(S, G) MDB entry of 3rd source after replacing source list"
 
 	run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $src1,$src3 dst $vtep_ip src_vni 10010"
-	run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \"src $src1\""
+	run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src $src1 src_vni 10010"
 	log_test $? 0 "(S, G) MDB entry of 1st source after removing source"
-	run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \"src $src2\""
-	log_test $? 1 "(S, G) MDB entry of 2nd source after removing source"
-	run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \"src $src3\""
+	run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src $src2 src_vni 10010"
+	log_test $? 254 "(S, G) MDB entry of 2nd source after removing source"
+	run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src $src3 src_vni 10010"
 	log_test $? 0 "(S, G) MDB entry of 3rd source after removing source"
 
 	run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep_ip src_vni 10010"
 
 	# Default protocol and replacement.
 	run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $src1 dst $vtep_ip src_vni 10010"
-	run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep -v \" src \" | grep \"proto static\""
+	run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src_vni 10010 | grep \"proto static\""
 	log_test $? 0 "(*, G) MDB entry default protocol"
-	run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \" src \" | grep \"proto static\""
+	run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src $src1 src_vni 10010 | grep \"proto static\""
 	log_test $? 0 "(S, G) MDB entry default protocol"
 
 	run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $src1 proto bgp dst $vtep_ip src_vni 10010"
-	run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep -v \" src \" | grep \"proto bgp\""
+	run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src_vni 10010 | grep \"proto bgp\""
 	log_test $? 0 "(*, G) MDB entry protocol after replacement"
-	run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \" src \" | grep \"proto bgp\""
+	run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src $src1 src_vni 10010 | grep \"proto bgp\""
 	log_test $? 0 "(S, G) MDB entry protocol after replacement"
 
 	run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep_ip src_vni 10010"
 
 	# Default destination port and replacement.
 	run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $src1 dst $vtep_ip src_vni 10010"
-	run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep -v \" src \" | grep \" dst_port \""
+	run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src_vni 10010 | grep \" dst_port \""
 	log_test $? 1 "(*, G) MDB entry default destination port"
-	run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \" src \" | grep \" dst_port \""
+	run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src $src1 src_vni 10010 | grep \" dst_port \""
 	log_test $? 1 "(S, G) MDB entry default destination port"
 
 	run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $src1 dst $vtep_ip dst_port 1234 src_vni 10010"
-	run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep -v \" src \" | grep \" dst_port 1234 \""
+	run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src_vni 10010 | grep \" dst_port 1234 \""
 	log_test $? 0 "(*, G) MDB entry destination port after replacement"
-	run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \" src \" | grep \" dst_port 1234 \""
+	run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src $src1 src_vni 10010 | grep \" dst_port 1234 \""
 	log_test $? 0 "(S, G) MDB entry destination port after replacement"
 
 	run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep_ip src_vni 10010"
 
 	# Default destination VNI and replacement.
 	run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $src1 dst $vtep_ip src_vni 10010"
-	run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep -v \" src \" | grep \" vni \""
+	run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src_vni 10010 | grep \" vni \""
 	log_test $? 1 "(*, G) MDB entry default destination VNI"
-	run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \" src \" | grep \" vni \""
+	run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src $src1 src_vni 10010 | grep \" vni \""
 	log_test $? 1 "(S, G) MDB entry default destination VNI"
 
 	run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $src1 dst $vtep_ip vni 1234 src_vni 10010"
-	run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep -v \" src \" | grep \" vni 1234 \""
+	run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src_vni 10010 | grep \" vni 1234 \""
 	log_test $? 0 "(*, G) MDB entry destination VNI after replacement"
-	run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \" src \" | grep \" vni 1234 \""
+	run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src $src1 src_vni 10010 | grep \" vni 1234 \""
 	log_test $? 0 "(S, G) MDB entry destination VNI after replacement"
 
 	run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep_ip src_vni 10010"
 
 	# Default outgoing interface and replacement.
 	run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $src1 dst $vtep_ip src_vni 10010"
-	run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep -v \" src \" | grep \" via \""
+	run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src_vni 10010 | grep \" via \""
 	log_test $? 1 "(*, G) MDB entry default outgoing interface"
-	run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \" src \" | grep \" via \""
+	run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src $src1 src_vni 10010 | grep \" via \""
 	log_test $? 1 "(S, G) MDB entry default outgoing interface"
 
 	run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $src1 dst $vtep_ip src_vni 10010 via veth0"
-	run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep -v \" src \" | grep \" via veth0 \""
+	run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src_vni 10010 | grep \" via veth0 \""
 	log_test $? 0 "(*, G) MDB entry outgoing interface after replacement"
-	run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \" src \" | grep \" via veth0 \""
+	run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src $src1 src_vni 10010 | grep \" via veth0 \""
 	log_test $? 0 "(S, G) MDB entry outgoing interface after replacement"
 
 	run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep_ip src_vni 10010"
@@ -772,7 +772,7 @@ sg_common()
 
 	# Default filter mode.
 	run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp $grp src $src permanent dst $vtep_ip src_vni 10010"
-	run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep include"
+	run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src $src src_vni 10010 | grep include"
 	log_test $? 0 "(S, G) MDB entry default filter mode"
 
 	run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp src $src permanent dst $vtep_ip src_vni 10010"
@@ -2296,9 +2296,9 @@ if [ ! -x "$(command -v jq)" ]; then
 	exit $ksft_skip
 fi
 
-bridge mdb help 2>&1 | grep -q "src_vni"
+bridge mdb help 2>&1 | grep -q "get"
 if [ $? -ne 0 ]; then
-   echo "SKIP: iproute2 bridge too old, missing VXLAN MDB support"
+   echo "SKIP: iproute2 bridge too old, missing VXLAN MDB get support"
    exit $ksft_skip
 fi
 
diff --git a/tools/testing/selftests/net/udpgro.sh b/tools/testing/selftests/net/udpgro.sh
index 0c743752669a..af5dc57c8ce9 100755
--- a/tools/testing/selftests/net/udpgro.sh
+++ b/tools/testing/selftests/net/udpgro.sh
@@ -3,6 +3,8 @@
 #
 # Run a series of udpgro functional tests.
 
+source net_helper.sh
+
 readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)"
 
 BPF_FILE="../bpf/xdp_dummy.bpf.o"
@@ -51,8 +53,7 @@ run_one() {
 		echo "ok" || \
 		echo "failed" &
 
-	# Hack: let bg programs complete the startup
-	sleep 0.2
+	wait_local_port_listen ${PEER_NS} 8000 udp
 	./udpgso_bench_tx ${tx_args}
 	ret=$?
 	wait $(jobs -p)
@@ -97,7 +98,7 @@ run_one_nat() {
 		echo "ok" || \
 		echo "failed"&
 
-	sleep 0.1
+	wait_local_port_listen "${PEER_NS}" 8000 udp
 	./udpgso_bench_tx ${tx_args}
 	ret=$?
 	kill -INT $pid
@@ -118,11 +119,9 @@ run_one_2sock() {
 		echo "ok" || \
 		echo "failed" &
 
-	# Hack: let bg programs complete the startup
-	sleep 0.2
+	wait_local_port_listen "${PEER_NS}" 12345 udp
 	./udpgso_bench_tx ${tx_args} -p 12345
-	sleep 0.1
-	# first UDP GSO socket should be closed at this point
+	wait_local_port_listen "${PEER_NS}" 8000 udp
 	./udpgso_bench_tx ${tx_args}
 	ret=$?
 	wait $(jobs -p)
diff --git a/tools/testing/selftests/net/udpgro_bench.sh b/tools/testing/selftests/net/udpgro_bench.sh
index 894972877e8b..cb664679b434 100755
--- a/tools/testing/selftests/net/udpgro_bench.sh
+++ b/tools/testing/selftests/net/udpgro_bench.sh
@@ -3,6 +3,8 @@
 #
 # Run a series of udpgro benchmarks
 
+source net_helper.sh
+
 readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)"
 
 BPF_FILE="../bpf/xdp_dummy.bpf.o"
@@ -40,8 +42,7 @@ run_one() {
 	ip netns exec "${PEER_NS}" ./udpgso_bench_rx ${rx_args} -r &
 	ip netns exec "${PEER_NS}" ./udpgso_bench_rx -t ${rx_args} -r &
 
-	# Hack: let bg programs complete the startup
-	sleep 0.2
+	wait_local_port_listen "${PEER_NS}" 8000 udp
 	./udpgso_bench_tx ${tx_args}
 }
 
diff --git a/tools/testing/selftests/net/udpgro_frglist.sh b/tools/testing/selftests/net/udpgro_frglist.sh
index 0a6359bed0b9..dd47fa96f6b3 100755
--- a/tools/testing/selftests/net/udpgro_frglist.sh
+++ b/tools/testing/selftests/net/udpgro_frglist.sh
@@ -3,6 +3,8 @@
 #
 # Run a series of udpgro benchmarks
 
+source net_helper.sh
+
 readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)"
 
 BPF_FILE="../bpf/xdp_dummy.bpf.o"
@@ -45,8 +47,7 @@ run_one() {
         echo ${rx_args}
 	ip netns exec "${PEER_NS}" ./udpgso_bench_rx ${rx_args} -r &
 
-	# Hack: let bg programs complete the startup
-	sleep 0.2
+	wait_local_port_listen "${PEER_NS}" 8000 udp
 	./udpgso_bench_tx ${tx_args}
 }
 
diff --git a/tools/testing/selftests/netfilter/Makefile b/tools/testing/selftests/netfilter/Makefile
index ef90aca4cc96..bced422b78f7 100644
--- a/tools/testing/selftests/netfilter/Makefile
+++ b/tools/testing/selftests/netfilter/Makefile
@@ -7,7 +7,7 @@ TEST_PROGS := nft_trans_stress.sh nft_fib.sh nft_nat.sh bridge_brouter.sh \
 	nft_queue.sh nft_meta.sh nf_nat_edemux.sh \
 	ipip-conntrack-mtu.sh conntrack_tcp_unreplied.sh \
 	conntrack_vrf.sh nft_synproxy.sh rpath.sh nft_audit.sh \
-	conntrack_sctp_collision.sh
+	conntrack_sctp_collision.sh xt_string.sh
 
 HOSTPKG_CONFIG := pkg-config
 
diff --git a/tools/testing/selftests/netfilter/nft_audit.sh b/tools/testing/selftests/netfilter/nft_audit.sh
index bb34329e02a7..99ed5bd6e840 100755
--- a/tools/testing/selftests/netfilter/nft_audit.sh
+++ b/tools/testing/selftests/netfilter/nft_audit.sh
@@ -11,6 +11,12 @@ nft --version >/dev/null 2>&1 || {
 	exit $SKIP_RC
 }
 
+# Run everything in a separate network namespace
+[ "${1}" != "run" ] && { unshare -n "${0}" run; exit $?; }
+
+# give other scripts a chance to finish - audit_logread sees all activity
+sleep 1
+
 logfile=$(mktemp)
 rulefile=$(mktemp)
 echo "logging into $logfile"
@@ -93,6 +99,12 @@ do_test 'nft add counter t1 c1' \
 do_test 'nft add counter t2 c1; add counter t2 c2' \
 'table=t2 family=2 entries=2 op=nft_register_obj'
 
+for ((i = 3; i <= 500; i++)); do
+	echo "add counter t2 c$i"
+done >$rulefile
+do_test "nft -f $rulefile" \
+'table=t2 family=2 entries=498 op=nft_register_obj'
+
 # adding/updating quotas
 
 do_test 'nft add quota t1 q1 { 10 bytes }' \
@@ -101,6 +113,12 @@ do_test 'nft add quota t1 q1 { 10 bytes }' \
 do_test 'nft add quota t2 q1 { 10 bytes }; add quota t2 q2 { 10 bytes }' \
 'table=t2 family=2 entries=2 op=nft_register_obj'
 
+for ((i = 3; i <= 500; i++)); do
+	echo "add quota t2 q$i { 10 bytes }"
+done >$rulefile
+do_test "nft -f $rulefile" \
+'table=t2 family=2 entries=498 op=nft_register_obj'
+
 # changing the quota value triggers obj update path
 do_test 'nft add quota t1 q1 { 20 bytes }' \
 'table=t1 family=2 entries=1 op=nft_register_obj'
@@ -150,6 +168,40 @@ done
 do_test 'nft reset set t1 s' \
 'table=t1 family=2 entries=3 op=nft_reset_setelem'
 
+# resetting counters
+
+do_test 'nft reset counter t1 c1' \
+'table=t1 family=2 entries=1 op=nft_reset_obj'
+
+do_test 'nft reset counters t1' \
+'table=t1 family=2 entries=1 op=nft_reset_obj'
+
+do_test 'nft reset counters t2' \
+'table=t2 family=2 entries=342 op=nft_reset_obj
+table=t2 family=2 entries=158 op=nft_reset_obj'
+
+do_test 'nft reset counters' \
+'table=t1 family=2 entries=1 op=nft_reset_obj
+table=t2 family=2 entries=341 op=nft_reset_obj
+table=t2 family=2 entries=159 op=nft_reset_obj'
+
+# resetting quotas
+
+do_test 'nft reset quota t1 q1' \
+'table=t1 family=2 entries=1 op=nft_reset_obj'
+
+do_test 'nft reset quotas t1' \
+'table=t1 family=2 entries=1 op=nft_reset_obj'
+
+do_test 'nft reset quotas t2' \
+'table=t2 family=2 entries=315 op=nft_reset_obj
+table=t2 family=2 entries=185 op=nft_reset_obj'
+
+do_test 'nft reset quotas' \
+'table=t1 family=2 entries=1 op=nft_reset_obj
+table=t2 family=2 entries=314 op=nft_reset_obj
+table=t2 family=2 entries=186 op=nft_reset_obj'
+
 # deleting rules
 
 readarray -t handles < <(nft -a list chain t1 c1 | \
diff --git a/tools/testing/selftests/netfilter/xt_string.sh b/tools/testing/selftests/netfilter/xt_string.sh
new file mode 100755
index 000000000000..1802653a4728
--- /dev/null
+++ b/tools/testing/selftests/netfilter/xt_string.sh
@@ -0,0 +1,128 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# return code to signal skipped test
+ksft_skip=4
+rc=0
+
+if ! iptables --version >/dev/null 2>&1; then
+	echo "SKIP: Test needs iptables"
+	exit $ksft_skip
+fi
+if ! ip -V >/dev/null 2>&1; then
+	echo "SKIP: Test needs iproute2"
+	exit $ksft_skip
+fi
+if ! nc -h >/dev/null 2>&1; then
+	echo "SKIP: Test needs netcat"
+	exit $ksft_skip
+fi
+
+pattern="foo bar baz"
+patlen=11
+hdrlen=$((20 + 8)) # IPv4 + UDP
+ns="ns-$(mktemp -u XXXXXXXX)"
+trap 'ip netns del $ns' EXIT
+ip netns add "$ns"
+ip -net "$ns" link add d0 type dummy
+ip -net "$ns" link set d0 up
+ip -net "$ns" addr add 10.1.2.1/24 dev d0
+
+#ip netns exec "$ns" tcpdump -npXi d0 &
+#tcpdump_pid=$!
+#trap 'kill $tcpdump_pid; ip netns del $ns' EXIT
+
+add_rule() { # (alg, from, to)
+	ip netns exec "$ns" \
+		iptables -A OUTPUT -o d0 -m string \
+			--string "$pattern" --algo $1 --from $2 --to $3
+}
+showrules() { # ()
+	ip netns exec "$ns" iptables -v -S OUTPUT | grep '^-A'
+}
+zerorules() {
+	ip netns exec "$ns" iptables -Z OUTPUT
+}
+countrule() { # (pattern)
+	showrules | grep -c -- "$*"
+}
+send() { # (offset)
+	( for ((i = 0; i < $1 - $hdrlen; i++)); do
+		printf " "
+	  done
+	  printf "$pattern"
+	) | ip netns exec "$ns" nc -w 1 -u 10.1.2.2 27374
+}
+
+add_rule bm 1000 1500
+add_rule bm 1400 1600
+add_rule kmp 1000 1500
+add_rule kmp 1400 1600
+
+zerorules
+send 0
+send $((1000 - $patlen))
+if [ $(countrule -c 0 0) -ne 4 ]; then
+	echo "FAIL: rules match data before --from"
+	showrules
+	((rc--))
+fi
+
+zerorules
+send 1000
+send $((1400 - $patlen))
+if [ $(countrule -c 2) -ne 2 ]; then
+	echo "FAIL: only two rules should match at low offset"
+	showrules
+	((rc--))
+fi
+
+zerorules
+send $((1500 - $patlen))
+if [ $(countrule -c 1) -ne 4 ]; then
+	echo "FAIL: all rules should match at end of packet"
+	showrules
+	((rc--))
+fi
+
+zerorules
+send 1495
+if [ $(countrule -c 1) -ne 1 ]; then
+	echo "FAIL: only kmp with proper --to should match pattern spanning fragments"
+	showrules
+	((rc--))
+fi
+
+zerorules
+send 1500
+if [ $(countrule -c 1) -ne 2 ]; then
+	echo "FAIL: two rules should match pattern at start of second fragment"
+	showrules
+	((rc--))
+fi
+
+zerorules
+send $((1600 - $patlen))
+if [ $(countrule -c 1) -ne 2 ]; then
+	echo "FAIL: two rules should match pattern at end of largest --to"
+	showrules
+	((rc--))
+fi
+
+zerorules
+send $((1600 - $patlen + 1))
+if [ $(countrule -c 1) -ne 0 ]; then
+	echo "FAIL: no rules should match pattern extending largest --to"
+	showrules
+	((rc--))
+fi
+
+zerorules
+send 1600
+if [ $(countrule -c 1) -ne 0 ]; then
+	echo "FAIL: no rule should match pattern past largest --to"
+	showrules
+	((rc--))
+fi
+
+exit $rc
diff --git a/tools/testing/selftests/nolibc/.gitignore b/tools/testing/selftests/nolibc/.gitignore
index 52f613cdad54..5119f9f7afd2 100644
--- a/tools/testing/selftests/nolibc/.gitignore
+++ b/tools/testing/selftests/nolibc/.gitignore
@@ -1,4 +1,5 @@
 /initramfs/
+/initramfs.cpio
 /libc-test
 /nolibc-test
 /run.out
diff --git a/tools/testing/selftests/nolibc/Makefile b/tools/testing/selftests/nolibc/Makefile
index dfe66776a331..a0fc07253baf 100644
--- a/tools/testing/selftests/nolibc/Makefile
+++ b/tools/testing/selftests/nolibc/Makefile
@@ -82,7 +82,7 @@ QEMU_ARCH_arm        = arm
 QEMU_ARCH_mips       = mipsel  # works with malta_defconfig
 QEMU_ARCH_ppc        = ppc
 QEMU_ARCH_ppc64      = ppc64
-QEMU_ARCH_ppc64le    = ppc64le
+QEMU_ARCH_ppc64le    = ppc64
 QEMU_ARCH_riscv      = riscv64
 QEMU_ARCH_s390       = s390x
 QEMU_ARCH_loongarch  = loongarch64
@@ -113,6 +113,7 @@ else
 Q=@
 endif
 
+CFLAGS_i386 = $(call cc-option,-m32)
 CFLAGS_ppc = -m32 -mbig-endian -mno-vsx $(call cc-option,-mmultiple)
 CFLAGS_ppc64 = -m64 -mbig-endian -mno-vsx $(call cc-option,-mmultiple)
 CFLAGS_ppc64le = -m64 -mlittle-endian -mno-vsx $(call cc-option,-mabi=elfv2)
@@ -131,18 +132,20 @@ REPORT  ?= awk '/\[OK\][\r]*$$/{p++} /\[FAIL\][\r]*$$/{if (!f) printf("\n"); f++
 
 help:
 	@echo "Supported targets under selftests/nolibc:"
-	@echo "  all          call the \"run\" target below"
-	@echo "  help         this help"
-	@echo "  sysroot      create the nolibc sysroot here (uses \$$ARCH)"
-	@echo "  nolibc-test  build the executable (uses \$$CC and \$$CROSS_COMPILE)"
-	@echo "  libc-test    build an executable using the compiler's default libc instead"
-	@echo "  run-user     runs the executable under QEMU (uses \$$XARCH, \$$TEST)"
-	@echo "  initramfs    prepare the initramfs with nolibc-test"
-	@echo "  defconfig    create a fresh new default config (uses \$$XARCH)"
-	@echo "  kernel       (re)build the kernel with the initramfs (uses \$$XARCH)"
-	@echo "  run          runs the kernel in QEMU after building it (uses \$$XARCH, \$$TEST)"
-	@echo "  rerun        runs a previously prebuilt kernel in QEMU (uses \$$XARCH, \$$TEST)"
-	@echo "  clean        clean the sysroot, initramfs, build and output files"
+	@echo "  all               call the \"run\" target below"
+	@echo "  help              this help"
+	@echo "  sysroot           create the nolibc sysroot here (uses \$$ARCH)"
+	@echo "  nolibc-test       build the executable (uses \$$CC and \$$CROSS_COMPILE)"
+	@echo "  libc-test         build an executable using the compiler's default libc instead"
+	@echo "  run-user          runs the executable under QEMU (uses \$$XARCH, \$$TEST)"
+	@echo "  initramfs.cpio    prepare the initramfs archive with nolibc-test"
+	@echo "  initramfs         prepare the initramfs tree with nolibc-test"
+	@echo "  defconfig         create a fresh new default config (uses \$$XARCH)"
+	@echo "  kernel            (re)build the kernel (uses \$$XARCH)"
+	@echo "  kernel-standalone (re)build the kernel with the initramfs (uses \$$XARCH)"
+	@echo "  run               runs the kernel in QEMU after building it (uses \$$XARCH, \$$TEST)"
+	@echo "  rerun             runs a previously prebuilt kernel in QEMU (uses \$$XARCH, \$$TEST)"
+	@echo "  clean             clean the sysroot, initramfs, build and output files"
 	@echo ""
 	@echo "The output file is \"run.out\". Test ranges may be passed using \$$TEST."
 	@echo ""
@@ -168,17 +171,17 @@ sysroot/$(ARCH)/include:
 	$(Q)mv sysroot/sysroot sysroot/$(ARCH)
 
 ifneq ($(NOLIBC_SYSROOT),0)
-nolibc-test: nolibc-test.c sysroot/$(ARCH)/include
+nolibc-test: nolibc-test.c nolibc-test-linkage.c sysroot/$(ARCH)/include
 	$(QUIET_CC)$(CC) $(CFLAGS) $(LDFLAGS) -o $@ \
-	  -nostdlib -static -Isysroot/$(ARCH)/include $< -lgcc
+	  -nostdlib -nostdinc -static -Isysroot/$(ARCH)/include nolibc-test.c nolibc-test-linkage.c -lgcc
 else
-nolibc-test: nolibc-test.c
+nolibc-test: nolibc-test.c nolibc-test-linkage.c
 	$(QUIET_CC)$(CC) $(CFLAGS) $(LDFLAGS) -o $@ \
-	  -nostdlib -static -include ../../../include/nolibc/nolibc.h $< -lgcc
+	  -nostdlib -static -include ../../../include/nolibc/nolibc.h nolibc-test.c nolibc-test-linkage.c -lgcc
 endif
 
-libc-test: nolibc-test.c
-	$(QUIET_CC)$(HOSTCC) -o $@ $<
+libc-test: nolibc-test.c nolibc-test-linkage.c
+	$(QUIET_CC)$(HOSTCC) -o $@ nolibc-test.c nolibc-test-linkage.c
 
 # local libc-test
 run-libc-test: libc-test
@@ -195,6 +198,9 @@ run-user: nolibc-test
 	$(Q)qemu-$(QEMU_ARCH) ./nolibc-test > "$(CURDIR)/run.out" || :
 	$(Q)$(REPORT) $(CURDIR)/run.out
 
+initramfs.cpio: kernel nolibc-test
+	$(QUIET_GEN)echo 'file /init nolibc-test 755 0 0' | $(srctree)/usr/gen_init_cpio - > initramfs.cpio
+
 initramfs: nolibc-test
 	$(QUIET_MKDIR)mkdir -p initramfs
 	$(call QUIET_INSTALL, initramfs/init)
@@ -203,17 +209,20 @@ initramfs: nolibc-test
 defconfig:
 	$(Q)$(MAKE) -C $(srctree) ARCH=$(ARCH) CC=$(CC) CROSS_COMPILE=$(CROSS_COMPILE) mrproper $(DEFCONFIG) prepare
 
-kernel: initramfs
+kernel:
+	$(Q)$(MAKE) -C $(srctree) ARCH=$(ARCH) CC=$(CC) CROSS_COMPILE=$(CROSS_COMPILE) $(IMAGE_NAME)
+
+kernel-standalone: initramfs
 	$(Q)$(MAKE) -C $(srctree) ARCH=$(ARCH) CC=$(CC) CROSS_COMPILE=$(CROSS_COMPILE) $(IMAGE_NAME) CONFIG_INITRAMFS_SOURCE=$(CURDIR)/initramfs
 
 # run the tests after building the kernel
-run: kernel
-	$(Q)qemu-system-$(QEMU_ARCH) -display none -no-reboot -kernel "$(srctree)/$(IMAGE)" -serial stdio $(QEMU_ARGS) > "$(CURDIR)/run.out"
+run: kernel initramfs.cpio
+	$(Q)qemu-system-$(QEMU_ARCH) -display none -no-reboot -kernel "$(srctree)/$(IMAGE)" -initrd initramfs.cpio -serial stdio $(QEMU_ARGS) > "$(CURDIR)/run.out"
 	$(Q)$(REPORT) $(CURDIR)/run.out
 
 # re-run the tests from an existing kernel
 rerun:
-	$(Q)qemu-system-$(QEMU_ARCH) -display none -no-reboot -kernel "$(srctree)/$(IMAGE)" -serial stdio $(QEMU_ARGS) > "$(CURDIR)/run.out"
+	$(Q)qemu-system-$(QEMU_ARCH) -display none -no-reboot -kernel "$(srctree)/$(IMAGE)" -initrd initramfs.cpio -serial stdio $(QEMU_ARGS) > "$(CURDIR)/run.out"
 	$(Q)$(REPORT) $(CURDIR)/run.out
 
 # report with existing test log
@@ -227,6 +236,8 @@ clean:
 	$(Q)rm -f nolibc-test
 	$(call QUIET_CLEAN, libc-test)
 	$(Q)rm -f libc-test
+	$(call QUIET_CLEAN, initramfs.cpio)
+	$(Q)rm -rf initramfs.cpio
 	$(call QUIET_CLEAN, initramfs)
 	$(Q)rm -rf initramfs
 	$(call QUIET_CLEAN, run.out)
diff --git a/tools/testing/selftests/nolibc/nolibc-test-linkage.c b/tools/testing/selftests/nolibc/nolibc-test-linkage.c
new file mode 100644
index 000000000000..5ff4c8a1db2a
--- /dev/null
+++ b/tools/testing/selftests/nolibc/nolibc-test-linkage.c
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include "nolibc-test-linkage.h"
+
+#ifndef NOLIBC
+#include <errno.h>
+#endif
+
+void *linkage_test_errno_addr(void)
+{
+	return &errno;
+}
+
+int linkage_test_constructor_test_value;
+
+__attribute__((constructor))
+static void constructor1(void)
+{
+	linkage_test_constructor_test_value = 2;
+}
+
+__attribute__((constructor))
+static void constructor2(void)
+{
+	linkage_test_constructor_test_value *= 3;
+}
diff --git a/tools/testing/selftests/nolibc/nolibc-test-linkage.h b/tools/testing/selftests/nolibc/nolibc-test-linkage.h
new file mode 100644
index 000000000000..c66473070d73
--- /dev/null
+++ b/tools/testing/selftests/nolibc/nolibc-test-linkage.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _NOLIBC_TEST_LINKAGE_H
+#define _NOLIBC_TEST_LINKAGE_H
+
+void *linkage_test_errno_addr(void);
+extern int linkage_test_constructor_test_value;
+
+#endif /* _NOLIBC_TEST_LINKAGE_H */
diff --git a/tools/testing/selftests/nolibc/nolibc-test.c b/tools/testing/selftests/nolibc/nolibc-test.c
index fb3bf91462e2..2f10541e6f38 100644
--- a/tools/testing/selftests/nolibc/nolibc-test.c
+++ b/tools/testing/selftests/nolibc/nolibc-test.c
@@ -41,6 +41,8 @@
 #endif
 #endif
 
+#include "nolibc-test-linkage.h"
+
 /* for the type of int_fast16_t and int_fast32_t, musl differs from glibc and nolibc */
 #define SINT_MAX_OF_TYPE(type) (((type)1 << (sizeof(type) * 8 - 2)) - (type)1 + ((type)1 << (sizeof(type) * 8 - 2)))
 #define SINT_MIN_OF_TYPE(type) (-SINT_MAX_OF_TYPE(type) - 1)
@@ -57,6 +59,9 @@ static int test_argc;
 /* will be used by some test cases as readable file, please don't write it */
 static const char *argv0;
 
+/* will be used by constructor tests */
+static int constructor_test_value;
+
 /* definition of a series of tests */
 struct test {
 	const char *name;              /* test name */
@@ -594,6 +599,19 @@ int expect_strne(const char *expr, int llen, const char *cmp)
 #define CASE_TEST(name) \
 	case __LINE__: llen += printf("%d %s", test, #name);
 
+/* constructors validate that they are executed in definition order */
+__attribute__((constructor))
+static void constructor1(void)
+{
+	constructor_test_value = 1;
+}
+
+__attribute__((constructor))
+static void constructor2(void)
+{
+	constructor_test_value *= 2;
+}
+
 int run_startup(int min, int max)
 {
 	int test;
@@ -630,7 +648,9 @@ int run_startup(int min, int max)
 		CASE_TEST(environ_HOME);     EXPECT_PTRNZ(1, getenv("HOME")); break;
 		CASE_TEST(auxv_addr);        EXPECT_PTRGT(test_auxv != (void *)-1, test_auxv, brk); break;
 		CASE_TEST(auxv_AT_UID);      EXPECT_EQ(1, getauxval(AT_UID), getuid()); break;
-		CASE_TEST(auxv_AT_PAGESZ);   EXPECT_GE(1, getauxval(AT_PAGESZ), 4096); break;
+		CASE_TEST(constructor);      EXPECT_EQ(1, constructor_test_value, 2); break;
+		CASE_TEST(linkage_errno);    EXPECT_PTREQ(1, linkage_test_errno_addr(), &errno); break;
+		CASE_TEST(linkage_constr);   EXPECT_EQ(1, linkage_test_constructor_test_value, 6); break;
 		case __LINE__:
 			return ret; /* must be last */
 		/* note: do not set any defaults so as to permit holes above */
@@ -894,14 +914,14 @@ int run_syscall(int min, int max)
 		CASE_TEST(lseek_0);           EXPECT_SYSER(1, lseek(0, 0, SEEK_SET), -1, ESPIPE); break;
 		CASE_TEST(mkdir_root);        EXPECT_SYSER(1, mkdir("/", 0755), -1, EEXIST); break;
 		CASE_TEST(mmap_bad);          EXPECT_PTRER(1, mmap(NULL, 0, PROT_READ, MAP_PRIVATE, 0, 0), MAP_FAILED, EINVAL); break;
-		CASE_TEST(munmap_bad);        EXPECT_SYSER(1, munmap((void *)1, 0), -1, EINVAL); break;
+		CASE_TEST(munmap_bad);        EXPECT_SYSER(1, munmap(NULL, 0), -1, EINVAL); break;
 		CASE_TEST(mmap_munmap_good);  EXPECT_SYSZR(1, test_mmap_munmap()); break;
 		CASE_TEST(open_tty);          EXPECT_SYSNE(1, tmp = open("/dev/null", 0), -1); if (tmp != -1) close(tmp); break;
 		CASE_TEST(open_blah);         EXPECT_SYSER(1, tmp = open("/proc/self/blah", 0), -1, ENOENT); if (tmp != -1) close(tmp); break;
 		CASE_TEST(pipe);              EXPECT_SYSZR(1, test_pipe()); break;
 		CASE_TEST(poll_null);         EXPECT_SYSZR(1, poll(NULL, 0, 0)); break;
 		CASE_TEST(poll_stdout);       EXPECT_SYSNE(1, ({ struct pollfd fds = { 1, POLLOUT, 0}; poll(&fds, 1, 0); }), -1); break;
-		CASE_TEST(poll_fault);        EXPECT_SYSER(1, poll((void *)1, 1, 0), -1, EFAULT); break;
+		CASE_TEST(poll_fault);        EXPECT_SYSER(1, poll(NULL, 1, 0), -1, EFAULT); break;
 		CASE_TEST(prctl);             EXPECT_SYSER(1, prctl(PR_SET_NAME, (unsigned long)NULL, 0, 0, 0), -1, EFAULT); break;
 		CASE_TEST(read_badf);         EXPECT_SYSER(1, read(-1, &tmp, 1), -1, EBADF); break;
 		CASE_TEST(rmdir_blah);        EXPECT_SYSER(1, rmdir("/blah"), -1, ENOENT); break;
@@ -910,7 +930,7 @@ int run_syscall(int min, int max)
 		CASE_TEST(select_stdout);     EXPECT_SYSNE(1, ({ fd_set fds; FD_ZERO(&fds); FD_SET(1, &fds); select(2, NULL, &fds, NULL, NULL); }), -1); break;
 		CASE_TEST(select_fault);      EXPECT_SYSER(1, select(1, (void *)1, NULL, NULL, 0), -1, EFAULT); break;
 		CASE_TEST(stat_blah);         EXPECT_SYSER(1, stat("/proc/self/blah", &stat_buf), -1, ENOENT); break;
-		CASE_TEST(stat_fault);        EXPECT_SYSER(1, stat((void *)1, &stat_buf), -1, EFAULT); break;
+		CASE_TEST(stat_fault);        EXPECT_SYSER(1, stat(NULL, &stat_buf), -1, EFAULT); break;
 		CASE_TEST(stat_timestamps);   EXPECT_SYSZR(1, test_stat_timestamps()); break;
 		CASE_TEST(symlink_root);      EXPECT_SYSER(1, symlink("/", "/"), -1, EEXIST); break;
 		CASE_TEST(unlink_root);       EXPECT_SYSER(1, unlink("/"), -1, EISDIR); break;
diff --git a/tools/testing/selftests/openat2/openat2_test.c b/tools/testing/selftests/openat2/openat2_test.c
index 7fb902099de4..9024754530b2 100644
--- a/tools/testing/selftests/openat2/openat2_test.c
+++ b/tools/testing/selftests/openat2/openat2_test.c
@@ -300,7 +300,7 @@ void test_openat2_flags(void)
 
 			ksft_print_msg("openat2 unexpectedly returned ");
 			if (fdpath)
-				ksft_print_msg("%d['%s'] with %X (!= %X)\n",
+				ksft_print_msg("%d['%s'] with %X (!= %llX)\n",
 					       fd, fdpath, fdflags,
 					       test->how.flags);
 			else
diff --git a/tools/testing/selftests/pidfd/pidfd_fdinfo_test.c b/tools/testing/selftests/pidfd/pidfd_fdinfo_test.c
index 4e86f927880c..01cc37bf611c 100644
--- a/tools/testing/selftests/pidfd/pidfd_fdinfo_test.c
+++ b/tools/testing/selftests/pidfd/pidfd_fdinfo_test.c
@@ -62,7 +62,7 @@ static void error_report(struct error *err, const char *test_name)
 		break;
 
 	case PIDFD_PASS:
-		ksft_test_result_pass("%s test: Passed\n");
+		ksft_test_result_pass("%s test: Passed\n", test_name);
 		break;
 
 	default:
diff --git a/tools/testing/selftests/pidfd/pidfd_test.c b/tools/testing/selftests/pidfd/pidfd_test.c
index 00a07e7c571c..c081ae91313a 100644
--- a/tools/testing/selftests/pidfd/pidfd_test.c
+++ b/tools/testing/selftests/pidfd/pidfd_test.c
@@ -381,13 +381,13 @@ static int test_pidfd_send_signal_syscall_support(void)
 
 static void *test_pidfd_poll_exec_thread(void *priv)
 {
-	ksft_print_msg("Child Thread: starting. pid %d tid %d ; and sleeping\n",
+	ksft_print_msg("Child Thread: starting. pid %d tid %ld ; and sleeping\n",
 			getpid(), syscall(SYS_gettid));
 	ksft_print_msg("Child Thread: doing exec of sleep\n");
 
 	execl("/bin/sleep", "sleep", str(CHILD_THREAD_MIN_WAIT), (char *)NULL);
 
-	ksft_print_msg("Child Thread: DONE. pid %d tid %d\n",
+	ksft_print_msg("Child Thread: DONE. pid %d tid %ld\n",
 			getpid(), syscall(SYS_gettid));
 	return NULL;
 }
@@ -427,7 +427,7 @@ static int child_poll_exec_test(void *args)
 {
 	pthread_t t1;
 
-	ksft_print_msg("Child (pidfd): starting. pid %d tid %d\n", getpid(),
+	ksft_print_msg("Child (pidfd): starting. pid %d tid %ld\n", getpid(),
 			syscall(SYS_gettid));
 	pthread_create(&t1, NULL, test_pidfd_poll_exec_thread, NULL);
 	/*
@@ -480,10 +480,10 @@ static void test_pidfd_poll_exec(int use_waitpid)
 
 static void *test_pidfd_poll_leader_exit_thread(void *priv)
 {
-	ksft_print_msg("Child Thread: starting. pid %d tid %d ; and sleeping\n",
+	ksft_print_msg("Child Thread: starting. pid %d tid %ld ; and sleeping\n",
 			getpid(), syscall(SYS_gettid));
 	sleep(CHILD_THREAD_MIN_WAIT);
-	ksft_print_msg("Child Thread: DONE. pid %d tid %d\n", getpid(), syscall(SYS_gettid));
+	ksft_print_msg("Child Thread: DONE. pid %d tid %ld\n", getpid(), syscall(SYS_gettid));
 	return NULL;
 }
 
@@ -492,7 +492,7 @@ static int child_poll_leader_exit_test(void *args)
 {
 	pthread_t t1, t2;
 
-	ksft_print_msg("Child: starting. pid %d tid %d\n", getpid(), syscall(SYS_gettid));
+	ksft_print_msg("Child: starting. pid %d tid %ld\n", getpid(), syscall(SYS_gettid));
 	pthread_create(&t1, NULL, test_pidfd_poll_leader_exit_thread, NULL);
 	pthread_create(&t2, NULL, test_pidfd_poll_leader_exit_thread, NULL);
 
diff --git a/tools/testing/selftests/proc/proc-empty-vm.c b/tools/testing/selftests/proc/proc-empty-vm.c
index ee71ce52cb6a..56198d4ca2bf 100644
--- a/tools/testing/selftests/proc/proc-empty-vm.c
+++ b/tools/testing/selftests/proc/proc-empty-vm.c
@@ -23,6 +23,9 @@
  *	/proc/${pid}/smaps
  *	/proc/${pid}/smaps_rollup
  */
+#undef _GNU_SOURCE
+#define _GNU_SOURCE
+
 #undef NDEBUG
 #include <assert.h>
 #include <errno.h>
@@ -34,6 +37,7 @@
 #include <sys/mman.h>
 #include <sys/ptrace.h>
 #include <sys/resource.h>
+#include <sys/syscall.h>
 #include <sys/types.h>
 #include <sys/wait.h>
 #include <unistd.h>
@@ -42,6 +46,43 @@
 #define TEST_VSYSCALL
 #endif
 
+#if defined __amd64__
+	#ifndef SYS_pkey_alloc
+		#define SYS_pkey_alloc 330
+	#endif
+	#ifndef SYS_pkey_free
+		#define SYS_pkey_free 331
+	#endif
+#elif defined __i386__
+	#ifndef SYS_pkey_alloc
+		#define SYS_pkey_alloc 381
+	#endif
+	#ifndef SYS_pkey_free
+		#define SYS_pkey_free 382
+	#endif
+#else
+	#error "SYS_pkey_alloc"
+#endif
+
+static int g_protection_key_support;
+
+static int protection_key_support(void)
+{
+	long rv = syscall(SYS_pkey_alloc, 0, 0);
+	if (rv > 0) {
+		syscall(SYS_pkey_free, (int)rv);
+		return 1;
+	} else if (rv == -1 && errno == ENOSYS) {
+		return 0;
+	} else if (rv == -1 && errno == EINVAL) {
+		// ospke=n
+		return 0;
+	} else {
+		fprintf(stderr, "%s: error: rv %ld, errno %d\n", __func__, rv, errno);
+		exit(EXIT_FAILURE);
+	}
+}
+
 /*
  * 0: vsyscall VMA doesn't exist	vsyscall=none
  * 1: vsyscall VMA is --xp		vsyscall=xonly
@@ -60,7 +101,7 @@ static const char proc_pid_maps_vsyscall_2[] =
 static const char proc_pid_smaps_vsyscall_0[] = "";
 
 static const char proc_pid_smaps_vsyscall_1[] =
-"ffffffffff600000-ffffffffff601000 r-xp 00000000 00:00 0                  [vsyscall]\n"
+"ffffffffff600000-ffffffffff601000 --xp 00000000 00:00 0                  [vsyscall]\n"
 "Size:                  4 kB\n"
 "KernelPageSize:        4 kB\n"
 "MMUPageSize:           4 kB\n"
@@ -73,6 +114,7 @@ static const char proc_pid_smaps_vsyscall_1[] =
 "Private_Dirty:         0 kB\n"
 "Referenced:            0 kB\n"
 "Anonymous:             0 kB\n"
+"KSM:                   0 kB\n"
 "LazyFree:              0 kB\n"
 "AnonHugePages:         0 kB\n"
 "ShmemPmdMapped:        0 kB\n"
@@ -83,14 +125,10 @@ static const char proc_pid_smaps_vsyscall_1[] =
 "SwapPss:               0 kB\n"
 "Locked:                0 kB\n"
 "THPeligible:           0\n"
-/*
- * "ProtectionKey:" field is conditional. It is possible to check it as well,
- * but I don't have such machine.
- */
 ;
 
 static const char proc_pid_smaps_vsyscall_2[] =
-"ffffffffff600000-ffffffffff601000 --xp 00000000 00:00 0                  [vsyscall]\n"
+"ffffffffff600000-ffffffffff601000 r-xp 00000000 00:00 0                  [vsyscall]\n"
 "Size:                  4 kB\n"
 "KernelPageSize:        4 kB\n"
 "MMUPageSize:           4 kB\n"
@@ -103,6 +141,7 @@ static const char proc_pid_smaps_vsyscall_2[] =
 "Private_Dirty:         0 kB\n"
 "Referenced:            0 kB\n"
 "Anonymous:             0 kB\n"
+"KSM:                   0 kB\n"
 "LazyFree:              0 kB\n"
 "AnonHugePages:         0 kB\n"
 "ShmemPmdMapped:        0 kB\n"
@@ -113,10 +152,6 @@ static const char proc_pid_smaps_vsyscall_2[] =
 "SwapPss:               0 kB\n"
 "Locked:                0 kB\n"
 "THPeligible:           0\n"
-/*
- * "ProtectionKey:" field is conditional. It is possible to check it as well,
- * but I'm too tired.
- */
 ;
 
 static void sigaction_SIGSEGV(int _, siginfo_t *__, void *___)
@@ -238,19 +273,27 @@ static int test_proc_pid_smaps(pid_t pid)
 		}
 		perror("open /proc/${pid}/smaps");
 		return EXIT_FAILURE;
+	}
+	ssize_t rv = read(fd, buf, sizeof(buf));
+	close(fd);
+
+	assert(0 <= rv);
+	assert(rv <= sizeof(buf));
+
+	if (g_vsyscall == 0) {
+		assert(rv == 0);
 	} else {
-		ssize_t rv = read(fd, buf, sizeof(buf));
-		close(fd);
-		if (g_vsyscall == 0) {
-			assert(rv == 0);
-		} else {
-			size_t len = strlen(g_proc_pid_maps_vsyscall);
-			/* TODO "ProtectionKey:" */
-			assert(rv > len);
-			assert(memcmp(buf, g_proc_pid_maps_vsyscall, len) == 0);
+		size_t len = strlen(g_proc_pid_smaps_vsyscall);
+		assert(rv > len);
+		assert(memcmp(buf, g_proc_pid_smaps_vsyscall, len) == 0);
+
+		if (g_protection_key_support) {
+#define PROTECTION_KEY "ProtectionKey:         0\n"
+			assert(memmem(buf, rv, PROTECTION_KEY, strlen(PROTECTION_KEY)));
 		}
-		return EXIT_SUCCESS;
 	}
+
+	return EXIT_SUCCESS;
 }
 
 static const char g_smaps_rollup[] =
@@ -303,6 +346,95 @@ static int test_proc_pid_smaps_rollup(pid_t pid)
 	}
 }
 
+static const char *parse_u64(const char *p, const char *const end, uint64_t *rv)
+{
+	*rv = 0;
+	for (; p != end; p += 1) {
+		if ('0' <= *p && *p <= '9') {
+			assert(!__builtin_mul_overflow(*rv, 10, rv));
+			assert(!__builtin_add_overflow(*rv, *p - '0', rv));
+		} else {
+			break;
+		}
+	}
+	assert(p != end);
+	return p;
+}
+
+/*
+ * There seems to be 2 types of valid output:
+ * "0 A A B 0 0 0\n" for dynamic exeuctables,
+ * "0 0 0 B 0 0 0\n" for static executables.
+ */
+static int test_proc_pid_statm(pid_t pid)
+{
+	char buf[4096];
+	snprintf(buf, sizeof(buf), "/proc/%u/statm", pid);
+	int fd = open(buf, O_RDONLY);
+	if (fd == -1) {
+		perror("open /proc/${pid}/statm");
+		return EXIT_FAILURE;
+	}
+
+	ssize_t rv = read(fd, buf, sizeof(buf));
+	close(fd);
+
+	assert(rv >= 0);
+	assert(rv <= sizeof(buf));
+	if (0) {
+		write(1, buf, rv);
+	}
+
+	const char *p = buf;
+	const char *const end = p + rv;
+
+	/* size */
+	assert(p != end && *p++ == '0');
+	assert(p != end && *p++ == ' ');
+
+	uint64_t resident;
+	p = parse_u64(p, end, &resident);
+	assert(p != end && *p++ == ' ');
+
+	uint64_t shared;
+	p = parse_u64(p, end, &shared);
+	assert(p != end && *p++ == ' ');
+
+	uint64_t text;
+	p = parse_u64(p, end, &text);
+	assert(p != end && *p++ == ' ');
+
+	assert(p != end && *p++ == '0');
+	assert(p != end && *p++ == ' ');
+
+	/* data */
+	assert(p != end && *p++ == '0');
+	assert(p != end && *p++ == ' ');
+
+	assert(p != end && *p++ == '0');
+	assert(p != end && *p++ == '\n');
+
+	assert(p == end);
+
+	/*
+	 * "text" is "mm->end_code - mm->start_code" at execve(2) time.
+	 * munmap() doesn't change it. It can be anything (just link
+	 * statically). It can't be 0 because executing to this point
+	 * implies at least 1 page of code.
+	 */
+	assert(text > 0);
+
+	/*
+	 * These two are always equal. Always 0 for statically linked
+	 * executables and sometimes 0 for dynamically linked executables.
+	 * There is no way to tell one from another without parsing ELF
+	 * which is too much for this test.
+	 */
+	assert(resident == shared);
+
+	return EXIT_SUCCESS;
+}
+
 int main(void)
 {
 	int rv = EXIT_SUCCESS;
@@ -328,6 +460,8 @@ int main(void)
 		abort();
 	}
 
+	g_protection_key_support = protection_key_support();
+
 	pid_t pid = fork();
 	if (pid == -1) {
 		perror("fork");
@@ -389,11 +523,9 @@ int main(void)
 		if (rv == EXIT_SUCCESS) {
 			rv = test_proc_pid_smaps_rollup(pid);
 		}
-		/*
-		 * TODO test /proc/${pid}/statm, task_statm()
-		 * ->start_code, ->end_code aren't updated by munmap().
-		 * Output can be "0 0 0 2 0 0 0\n" where "2" can be anything.
-		 */
+		if (rv == EXIT_SUCCESS) {
+			rv = test_proc_pid_statm(pid);
+		}
 
 		/* Cut the rope. */
 		int wstatus;
diff --git a/tools/testing/selftests/rcutorture/bin/functions.sh b/tools/testing/selftests/rcutorture/bin/functions.sh
index b8e2ea23cb3f..6e415ddb206f 100644..100755
--- a/tools/testing/selftests/rcutorture/bin/functions.sh
+++ b/tools/testing/selftests/rcutorture/bin/functions.sh
@@ -331,3 +331,32 @@ specify_qemu_net () {
 		echo $1 -net none
 	fi
 }
+
+# Extract the ftrace output from the console log output
+# The ftrace output in the original logs look like:
+# Dumping ftrace buffer:
+# ---------------------------------
+# [...]
+# ---------------------------------
+extract_ftrace_from_console() {
+	awk < "$1" '
+
+	/Dumping ftrace buffer:/ {
+		buffer_count++
+		print "Ftrace dump " buffer_count ":"
+		capture = 1
+		next
+	}
+
+	/---------------------------------/ {
+		if(capture == 1) {
+			capture = 2
+			next
+		} else if(capture == 2) {
+			capture = 0
+			print ""
+		}
+	}
+
+	capture == 2'
+}
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh
index 5be670dd4009..de65d77b47ff 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh
@@ -13,7 +13,7 @@
 #
 # Authors: Paul E. McKenney <paulmck@linux.ibm.com>
 
-T=/tmp/kvm-recheck.sh.$$
+T="`mktemp ${TMPDIR-/tmp}/kvm-recheck.sh.XXXXXX`"
 trap 'rm -f $T' 0 2
 
 configerrors=0
diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh
index b0f36a638a69..7af73ddc148d 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm.sh
@@ -49,6 +49,7 @@ TORTURE_SHUTDOWN_GRACE=180
 TORTURE_SUITE=rcu
 TORTURE_MOD=rcutorture
 TORTURE_TRUST_MAKE=""
+debuginfo="CONFIG_DEBUG_INFO_NONE=n CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y"
 resdir=""
 configs=""
 cpus=0
@@ -68,6 +69,7 @@ usage () {
 	echo "       --cpus N"
 	echo "       --datestamp string"
 	echo "       --defconfig string"
+	echo "       --debug-info"
 	echo "       --dryrun batches|scenarios|sched|script"
 	echo "       --duration minutes | <seconds>s | <hours>h | <days>d"
 	echo "       --gdb"
@@ -135,6 +137,15 @@ do
 		ds=$2
 		shift
 		;;
+	--debug-info|--debuginfo)
+		if test -z "$TORTURE_KCONFIG_KCSAN_ARG" && test -z "$TORTURE_BOOT_GDB_ARG"
+		then
+			TORTURE_KCONFIG_KCSAN_ARG="$debuginfo"; export TORTURE_KCONFIG_KCSAN_ARG
+			TORTURE_BOOT_GDB_ARG="nokaslr"; export TORTURE_BOOT_GDB_ARG
+		else
+			echo "Ignored redundant --debug-info (implied by --kcsan &c)"
+		fi
+		;;
 	--defconfig)
 		checkarg --defconfig "defconfigtype" "$#" "$2" '^[^/][^/]*$' '^--'
 		TORTURE_DEFCONFIG=$2
@@ -163,7 +174,7 @@ do
 		shift
 		;;
 	--gdb)
-		TORTURE_KCONFIG_GDB_ARG="CONFIG_DEBUG_INFO_NONE=n CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y"; export TORTURE_KCONFIG_GDB_ARG
+		TORTURE_KCONFIG_GDB_ARG="$debuginfo"; export TORTURE_KCONFIG_GDB_ARG
 		TORTURE_BOOT_GDB_ARG="nokaslr"; export TORTURE_BOOT_GDB_ARG
 		TORTURE_QEMU_GDB_ARG="-s -S"; export TORTURE_QEMU_GDB_ARG
 		;;
@@ -179,7 +190,7 @@ do
 		shift
 		;;
 	--kasan)
-		TORTURE_KCONFIG_KASAN_ARG="CONFIG_DEBUG_INFO_NONE=n CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_KASAN=y"; export TORTURE_KCONFIG_KASAN_ARG
+		TORTURE_KCONFIG_KASAN_ARG="$debuginfo CONFIG_KASAN=y"; export TORTURE_KCONFIG_KASAN_ARG
 		if test -n "$torture_qemu_mem_default"
 		then
 			TORTURE_QEMU_MEM=2G
@@ -191,7 +202,7 @@ do
 		shift
 		;;
 	--kcsan)
-		TORTURE_KCONFIG_KCSAN_ARG="CONFIG_DEBUG_INFO_NONE=n CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_KCSAN=y CONFIG_KCSAN_STRICT=y CONFIG_KCSAN_REPORT_ONCE_IN_MS=100000 CONFIG_KCSAN_VERBOSE=y CONFIG_DEBUG_LOCK_ALLOC=y CONFIG_PROVE_LOCKING=y"; export TORTURE_KCONFIG_KCSAN_ARG
+		TORTURE_KCONFIG_KCSAN_ARG="$debuginfo CONFIG_KCSAN=y CONFIG_KCSAN_STRICT=y CONFIG_KCSAN_REPORT_ONCE_IN_MS=100000 CONFIG_KCSAN_VERBOSE=y CONFIG_DEBUG_LOCK_ALLOC=y CONFIG_PROVE_LOCKING=y"; export TORTURE_KCONFIG_KCSAN_ARG
 		;;
 	--kmake-arg|--kmake-args)
 		checkarg --kmake-arg "(kernel make arguments)" $# "$2" '.*' '^error$'
diff --git a/tools/testing/selftests/rcutorture/bin/parse-console.sh b/tools/testing/selftests/rcutorture/bin/parse-console.sh
index 9ab0f6bc172c..b07c11cf6929 100755
--- a/tools/testing/selftests/rcutorture/bin/parse-console.sh
+++ b/tools/testing/selftests/rcutorture/bin/parse-console.sh
@@ -11,7 +11,7 @@
 #
 # Authors: Paul E. McKenney <paulmck@linux.ibm.com>
 
-T=${TMPDIR-/tmp}/parse-console.sh.$$
+T="`mktemp -d ${TMPDIR-/tmp}/parse-console.sh.XXXXXX`"
 file="$1"
 title="$2"
 
@@ -182,3 +182,10 @@ if ! test -s $file.diags
 then
 	rm -f $file.diags
 fi
+
+# Call extract_ftrace_from_console function, if the output is empty,
+# don't create $file.ftrace. Otherwise output the results to $file.ftrace
+extract_ftrace_from_console $file > $file.ftrace
+if [ ! -s $file.ftrace ]; then
+	rm -f $file.ftrace
+fi
diff --git a/tools/testing/selftests/rcutorture/bin/torture.sh b/tools/testing/selftests/rcutorture/bin/torture.sh
index 12b50a4a881a..d5a0d8a33c27 100755
--- a/tools/testing/selftests/rcutorture/bin/torture.sh
+++ b/tools/testing/selftests/rcutorture/bin/torture.sh
@@ -472,7 +472,7 @@ do
 	if test -n "$firsttime"
 	then
 		torture_bootargs="refscale.scale_type="$prim" refscale.nreaders=$HALF_ALLOTED_CPUS refscale.loops=10000 refscale.holdoff=20 torture.disable_onoff_at_boot"
-		torture_set "refscale-$prim" tools/testing/selftests/rcutorture/bin/kvm.sh --torture refscale --allcpus --duration 5 --kconfig "CONFIG_TASKS_TRACE_RCU=y CONFIG_NR_CPUS=$HALF_ALLOTED_CPUS" --bootargs "verbose_batched=$VERBOSE_BATCH_CPUS torture.verbose_sleep_frequency=8 torture.verbose_sleep_duration=$VERBOSE_BATCH_CPUS" --trust-make
+		torture_set "refscale-$prim" tools/testing/selftests/rcutorture/bin/kvm.sh --torture refscale --allcpus --duration 5 --kconfig "CONFIG_TASKS_TRACE_RCU=y CONFIG_NR_CPUS=$HALF_ALLOTED_CPUS" --bootargs "refscale.verbose_batched=$VERBOSE_BATCH_CPUS torture.verbose_sleep_frequency=8 torture.verbose_sleep_duration=$VERBOSE_BATCH_CPUS" --trust-make
 		mv $T/last-resdir-nodebug $T/first-resdir-nodebug || :
 		if test -f "$T/last-resdir-kasan"
 		then
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TRACE02 b/tools/testing/selftests/rcutorture/configs/rcu/TRACE02
index 093ea6e8e65c..9003c56cd764 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TRACE02
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TRACE02
@@ -11,3 +11,4 @@ CONFIG_FORCE_TASKS_TRACE_RCU=y
 #CHECK#CONFIG_TASKS_TRACE_RCU=y
 CONFIG_TASKS_TRACE_RCU_READ_MB=n
 CONFIG_RCU_EXPERT=y
+CONFIG_DEBUG_OBJECTS=y
diff --git a/tools/testing/selftests/resctrl/Makefile b/tools/testing/selftests/resctrl/Makefile
index 5073dbc96125..2deac2031de9 100644
--- a/tools/testing/selftests/resctrl/Makefile
+++ b/tools/testing/selftests/resctrl/Makefile
@@ -1,6 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0
 
-CFLAGS = -g -Wall -O2 -D_FORTIFY_SOURCE=2
+CFLAGS = -g -Wall -O2 -D_FORTIFY_SOURCE=2 -D_GNU_SOURCE
 CFLAGS += $(KHDR_INCLUDES)
 
 TEST_GEN_PROGS := resctrl_tests
diff --git a/tools/testing/selftests/resctrl/cache.c b/tools/testing/selftests/resctrl/cache.c
index d3cbb829ff6a..bcbca356d56a 100644
--- a/tools/testing/selftests/resctrl/cache.c
+++ b/tools/testing/selftests/resctrl/cache.c
@@ -205,10 +205,11 @@ int measure_cache_vals(struct resctrl_val_param *param, int bm_pid)
  * cache_val:		execute benchmark and measure LLC occupancy resctrl
  * and perf cache miss for the benchmark
  * @param:		parameters passed to cache_val()
+ * @span:		buffer size for the benchmark
  *
  * Return:		0 on success. non-zero on failure.
  */
-int cat_val(struct resctrl_val_param *param)
+int cat_val(struct resctrl_val_param *param, size_t span)
 {
 	int memflush = 1, operation = 0, ret = 0;
 	char *resctrl_val = param->resctrl_val;
@@ -245,7 +246,7 @@ int cat_val(struct resctrl_val_param *param)
 		if (ret)
 			break;
 
-		if (run_fill_buf(param->span, memflush, operation, true)) {
+		if (run_fill_buf(span, memflush, operation, true)) {
 			fprintf(stderr, "Error-running fill buffer\n");
 			ret = -1;
 			goto pe_close;
@@ -294,7 +295,7 @@ int show_cache_info(unsigned long sum_llc_val, int no_of_bits,
 	ret = platform && abs((int)diff_percent) > max_diff_percent &&
 	      (cmt ? (abs(avg_diff) > max_diff) : true);
 
-	ksft_print_msg("%s Check cache miss rate within %d%%\n",
+	ksft_print_msg("%s Check cache miss rate within %lu%%\n",
 		       ret ? "Fail:" : "Pass:", max_diff_percent);
 
 	ksft_print_msg("Percent diff=%d\n", abs((int)diff_percent));
diff --git a/tools/testing/selftests/resctrl/cat_test.c b/tools/testing/selftests/resctrl/cat_test.c
index 3848dfb46aba..224ba8544d8a 100644
--- a/tools/testing/selftests/resctrl/cat_test.c
+++ b/tools/testing/selftests/resctrl/cat_test.c
@@ -41,7 +41,7 @@ static int cat_setup(struct resctrl_val_param *p)
 	return ret;
 }
 
-static int check_results(struct resctrl_val_param *param)
+static int check_results(struct resctrl_val_param *param, size_t span)
 {
 	char *token_array[8], temp[512];
 	unsigned long sum_llc_perf_miss = 0;
@@ -76,7 +76,7 @@ static int check_results(struct resctrl_val_param *param)
 	fclose(fp);
 	no_of_bits = count_bits(param->mask);
 
-	return show_cache_info(sum_llc_perf_miss, no_of_bits, param->span / 64,
+	return show_cache_info(sum_llc_perf_miss, no_of_bits, span / 64,
 			       MAX_DIFF, MAX_DIFF_PERCENT, runs - 1,
 			       get_vendor() == ARCH_INTEL, false);
 }
@@ -96,6 +96,7 @@ int cat_perf_miss_val(int cpu_no, int n, char *cache_type)
 	char cbm_mask[256];
 	int count_of_bits;
 	char pipe_message;
+	size_t span;
 
 	/* Get default cbm mask for L3/L2 cache */
 	ret = get_cbm_mask(cache_type, cbm_mask);
@@ -140,7 +141,7 @@ int cat_perf_miss_val(int cpu_no, int n, char *cache_type)
 	/* Set param values for parent thread which will be allocated bitmask
 	 * with (max_bits - n) bits
 	 */
-	param.span = cache_size * (count_of_bits - n) / count_of_bits;
+	span = cache_size * (count_of_bits - n) / count_of_bits;
 	strcpy(param.ctrlgrp, "c2");
 	strcpy(param.mongrp, "m2");
 	strcpy(param.filename, RESULT_FILE_NAME2);
@@ -162,23 +163,17 @@ int cat_perf_miss_val(int cpu_no, int n, char *cache_type)
 		param.mask = l_mask_1;
 		strcpy(param.ctrlgrp, "c1");
 		strcpy(param.mongrp, "m1");
-		param.span = cache_size * n / count_of_bits;
+		span = cache_size * n / count_of_bits;
 		strcpy(param.filename, RESULT_FILE_NAME1);
 		param.num_of_runs = 0;
 		param.cpu_no = sibling_cpu_no;
-	} else {
-		ret = signal_handler_register();
-		if (ret) {
-			kill(bm_pid, SIGKILL);
-			goto out;
-		}
 	}
 
 	remove(param.filename);
 
-	ret = cat_val(&param);
+	ret = cat_val(&param, span);
 	if (ret == 0)
-		ret = check_results(&param);
+		ret = check_results(&param, span);
 
 	if (bm_pid == 0) {
 		/* Tell parent that child is ready */
@@ -208,10 +203,8 @@ int cat_perf_miss_val(int cpu_no, int n, char *cache_type)
 		}
 		close(pipefd[0]);
 		kill(bm_pid, SIGKILL);
-		signal_handler_unregister();
 	}
 
-out:
 	cat_test_cleanup();
 
 	return ret;
diff --git a/tools/testing/selftests/resctrl/cmt_test.c b/tools/testing/selftests/resctrl/cmt_test.c
index cb2197647c6c..50bdbce9fba9 100644
--- a/tools/testing/selftests/resctrl/cmt_test.c
+++ b/tools/testing/selftests/resctrl/cmt_test.c
@@ -27,7 +27,7 @@ static int cmt_setup(struct resctrl_val_param *p)
 	return 0;
 }
 
-static int check_results(struct resctrl_val_param *param, int no_of_bits)
+static int check_results(struct resctrl_val_param *param, size_t span, int no_of_bits)
 {
 	char *token_array[8], temp[512];
 	unsigned long sum_llc_occu_resc = 0;
@@ -58,7 +58,7 @@ static int check_results(struct resctrl_val_param *param, int no_of_bits)
 	}
 	fclose(fp);
 
-	return show_cache_info(sum_llc_occu_resc, no_of_bits, param->span,
+	return show_cache_info(sum_llc_occu_resc, no_of_bits, span,
 			       MAX_DIFF, MAX_DIFF_PERCENT, runs - 1,
 			       true, true);
 }
@@ -68,16 +68,17 @@ void cmt_test_cleanup(void)
 	remove(RESULT_FILE_NAME);
 }
 
-int cmt_resctrl_val(int cpu_no, int n, char **benchmark_cmd)
+int cmt_resctrl_val(int cpu_no, int n, const char * const *benchmark_cmd)
 {
+	const char * const *cmd = benchmark_cmd;
+	const char *new_cmd[BENCHMARK_ARGS];
 	unsigned long cache_size = 0;
 	unsigned long long_mask;
+	char *span_str = NULL;
 	char cbm_mask[256];
 	int count_of_bits;
-	int ret;
-
-	if (!validate_resctrl_feature_request(CMT_STR))
-		return -1;
+	size_t span;
+	int ret, i;
 
 	ret = get_cbm_mask("L3", cbm_mask);
 	if (ret)
@@ -105,24 +106,36 @@ int cmt_resctrl_val(int cpu_no, int n, char **benchmark_cmd)
 		.cpu_no		= cpu_no,
 		.filename	= RESULT_FILE_NAME,
 		.mask		= ~(long_mask << n) & long_mask,
-		.span		= cache_size * n / count_of_bits,
 		.num_of_runs	= 0,
 		.setup		= cmt_setup,
 	};
 
-	if (strcmp(benchmark_cmd[0], "fill_buf") == 0)
-		sprintf(benchmark_cmd[1], "%zu", param.span);
+	span = cache_size * n / count_of_bits;
+
+	if (strcmp(cmd[0], "fill_buf") == 0) {
+		/* Duplicate the command to be able to replace span in it */
+		for (i = 0; benchmark_cmd[i]; i++)
+			new_cmd[i] = benchmark_cmd[i];
+		new_cmd[i] = NULL;
+
+		ret = asprintf(&span_str, "%zu", span);
+		if (ret < 0)
+			return -1;
+		new_cmd[1] = span_str;
+		cmd = new_cmd;
+	}
 
 	remove(RESULT_FILE_NAME);
 
-	ret = resctrl_val(benchmark_cmd, &param);
+	ret = resctrl_val(cmd, &param);
 	if (ret)
 		goto out;
 
-	ret = check_results(&param, n);
+	ret = check_results(&param, span, n);
 
 out:
 	cmt_test_cleanup();
+	free(span_str);
 
 	return ret;
 }
diff --git a/tools/testing/selftests/resctrl/mba_test.c b/tools/testing/selftests/resctrl/mba_test.c
index 4d2f145804b8..d3bf4368341e 100644
--- a/tools/testing/selftests/resctrl/mba_test.c
+++ b/tools/testing/selftests/resctrl/mba_test.c
@@ -12,7 +12,7 @@
 
 #define RESULT_FILE_NAME	"result_mba"
 #define NUM_OF_RUNS		5
-#define MAX_DIFF_PERCENT	5
+#define MAX_DIFF_PERCENT	8
 #define ALLOCATION_MAX		100
 #define ALLOCATION_MIN		10
 #define ALLOCATION_STEP		10
@@ -141,7 +141,7 @@ void mba_test_cleanup(void)
 	remove(RESULT_FILE_NAME);
 }
 
-int mba_schemata_change(int cpu_no, char *bw_report, char **benchmark_cmd)
+int mba_schemata_change(int cpu_no, const char * const *benchmark_cmd)
 {
 	struct resctrl_val_param param = {
 		.resctrl_val	= MBA_STR,
@@ -149,7 +149,7 @@ int mba_schemata_change(int cpu_no, char *bw_report, char **benchmark_cmd)
 		.mongrp		= "m1",
 		.cpu_no		= cpu_no,
 		.filename	= RESULT_FILE_NAME,
-		.bw_report	= bw_report,
+		.bw_report	= "reads",
 		.setup		= mba_setup
 	};
 	int ret;
diff --git a/tools/testing/selftests/resctrl/mbm_test.c b/tools/testing/selftests/resctrl/mbm_test.c
index c7de6f5977f6..741533f2b075 100644
--- a/tools/testing/selftests/resctrl/mbm_test.c
+++ b/tools/testing/selftests/resctrl/mbm_test.c
@@ -11,7 +11,7 @@
 #include "resctrl.h"
 
 #define RESULT_FILE_NAME	"result_mbm"
-#define MAX_DIFF_PERCENT	5
+#define MAX_DIFF_PERCENT	8
 #define NUM_OF_RUNS		5
 
 static int
@@ -95,7 +95,7 @@ static int mbm_setup(struct resctrl_val_param *p)
 		return END_OF_TESTS;
 
 	/* Set up shemata with 100% allocation on the first run. */
-	if (p->num_of_runs == 0)
+	if (p->num_of_runs == 0 && validate_resctrl_feature_request("MB", NULL))
 		ret = write_schemata(p->ctrlgrp, "100", p->cpu_no,
 				     p->resctrl_val);
 
@@ -109,16 +109,15 @@ void mbm_test_cleanup(void)
 	remove(RESULT_FILE_NAME);
 }
 
-int mbm_bw_change(size_t span, int cpu_no, char *bw_report, char **benchmark_cmd)
+int mbm_bw_change(int cpu_no, const char * const *benchmark_cmd)
 {
 	struct resctrl_val_param param = {
 		.resctrl_val	= MBM_STR,
 		.ctrlgrp	= "c1",
 		.mongrp		= "m1",
-		.span		= span,
 		.cpu_no		= cpu_no,
 		.filename	= RESULT_FILE_NAME,
-		.bw_report	=  bw_report,
+		.bw_report	= "reads",
 		.setup		= mbm_setup
 	};
 	int ret;
@@ -129,7 +128,7 @@ int mbm_bw_change(size_t span, int cpu_no, char *bw_report, char **benchmark_cmd
 	if (ret)
 		goto out;
 
-	ret = check_results(span);
+	ret = check_results(DEFAULT_SPAN);
 
 out:
 	mbm_test_cleanup();
diff --git a/tools/testing/selftests/resctrl/resctrl.h b/tools/testing/selftests/resctrl/resctrl.h
index 838d1a438f33..a33f414f6019 100644
--- a/tools/testing/selftests/resctrl/resctrl.h
+++ b/tools/testing/selftests/resctrl/resctrl.h
@@ -1,5 +1,4 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-#define _GNU_SOURCE
 #ifndef RESCTRL_H
 #define RESCTRL_H
 #include <stdio.h>
@@ -28,16 +27,16 @@
 #define RESCTRL_PATH		"/sys/fs/resctrl"
 #define PHYS_ID_PATH		"/sys/devices/system/cpu/cpu"
 #define INFO_PATH		"/sys/fs/resctrl/info"
-#define L3_PATH			"/sys/fs/resctrl/info/L3"
-#define MB_PATH			"/sys/fs/resctrl/info/MB"
-#define L3_MON_PATH		"/sys/fs/resctrl/info/L3_MON"
-#define L3_MON_FEATURES_PATH	"/sys/fs/resctrl/info/L3_MON/mon_features"
 
 #define ARCH_INTEL     1
 #define ARCH_AMD       2
 
 #define END_OF_TESTS	1
 
+#define BENCHMARK_ARGS		64
+
+#define DEFAULT_SPAN		(250 * MB)
+
 #define PARENT_EXIT(err_msg)			\
 	do {					\
 		perror(err_msg);		\
@@ -52,7 +51,6 @@
  * @ctrlgrp:		Name of the control monitor group (con_mon grp)
  * @mongrp:		Name of the monitor group (mon grp)
  * @cpu_no:		CPU number to which the benchmark would be binded
- * @span:		Memory bytes accessed in each benchmark iteration
  * @filename:		Name of file to which the o/p should be written
  * @bw_report:		Bandwidth report type (reads vs writes)
  * @setup:		Call back function to setup test environment
@@ -62,7 +60,6 @@ struct resctrl_val_param {
 	char		ctrlgrp[64];
 	char		mongrp[64];
 	int		cpu_no;
-	size_t		span;
 	char		filename[64];
 	char		*bw_report;
 	unsigned long	mask;
@@ -86,10 +83,9 @@ int get_resource_id(int cpu_no, int *resource_id);
 int mount_resctrlfs(void);
 int umount_resctrlfs(void);
 int validate_bw_report_request(char *bw_report);
-bool validate_resctrl_feature_request(const char *resctrl_val);
+bool validate_resctrl_feature_request(const char *resource, const char *feature);
 char *fgrep(FILE *inf, const char *str);
 int taskset_benchmark(pid_t bm_pid, int cpu_no);
-void run_benchmark(int signum, siginfo_t *info, void *ucontext);
 int write_schemata(char *ctrlgrp, char *schemata, int cpu_no,
 		   char *resctrl_val);
 int write_bm_pid_to_resctrl(pid_t bm_pid, char *ctrlgrp, char *mongrp,
@@ -97,21 +93,21 @@ int write_bm_pid_to_resctrl(pid_t bm_pid, char *ctrlgrp, char *mongrp,
 int perf_event_open(struct perf_event_attr *hw_event, pid_t pid, int cpu,
 		    int group_fd, unsigned long flags);
 int run_fill_buf(size_t span, int memflush, int op, bool once);
-int resctrl_val(char **benchmark_cmd, struct resctrl_val_param *param);
-int mbm_bw_change(size_t span, int cpu_no, char *bw_report, char **benchmark_cmd);
+int resctrl_val(const char * const *benchmark_cmd, struct resctrl_val_param *param);
+int mbm_bw_change(int cpu_no, const char * const *benchmark_cmd);
 void tests_cleanup(void);
 void mbm_test_cleanup(void);
-int mba_schemata_change(int cpu_no, char *bw_report, char **benchmark_cmd);
+int mba_schemata_change(int cpu_no, const char * const *benchmark_cmd);
 void mba_test_cleanup(void);
 int get_cbm_mask(char *cache_type, char *cbm_mask);
 int get_cache_size(int cpu_no, char *cache_type, unsigned long *cache_size);
 void ctrlc_handler(int signum, siginfo_t *info, void *ptr);
 int signal_handler_register(void);
 void signal_handler_unregister(void);
-int cat_val(struct resctrl_val_param *param);
+int cat_val(struct resctrl_val_param *param, size_t span);
 void cat_test_cleanup(void);
 int cat_perf_miss_val(int cpu_no, int no_of_bits, char *cache_type);
-int cmt_resctrl_val(int cpu_no, int n, char **benchmark_cmd);
+int cmt_resctrl_val(int cpu_no, int n, const char * const *benchmark_cmd);
 unsigned int count_bits(unsigned long n);
 void cmt_test_cleanup(void);
 int get_core_sibling(int cpu_no);
diff --git a/tools/testing/selftests/resctrl/resctrl_tests.c b/tools/testing/selftests/resctrl/resctrl_tests.c
index d511daeb6851..2bbe3045a018 100644
--- a/tools/testing/selftests/resctrl/resctrl_tests.c
+++ b/tools/testing/selftests/resctrl/resctrl_tests.c
@@ -10,9 +10,6 @@
  */
 #include "resctrl.h"
 
-#define BENCHMARK_ARGS		64
-#define BENCHMARK_ARG_SIZE	64
-
 static int detect_vendor(void)
 {
 	FILE *inf = fopen("/proc/cpuinfo", "r");
@@ -52,8 +49,8 @@ int get_vendor(void)
 
 static void cmd_help(void)
 {
-	printf("usage: resctrl_tests [-h] [-b \"benchmark_cmd [options]\"] [-t test list] [-n no_of_bits]\n");
-	printf("\t-b benchmark_cmd [options]: run specified benchmark for MBM, MBA and CMT\n");
+	printf("usage: resctrl_tests [-h] [-t test list] [-n no_of_bits] [-b benchmark_cmd [option]...]\n");
+	printf("\t-b benchmark_cmd [option]...: run specified benchmark for MBM, MBA and CMT\n");
 	printf("\t   default benchmark is builtin fill_buf\n");
 	printf("\t-t test list: run tests specified in the test list, ");
 	printf("e.g. -t mbm,mba,cmt,cat\n");
@@ -70,72 +67,98 @@ void tests_cleanup(void)
 	cat_test_cleanup();
 }
 
-static void run_mbm_test(char **benchmark_cmd, size_t span,
-			 int cpu_no, char *bw_report)
+static int test_prepare(void)
 {
 	int res;
 
-	ksft_print_msg("Starting MBM BW change ...\n");
+	res = signal_handler_register();
+	if (res) {
+		ksft_print_msg("Failed to register signal handler\n");
+		return res;
+	}
 
 	res = mount_resctrlfs();
 	if (res) {
-		ksft_exit_fail_msg("Failed to mount resctrl FS\n");
+		signal_handler_unregister();
+		ksft_print_msg("Failed to mount resctrl FS\n");
+		return res;
+	}
+	return 0;
+}
+
+static void test_cleanup(void)
+{
+	umount_resctrlfs();
+	signal_handler_unregister();
+}
+
+static void run_mbm_test(const char * const *benchmark_cmd, int cpu_no)
+{
+	int res;
+
+	ksft_print_msg("Starting MBM BW change ...\n");
+
+	if (test_prepare()) {
+		ksft_exit_fail_msg("Abnormal failure when preparing for the test\n");
 		return;
 	}
 
-	if (!validate_resctrl_feature_request(MBM_STR) || (get_vendor() != ARCH_INTEL)) {
+	if (!validate_resctrl_feature_request("L3_MON", "mbm_total_bytes") ||
+	    !validate_resctrl_feature_request("L3_MON", "mbm_local_bytes") ||
+	    (get_vendor() != ARCH_INTEL)) {
 		ksft_test_result_skip("Hardware does not support MBM or MBM is disabled\n");
-		goto umount;
+		goto cleanup;
 	}
 
-	res = mbm_bw_change(span, cpu_no, bw_report, benchmark_cmd);
+	res = mbm_bw_change(cpu_no, benchmark_cmd);
 	ksft_test_result(!res, "MBM: bw change\n");
 	if ((get_vendor() == ARCH_INTEL) && res)
 		ksft_print_msg("Intel MBM may be inaccurate when Sub-NUMA Clustering is enabled. Check BIOS configuration.\n");
 
-umount:
-	umount_resctrlfs();
+cleanup:
+	test_cleanup();
 }
 
-static void run_mba_test(char **benchmark_cmd, int cpu_no, char *bw_report)
+static void run_mba_test(const char * const *benchmark_cmd, int cpu_no)
 {
 	int res;
 
 	ksft_print_msg("Starting MBA Schemata change ...\n");
 
-	res = mount_resctrlfs();
-	if (res) {
-		ksft_exit_fail_msg("Failed to mount resctrl FS\n");
+	if (test_prepare()) {
+		ksft_exit_fail_msg("Abnormal failure when preparing for the test\n");
 		return;
 	}
 
-	if (!validate_resctrl_feature_request(MBA_STR) || (get_vendor() != ARCH_INTEL)) {
+	if (!validate_resctrl_feature_request("MB", NULL) ||
+	    !validate_resctrl_feature_request("L3_MON", "mbm_local_bytes") ||
+	    (get_vendor() != ARCH_INTEL)) {
 		ksft_test_result_skip("Hardware does not support MBA or MBA is disabled\n");
-		goto umount;
+		goto cleanup;
 	}
 
-	res = mba_schemata_change(cpu_no, bw_report, benchmark_cmd);
+	res = mba_schemata_change(cpu_no, benchmark_cmd);
 	ksft_test_result(!res, "MBA: schemata change\n");
 
-umount:
-	umount_resctrlfs();
+cleanup:
+	test_cleanup();
 }
 
-static void run_cmt_test(char **benchmark_cmd, int cpu_no)
+static void run_cmt_test(const char * const *benchmark_cmd, int cpu_no)
 {
 	int res;
 
 	ksft_print_msg("Starting CMT test ...\n");
 
-	res = mount_resctrlfs();
-	if (res) {
-		ksft_exit_fail_msg("Failed to mount resctrl FS\n");
+	if (test_prepare()) {
+		ksft_exit_fail_msg("Abnormal failure when preparing for the test\n");
 		return;
 	}
 
-	if (!validate_resctrl_feature_request(CMT_STR)) {
+	if (!validate_resctrl_feature_request("L3_MON", "llc_occupancy") ||
+	    !validate_resctrl_feature_request("L3", NULL)) {
 		ksft_test_result_skip("Hardware does not support CMT or CMT is disabled\n");
-		goto umount;
+		goto cleanup;
 	}
 
 	res = cmt_resctrl_val(cpu_no, 5, benchmark_cmd);
@@ -143,8 +166,8 @@ static void run_cmt_test(char **benchmark_cmd, int cpu_no)
 	if ((get_vendor() == ARCH_INTEL) && res)
 		ksft_print_msg("Intel CMT may be inaccurate when Sub-NUMA Clustering is enabled. Check BIOS configuration.\n");
 
-umount:
-	umount_resctrlfs();
+cleanup:
+	test_cleanup();
 }
 
 static void run_cat_test(int cpu_no, int no_of_bits)
@@ -153,48 +176,53 @@ static void run_cat_test(int cpu_no, int no_of_bits)
 
 	ksft_print_msg("Starting CAT test ...\n");
 
-	res = mount_resctrlfs();
-	if (res) {
-		ksft_exit_fail_msg("Failed to mount resctrl FS\n");
+	if (test_prepare()) {
+		ksft_exit_fail_msg("Abnormal failure when preparing for the test\n");
 		return;
 	}
 
-	if (!validate_resctrl_feature_request(CAT_STR)) {
+	if (!validate_resctrl_feature_request("L3", NULL)) {
 		ksft_test_result_skip("Hardware does not support CAT or CAT is disabled\n");
-		goto umount;
+		goto cleanup;
 	}
 
 	res = cat_perf_miss_val(cpu_no, no_of_bits, "L3");
 	ksft_test_result(!res, "CAT: test\n");
 
-umount:
-	umount_resctrlfs();
+cleanup:
+	test_cleanup();
 }
 
 int main(int argc, char **argv)
 {
-	bool has_ben = false, mbm_test = true, mba_test = true, cmt_test = true;
-	char *benchmark_cmd[BENCHMARK_ARGS], bw_report[64], bm_type[64];
-	char benchmark_cmd_area[BENCHMARK_ARGS][BENCHMARK_ARG_SIZE];
-	int c, cpu_no = 1, argc_new = argc, i, no_of_bits = 0;
-	int ben_ind, ben_count, tests = 0;
-	size_t span = 250 * MB;
+	bool mbm_test = true, mba_test = true, cmt_test = true;
+	const char *benchmark_cmd[BENCHMARK_ARGS] = {};
+	int c, cpu_no = 1, i, no_of_bits = 0;
+	char *span_str = NULL;
 	bool cat_test = true;
+	int tests = 0;
+	int ret;
 
-	for (i = 0; i < argc; i++) {
-		if (strcmp(argv[i], "-b") == 0) {
-			ben_ind = i + 1;
-			ben_count = argc - ben_ind;
-			argc_new = ben_ind - 1;
-			has_ben = true;
-			break;
-		}
-	}
-
-	while ((c = getopt(argc_new, argv, "ht:b:n:p:")) != -1) {
+	while ((c = getopt(argc, argv, "ht:b:n:p:")) != -1) {
 		char *token;
 
 		switch (c) {
+		case 'b':
+			/*
+			 * First move optind back to the (first) optarg and
+			 * then build the benchmark command using the
+			 * remaining arguments.
+			 */
+			optind--;
+			if (argc - optind >= BENCHMARK_ARGS)
+				ksft_exit_fail_msg("Too long benchmark command");
+
+			/* Extract benchmark command from command line. */
+			for (i = 0; i < argc - optind; i++)
+				benchmark_cmd[i] = argv[i + optind];
+			benchmark_cmd[i] = NULL;
+
+			goto last_arg;
 		case 't':
 			token = strtok(optarg, ",");
 
@@ -243,6 +271,7 @@ int main(int argc, char **argv)
 			return -1;
 		}
 	}
+last_arg:
 
 	ksft_print_header();
 
@@ -254,29 +283,6 @@ int main(int argc, char **argv)
 	if (geteuid() != 0)
 		return ksft_exit_skip("Not running as root. Skipping...\n");
 
-	if (has_ben) {
-		/* Extract benchmark command from command line. */
-		for (i = ben_ind; i < argc; i++) {
-			benchmark_cmd[i - ben_ind] = benchmark_cmd_area[i];
-			sprintf(benchmark_cmd[i - ben_ind], "%s", argv[i]);
-		}
-		benchmark_cmd[ben_count] = NULL;
-	} else {
-		/* If no benchmark is given by "-b" argument, use fill_buf. */
-		for (i = 0; i < 5; i++)
-			benchmark_cmd[i] = benchmark_cmd_area[i];
-
-		strcpy(benchmark_cmd[0], "fill_buf");
-		sprintf(benchmark_cmd[1], "%zu", span);
-		strcpy(benchmark_cmd[2], "1");
-		strcpy(benchmark_cmd[3], "0");
-		strcpy(benchmark_cmd[4], "false");
-		benchmark_cmd[5] = NULL;
-	}
-
-	sprintf(bw_report, "reads");
-	sprintf(bm_type, "fill_buf");
-
 	if (!check_resctrlfs_support())
 		return ksft_exit_skip("resctrl FS does not exist. Enable X86_CPU_RESCTRL config option.\n");
 
@@ -285,13 +291,26 @@ int main(int argc, char **argv)
 
 	filter_dmesg();
 
+	if (!benchmark_cmd[0]) {
+		/* If no benchmark is given by "-b" argument, use fill_buf. */
+		benchmark_cmd[0] = "fill_buf";
+		ret = asprintf(&span_str, "%u", DEFAULT_SPAN);
+		if (ret < 0)
+			ksft_exit_fail_msg("Out of memory!\n");
+		benchmark_cmd[1] = span_str;
+		benchmark_cmd[2] = "1";
+		benchmark_cmd[3] = "0";
+		benchmark_cmd[4] = "false";
+		benchmark_cmd[5] = NULL;
+	}
+
 	ksft_set_plan(tests ? : 4);
 
 	if (mbm_test)
-		run_mbm_test(benchmark_cmd, span, cpu_no, bw_report);
+		run_mbm_test(benchmark_cmd, cpu_no);
 
 	if (mba_test)
-		run_mba_test(benchmark_cmd, cpu_no, bw_report);
+		run_mba_test(benchmark_cmd, cpu_no);
 
 	if (cmt_test)
 		run_cmt_test(benchmark_cmd, cpu_no);
@@ -299,5 +318,6 @@ int main(int argc, char **argv)
 	if (cat_test)
 		run_cat_test(cpu_no, no_of_bits);
 
+	free(span_str);
 	ksft_finished();
 }
diff --git a/tools/testing/selftests/resctrl/resctrl_val.c b/tools/testing/selftests/resctrl/resctrl_val.c
index f0f6c5f6e98b..88789678917b 100644
--- a/tools/testing/selftests/resctrl/resctrl_val.c
+++ b/tools/testing/selftests/resctrl/resctrl_val.c
@@ -468,7 +468,9 @@ pid_t bm_pid, ppid;
 
 void ctrlc_handler(int signum, siginfo_t *info, void *ptr)
 {
-	kill(bm_pid, SIGKILL);
+	/* Only kill child after bm_pid is set after fork() */
+	if (bm_pid)
+		kill(bm_pid, SIGKILL);
 	umount_resctrlfs();
 	tests_cleanup();
 	ksft_print_msg("Ending\n\n");
@@ -482,9 +484,11 @@ void ctrlc_handler(int signum, siginfo_t *info, void *ptr)
  */
 int signal_handler_register(void)
 {
-	struct sigaction sigact;
+	struct sigaction sigact = {};
 	int ret = 0;
 
+	bm_pid = 0;
+
 	sigact.sa_sigaction = ctrlc_handler;
 	sigemptyset(&sigact.sa_mask);
 	sigact.sa_flags = SA_SIGINFO;
@@ -504,7 +508,7 @@ int signal_handler_register(void)
  */
 void signal_handler_unregister(void)
 {
-	struct sigaction sigact;
+	struct sigaction sigact = {};
 
 	sigact.sa_handler = SIG_DFL;
 	sigemptyset(&sigact.sa_mask);
@@ -622,6 +626,56 @@ measure_vals(struct resctrl_val_param *param, unsigned long *bw_resc_start)
 }
 
 /*
+ * run_benchmark - Run a specified benchmark or fill_buf (default benchmark)
+ *		   in specified signal. Direct benchmark stdio to /dev/null.
+ * @signum:	signal number
+ * @info:	signal info
+ * @ucontext:	user context in signal handling
+ */
+static void run_benchmark(int signum, siginfo_t *info, void *ucontext)
+{
+	int operation, ret, memflush;
+	char **benchmark_cmd;
+	size_t span;
+	bool once;
+	FILE *fp;
+
+	benchmark_cmd = info->si_ptr;
+
+	/*
+	 * Direct stdio of child to /dev/null, so that only parent writes to
+	 * stdio (console)
+	 */
+	fp = freopen("/dev/null", "w", stdout);
+	if (!fp)
+		PARENT_EXIT("Unable to direct benchmark status to /dev/null");
+
+	if (strcmp(benchmark_cmd[0], "fill_buf") == 0) {
+		/* Execute default fill_buf benchmark */
+		span = strtoul(benchmark_cmd[1], NULL, 10);
+		memflush =  atoi(benchmark_cmd[2]);
+		operation = atoi(benchmark_cmd[3]);
+		if (!strcmp(benchmark_cmd[4], "true"))
+			once = true;
+		else if (!strcmp(benchmark_cmd[4], "false"))
+			once = false;
+		else
+			PARENT_EXIT("Invalid once parameter");
+
+		if (run_fill_buf(span, memflush, operation, once))
+			fprintf(stderr, "Error in running fill buffer\n");
+	} else {
+		/* Execute specified benchmark */
+		ret = execvp(benchmark_cmd[0], benchmark_cmd);
+		if (ret)
+			perror("wrong\n");
+	}
+
+	fclose(stdout);
+	PARENT_EXIT("Unable to run specified benchmark");
+}
+
+/*
  * resctrl_val:	execute benchmark and measure memory bandwidth on
  *			the benchmark
  * @benchmark_cmd:	benchmark command and its arguments
@@ -629,7 +683,7 @@ measure_vals(struct resctrl_val_param *param, unsigned long *bw_resc_start)
  *
  * Return:		0 on success. non-zero on failure.
  */
-int resctrl_val(char **benchmark_cmd, struct resctrl_val_param *param)
+int resctrl_val(const char * const *benchmark_cmd, struct resctrl_val_param *param)
 {
 	char *resctrl_val = param->resctrl_val;
 	unsigned long bw_resc_start = 0;
@@ -706,28 +760,30 @@ int resctrl_val(char **benchmark_cmd, struct resctrl_val_param *param)
 
 	ksft_print_msg("Benchmark PID: %d\n", bm_pid);
 
-	ret = signal_handler_register();
-	if (ret)
-		goto out;
-
-	value.sival_ptr = benchmark_cmd;
+	/*
+	 * The cast removes constness but nothing mutates benchmark_cmd within
+	 * the context of this process. At the receiving process, it becomes
+	 * argv, which is mutable, on exec() but that's after fork() so it
+	 * doesn't matter for the process running the tests.
+	 */
+	value.sival_ptr = (void *)benchmark_cmd;
 
 	/* Taskset benchmark to specified cpu */
 	ret = taskset_benchmark(bm_pid, param->cpu_no);
 	if (ret)
-		goto unregister;
+		goto out;
 
 	/* Write benchmark to specified control&monitoring grp in resctrl FS */
 	ret = write_bm_pid_to_resctrl(bm_pid, param->ctrlgrp, param->mongrp,
 				      resctrl_val);
 	if (ret)
-		goto unregister;
+		goto out;
 
 	if (!strncmp(resctrl_val, MBM_STR, sizeof(MBM_STR)) ||
 	    !strncmp(resctrl_val, MBA_STR, sizeof(MBA_STR))) {
 		ret = initialize_mem_bw_imc();
 		if (ret)
-			goto unregister;
+			goto out;
 
 		initialize_mem_bw_resctrl(param->ctrlgrp, param->mongrp,
 					  param->cpu_no, resctrl_val);
@@ -742,7 +798,7 @@ int resctrl_val(char **benchmark_cmd, struct resctrl_val_param *param)
 		    sizeof(pipe_message)) {
 			perror("# failed reading message from child process");
 			close(pipefd[0]);
-			goto unregister;
+			goto out;
 		}
 	}
 	close(pipefd[0]);
@@ -751,7 +807,7 @@ int resctrl_val(char **benchmark_cmd, struct resctrl_val_param *param)
 	if (sigqueue(bm_pid, SIGUSR1, value) == -1) {
 		perror("# sigqueue SIGUSR1 to child");
 		ret = errno;
-		goto unregister;
+		goto out;
 	}
 
 	/* Give benchmark enough time to fully run */
@@ -780,8 +836,6 @@ int resctrl_val(char **benchmark_cmd, struct resctrl_val_param *param)
 		}
 	}
 
-unregister:
-	signal_handler_unregister();
 out:
 	kill(bm_pid, SIGKILL);
 
diff --git a/tools/testing/selftests/resctrl/resctrlfs.c b/tools/testing/selftests/resctrl/resctrlfs.c
index bd36ee206602..5ebd43683876 100644
--- a/tools/testing/selftests/resctrl/resctrlfs.c
+++ b/tools/testing/selftests/resctrl/resctrlfs.c
@@ -8,6 +8,9 @@
  *    Sai Praneeth Prakhya <sai.praneeth.prakhya@intel.com>,
  *    Fenghua Yu <fenghua.yu@intel.com>
  */
+#include <fcntl.h>
+#include <limits.h>
+
 #include "resctrl.h"
 
 static int find_resctrl_mount(char *buffer)
@@ -292,58 +295,6 @@ int taskset_benchmark(pid_t bm_pid, int cpu_no)
 }
 
 /*
- * run_benchmark - Run a specified benchmark or fill_buf (default benchmark)
- *		   in specified signal. Direct benchmark stdio to /dev/null.
- * @signum:	signal number
- * @info:	signal info
- * @ucontext:	user context in signal handling
- *
- * Return: void
- */
-void run_benchmark(int signum, siginfo_t *info, void *ucontext)
-{
-	int operation, ret, memflush;
-	char **benchmark_cmd;
-	size_t span;
-	bool once;
-	FILE *fp;
-
-	benchmark_cmd = info->si_ptr;
-
-	/*
-	 * Direct stdio of child to /dev/null, so that only parent writes to
-	 * stdio (console)
-	 */
-	fp = freopen("/dev/null", "w", stdout);
-	if (!fp)
-		PARENT_EXIT("Unable to direct benchmark status to /dev/null");
-
-	if (strcmp(benchmark_cmd[0], "fill_buf") == 0) {
-		/* Execute default fill_buf benchmark */
-		span = strtoul(benchmark_cmd[1], NULL, 10);
-		memflush =  atoi(benchmark_cmd[2]);
-		operation = atoi(benchmark_cmd[3]);
-		if (!strcmp(benchmark_cmd[4], "true"))
-			once = true;
-		else if (!strcmp(benchmark_cmd[4], "false"))
-			once = false;
-		else
-			PARENT_EXIT("Invalid once parameter");
-
-		if (run_fill_buf(span, memflush, operation, once))
-			fprintf(stderr, "Error in running fill buffer\n");
-	} else {
-		/* Execute specified benchmark */
-		ret = execvp(benchmark_cmd[0], benchmark_cmd);
-		if (ret)
-			perror("wrong\n");
-	}
-
-	fclose(stdout);
-	PARENT_EXIT("Unable to run specified benchmark");
-}
-
-/*
  * create_grp - Create a group only if one doesn't exist
  * @grp_name:	Name of the group
  * @grp:	Full path and name of the group
@@ -488,9 +439,8 @@ out:
  */
 int write_schemata(char *ctrlgrp, char *schemata, int cpu_no, char *resctrl_val)
 {
-	char controlgroup[1024], schema[1024], reason[64];
-	int resource_id, ret = 0;
-	FILE *fp;
+	char controlgroup[1024], reason[128], schema[1024] = {};
+	int resource_id, fd, schema_len = -1, ret = 0;
 
 	if (strncmp(resctrl_val, MBA_STR, sizeof(MBA_STR)) &&
 	    strncmp(resctrl_val, MBM_STR, sizeof(MBM_STR)) &&
@@ -518,28 +468,39 @@ int write_schemata(char *ctrlgrp, char *schemata, int cpu_no, char *resctrl_val)
 
 	if (!strncmp(resctrl_val, CAT_STR, sizeof(CAT_STR)) ||
 	    !strncmp(resctrl_val, CMT_STR, sizeof(CMT_STR)))
-		sprintf(schema, "%s%d%c%s", "L3:", resource_id, '=', schemata);
+		schema_len = snprintf(schema, sizeof(schema), "%s%d%c%s\n",
+				      "L3:", resource_id, '=', schemata);
 	if (!strncmp(resctrl_val, MBA_STR, sizeof(MBA_STR)) ||
 	    !strncmp(resctrl_val, MBM_STR, sizeof(MBM_STR)))
-		sprintf(schema, "%s%d%c%s", "MB:", resource_id, '=', schemata);
-
-	fp = fopen(controlgroup, "w");
-	if (!fp) {
-		sprintf(reason, "Failed to open control group");
+		schema_len = snprintf(schema, sizeof(schema), "%s%d%c%s\n",
+				      "MB:", resource_id, '=', schemata);
+	if (schema_len < 0 || schema_len >= sizeof(schema)) {
+		snprintf(reason, sizeof(reason),
+			 "snprintf() failed with return value : %d", schema_len);
 		ret = -1;
-
 		goto out;
 	}
 
-	if (fprintf(fp, "%s\n", schema) < 0) {
-		sprintf(reason, "Failed to write schemata in control group");
-		fclose(fp);
+	fd = open(controlgroup, O_WRONLY);
+	if (fd < 0) {
+		snprintf(reason, sizeof(reason),
+			 "open() failed : %s", strerror(errno));
 		ret = -1;
 
-		goto out;
+		goto err_schema_not_empty;
 	}
-	fclose(fp);
+	if (write(fd, schema, schema_len) < 0) {
+		snprintf(reason, sizeof(reason),
+			 "write() failed : %s", strerror(errno));
+		close(fd);
+		ret = -1;
+
+		goto err_schema_not_empty;
+	}
+	close(fd);
 
+err_schema_not_empty:
+	schema[schema_len - 1] = 0;
 out:
 	ksft_print_msg("Write schema \"%s\" to resctrl FS%s%s\n",
 		       schema, ret ? " # " : "",
@@ -604,63 +565,46 @@ char *fgrep(FILE *inf, const char *str)
 
 /*
  * validate_resctrl_feature_request - Check if requested feature is valid.
- * @resctrl_val:	Requested feature
+ * @resource:	Required resource (e.g., MB, L3, L2, L3_MON, etc.)
+ * @feature:	Required monitor feature (in mon_features file). Can only be
+ *		set for L3_MON. Must be NULL for all other resources.
  *
- * Return: True if the feature is supported, else false. False is also
- *         returned if resctrl FS is not mounted.
+ * Return: True if the resource/feature is supported, else false. False is
+ *         also returned if resctrl FS is not mounted.
  */
-bool validate_resctrl_feature_request(const char *resctrl_val)
+bool validate_resctrl_feature_request(const char *resource, const char *feature)
 {
+	char res_path[PATH_MAX];
 	struct stat statbuf;
-	bool found = false;
 	char *res;
 	FILE *inf;
 	int ret;
 
-	if (!resctrl_val)
+	if (!resource)
 		return false;
 
 	ret = find_resctrl_mount(NULL);
 	if (ret)
 		return false;
 
-	if (!strncmp(resctrl_val, CAT_STR, sizeof(CAT_STR))) {
-		if (!stat(L3_PATH, &statbuf))
-			return true;
-	} else if (!strncmp(resctrl_val, MBA_STR, sizeof(MBA_STR))) {
-		if (!stat(MB_PATH, &statbuf))
-			return true;
-	} else if (!strncmp(resctrl_val, MBM_STR, sizeof(MBM_STR)) ||
-		   !strncmp(resctrl_val, CMT_STR, sizeof(CMT_STR))) {
-		if (!stat(L3_MON_PATH, &statbuf)) {
-			inf = fopen(L3_MON_FEATURES_PATH, "r");
-			if (!inf)
-				return false;
-
-			if (!strncmp(resctrl_val, CMT_STR, sizeof(CMT_STR))) {
-				res = fgrep(inf, "llc_occupancy");
-				if (res) {
-					found = true;
-					free(res);
-				}
-			}
-
-			if (!strncmp(resctrl_val, MBM_STR, sizeof(MBM_STR))) {
-				res = fgrep(inf, "mbm_total_bytes");
-				if (res) {
-					free(res);
-					res = fgrep(inf, "mbm_local_bytes");
-					if (res) {
-						found = true;
-						free(res);
-					}
-				}
-			}
-			fclose(inf);
-		}
-	}
+	snprintf(res_path, sizeof(res_path), "%s/%s", INFO_PATH, resource);
+
+	if (stat(res_path, &statbuf))
+		return false;
+
+	if (!feature)
+		return true;
+
+	snprintf(res_path, sizeof(res_path), "%s/%s/mon_features", INFO_PATH, resource);
+	inf = fopen(res_path, "r");
+	if (!inf)
+		return false;
+
+	res = fgrep(inf, feature);
+	free(res);
+	fclose(inf);
 
-	return found;
+	return !!res;
 }
 
 int filter_dmesg(void)
diff --git a/tools/testing/selftests/riscv/hwprobe/Makefile b/tools/testing/selftests/riscv/hwprobe/Makefile
index ebdbb3c22e54..f224b84591fb 100644
--- a/tools/testing/selftests/riscv/hwprobe/Makefile
+++ b/tools/testing/selftests/riscv/hwprobe/Makefile
@@ -2,9 +2,14 @@
 # Copyright (C) 2021 ARM Limited
 # Originally tools/testing/arm64/abi/Makefile
 
-TEST_GEN_PROGS := hwprobe
+CFLAGS += -I$(top_srcdir)/tools/include
+
+TEST_GEN_PROGS := hwprobe cbo
 
 include ../../lib.mk
 
 $(OUTPUT)/hwprobe: hwprobe.c sys_hwprobe.S
-	$(CC) -o$@ $(CFLAGS) $(LDFLAGS) $^
+	$(CC) -static -o$@ $(CFLAGS) $(LDFLAGS) $^
+
+$(OUTPUT)/cbo: cbo.c sys_hwprobe.S
+	$(CC) -static -o$@ $(CFLAGS) $(LDFLAGS) $^
diff --git a/tools/testing/selftests/riscv/hwprobe/cbo.c b/tools/testing/selftests/riscv/hwprobe/cbo.c
new file mode 100644
index 000000000000..50a2cc8aef38
--- /dev/null
+++ b/tools/testing/selftests/riscv/hwprobe/cbo.c
@@ -0,0 +1,228 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2023 Ventana Micro Systems Inc.
+ *
+ * Run with 'taskset -c <cpu-list> cbo' to only execute hwprobe on a
+ * subset of cpus, as well as only executing the tests on those cpus.
+ */
+#define _GNU_SOURCE
+#include <stdbool.h>
+#include <stdint.h>
+#include <string.h>
+#include <sched.h>
+#include <signal.h>
+#include <assert.h>
+#include <linux/compiler.h>
+#include <linux/kernel.h>
+#include <asm/ucontext.h>
+
+#include "hwprobe.h"
+#include "../../kselftest.h"
+
+#define MK_CBO(fn) cpu_to_le32((fn) << 20 | 10 << 15 | 2 << 12 | 0 << 7 | 15)
+
+static char mem[4096] __aligned(4096) = { [0 ... 4095] = 0xa5 };
+
+static bool illegal_insn;
+
+static void sigill_handler(int sig, siginfo_t *info, void *context)
+{
+	unsigned long *regs = (unsigned long *)&((ucontext_t *)context)->uc_mcontext;
+	uint32_t insn = *(uint32_t *)regs[0];
+
+	assert(insn == MK_CBO(regs[11]));
+
+	illegal_insn = true;
+	regs[0] += 4;
+}
+
+static void cbo_insn(char *base, int fn)
+{
+	uint32_t insn = MK_CBO(fn);
+
+	asm volatile(
+	"mv	a0, %0\n"
+	"li	a1, %1\n"
+	".4byte	%2\n"
+	: : "r" (base), "i" (fn), "i" (insn) : "a0", "a1", "memory");
+}
+
+static void cbo_inval(char *base) { cbo_insn(base, 0); }
+static void cbo_clean(char *base) { cbo_insn(base, 1); }
+static void cbo_flush(char *base) { cbo_insn(base, 2); }
+static void cbo_zero(char *base)  { cbo_insn(base, 4); }
+
+static void test_no_zicbom(void *arg)
+{
+	ksft_print_msg("Testing Zicbom instructions remain privileged\n");
+
+	illegal_insn = false;
+	cbo_clean(&mem[0]);
+	ksft_test_result(illegal_insn, "No cbo.clean\n");
+
+	illegal_insn = false;
+	cbo_flush(&mem[0]);
+	ksft_test_result(illegal_insn, "No cbo.flush\n");
+
+	illegal_insn = false;
+	cbo_inval(&mem[0]);
+	ksft_test_result(illegal_insn, "No cbo.inval\n");
+}
+
+static void test_no_zicboz(void *arg)
+{
+	ksft_print_msg("No Zicboz, testing cbo.zero remains privileged\n");
+
+	illegal_insn = false;
+	cbo_zero(&mem[0]);
+	ksft_test_result(illegal_insn, "No cbo.zero\n");
+}
+
+static bool is_power_of_2(__u64 n)
+{
+	return n != 0 && (n & (n - 1)) == 0;
+}
+
+static void test_zicboz(void *arg)
+{
+	struct riscv_hwprobe pair = {
+		.key = RISCV_HWPROBE_KEY_ZICBOZ_BLOCK_SIZE,
+	};
+	cpu_set_t *cpus = (cpu_set_t *)arg;
+	__u64 block_size;
+	int i, j;
+	long rc;
+
+	rc = riscv_hwprobe(&pair, 1, sizeof(cpu_set_t), (unsigned long *)cpus, 0);
+	block_size = pair.value;
+	ksft_test_result(rc == 0 && pair.key == RISCV_HWPROBE_KEY_ZICBOZ_BLOCK_SIZE &&
+			 is_power_of_2(block_size), "Zicboz block size\n");
+	ksft_print_msg("Zicboz block size: %ld\n", block_size);
+
+	illegal_insn = false;
+	cbo_zero(&mem[block_size]);
+	ksft_test_result(!illegal_insn, "cbo.zero\n");
+
+	if (illegal_insn || !is_power_of_2(block_size)) {
+		ksft_test_result_skip("cbo.zero check\n");
+		return;
+	}
+
+	assert(block_size <= 1024);
+
+	for (i = 0; i < 4096 / block_size; ++i) {
+		if (i % 2)
+			cbo_zero(&mem[i * block_size]);
+	}
+
+	for (i = 0; i < 4096 / block_size; ++i) {
+		char expected = i % 2 ? 0x0 : 0xa5;
+
+		for (j = 0; j < block_size; ++j) {
+			if (mem[i * block_size + j] != expected) {
+				ksft_test_result_fail("cbo.zero check\n");
+				ksft_print_msg("cbo.zero check: mem[%d] != 0x%x\n",
+					       i * block_size + j, expected);
+				return;
+			}
+		}
+	}
+
+	ksft_test_result_pass("cbo.zero check\n");
+}
+
+static void check_no_zicboz_cpus(cpu_set_t *cpus)
+{
+	struct riscv_hwprobe pair = {
+		.key = RISCV_HWPROBE_KEY_IMA_EXT_0,
+	};
+	cpu_set_t one_cpu;
+	int i = 0, c = 0;
+	long rc;
+
+	while (i++ < CPU_COUNT(cpus)) {
+		while (!CPU_ISSET(c, cpus))
+			++c;
+
+		CPU_ZERO(&one_cpu);
+		CPU_SET(c, &one_cpu);
+
+		rc = riscv_hwprobe(&pair, 1, sizeof(cpu_set_t), (unsigned long *)&one_cpu, 0);
+		assert(rc == 0 && pair.key == RISCV_HWPROBE_KEY_IMA_EXT_0);
+
+		if (pair.value & RISCV_HWPROBE_EXT_ZICBOZ)
+			ksft_exit_fail_msg("Zicboz is only present on a subset of harts.\n"
+					   "Use taskset to select a set of harts where Zicboz\n"
+					   "presence (present or not) is consistent for each hart\n");
+		++c;
+	}
+}
+
+enum {
+	TEST_ZICBOZ,
+	TEST_NO_ZICBOZ,
+	TEST_NO_ZICBOM,
+};
+
+static struct test_info {
+	bool enabled;
+	unsigned int nr_tests;
+	void (*test_fn)(void *arg);
+} tests[] = {
+	[TEST_ZICBOZ]		= { .nr_tests = 3, test_zicboz },
+	[TEST_NO_ZICBOZ]	= { .nr_tests = 1, test_no_zicboz },
+	[TEST_NO_ZICBOM]	= { .nr_tests = 3, test_no_zicbom },
+};
+
+int main(int argc, char **argv)
+{
+	struct sigaction act = {
+		.sa_sigaction = &sigill_handler,
+		.sa_flags = SA_SIGINFO,
+	};
+	struct riscv_hwprobe pair;
+	unsigned int plan = 0;
+	cpu_set_t cpus;
+	long rc;
+	int i;
+
+	if (argc > 1 && !strcmp(argv[1], "--sigill")) {
+		rc = sigaction(SIGILL, &act, NULL);
+		assert(rc == 0);
+		tests[TEST_NO_ZICBOZ].enabled = true;
+		tests[TEST_NO_ZICBOM].enabled = true;
+	}
+
+	rc = sched_getaffinity(0, sizeof(cpu_set_t), &cpus);
+	assert(rc == 0);
+
+	ksft_print_header();
+
+	pair.key = RISCV_HWPROBE_KEY_IMA_EXT_0;
+	rc = riscv_hwprobe(&pair, 1, sizeof(cpu_set_t), (unsigned long *)&cpus, 0);
+	if (rc < 0)
+		ksft_exit_fail_msg("hwprobe() failed with %d\n", rc);
+	assert(rc == 0 && pair.key == RISCV_HWPROBE_KEY_IMA_EXT_0);
+
+	if (pair.value & RISCV_HWPROBE_EXT_ZICBOZ) {
+		tests[TEST_ZICBOZ].enabled = true;
+		tests[TEST_NO_ZICBOZ].enabled = false;
+	} else {
+		check_no_zicboz_cpus(&cpus);
+	}
+
+	for (i = 0; i < ARRAY_SIZE(tests); ++i)
+		plan += tests[i].enabled ? tests[i].nr_tests : 0;
+
+	if (plan == 0)
+		ksft_print_msg("No tests enabled.\n");
+	else
+		ksft_set_plan(plan);
+
+	for (i = 0; i < ARRAY_SIZE(tests); ++i) {
+		if (tests[i].enabled)
+			tests[i].test_fn(&cpus);
+	}
+
+	ksft_finished();
+}
diff --git a/tools/testing/selftests/riscv/hwprobe/hwprobe.c b/tools/testing/selftests/riscv/hwprobe/hwprobe.c
index 09f290a67420..c474891df307 100644
--- a/tools/testing/selftests/riscv/hwprobe/hwprobe.c
+++ b/tools/testing/selftests/riscv/hwprobe/hwprobe.c
@@ -1,14 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0-only
-#include <stddef.h>
-#include <asm/hwprobe.h>
-
-/*
- * Rather than relying on having a new enough libc to define this, just do it
- * ourselves.  This way we don't need to be coupled to a new-enough libc to
- * contain the call.
- */
-long riscv_hwprobe(struct riscv_hwprobe *pairs, size_t pair_count,
-		   size_t cpu_count, unsigned long *cpus, unsigned int flags);
+#include "hwprobe.h"
+#include "../../kselftest.h"
 
 int main(int argc, char **argv)
 {
@@ -16,6 +8,9 @@ int main(int argc, char **argv)
 	unsigned long cpus;
 	long out;
 
+	ksft_print_header();
+	ksft_set_plan(5);
+
 	/* Fake the CPU_SET ops. */
 	cpus = -1;
 
@@ -25,13 +20,16 @@ int main(int argc, char **argv)
 	 */
 	for (long i = 0; i < 8; i++)
 		pairs[i].key = i;
+
 	out = riscv_hwprobe(pairs, 8, 1, &cpus, 0);
 	if (out != 0)
-		return -1;
+		ksft_exit_fail_msg("hwprobe() failed with %ld\n", out);
+
 	for (long i = 0; i < 4; ++i) {
 		/* Fail if the kernel claims not to recognize a base key. */
 		if ((i < 4) && (pairs[i].key != i))
-			return -2;
+			ksft_exit_fail_msg("Failed to recognize base key: key != i, "
+					   "key=%ld, i=%ld\n", pairs[i].key, i);
 
 		if (pairs[i].key != RISCV_HWPROBE_KEY_BASE_BEHAVIOR)
 			continue;
@@ -39,52 +37,30 @@ int main(int argc, char **argv)
 		if (pairs[i].value & RISCV_HWPROBE_BASE_BEHAVIOR_IMA)
 			continue;
 
-		return -3;
+		ksft_exit_fail_msg("Unexpected pair: (%ld, %ld)\n", pairs[i].key, pairs[i].value);
 	}
 
-	/*
-	 * This should also work with a NULL CPU set, but should not work
-	 * with an improperly supplied CPU set.
-	 */
 	out = riscv_hwprobe(pairs, 8, 0, 0, 0);
-	if (out != 0)
-		return -4;
+	ksft_test_result(out == 0, "NULL CPU set\n");
 
 	out = riscv_hwprobe(pairs, 8, 0, &cpus, 0);
-	if (out == 0)
-		return -5;
+	ksft_test_result(out != 0, "Bad CPU set\n");
 
 	out = riscv_hwprobe(pairs, 8, 1, 0, 0);
-	if (out == 0)
-		return -6;
+	ksft_test_result(out != 0, "NULL CPU set with non-zero count\n");
 
-	/*
-	 * Check that keys work by providing one that we know exists, and
-	 * checking to make sure the resultig pair is what we asked for.
-	 */
 	pairs[0].key = RISCV_HWPROBE_KEY_BASE_BEHAVIOR;
 	out = riscv_hwprobe(pairs, 1, 1, &cpus, 0);
-	if (out != 0)
-		return -7;
-	if (pairs[0].key != RISCV_HWPROBE_KEY_BASE_BEHAVIOR)
-		return -8;
+	ksft_test_result(out == 0 && pairs[0].key == RISCV_HWPROBE_KEY_BASE_BEHAVIOR,
+			 "Existing key is maintained\n");
 
-	/*
-	 * Check that an unknown key gets overwritten with -1,
-	 * but doesn't block elements after it.
-	 */
 	pairs[0].key = 0x5555;
 	pairs[1].key = 1;
 	pairs[1].value = 0xAAAA;
 	out = riscv_hwprobe(pairs, 2, 0, 0, 0);
-	if (out != 0)
-		return -9;
-
-	if (pairs[0].key != -1)
-		return -10;
-
-	if ((pairs[1].key != 1) || (pairs[1].value == 0xAAAA))
-		return -11;
+	ksft_test_result(out == 0 && pairs[0].key == -1 &&
+			 pairs[1].key == 1 && pairs[1].value != 0xAAAA,
+			 "Unknown key overwritten with -1 and doesn't block other elements\n");
 
-	return 0;
+	ksft_finished();
 }
diff --git a/tools/testing/selftests/riscv/hwprobe/hwprobe.h b/tools/testing/selftests/riscv/hwprobe/hwprobe.h
new file mode 100644
index 000000000000..721b0ce73a56
--- /dev/null
+++ b/tools/testing/selftests/riscv/hwprobe/hwprobe.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef SELFTEST_RISCV_HWPROBE_H
+#define SELFTEST_RISCV_HWPROBE_H
+#include <stddef.h>
+#include <asm/hwprobe.h>
+
+/*
+ * Rather than relying on having a new enough libc to define this, just do it
+ * ourselves.  This way we don't need to be coupled to a new-enough libc to
+ * contain the call.
+ */
+long riscv_hwprobe(struct riscv_hwprobe *pairs, size_t pair_count,
+		   size_t cpu_count, unsigned long *cpus, unsigned int flags);
+
+#endif
diff --git a/tools/testing/selftests/riscv/mm/Makefile b/tools/testing/selftests/riscv/mm/Makefile
index 11e0f0568923..c333263f2b27 100644
--- a/tools/testing/selftests/riscv/mm/Makefile
+++ b/tools/testing/selftests/riscv/mm/Makefile
@@ -5,11 +5,11 @@
 # Additional include paths needed by kselftest.h and local headers
 CFLAGS += -D_GNU_SOURCE -std=gnu99 -I.
 
-TEST_GEN_FILES := testcases/mmap_default testcases/mmap_bottomup
+TEST_GEN_FILES := mmap_default mmap_bottomup
 
-TEST_PROGS := testcases/run_mmap.sh
+TEST_PROGS := run_mmap.sh
 
 include ../../lib.mk
 
-$(OUTPUT)/mm: testcases/mmap_default.c testcases/mmap_bottomup.c testcases/mmap_tests.h
+$(OUTPUT)/mm: mmap_default.c mmap_bottomup.c mmap_tests.h
 	$(CC) -o$@ $(CFLAGS) $(LDFLAGS) $^
diff --git a/tools/testing/selftests/riscv/mm/testcases/mmap_bottomup.c b/tools/testing/selftests/riscv/mm/mmap_bottomup.c
index b29379f7e478..1757d19ca89b 100644
--- a/tools/testing/selftests/riscv/mm/testcases/mmap_bottomup.c
+++ b/tools/testing/selftests/riscv/mm/mmap_bottomup.c
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0-only
 #include <sys/mman.h>
-#include <testcases/mmap_test.h>
+#include <mmap_test.h>
 
 #include "../../kselftest_harness.h"
 
diff --git a/tools/testing/selftests/riscv/mm/testcases/mmap_default.c b/tools/testing/selftests/riscv/mm/mmap_default.c
index d1accb91b726..c63c60b9397e 100644
--- a/tools/testing/selftests/riscv/mm/testcases/mmap_default.c
+++ b/tools/testing/selftests/riscv/mm/mmap_default.c
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0-only
 #include <sys/mman.h>
-#include <testcases/mmap_test.h>
+#include <mmap_test.h>
 
 #include "../../kselftest_harness.h"
 
diff --git a/tools/testing/selftests/riscv/mm/testcases/mmap_test.h b/tools/testing/selftests/riscv/mm/mmap_test.h
index 9b8434f62f57..9b8434f62f57 100644
--- a/tools/testing/selftests/riscv/mm/testcases/mmap_test.h
+++ b/tools/testing/selftests/riscv/mm/mmap_test.h
diff --git a/tools/testing/selftests/riscv/mm/testcases/run_mmap.sh b/tools/testing/selftests/riscv/mm/run_mmap.sh
index ca5ad7c48bad..ca5ad7c48bad 100755
--- a/tools/testing/selftests/riscv/mm/testcases/run_mmap.sh
+++ b/tools/testing/selftests/riscv/mm/run_mmap.sh
diff --git a/tools/testing/selftests/rseq/param_test.c b/tools/testing/selftests/rseq/param_test.c
index bf951a490bb4..20403d58345c 100644
--- a/tools/testing/selftests/rseq/param_test.c
+++ b/tools/testing/selftests/rseq/param_test.c
@@ -1231,7 +1231,7 @@ void *test_membarrier_worker_thread(void *arg)
 	}
 
 	/* Wait for initialization. */
-	while (!atomic_load(&args->percpu_list_ptr)) {}
+	while (!__atomic_load_n(&args->percpu_list_ptr, __ATOMIC_ACQUIRE)) {}
 
 	for (i = 0; i < iters; ++i) {
 		int ret;
@@ -1299,22 +1299,22 @@ void *test_membarrier_manager_thread(void *arg)
 	test_membarrier_init_percpu_list(&list_a);
 	test_membarrier_init_percpu_list(&list_b);
 
-	atomic_store(&args->percpu_list_ptr, (intptr_t)&list_a);
+	__atomic_store_n(&args->percpu_list_ptr, (intptr_t)&list_a, __ATOMIC_RELEASE);
 
-	while (!atomic_load(&args->stop)) {
+	while (!__atomic_load_n(&args->stop, __ATOMIC_ACQUIRE)) {
 		/* list_a is "active". */
 		cpu_a = rand() % CPU_SETSIZE;
 		/*
 		 * As list_b is "inactive", we should never see changes
 		 * to list_b.
 		 */
-		if (expect_b != atomic_load(&list_b.c[cpu_b].head->data)) {
+		if (expect_b != __atomic_load_n(&list_b.c[cpu_b].head->data, __ATOMIC_ACQUIRE)) {
 			fprintf(stderr, "Membarrier test failed\n");
 			abort();
 		}
 
 		/* Make list_b "active". */
-		atomic_store(&args->percpu_list_ptr, (intptr_t)&list_b);
+		__atomic_store_n(&args->percpu_list_ptr, (intptr_t)&list_b, __ATOMIC_RELEASE);
 		if (rseq_membarrier_expedited(cpu_a) &&
 				errno != ENXIO /* missing CPU */) {
 			perror("sys_membarrier");
@@ -1324,27 +1324,27 @@ void *test_membarrier_manager_thread(void *arg)
 		 * Cpu A should now only modify list_b, so the values
 		 * in list_a should be stable.
 		 */
-		expect_a = atomic_load(&list_a.c[cpu_a].head->data);
+		expect_a = __atomic_load_n(&list_a.c[cpu_a].head->data, __ATOMIC_ACQUIRE);
 
 		cpu_b = rand() % CPU_SETSIZE;
 		/*
 		 * As list_a is "inactive", we should never see changes
 		 * to list_a.
 		 */
-		if (expect_a != atomic_load(&list_a.c[cpu_a].head->data)) {
+		if (expect_a != __atomic_load_n(&list_a.c[cpu_a].head->data, __ATOMIC_ACQUIRE)) {
 			fprintf(stderr, "Membarrier test failed\n");
 			abort();
 		}
 
 		/* Make list_a "active". */
-		atomic_store(&args->percpu_list_ptr, (intptr_t)&list_a);
+		__atomic_store_n(&args->percpu_list_ptr, (intptr_t)&list_a, __ATOMIC_RELEASE);
 		if (rseq_membarrier_expedited(cpu_b) &&
 				errno != ENXIO /* missing CPU*/) {
 			perror("sys_membarrier");
 			abort();
 		}
 		/* Remember a value from list_b. */
-		expect_b = atomic_load(&list_b.c[cpu_b].head->data);
+		expect_b = __atomic_load_n(&list_b.c[cpu_b].head->data, __ATOMIC_ACQUIRE);
 	}
 
 	test_membarrier_free_percpu_list(&list_a);
@@ -1401,7 +1401,7 @@ void test_membarrier(void)
 		}
 	}
 
-	atomic_store(&thread_args.stop, 1);
+	__atomic_store_n(&thread_args.stop, 1, __ATOMIC_RELEASE);
 	ret = pthread_join(manager_thread, NULL);
 	if (ret) {
 		errno = ret;
diff --git a/tools/testing/selftests/sigaltstack/sas.c b/tools/testing/selftests/sigaltstack/sas.c
index 98d37cb744fb..07227fab1cc9 100644
--- a/tools/testing/selftests/sigaltstack/sas.c
+++ b/tools/testing/selftests/sigaltstack/sas.c
@@ -111,7 +111,7 @@ int main(void)
 
 	/* Make sure more than the required minimum. */
 	stack_size = getauxval(AT_MINSIGSTKSZ) + SIGSTKSZ;
-	ksft_print_msg("[NOTE]\tthe stack size is %lu\n", stack_size);
+	ksft_print_msg("[NOTE]\tthe stack size is %u\n", stack_size);
 
 	ksft_print_header();
 	ksft_set_plan(3);
diff --git a/tools/testing/selftests/static_keys/test_static_keys.sh b/tools/testing/selftests/static_keys/test_static_keys.sh
index fc9f8cde7d42..3b0f17b81ac2 100755
--- a/tools/testing/selftests/static_keys/test_static_keys.sh
+++ b/tools/testing/selftests/static_keys/test_static_keys.sh
@@ -6,18 +6,18 @@
 ksft_skip=4
 
 if ! /sbin/modprobe -q -n test_static_key_base; then
-	echo "static_key: module test_static_key_base is not found [SKIP]"
+	echo "static_keys: module test_static_key_base is not found [SKIP]"
 	exit $ksft_skip
 fi
 
 if ! /sbin/modprobe -q -n test_static_keys; then
-	echo "static_key: module test_static_keys is not found [SKIP]"
+	echo "static_keys: module test_static_keys is not found [SKIP]"
 	exit $ksft_skip
 fi
 
 if /sbin/modprobe -q test_static_key_base; then
 	if /sbin/modprobe -q test_static_keys; then
-		echo "static_key: ok"
+		echo "static_keys: ok"
 		/sbin/modprobe -q -r test_static_keys
 		/sbin/modprobe -q -r test_static_key_base
 	else
@@ -25,6 +25,6 @@ if /sbin/modprobe -q test_static_key_base; then
 		/sbin/modprobe -q -r test_static_key_base
 	fi
 else
-	echo "static_key: [FAIL]"
+	echo "static_keys: [FAIL]"
 	exit 1
 fi
diff --git a/tools/testing/selftests/tc-testing/Makefile b/tools/testing/selftests/tc-testing/Makefile
index 3c4b7fa05075..e8b3dde4fa16 100644
--- a/tools/testing/selftests/tc-testing/Makefile
+++ b/tools/testing/selftests/tc-testing/Makefile
@@ -1,31 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0
-include ../../../scripts/Makefile.include
 
-top_srcdir = $(abspath ../../../..)
-APIDIR := $(top_scrdir)/include/uapi
-TEST_GEN_FILES = action.o
+TEST_PROGS += ./tdc.sh
+TEST_FILES := action-ebpf tdc*.py Tdc*.py plugins plugin-lib tc-tests scripts
 
 include ../lib.mk
-
-PROBE := $(shell $(LLC) -march=bpf -mcpu=probe -filetype=null /dev/null 2>&1)
-
-ifeq ($(PROBE),)
-  CPU ?= probe
-else
-  CPU ?= generic
-endif
-
-CLANG_SYS_INCLUDES := $(shell $(CLANG) -v -E - </dev/null 2>&1 \
-	| sed -n '/<...> search starts here:/,/End of search list./{ s| \(/.*\)|-idirafter \1|p }')
-
-CLANG_FLAGS = -I. -I$(APIDIR) \
-	      $(CLANG_SYS_INCLUDES) \
-	      -Wno-compare-distinct-pointer-types
-
-$(OUTPUT)/%.o: %.c
-	$(CLANG) $(CLANG_FLAGS) \
-		 -O2 --target=bpf -emit-llvm -c $< -o - |      \
-	$(LLC) -march=bpf -mcpu=$(CPU) $(LLC_FLAGS) -filetype=obj -o $@
-
-TEST_PROGS += ./tdc.sh
-TEST_FILES := tdc*.py Tdc*.py plugins plugin-lib tc-tests
diff --git a/tools/testing/selftests/tc-testing/README b/tools/testing/selftests/tc-testing/README
index be7b00799b3e..fc8e858ff119 100644
--- a/tools/testing/selftests/tc-testing/README
+++ b/tools/testing/selftests/tc-testing/README
@@ -195,8 +195,6 @@ directory:
       and the other is a test whether the command leaked memory or not.
       (This one is a preliminary version, it may not work quite right yet,
       but the overall template is there and it should only need tweaks.)
-  - buildebpfPlugin.py:
-      builds all programs in $EBPFDIR.
 
 
 ACKNOWLEDGEMENTS
diff --git a/tools/testing/selftests/tc-testing/action-ebpf b/tools/testing/selftests/tc-testing/action-ebpf
new file mode 100644
index 000000000000..4879479b2ee5
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/action-ebpf
diff --git a/tools/testing/selftests/tc-testing/config b/tools/testing/selftests/tc-testing/config
index 5aa8705751f0..012aa33b341b 100644
--- a/tools/testing/selftests/tc-testing/config
+++ b/tools/testing/selftests/tc-testing/config
@@ -1,12 +1,21 @@
 #
+# Network
+#
+
+CONFIG_DUMMY=y
+CONFIG_VETH=y
+
+#
 # Core Netfilter Configuration
 #
+CONFIG_NETFILTER_ADVANCED=y
 CONFIG_NF_CONNTRACK=m
 CONFIG_NF_CONNTRACK_MARK=y
 CONFIG_NF_CONNTRACK_ZONES=y
 CONFIG_NF_CONNTRACK_LABELS=y
 CONFIG_NF_CONNTRACK_PROCFS=y
 CONFIG_NF_FLOW_TABLE=m
+CONFIG_NF_TABLES=m
 CONFIG_NF_NAT=m
 CONFIG_NETFILTER_XT_TARGET_LOG=m
 
diff --git a/tools/testing/selftests/tc-testing/plugin-lib/buildebpfPlugin.py b/tools/testing/selftests/tc-testing/plugin-lib/buildebpfPlugin.py
deleted file mode 100644
index d34fe06268d2..000000000000
--- a/tools/testing/selftests/tc-testing/plugin-lib/buildebpfPlugin.py
+++ /dev/null
@@ -1,67 +0,0 @@
-'''
-build ebpf program
-'''
-
-import os
-import signal
-from string import Template
-import subprocess
-import time
-from TdcPlugin import TdcPlugin
-from tdc_config import *
-
-class SubPlugin(TdcPlugin):
-    def __init__(self):
-        self.sub_class = 'buildebpf/SubPlugin'
-        self.tap = ''
-        super().__init__()
-
-    def pre_suite(self, testcount, testidlist):
-        super().pre_suite(testcount, testidlist)
-
-        if self.args.buildebpf:
-            self._ebpf_makeall()
-
-    def post_suite(self, index):
-        super().post_suite(index)
-
-        self._ebpf_makeclean()
-
-    def add_args(self, parser):
-        super().add_args(parser)
-
-        self.argparser_group = self.argparser.add_argument_group(
-            'buildebpf',
-            'options for buildebpfPlugin')
-        self.argparser_group.add_argument(
-            '--nobuildebpf', action='store_false', default=True,
-            dest='buildebpf',
-            help='Don\'t build eBPF programs')
-
-        return self.argparser
-
-    def _ebpf_makeall(self):
-        if self.args.buildebpf:
-            self._make('all')
-
-    def _ebpf_makeclean(self):
-        if self.args.buildebpf:
-            self._make('clean')
-
-    def _make(self, target):
-        command = 'make -C {} {}'.format(self.args.NAMES['EBPFDIR'], target)
-        proc = subprocess.Popen(command,
-            shell=True,
-            stdout=subprocess.PIPE,
-            stderr=subprocess.PIPE,
-            env=os.environ.copy())
-        (rawout, serr) = proc.communicate()
-
-        if proc.returncode != 0 and len(serr) > 0:
-            foutput = serr.decode("utf-8")
-        else:
-            foutput = rawout.decode("utf-8")
-
-        proc.stdout.close()
-        proc.stderr.close()
-        return proc, foutput
diff --git a/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py b/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py
index b62429b0fcdb..bb19b8b76d3b 100644
--- a/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py
+++ b/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py
@@ -9,43 +9,13 @@ from TdcPlugin import TdcPlugin
 
 from tdc_config import *
 
-def prepare_suite(obj, test):
-    original = obj.args.NAMES
-
-    if 'skip' in test and test['skip'] == 'yes':
-        return
-
-    if 'nsPlugin' not in test['plugins']:
-        return
-
-    shadow = {}
-    shadow['IP'] = original['IP']
-    shadow['TC'] = original['TC']
-    shadow['NS'] = '{}-{}'.format(original['NS'], test['random'])
-    shadow['DEV0'] = '{}id{}'.format(original['DEV0'], test['id'])
-    shadow['DEV1'] = '{}id{}'.format(original['DEV1'], test['id'])
-    shadow['DUMMY'] = '{}id{}'.format(original['DUMMY'], test['id'])
-    shadow['DEV2'] = original['DEV2']
-    obj.args.NAMES = shadow
-
-    if obj.args.namespace:
-        obj._ns_create()
-    else:
-        obj._ports_create()
-
-    # Make sure the netns is visible in the fs
-    while True:
-        obj._proc_check()
-        try:
-            ns = obj.args.NAMES['NS']
-            f = open('/run/netns/{}'.format(ns))
-            f.close()
-            break
-        except:
-            time.sleep(0.1)
-            continue
-
-    obj.args.NAMES = original
+try:
+    from pyroute2 import netns
+    from pyroute2 import IPRoute
+    netlink = True
+except ImportError:
+    netlink = False
+    print("!!! Consider installing pyroute2 !!!")
 
 class SubPlugin(TdcPlugin):
     def __init__(self):
@@ -53,64 +23,71 @@ class SubPlugin(TdcPlugin):
         super().__init__()
 
     def pre_suite(self, testcount, testlist):
-        from itertools import cycle
-
         super().pre_suite(testcount, testlist)
 
-        print("Setting up namespaces and devices...")
+    def prepare_test(self, test):
+        if 'skip' in test and test['skip'] == 'yes':
+            return
 
-        with Pool(self.args.mp) as p:
-            it = zip(cycle([self]), testlist)
-            p.starmap(prepare_suite, it)
+        if 'nsPlugin' not in test['plugins']:
+            return
 
-    def pre_case(self, caseinfo, test_skip):
+        if netlink == True:
+            self._nl_ns_create()
+        else:
+            self._ipr2_ns_create()
+
+        # Make sure the netns is visible in the fs
+        ticks = 20
+        while True:
+            if ticks == 0:
+                raise TimeoutError
+            self._proc_check()
+            try:
+                ns = self.args.NAMES['NS']
+                f = open('/run/netns/{}'.format(ns))
+                f.close()
+                break
+            except:
+                time.sleep(0.1)
+                ticks -= 1
+                continue
+
+    def pre_case(self, test, test_skip):
         if self.args.verbose:
             print('{}.pre_case'.format(self.sub_class))
 
         if test_skip:
             return
 
+        self.prepare_test(test)
 
     def post_case(self):
         if self.args.verbose:
             print('{}.post_case'.format(self.sub_class))
 
-        if self.args.namespace:
-            self._ns_destroy()
+        if netlink == True:
+            self._nl_ns_destroy()
         else:
-            self._ports_destroy()
+            self._ipr2_ns_destroy()
 
     def post_suite(self, index):
         if self.args.verbose:
             print('{}.post_suite'.format(self.sub_class))
 
         # Make sure we don't leak resources
-        for f in os.listdir('/run/netns/'):
-            cmd = self._replace_keywords("$IP netns del {}".format(f))
+        cmd = self._replace_keywords("$IP -a netns del")
 
-            if self.args.verbose > 3:
-                print('_exec_cmd:  command "{}"'.format(cmd))
-
-            subprocess.run(cmd, shell=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
+        if self.args.verbose > 3:
+            print('_exec_cmd:  command "{}"'.format(cmd))
 
-    def add_args(self, parser):
-        super().add_args(parser)
-        self.argparser_group = self.argparser.add_argument_group(
-            'netns',
-            'options for nsPlugin(run commands in net namespace)')
-        self.argparser_group.add_argument(
-            '-N', '--no-namespace', action='store_false', default=True,
-            dest='namespace', help='Don\'t run commands in namespace')
-        return self.argparser
+        subprocess.run(cmd, shell=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
 
     def adjust_command(self, stage, command):
         super().adjust_command(stage, command)
         cmdform = 'list'
         cmdlist = list()
 
-        if not self.args.namespace:
-            return command
-
         if self.args.verbose:
             print('{}.adjust_command'.format(self.sub_class))
 
@@ -138,63 +115,90 @@ class SubPlugin(TdcPlugin):
             print('adjust_command:  return command [{}]'.format(command))
         return command
 
-    def _ports_create_cmds(self):
-        cmds = []
-
-        cmds.append(self._replace_keywords('link add $DEV0 type veth peer name $DEV1'))
-        cmds.append(self._replace_keywords('link set $DEV0 up'))
-        cmds.append(self._replace_keywords('link add $DUMMY type dummy'))
-        if not self.args.namespace:
-            cmds.append(self._replace_keywords('link set $DEV1 up'))
-
-        return cmds
-
-    def _ports_create(self):
-        self._exec_cmd_batched('pre', self._ports_create_cmds())
+    def _nl_ns_create(self):
+        ns = self.args.NAMES["NS"];
+        dev0 = self.args.NAMES["DEV0"];
+        dev1 = self.args.NAMES["DEV1"];
+        dummy = self.args.NAMES["DUMMY"];
 
-    def _ports_destroy_cmd(self):
-        return self._replace_keywords('link del $DEV0')
-
-    def _ports_destroy(self):
-        self._exec_cmd('post', self._ports_destroy_cmd())
-
-    def _ns_create_cmds(self):
+        if self.args.verbose:
+            print('{}._nl_ns_create'.format(self.sub_class))
+
+        netns.create(ns)
+        netns.pushns(newns=ns)
+        with IPRoute() as ip:
+            ip.link('add', ifname=dev1, kind='veth', peer={'ifname': dev0, 'net_ns_fd':'/proc/1/ns/net'})
+            ip.link('add', ifname=dummy, kind='dummy')
+            ticks = 20
+            while True:
+                if ticks == 0:
+                    raise TimeoutError
+                try:
+                    dev1_idx = ip.link_lookup(ifname=dev1)[0]
+                    dummy_idx = ip.link_lookup(ifname=dummy)[0]
+                    ip.link('set', index=dev1_idx, state='up')
+                    ip.link('set', index=dummy_idx, state='up')
+                    break
+                except:
+                    time.sleep(0.1)
+                    ticks -= 1
+                    continue
+        netns.popns()
+
+        with IPRoute() as ip:
+            ticks = 20
+            while True:
+                if ticks == 0:
+                    raise TimeoutError
+                try:
+                    dev0_idx = ip.link_lookup(ifname=dev0)[0]
+                    ip.link('set', index=dev0_idx, state='up')
+                    break
+                except:
+                    time.sleep(0.1)
+                    ticks -= 1
+                    continue
+
+    def _ipr2_ns_create_cmds(self):
         cmds = []
 
-        if self.args.namespace:
-            ns = self.args.NAMES['NS']
+        ns = self.args.NAMES['NS']
 
-            cmds.append(self._replace_keywords('netns add {}'.format(ns)))
-            cmds.append(self._replace_keywords('link set $DEV1 netns {}'.format(ns)))
-            cmds.append(self._replace_keywords('link set $DUMMY netns {}'.format(ns)))
-            cmds.append(self._replace_keywords('netns exec {} $IP link set $DEV1 up'.format(ns)))
-            cmds.append(self._replace_keywords('netns exec {} $IP link set $DUMMY up'.format(ns)))
+        cmds.append(self._replace_keywords('netns add {}'.format(ns)))
+        cmds.append(self._replace_keywords('link add $DEV1 type veth peer name $DEV0'))
+        cmds.append(self._replace_keywords('link set $DEV1 netns {}'.format(ns)))
+        cmds.append(self._replace_keywords('link add $DUMMY type dummy'.format(ns)))
+        cmds.append(self._replace_keywords('link set $DUMMY netns {}'.format(ns)))
+        cmds.append(self._replace_keywords('netns exec {} $IP link set $DEV1 up'.format(ns)))
+        cmds.append(self._replace_keywords('netns exec {} $IP link set $DUMMY up'.format(ns)))
+        cmds.append(self._replace_keywords('link set $DEV0 up'.format(ns)))
 
-            if self.args.device:
-                cmds.append(self._replace_keywords('link set $DEV2 netns {}'.format(ns)))
-                cmds.append(self._replace_keywords('netns exec {} $IP link set $DEV2 up'.format(ns)))
+        if self.args.device:
+            cmds.append(self._replace_keywords('link set $DEV2 netns {}'.format(ns)))
+            cmds.append(self._replace_keywords('netns exec {} $IP link set $DEV2 up'.format(ns)))
 
         return cmds
 
-    def _ns_create(self):
+    def _ipr2_ns_create(self):
         '''
         Create the network namespace in which the tests will be run and set up
         the required network devices for it.
         '''
-        self._ports_create()
-        self._exec_cmd_batched('pre', self._ns_create_cmds())
+        self._exec_cmd_batched('pre', self._ipr2_ns_create_cmds())
+
+    def _nl_ns_destroy(self):
+        ns = self.args.NAMES['NS']
+        netns.remove(ns)
 
-    def _ns_destroy_cmd(self):
+    def _ipr2_ns_destroy_cmd(self):
         return self._replace_keywords('netns delete {}'.format(self.args.NAMES['NS']))
 
-    def _ns_destroy(self):
+    def _ipr2_ns_destroy(self):
         '''
         Destroy the network namespace for testing (and any associated network
         devices as well)
         '''
-        if self.args.namespace:
-            self._exec_cmd('post', self._ns_destroy_cmd())
-            self._ports_destroy()
+        self._exec_cmd('post', self._ipr2_ns_destroy_cmd())
 
     @cached_property
     def _proc(self):
diff --git a/tools/testing/selftests/tc-testing/taprio_wait_for_admin.sh b/tools/testing/selftests/tc-testing/scripts/taprio_wait_for_admin.sh
index f5335e8ad6b4..f5335e8ad6b4 100755
--- a/tools/testing/selftests/tc-testing/taprio_wait_for_admin.sh
+++ b/tools/testing/selftests/tc-testing/scripts/taprio_wait_for_admin.sh
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json b/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json
index 91832400ddbd..6e00bf32ef9a 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json
@@ -54,9 +54,6 @@
             "actions",
             "bpf"
         ],
-        "plugins": {
-                "requires": "buildebpfPlugin"
-        },
         "setup": [
             [
                 "$TC action flush action bpf",
@@ -65,10 +62,10 @@
                 255
             ]
         ],
-        "cmdUnderTest": "$TC action add action bpf object-file $EBPFDIR/action.o section action-ok index 667",
+        "cmdUnderTest": "$TC action add action bpf object-file $EBPFDIR/action-ebpf section action-ok index 667",
         "expExitCode": "0",
         "verifyCmd": "$TC action get action bpf index 667",
-        "matchPattern": "action order [0-9]*: bpf action.o:\\[action-ok\\] id [0-9].* tag [0-9a-f]{16}( jited)? default-action pipe.*index 667 ref",
+        "matchPattern": "action order [0-9]*: bpf action-ebpf:\\[action-ok\\] id [0-9].* tag [0-9a-f]{16}( jited)? default-action pipe.*index 667 ref",
         "matchCount": "1",
         "teardown": [
             "$TC action flush action bpf"
@@ -81,9 +78,6 @@
             "actions",
             "bpf"
         ],
-        "plugins": {
-                "requires": "buildebpfPlugin"
-        },
         "setup": [
             [
                 "$TC action flush action bpf",
@@ -92,10 +86,10 @@
                 255
             ]
         ],
-        "cmdUnderTest": "$TC action add action bpf object-file $EBPFDIR/action.o section action-ko index 667",
+        "cmdUnderTest": "$TC action add action bpf object-file $EBPFDIR/action-ebpf section action-ko index 667",
         "expExitCode": "255",
         "verifyCmd": "$TC action get action bpf index 667",
-        "matchPattern": "action order [0-9]*: bpf action.o:\\[action-ko\\] id [0-9].*index 667 ref",
+        "matchPattern": "action order [0-9]*: bpf action-ebpf:\\[action-ko\\] id [0-9].*index 667 ref",
         "matchCount": "0",
         "teardown": [
             [
diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/bpf.json b/tools/testing/selftests/tc-testing/tc-tests/filters/bpf.json
index 013fb983bc3f..725d406a30ac 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/filters/bpf.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/filters/bpf.json
@@ -52,17 +52,16 @@
         ],
         "plugins": {
             "requires": [
-               "buildebpfPlugin",
                "nsPlugin"
             ]
         },
         "setup": [
             "$TC qdisc add dev $DEV1 ingress"
         ],
-        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 100 bpf object-file $EBPFDIR/action.o section action-ok",
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 100 bpf object-file $EBPFDIR/action-ebpf section action-ok",
         "expExitCode": "0",
         "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 protocol ip prio 100 bpf",
-        "matchPattern": "filter parent ffff: protocol ip pref 100 bpf chain [0-9]+ handle 0x1 action.o:\\[action-ok\\].*tag [0-9a-f]{16}( jited)?",
+        "matchPattern": "filter parent ffff: protocol ip pref 100 bpf chain [0-9]+ handle 0x1 action-ebpf:\\[action-ok\\].*tag [0-9a-f]{16}( jited)?",
         "matchCount": "1",
         "teardown": [
             "$TC qdisc del dev $DEV1 ingress"
@@ -77,17 +76,16 @@
         ],
         "plugins": {
             "requires": [
-               "buildebpfPlugin",
                "nsPlugin"
             ]
         },
         "setup": [
             "$TC qdisc add dev $DEV1 ingress"
         ],
-        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 100 bpf object-file $EBPFDIR/action.o section action-ko",
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 100 bpf object-file $EBPFDIR/action-ebpf section action-ko",
         "expExitCode": "1",
         "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 protocol ip prio 100 bpf",
-        "matchPattern": "filter parent ffff: protocol ip pref 100 bpf chain [0-9]+ handle 0x1 action.o:\\[action-ko\\].*tag [0-9a-f]{16}( jited)?",
+        "matchPattern": "filter parent ffff: protocol ip pref 100 bpf chain [0-9]+ handle 0x1 action-ebpf:\\[action-ko\\].*tag [0-9a-f]{16}( jited)?",
         "matchCount": "0",
         "teardown": [
             "$TC qdisc del dev $DEV1 ingress"
diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/u32.json b/tools/testing/selftests/tc-testing/tc-tests/filters/u32.json
index ddc7c355be0a..24bd0c2a3014 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/filters/u32.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/filters/u32.json
@@ -272,5 +272,62 @@
         "teardown": [
             "$TC qdisc del dev $DEV1 parent root drr"
         ]
+    },
+    {
+        "id": "bd32",
+        "name": "Try to delete hashtable referenced by another u32 filter",
+        "category": [
+            "filter",
+            "u32"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 parent root handle 10: drr",
+            "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 handle 1: u32 divisor 1",
+            "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 u32 ht 800: match ip src any link 1:"
+        ],
+        "cmdUnderTest": "$TC filter delete dev $DEV1 parent 10: prio 2 handle 1: u32",
+        "expExitCode": "2",
+        "verifyCmd": "$TC filter show dev $DEV1",
+        "matchPattern": "protocol ip pref 2 u32 chain 0 fh 1:",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 parent root drr"
+        ]
+    },
+    {
+        "id": "4585",
+        "name": "Delete small tree of u32 hashtables and filters",
+        "category": [
+            "filter",
+            "u32"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 parent root handle 10: drr",
+            "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 handle 1: u32 divisor 1",
+            "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 handle 2: u32 divisor 1",
+            "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 handle 3: u32 divisor 2",
+            "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 handle 4: u32 divisor 1",
+            "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 u32 ht 1: match ip src any action drop",
+            "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 u32 ht 2: match ip src any action drop",
+            "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 u32 ht 3: match ip src any link 2:",
+            "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 u32 ht 3: match ip src any link 1:",
+            "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 u32 ht 4: match ip src any action drop",
+            "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 u32 ht 800: match ip src any link 3:",
+            "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 u32 ht 800: match ip src any link 4:"
+        ],
+        "cmdUnderTest": "$TC filter delete dev $DEV1 parent 10:",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter show dev $DEV1",
+        "matchPattern": "protocol ip pref 2 u32",
+        "matchCount": "0",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 parent root drr"
+        ]
     }
 ]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/hfsc.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/hfsc.json
index 0ddb8e1b4369..c98c339424d4 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/hfsc.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/hfsc.json
@@ -9,8 +9,7 @@
         "plugins": {
             "requires": "nsPlugin"
         },
-        "setup": [
-        ],
+        "setup": [],
         "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root hfsc",
         "expExitCode": "0",
         "verifyCmd": "$TC qdisc show dev $DUMMY",
@@ -126,8 +125,7 @@
         "verifyCmd": "$TC qdisc show dev $DUMMY",
         "matchPattern": "qdisc hfsc 1: root refcnt [0-9]+",
         "matchCount": "0",
-        "teardown": [
-        ]
+        "teardown": []
     },
     {
         "id": "8436",
@@ -139,8 +137,7 @@
         "plugins": {
             "requires": "nsPlugin"
         },
-        "setup": [
-        ],
+        "setup": [],
         "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root hfsc",
         "expExitCode": "0",
         "verifyCmd": "$TC class show dev $DUMMY",
@@ -149,5 +146,28 @@
         "teardown": [
             "$TC qdisc del dev $DUMMY handle 1: root"
         ]
+    },
+    {
+        "id": "bef4",
+        "name": "HFSC rt inner class upgrade to sc",
+        "category": [
+            "qdisc",
+            "hfsc"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DUMMY handle 1: root hfsc default 1",
+            "$TC class add dev $DUMMY parent 1: classid 1:1 hfsc rt rate 8"
+        ],
+        "cmdUnderTest": "$TC class add dev $DUMMY parent 1:1 classid 1:2 hfsc rt rate 8",
+        "expExitCode": "0",
+        "verifyCmd": "$TC class show dev $DUMMY",
+        "matchPattern": "class hfsc 1:1 parent 1: sc m1 0bit d 0us m2 8bit.*rt m1 0bit d 0us m2 8bit",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root"
+        ]
     }
 ]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/taprio.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/taprio.json
index 0599635c4bc6..2d603ef2e375 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/taprio.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/taprio.json
@@ -170,11 +170,11 @@
         "setup": [
             "echo \"1 1 8\" > /sys/bus/netdevsim/new_device",
             "$TC qdisc replace dev $ETH handle 8001: parent root stab overhead 24 taprio num_tc 8 map 0 1 2 3 4 5 6 7 queues 1@0 1@1 1@2 1@3 1@4 1@5 1@6 1@7 base-time 0 sched-entry S ff 20000000 clockid CLOCK_TAI",
-            "./taprio_wait_for_admin.sh $TC $ETH"
+            "./scripts/taprio_wait_for_admin.sh $TC $ETH"
         ],
         "cmdUnderTest": "$TC qdisc replace dev $ETH parent 8001:7 taprio num_tc 8 map 0 1 2 3 4 5 6 7 queues 1@0 1@1 1@2 1@3 1@4 1@5 1@6 1@7 base-time 200 sched-entry S ff 20000000 clockid CLOCK_TAI",
         "expExitCode": "2",
-        "verifyCmd": "bash -c \"./taprio_wait_for_admin.sh $TC $ETH && $TC -j qdisc show dev $ETH root | jq '.[].options.base_time'\"",
+        "verifyCmd": "bash -c \"./scripts/taprio_wait_for_admin.sh $TC $ETH && $TC -j qdisc show dev $ETH root | jq '.[].options.base_time'\"",
         "matchPattern": "0",
         "matchCount": "1",
         "teardown": [
@@ -195,11 +195,11 @@
         "setup": [
             "echo \"1 1 8\" > /sys/bus/netdevsim/new_device",
             "$TC qdisc replace dev $ETH handle 8001: parent root stab overhead 24 taprio num_tc 8 map 0 1 2 3 4 5 6 7 queues 1@0 1@1 1@2 1@3 1@4 1@5 1@6 1@7 base-time 0 sched-entry S ff 20000000 flags 0x2",
-            "./taprio_wait_for_admin.sh $TC $ETH"
+            "./scripts/taprio_wait_for_admin.sh $TC $ETH"
         ],
         "cmdUnderTest": "$TC qdisc replace dev $ETH parent 8001:7 taprio num_tc 8 map 0 1 2 3 4 5 6 7 queues 1@0 1@1 1@2 1@3 1@4 1@5 1@6 1@7 base-time 200 sched-entry S ff 20000000 flags 0x2",
         "expExitCode": "2",
-        "verifyCmd": "bash -c \"./taprio_wait_for_admin.sh $TC $ETH && $TC -j qdisc show dev $ETH root | jq '.[].options.base_time'\"",
+        "verifyCmd": "bash -c \"./scripts/taprio_wait_for_admin.sh $TC $ETH && $TC -j qdisc show dev $ETH root | jq '.[].options.base_time'\"",
         "matchPattern": "0",
         "matchCount": "1",
         "teardown": [
diff --git a/tools/testing/selftests/tc-testing/tdc.py b/tools/testing/selftests/tc-testing/tdc.py
index a6718192aff3..caeacc691587 100755
--- a/tools/testing/selftests/tc-testing/tdc.py
+++ b/tools/testing/selftests/tc-testing/tdc.py
@@ -497,11 +497,6 @@ def prepare_run(pm, args, testlist):
         pm.call_post_suite(1)
         return emergency_exit_message
 
-    if args.verbose:
-        print('give test rig 2 seconds to stabilize')
-
-    time.sleep(2)
-
 def purge_run(pm, index):
     pm.call_post_suite(index)
 
@@ -616,7 +611,7 @@ def test_runner_mp(pm, args, alltests):
     batches.insert(0, serial)
 
     print("Executing {} tests in parallel and {} in serial".format(len(parallel), len(serial)))
-    print("Using {} batches".format(len(batches)))
+    print("Using {} batches and {} workers".format(len(batches), args.mp))
 
     # We can't pickle these objects so workaround them
     global mp_pm
@@ -1017,12 +1012,17 @@ def main():
     parser = pm.call_add_args(parser)
     (args, remaining) = parser.parse_known_args()
     args.NAMES = NAMES
+    args.mp = min(args.mp, 4)
     pm.set_args(args)
     check_default_settings(args, remaining, pm)
     if args.verbose > 2:
         print('args is {}'.format(args))
 
-    set_operation_mode(pm, parser, args, remaining)
+    try:
+        set_operation_mode(pm, parser, args, remaining)
+    except KeyboardInterrupt:
+        # Cleanup on Ctrl-C
+        pm.call_post_suite(None)
 
 if __name__ == "__main__":
     main()
diff --git a/tools/testing/selftests/tc-testing/tdc.sh b/tools/testing/selftests/tc-testing/tdc.sh
index eb357bd7923c..407fa53822a0 100755
--- a/tools/testing/selftests/tc-testing/tdc.sh
+++ b/tools/testing/selftests/tc-testing/tdc.sh
@@ -1,7 +1,68 @@
 #!/bin/sh
 # SPDX-License-Identifier: GPL-2.0
 
-modprobe netdevsim
-modprobe sch_teql
-./tdc.py -c actions --nobuildebpf
-./tdc.py -c qdisc
+# If a module is required and was not compiled
+# the test that requires it will fail anyways
+try_modprobe() {
+   modprobe -q -R "$1"
+   if [ $? -ne 0 ]; then
+      echo "Module $1 not found... skipping."
+   else
+      modprobe "$1"
+   fi
+}
+
+try_modprobe netdevsim
+try_modprobe act_bpf
+try_modprobe act_connmark
+try_modprobe act_csum
+try_modprobe act_ct
+try_modprobe act_ctinfo
+try_modprobe act_gact
+try_modprobe act_gate
+try_modprobe act_ipt
+try_modprobe act_mirred
+try_modprobe act_mpls
+try_modprobe act_nat
+try_modprobe act_pedit
+try_modprobe act_police
+try_modprobe act_sample
+try_modprobe act_simple
+try_modprobe act_skbedit
+try_modprobe act_skbmod
+try_modprobe act_tunnel_key
+try_modprobe act_vlan
+try_modprobe cls_basic
+try_modprobe cls_bpf
+try_modprobe cls_cgroup
+try_modprobe cls_flow
+try_modprobe cls_flower
+try_modprobe cls_fw
+try_modprobe cls_matchall
+try_modprobe cls_route
+try_modprobe cls_u32
+try_modprobe em_canid
+try_modprobe em_cmp
+try_modprobe em_ipset
+try_modprobe em_ipt
+try_modprobe em_meta
+try_modprobe em_nbyte
+try_modprobe em_text
+try_modprobe em_u32
+try_modprobe sch_cake
+try_modprobe sch_cbs
+try_modprobe sch_choke
+try_modprobe sch_codel
+try_modprobe sch_drr
+try_modprobe sch_etf
+try_modprobe sch_ets
+try_modprobe sch_fq
+try_modprobe sch_fq_codel
+try_modprobe sch_fq_pie
+try_modprobe sch_gred
+try_modprobe sch_hfsc
+try_modprobe sch_hhf
+try_modprobe sch_htb
+try_modprobe sch_teql
+./tdc.py -J`nproc` -c actions
+./tdc.py -J`nproc` -c qdisc
diff --git a/tools/testing/selftests/tdx/.gitignore b/tools/testing/selftests/tdx/.gitignore
new file mode 100644
index 000000000000..5db4d15cc673
--- /dev/null
+++ b/tools/testing/selftests/tdx/.gitignore
@@ -0,0 +1 @@
+tdx_guest_test
diff --git a/tools/testing/selftests/thermal/intel/power_floor/Makefile b/tools/testing/selftests/thermal/intel/power_floor/Makefile
new file mode 100644
index 000000000000..9b88e57dbba5
--- /dev/null
+++ b/tools/testing/selftests/thermal/intel/power_floor/Makefile
@@ -0,0 +1,12 @@
+# SPDX-License-Identifier: GPL-2.0
+ifndef CROSS_COMPILE
+uname_M := $(shell uname -m 2>/dev/null || echo not)
+ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/)
+
+ifeq ($(ARCH),x86)
+TEST_GEN_PROGS := power_floor_test
+
+include ../../../lib.mk
+
+endif
+endif
diff --git a/tools/testing/selftests/thermal/intel/power_floor/power_floor_test.c b/tools/testing/selftests/thermal/intel/power_floor/power_floor_test.c
new file mode 100644
index 000000000000..0326b39a11b9
--- /dev/null
+++ b/tools/testing/selftests/thermal/intel/power_floor/power_floor_test.c
@@ -0,0 +1,108 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <poll.h>
+#include <signal.h>
+
+#define POWER_FLOOR_ENABLE_ATTRIBUTE "/sys/bus/pci/devices/0000:00:04.0/power_limits/power_floor_enable"
+#define POWER_FLOOR_STATUS_ATTRIBUTE  "/sys/bus/pci/devices/0000:00:04.0/power_limits/power_floor_status"
+
+void power_floor_exit(int signum)
+{
+	int fd;
+
+	/* Disable feature via sysfs knob */
+
+	fd = open(POWER_FLOOR_ENABLE_ATTRIBUTE, O_RDWR);
+	if (fd < 0) {
+		perror("Unable to open power floor enable file\n");
+		exit(1);
+	}
+
+	if (write(fd, "0\n", 2) < 0) {
+		perror("Can' disable power floor notifications\n");
+		exit(1);
+	}
+
+	printf("Disabled power floor notifications\n");
+
+	close(fd);
+}
+
+int main(int argc, char **argv)
+{
+	struct pollfd ufd;
+	char status_str[3];
+	int fd, ret;
+
+	if (signal(SIGINT, power_floor_exit) == SIG_IGN)
+		signal(SIGINT, SIG_IGN);
+	if (signal(SIGHUP, power_floor_exit) == SIG_IGN)
+		signal(SIGHUP, SIG_IGN);
+	if (signal(SIGTERM, power_floor_exit) == SIG_IGN)
+		signal(SIGTERM, SIG_IGN);
+
+	/* Enable feature via sysfs knob */
+	fd = open(POWER_FLOOR_ENABLE_ATTRIBUTE, O_RDWR);
+	if (fd < 0) {
+		perror("Unable to open power floor enable file\n");
+		exit(1);
+	}
+
+	if (write(fd, "1\n", 2) < 0) {
+		perror("Can' enable power floor notifications\n");
+		exit(1);
+	}
+
+	close(fd);
+
+	printf("Enabled power floor notifications\n");
+
+	while (1) {
+		fd = open(POWER_FLOOR_STATUS_ATTRIBUTE, O_RDONLY);
+		if (fd < 0) {
+			perror("Unable to power floor status file\n");
+			exit(1);
+		}
+
+		if ((lseek(fd, 0L, SEEK_SET)) < 0) {
+			fprintf(stderr, "Failed to set pointer to beginning\n");
+			exit(1);
+		}
+
+		if (read(fd, status_str, sizeof(status_str)) < 0) {
+			fprintf(stderr, "Failed to read from:%s\n",
+			POWER_FLOOR_STATUS_ATTRIBUTE);
+			exit(1);
+		}
+
+		ufd.fd = fd;
+		ufd.events = POLLPRI;
+
+		ret = poll(&ufd, 1, -1);
+		if (ret < 0) {
+			perror("poll error");
+			exit(1);
+		} else if (ret == 0) {
+			printf("Poll Timeout\n");
+		} else {
+			if ((lseek(fd, 0L, SEEK_SET)) < 0) {
+				fprintf(stderr, "Failed to set pointer to beginning\n");
+				exit(1);
+			}
+
+			if (read(fd, status_str, sizeof(status_str)) < 0)
+				exit(0);
+
+			printf("power floor status: %s\n", status_str);
+		}
+
+		close(fd);
+	}
+}
diff --git a/tools/testing/selftests/thermal/intel/workload_hint/Makefile b/tools/testing/selftests/thermal/intel/workload_hint/Makefile
new file mode 100644
index 000000000000..37ff3286283b
--- /dev/null
+++ b/tools/testing/selftests/thermal/intel/workload_hint/Makefile
@@ -0,0 +1,12 @@
+# SPDX-License-Identifier: GPL-2.0
+ifndef CROSS_COMPILE
+uname_M := $(shell uname -m 2>/dev/null || echo not)
+ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/)
+
+ifeq ($(ARCH),x86)
+TEST_GEN_PROGS := workload_hint_test
+
+include ../../../lib.mk
+
+endif
+endif
diff --git a/tools/testing/selftests/thermal/intel/workload_hint/workload_hint_test.c b/tools/testing/selftests/thermal/intel/workload_hint/workload_hint_test.c
new file mode 100644
index 000000000000..217c3a641c53
--- /dev/null
+++ b/tools/testing/selftests/thermal/intel/workload_hint/workload_hint_test.c
@@ -0,0 +1,157 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <poll.h>
+#include <signal.h>
+
+#define WORKLOAD_NOTIFICATION_DELAY_ATTRIBUTE "/sys/bus/pci/devices/0000:00:04.0/workload_hint/notification_delay_ms"
+#define WORKLOAD_ENABLE_ATTRIBUTE "/sys/bus/pci/devices/0000:00:04.0/workload_hint/workload_hint_enable"
+#define WORKLOAD_TYPE_INDEX_ATTRIBUTE  "/sys/bus/pci/devices/0000:00:04.0/workload_hint/workload_type_index"
+
+static const char * const workload_types[] = {
+	"idle",
+	"battery_life",
+	"sustained",
+	"bursty",
+	NULL
+};
+
+#define WORKLOAD_TYPE_MAX_INDEX	3
+
+void workload_hint_exit(int signum)
+{
+	int fd;
+
+	/* Disable feature via sysfs knob */
+
+	fd = open(WORKLOAD_ENABLE_ATTRIBUTE, O_RDWR);
+	if (fd < 0) {
+		perror("Unable to open workload type feature enable file\n");
+		exit(1);
+	}
+
+	if (write(fd, "0\n", 2) < 0) {
+		perror("Can' disable workload hints\n");
+		exit(1);
+	}
+
+	printf("Disabled workload type prediction\n");
+
+	close(fd);
+}
+
+int main(int argc, char **argv)
+{
+	struct pollfd ufd;
+	char index_str[4];
+	int fd, ret, index;
+	char delay_str[64];
+	int delay = 0;
+
+	printf("Usage: workload_hint_test [notification delay in milli seconds]\n");
+
+	if (argc > 1) {
+		ret = sscanf(argv[1], "%d", &delay);
+		if (ret < 0) {
+			printf("Invalid delay\n");
+			exit(1);
+		}
+
+		printf("Setting notification delay to %d ms\n", delay);
+		if (delay < 0)
+			exit(1);
+
+		sprintf(delay_str, "%s\n", argv[1]);
+
+		sprintf(delay_str, "%s\n", argv[1]);
+		fd = open(WORKLOAD_NOTIFICATION_DELAY_ATTRIBUTE, O_RDWR);
+		if (fd < 0) {
+			perror("Unable to open workload notification delay\n");
+			exit(1);
+		}
+
+		if (write(fd, delay_str, strlen(delay_str)) < 0) {
+			perror("Can't set delay\n");
+			exit(1);
+		}
+
+		close(fd);
+	}
+
+	if (signal(SIGINT, workload_hint_exit) == SIG_IGN)
+		signal(SIGINT, SIG_IGN);
+	if (signal(SIGHUP, workload_hint_exit) == SIG_IGN)
+		signal(SIGHUP, SIG_IGN);
+	if (signal(SIGTERM, workload_hint_exit) == SIG_IGN)
+		signal(SIGTERM, SIG_IGN);
+
+	/* Enable feature via sysfs knob */
+	fd = open(WORKLOAD_ENABLE_ATTRIBUTE, O_RDWR);
+	if (fd < 0) {
+		perror("Unable to open workload type feature enable file\n");
+		exit(1);
+	}
+
+	if (write(fd, "1\n", 2) < 0) {
+		perror("Can' enable workload hints\n");
+		exit(1);
+	}
+
+	close(fd);
+
+	printf("Enabled workload type prediction\n");
+
+	while (1) {
+		fd = open(WORKLOAD_TYPE_INDEX_ATTRIBUTE, O_RDONLY);
+		if (fd < 0) {
+			perror("Unable to open workload type file\n");
+			exit(1);
+		}
+
+		if ((lseek(fd, 0L, SEEK_SET)) < 0) {
+			fprintf(stderr, "Failed to set pointer to beginning\n");
+			exit(1);
+		}
+
+		if (read(fd, index_str, sizeof(index_str)) < 0) {
+			fprintf(stderr, "Failed to read from:%s\n",
+			WORKLOAD_TYPE_INDEX_ATTRIBUTE);
+			exit(1);
+		}
+
+		ufd.fd = fd;
+		ufd.events = POLLPRI;
+
+		ret = poll(&ufd, 1, -1);
+		if (ret < 0) {
+			perror("poll error");
+			exit(1);
+		} else if (ret == 0) {
+			printf("Poll Timeout\n");
+		} else {
+			if ((lseek(fd, 0L, SEEK_SET)) < 0) {
+				fprintf(stderr, "Failed to set pointer to beginning\n");
+				exit(1);
+			}
+
+			if (read(fd, index_str, sizeof(index_str)) < 0)
+				exit(0);
+
+			ret = sscanf(index_str, "%d", &index);
+			if (ret < 0)
+				break;
+			if (index > WORKLOAD_TYPE_MAX_INDEX)
+				printf("Invalid workload type index\n");
+			else
+				printf("workload type:%s\n", workload_types[index]);
+		}
+
+		close(fd);
+	}
+}
diff --git a/tools/testing/selftests/timers/nsleep-lat.c b/tools/testing/selftests/timers/nsleep-lat.c
index eb3e79ed7b4a..edb5acacf214 100644
--- a/tools/testing/selftests/timers/nsleep-lat.c
+++ b/tools/testing/selftests/timers/nsleep-lat.c
@@ -118,7 +118,7 @@ int nanosleep_lat_test(int clockid, long long ns)
 	clock_gettime(clockid, &end);
 
 	if (((timespec_sub(start, end)/count)-ns) > UNRESONABLE_LATENCY) {
-		printf("Large rel latency: %lld ns :", (timespec_sub(start, end)/count)-ns);
+		ksft_print_msg("Large rel latency: %lld ns :", (timespec_sub(start, end)/count)-ns);
 		return -1;
 	}
 
@@ -132,20 +132,23 @@ int nanosleep_lat_test(int clockid, long long ns)
 	}
 
 	if (latency/count > UNRESONABLE_LATENCY) {
-		printf("Large abs latency: %lld ns :", latency/count);
+		ksft_print_msg("Large abs latency: %lld ns :", latency/count);
 		return -1;
 	}
 
 	return 0;
 }
 
-
+#define SKIPPED_CLOCK_COUNT 3
 
 int main(int argc, char **argv)
 {
 	long long length;
 	int clockid, ret;
 
+	ksft_print_header();
+	ksft_set_plan(NR_CLOCKIDS - CLOCK_REALTIME - SKIPPED_CLOCK_COUNT);
+
 	for (clockid = CLOCK_REALTIME; clockid < NR_CLOCKIDS; clockid++) {
 
 		/* Skip cputime clockids since nanosleep won't increment cputime */
@@ -154,9 +157,6 @@ int main(int argc, char **argv)
 				clockid == CLOCK_HWSPECIFIC)
 			continue;
 
-		printf("nsleep latency %-26s ", clockstring(clockid));
-		fflush(stdout);
-
 		length = 10;
 		while (length <= (NSEC_PER_SEC * 10)) {
 			ret = nanosleep_lat_test(clockid, length);
@@ -167,14 +167,12 @@ int main(int argc, char **argv)
 		}
 
 		if (ret == UNSUPPORTED) {
-			printf("[UNSUPPORTED]\n");
-			continue;
-		}
-		if (ret < 0) {
-			printf("[FAILED]\n");
-			return ksft_exit_fail();
+			ksft_test_result_skip("%s\n", clockstring(clockid));
+		} else {
+			ksft_test_result(ret >= 0, "%s\n",
+					 clockstring(clockid));
 		}
-		printf("[OK]\n");
 	}
-	return ksft_exit_pass();
+
+	ksft_finished();
 }
diff --git a/tools/testing/selftests/timers/posix_timers.c b/tools/testing/selftests/timers/posix_timers.c
index 8a17c0e8d82b..d49dd3ffd0d9 100644
--- a/tools/testing/selftests/timers/posix_timers.c
+++ b/tools/testing/selftests/timers/posix_timers.c
@@ -76,22 +76,21 @@ static int check_diff(struct timeval start, struct timeval end)
 
 static int check_itimer(int which)
 {
+	const char *name;
 	int err;
 	struct timeval start, end;
 	struct itimerval val = {
 		.it_value.tv_sec = DELAY,
 	};
 
-	printf("Check itimer ");
-
 	if (which == ITIMER_VIRTUAL)
-		printf("virtual... ");
+		name = "ITIMER_VIRTUAL";
 	else if (which == ITIMER_PROF)
-		printf("prof... ");
+		name = "ITIMER_PROF";
 	else if (which == ITIMER_REAL)
-		printf("real... ");
-
-	fflush(stdout);
+		name = "ITIMER_REAL";
+	else
+		return -1;
 
 	done = 0;
 
@@ -104,13 +103,13 @@ static int check_itimer(int which)
 
 	err = gettimeofday(&start, NULL);
 	if (err < 0) {
-		perror("Can't call gettimeofday()\n");
+		ksft_perror("Can't call gettimeofday()");
 		return -1;
 	}
 
 	err = setitimer(which, &val, NULL);
 	if (err < 0) {
-		perror("Can't set timer\n");
+		ksft_perror("Can't set timer");
 		return -1;
 	}
 
@@ -123,20 +122,18 @@ static int check_itimer(int which)
 
 	err = gettimeofday(&end, NULL);
 	if (err < 0) {
-		perror("Can't call gettimeofday()\n");
+		ksft_perror("Can't call gettimeofday()");
 		return -1;
 	}
 
-	if (!check_diff(start, end))
-		printf("[OK]\n");
-	else
-		printf("[FAIL]\n");
+	ksft_test_result(check_diff(start, end) == 0, "%s\n", name);
 
 	return 0;
 }
 
 static int check_timer_create(int which)
 {
+	const char *type;
 	int err;
 	timer_t id;
 	struct timeval start, end;
@@ -144,31 +141,32 @@ static int check_timer_create(int which)
 		.it_value.tv_sec = DELAY,
 	};
 
-	printf("Check timer_create() ");
 	if (which == CLOCK_THREAD_CPUTIME_ID) {
-		printf("per thread... ");
+		type = "thread";
 	} else if (which == CLOCK_PROCESS_CPUTIME_ID) {
-		printf("per process... ");
+		type = "process";
+	} else {
+		ksft_print_msg("Unknown timer_create() type %d\n", which);
+		return -1;
 	}
-	fflush(stdout);
 
 	done = 0;
 	err = timer_create(which, NULL, &id);
 	if (err < 0) {
-		perror("Can't create timer\n");
+		ksft_perror("Can't create timer");
 		return -1;
 	}
 	signal(SIGALRM, sig_handler);
 
 	err = gettimeofday(&start, NULL);
 	if (err < 0) {
-		perror("Can't call gettimeofday()\n");
+		ksft_perror("Can't call gettimeofday()");
 		return -1;
 	}
 
 	err = timer_settime(id, 0, &val, NULL);
 	if (err < 0) {
-		perror("Can't set timer\n");
+		ksft_perror("Can't set timer");
 		return -1;
 	}
 
@@ -176,14 +174,12 @@ static int check_timer_create(int which)
 
 	err = gettimeofday(&end, NULL);
 	if (err < 0) {
-		perror("Can't call gettimeofday()\n");
+		ksft_perror("Can't call gettimeofday()");
 		return -1;
 	}
 
-	if (!check_diff(start, end))
-		printf("[OK]\n");
-	else
-		printf("[FAIL]\n");
+	ksft_test_result(check_diff(start, end) == 0,
+			 "timer_create() per %s\n", type);
 
 	return 0;
 }
@@ -220,25 +216,25 @@ static int check_timer_distribution(void)
 		.it_interval.tv_nsec = 1000 * 1000,
 	};
 
-	printf("Check timer_create() per process signal distribution... ");
-	fflush(stdout);
-
 	remain = nthreads + 1;  /* worker threads + this thread */
 	signal(SIGALRM, distribution_handler);
 	err = timer_create(CLOCK_PROCESS_CPUTIME_ID, NULL, &id);
 	if (err < 0) {
-		perror("Can't create timer\n");
+		ksft_perror("Can't create timer");
 		return -1;
 	}
 	err = timer_settime(id, 0, &val, NULL);
 	if (err < 0) {
-		perror("Can't set timer\n");
+		ksft_perror("Can't set timer");
 		return -1;
 	}
 
 	for (i = 0; i < nthreads; i++) {
-		if (pthread_create(&threads[i], NULL, distribution_thread, NULL)) {
-			perror("Can't create thread\n");
+		err = pthread_create(&threads[i], NULL, distribution_thread,
+				     NULL);
+		if (err) {
+			ksft_print_msg("Can't create thread: %s (%d)\n",
+				       strerror(errno), errno);
 			return -1;
 		}
 	}
@@ -247,25 +243,30 @@ static int check_timer_distribution(void)
 	while (__atomic_load_n(&remain, __ATOMIC_RELAXED));
 
 	for (i = 0; i < nthreads; i++) {
-		if (pthread_join(threads[i], NULL)) {
-			perror("Can't join thread\n");
+		err = pthread_join(threads[i], NULL);
+		if (err) {
+			ksft_print_msg("Can't join thread: %s (%d)\n",
+				       strerror(errno), errno);
 			return -1;
 		}
 	}
 
 	if (timer_delete(id)) {
-		perror("Can't delete timer\n");
+		ksft_perror("Can't delete timer");
 		return -1;
 	}
 
-	printf("[OK]\n");
+	ksft_test_result_pass("check_timer_distribution\n");
 	return 0;
 }
 
 int main(int argc, char **argv)
 {
-	printf("Testing posix timers. False negative may happen on CPU execution \n");
-	printf("based timers if other threads run on the CPU...\n");
+	ksft_print_header();
+	ksft_set_plan(6);
+
+	ksft_print_msg("Testing posix timers. False negative may happen on CPU execution \n");
+	ksft_print_msg("based timers if other threads run on the CPU...\n");
 
 	if (check_itimer(ITIMER_VIRTUAL) < 0)
 		return ksft_exit_fail();
@@ -294,5 +295,5 @@ int main(int argc, char **argv)
 	if (check_timer_distribution() < 0)
 		return ksft_exit_fail();
 
-	return ksft_exit_pass();
+	ksft_finished();
 }
diff --git a/tools/testing/selftests/uevent/uevent_filtering.c b/tools/testing/selftests/uevent/uevent_filtering.c
index 5cebfb356345..dbe55f3a66f4 100644
--- a/tools/testing/selftests/uevent/uevent_filtering.c
+++ b/tools/testing/selftests/uevent/uevent_filtering.c
@@ -78,7 +78,7 @@ static int uevent_listener(unsigned long post_flags, bool expect_uevent,
 {
 	int sk_fd, ret;
 	socklen_t sk_addr_len;
-	int fret = -1, rcv_buf_sz = __UEVENT_BUFFER_SIZE;
+	int rcv_buf_sz = __UEVENT_BUFFER_SIZE;
 	uint64_t sync_add = 1;
 	struct sockaddr_nl sk_addr = { 0 }, rcv_addr = { 0 };
 	char buf[__UEVENT_BUFFER_SIZE] = { 0 };
@@ -121,6 +121,7 @@ static int uevent_listener(unsigned long post_flags, bool expect_uevent,
 
 	if ((size_t)sk_addr_len != sizeof(sk_addr)) {
 		fprintf(stderr, "Invalid socket address size\n");
+		ret = -1;
 		goto on_error;
 	}
 
@@ -147,11 +148,12 @@ static int uevent_listener(unsigned long post_flags, bool expect_uevent,
 	ret = write_nointr(sync_fd, &sync_add, sizeof(sync_add));
 	close(sync_fd);
 	if (ret != sizeof(sync_add)) {
+		ret = -1;
 		fprintf(stderr, "Failed to synchronize with parent process\n");
 		goto on_error;
 	}
 
-	fret = 0;
+	ret = 0;
 	for (;;) {
 		ssize_t r;
 
@@ -187,7 +189,7 @@ static int uevent_listener(unsigned long post_flags, bool expect_uevent,
 on_error:
 	close(sk_fd);
 
-	return fret;
+	return ret;
 }
 
 int trigger_uevent(unsigned int times)
diff --git a/tools/testing/selftests/user_events/.gitignore b/tools/testing/selftests/user_events/.gitignore
new file mode 100644
index 000000000000..f570febd211b
--- /dev/null
+++ b/tools/testing/selftests/user_events/.gitignore
@@ -0,0 +1,4 @@
+abi_test
+dyn_test
+ftrace_test
+perf_test
diff --git a/tools/testing/selftests/user_events/abi_test.c b/tools/testing/selftests/user_events/abi_test.c
index 8202f1327c39..cef1ff1af223 100644
--- a/tools/testing/selftests/user_events/abi_test.c
+++ b/tools/testing/selftests/user_events/abi_test.c
@@ -24,6 +24,18 @@
 const char *data_file = "/sys/kernel/tracing/user_events_data";
 const char *enable_file = "/sys/kernel/tracing/events/user_events/__abi_event/enable";
 
+static bool event_exists(void)
+{
+	int fd = open(enable_file, O_RDWR);
+
+	if (fd < 0)
+		return false;
+
+	close(fd);
+
+	return true;
+}
+
 static int change_event(bool enable)
 {
 	int fd = open(enable_file, O_RDWR);
@@ -47,7 +59,22 @@ static int change_event(bool enable)
 	return ret;
 }
 
-static int reg_enable(long *enable, int size, int bit)
+static int event_delete(void)
+{
+	int fd = open(data_file, O_RDWR);
+	int ret;
+
+	if (fd < 0)
+		return -1;
+
+	ret = ioctl(fd, DIAG_IOCSDEL, "__abi_event");
+
+	close(fd);
+
+	return ret;
+}
+
+static int reg_enable_flags(void *enable, int size, int bit, int flags)
 {
 	struct user_reg reg = {0};
 	int fd = open(data_file, O_RDWR);
@@ -58,6 +85,7 @@ static int reg_enable(long *enable, int size, int bit)
 
 	reg.size = sizeof(reg);
 	reg.name_args = (__u64)"__abi_event";
+	reg.flags = flags;
 	reg.enable_bit = bit;
 	reg.enable_addr = (__u64)enable;
 	reg.enable_size = size;
@@ -69,7 +97,12 @@ static int reg_enable(long *enable, int size, int bit)
 	return ret;
 }
 
-static int reg_disable(long *enable, int bit)
+static int reg_enable(void *enable, int size, int bit)
+{
+	return reg_enable_flags(enable, size, bit, 0);
+}
+
+static int reg_disable(void *enable, int bit)
 {
 	struct user_unreg reg = {0};
 	int fd = open(data_file, O_RDWR);
@@ -90,7 +123,8 @@ static int reg_disable(long *enable, int bit)
 }
 
 FIXTURE(user) {
-	long check;
+	int check;
+	long check_long;
 	bool umount;
 };
 
@@ -99,6 +133,7 @@ FIXTURE_SETUP(user) {
 
 	change_event(false);
 	self->check = 0;
+	self->check_long = 0;
 }
 
 FIXTURE_TEARDOWN(user) {
@@ -126,6 +161,26 @@ TEST_F(user, enablement) {
 	ASSERT_EQ(0, change_event(false));
 }
 
+TEST_F(user, flags) {
+	/* USER_EVENT_REG_PERSIST is allowed */
+	ASSERT_EQ(0, reg_enable_flags(&self->check, sizeof(int), 0,
+				      USER_EVENT_REG_PERSIST));
+	ASSERT_EQ(0, reg_disable(&self->check, 0));
+
+	/* Ensure it exists after close and disable */
+	ASSERT_TRUE(event_exists());
+
+	/* Ensure we can delete it */
+	ASSERT_EQ(0, event_delete());
+
+	/* USER_EVENT_REG_MAX or above is not allowed */
+	ASSERT_EQ(-1, reg_enable_flags(&self->check, sizeof(int), 0,
+				       USER_EVENT_REG_MAX));
+
+	/* Ensure it does not exist after invalid flags */
+	ASSERT_FALSE(event_exists());
+}
+
 TEST_F(user, bit_sizes) {
 	/* Allow 0-31 bits for 32-bit */
 	ASSERT_EQ(0, reg_enable(&self->check, sizeof(int), 0));
@@ -136,9 +191,9 @@ TEST_F(user, bit_sizes) {
 
 #if BITS_PER_LONG == 8
 	/* Allow 0-64 bits for 64-bit */
-	ASSERT_EQ(0, reg_enable(&self->check, sizeof(long), 63));
-	ASSERT_NE(0, reg_enable(&self->check, sizeof(long), 64));
-	ASSERT_EQ(0, reg_disable(&self->check, 63));
+	ASSERT_EQ(0, reg_enable(&self->check_long, sizeof(long), 63));
+	ASSERT_NE(0, reg_enable(&self->check_long, sizeof(long), 64));
+	ASSERT_EQ(0, reg_disable(&self->check_long, 63));
 #endif
 
 	/* Disallowed sizes (everything beside 4 and 8) */
@@ -200,7 +255,7 @@ static int clone_check(void *check)
 	for (i = 0; i < 10; ++i) {
 		usleep(100000);
 
-		if (*(long *)check)
+		if (*(int *)check)
 			return 0;
 	}
 
diff --git a/tools/testing/selftests/user_events/dyn_test.c b/tools/testing/selftests/user_events/dyn_test.c
index a85980190bea..bdf9ab127488 100644
--- a/tools/testing/selftests/user_events/dyn_test.c
+++ b/tools/testing/selftests/user_events/dyn_test.c
@@ -17,9 +17,25 @@
 #include "../kselftest_harness.h"
 #include "user_events_selftests.h"
 
+const char *dyn_file = "/sys/kernel/tracing/dynamic_events";
 const char *abi_file = "/sys/kernel/tracing/user_events_data";
 const char *enable_file = "/sys/kernel/tracing/events/user_events/__test_event/enable";
 
+static int event_delete(void)
+{
+	int fd = open(abi_file, O_RDWR);
+	int ret;
+
+	if (fd < 0)
+		return -1;
+
+	ret = ioctl(fd, DIAG_IOCSDEL, "__test_event");
+
+	close(fd);
+
+	return ret;
+}
+
 static bool wait_for_delete(void)
 {
 	int i;
@@ -64,7 +80,31 @@ static int unreg_event(int fd, int *check, int bit)
 	return ioctl(fd, DIAG_IOCSUNREG, &unreg);
 }
 
-static int parse(int *check, const char *value)
+static int parse_dyn(const char *value)
+{
+	int fd = open(dyn_file, O_RDWR | O_APPEND);
+	int len = strlen(value);
+	int ret;
+
+	if (fd == -1)
+		return -1;
+
+	ret = write(fd, value, len);
+
+	if (ret == len)
+		ret = 0;
+	else
+		ret = -1;
+
+	close(fd);
+
+	if (ret == 0)
+		event_delete();
+
+	return ret;
+}
+
+static int parse_abi(int *check, const char *value)
 {
 	int fd = open(abi_file, O_RDWR);
 	int ret;
@@ -90,6 +130,18 @@ static int parse(int *check, const char *value)
 	return ret;
 }
 
+static int parse(int *check, const char *value)
+{
+	int abi_ret = parse_abi(check, value);
+	int dyn_ret = parse_dyn(value);
+
+	/* Ensure both ABI and DYN parse the same way */
+	if (dyn_ret != abi_ret)
+		return -1;
+
+	return dyn_ret;
+}
+
 static int check_match(int *check, const char *first, const char *second, bool *match)
 {
 	int fd = open(abi_file, O_RDWR);
diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile
index 7e8c937627dd..0b872c0a42d2 100644
--- a/tools/testing/selftests/x86/Makefile
+++ b/tools/testing/selftests/x86/Makefile
@@ -14,6 +14,7 @@ TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt test_mremap
 			check_initial_reg_state sigreturn iopl ioperm \
 			test_vsyscall mov_ss_trap \
 			syscall_arg_fault fsgsbase_restore sigaltstack
+TARGETS_C_BOTHBITS += nx_stack
 TARGETS_C_32BIT_ONLY := entry_from_vm86 test_syscall_vdso unwind_vdso \
 			test_FCMOV test_FCOMI test_FISTTP \
 			vdso_restorer
@@ -109,3 +110,6 @@ $(OUTPUT)/test_syscall_vdso_32: thunks_32.S
 # state.
 $(OUTPUT)/check_initial_reg_state_32: CFLAGS += -Wl,-ereal_start -static
 $(OUTPUT)/check_initial_reg_state_64: CFLAGS += -Wl,-ereal_start -static
+
+$(OUTPUT)/nx_stack_32: CFLAGS += -Wl,-z,noexecstack
+$(OUTPUT)/nx_stack_64: CFLAGS += -Wl,-z,noexecstack
diff --git a/tools/testing/selftests/x86/lam.c b/tools/testing/selftests/x86/lam.c
index eb0e46905bf9..8f9b06d9ce03 100644
--- a/tools/testing/selftests/x86/lam.c
+++ b/tools/testing/selftests/x86/lam.c
@@ -573,7 +573,7 @@ int do_uring(unsigned long lam)
 	char path[PATH_MAX] = {0};
 
 	/* get current process path */
-	if (readlink("/proc/self/exe", path, PATH_MAX) <= 0)
+	if (readlink("/proc/self/exe", path, PATH_MAX - 1) <= 0)
 		return 1;
 
 	int file_fd = open(path, O_RDONLY);
@@ -680,14 +680,14 @@ static int handle_execve(struct testcases *test)
 		perror("Fork failed.");
 		ret = 1;
 	} else if (pid == 0) {
-		char path[PATH_MAX];
+		char path[PATH_MAX] = {0};
 
 		/* Set LAM mode in parent process */
 		if (set_lam(lam) != 0)
 			return 1;
 
 		/* Get current binary's path and the binary was run by execve */
-		if (readlink("/proc/self/exe", path, PATH_MAX) <= 0)
+		if (readlink("/proc/self/exe", path, PATH_MAX - 1) <= 0)
 			exit(-1);
 
 		/* run binary to get LAM mode and return to parent process */
diff --git a/tools/testing/selftests/x86/nx_stack.c b/tools/testing/selftests/x86/nx_stack.c
new file mode 100644
index 000000000000..ea4a4e246879
--- /dev/null
+++ b/tools/testing/selftests/x86/nx_stack.c
@@ -0,0 +1,212 @@
+/*
+ * Copyright (c) 2023 Alexey Dobriyan <adobriyan@gmail.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+/*
+ * Test that userspace stack is NX. Requires linking with -Wl,-z,noexecstack
+ * because I don't want to bother with PT_GNU_STACK detection.
+ *
+ * Fill the stack with INT3's and then try to execute some of them:
+ * SIGSEGV -- good, SIGTRAP -- bad.
+ *
+ * Regular stack is completely overwritten before testing.
+ * Test doesn't exit SIGSEGV handler after first fault at INT3.
+ */
+#undef _GNU_SOURCE
+#define _GNU_SOURCE
+#undef NDEBUG
+#include <assert.h>
+#include <signal.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+#include <sys/resource.h>
+#include <unistd.h>
+
+#define PAGE_SIZE 4096
+
+/*
+ * This is memset(rsp, 0xcc, -1); but down.
+ * It will SIGSEGV when bottom of the stack is reached.
+ * Byte-size access is important! (see rdi tweak in the signal handler).
+ */
+void make_stack1(void);
+asm(
+".pushsection .text\n"
+".globl make_stack1\n"
+".align 16\n"
+"make_stack1:\n"
+	"mov $0xcc, %al\n"
+#if defined __amd64__
+	"mov %rsp, %rdi\n"
+	"mov $-1, %rcx\n"
+#elif defined __i386__
+	"mov %esp, %edi\n"
+	"mov $-1, %ecx\n"
+#else
+#error
+#endif
+	"std\n"
+	"rep stosb\n"
+	/* unreachable */
+	"hlt\n"
+".type make_stack1,@function\n"
+".size make_stack1,.-make_stack1\n"
+".popsection\n"
+);
+
+/*
+ * memset(p, 0xcc, -1);
+ * It will SIGSEGV when top of the stack is reached.
+ */
+void make_stack2(uint64_t p);
+asm(
+".pushsection .text\n"
+".globl make_stack2\n"
+".align 16\n"
+"make_stack2:\n"
+	"mov $0xcc, %al\n"
+#if defined __amd64__
+	"mov $-1, %rcx\n"
+#elif defined __i386__
+	"mov $-1, %ecx\n"
+#else
+#error
+#endif
+	"cld\n"
+	"rep stosb\n"
+	/* unreachable */
+	"hlt\n"
+".type make_stack2,@function\n"
+".size make_stack2,.-make_stack2\n"
+".popsection\n"
+);
+
+static volatile int test_state = 0;
+static volatile unsigned long stack_min_addr;
+
+#if defined __amd64__
+#define RDI	REG_RDI
+#define RIP	REG_RIP
+#define RIP_STRING "rip"
+#elif defined __i386__
+#define RDI	REG_EDI
+#define RIP	REG_EIP
+#define RIP_STRING "eip"
+#else
+#error
+#endif
+
+static void sigsegv(int _, siginfo_t *__, void *uc_)
+{
+	/*
+	 * Some Linux versions didn't clear DF before entering signal
+	 * handler. make_stack1() doesn't have a chance to clear DF
+	 * either so we clear it by hand here.
+	 */
+	asm volatile ("cld" ::: "memory");
+
+	ucontext_t *uc = uc_;
+
+	if (test_state == 0) {
+		/* Stack is faulted and cleared from RSP to the lowest address. */
+		stack_min_addr = ++uc->uc_mcontext.gregs[RDI];
+		if (1) {
+			printf("stack min %lx\n", stack_min_addr);
+		}
+		uc->uc_mcontext.gregs[RIP] = (uintptr_t)&make_stack2;
+		test_state = 1;
+	} else if (test_state == 1) {
+		/* Stack has been cleared from top to bottom. */
+		unsigned long stack_max_addr = uc->uc_mcontext.gregs[RDI];
+		if (1) {
+			printf("stack max %lx\n", stack_max_addr);
+		}
+		/* Start faulting pages on stack and see what happens. */
+		uc->uc_mcontext.gregs[RIP] = stack_max_addr - PAGE_SIZE;
+		test_state = 2;
+	} else if (test_state == 2) {
+		/* Stack page is NX -- good, test next page. */
+		uc->uc_mcontext.gregs[RIP] -= PAGE_SIZE;
+		if (uc->uc_mcontext.gregs[RIP] == stack_min_addr) {
+			/* One more SIGSEGV and test ends. */
+			test_state = 3;
+		}
+	} else {
+		printf("PASS\tAll stack pages are NX\n");
+		_exit(EXIT_SUCCESS);
+	}
+}
+
+static void sigtrap(int _, siginfo_t *__, void *uc_)
+{
+	const ucontext_t *uc = uc_;
+	unsigned long rip = uc->uc_mcontext.gregs[RIP];
+	printf("FAIL\texecutable page on the stack: " RIP_STRING " %lx\n", rip);
+	_exit(EXIT_FAILURE);
+}
+
+int main(void)
+{
+	{
+		struct sigaction act = {};
+		sigemptyset(&act.sa_mask);
+		act.sa_flags = SA_SIGINFO;
+		act.sa_sigaction = &sigsegv;
+		int rv = sigaction(SIGSEGV, &act, NULL);
+		assert(rv == 0);
+	}
+	{
+		struct sigaction act = {};
+		sigemptyset(&act.sa_mask);
+		act.sa_flags = SA_SIGINFO;
+		act.sa_sigaction = &sigtrap;
+		int rv = sigaction(SIGTRAP, &act, NULL);
+		assert(rv == 0);
+	}
+	{
+		struct rlimit rlim;
+		int rv = getrlimit(RLIMIT_STACK, &rlim);
+		assert(rv == 0);
+		/* Cap stack at time-honored 8 MiB value. */
+		rlim.rlim_max = rlim.rlim_cur;
+		if (rlim.rlim_max > 8 * 1024 * 1024) {
+			rlim.rlim_max = 8 * 1024 * 1024;
+		}
+		rv = setrlimit(RLIMIT_STACK, &rlim);
+		assert(rv == 0);
+	}
+	{
+		/*
+		 * We don't know now much stack SIGSEGV handler uses.
+		 * Bump this by 1 page every time someone complains,
+		 * or rewrite it in assembly.
+		 */
+		const size_t len = SIGSTKSZ;
+		void *p = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
+		assert(p != MAP_FAILED);
+		stack_t ss = {};
+		ss.ss_sp = p;
+		ss.ss_size = len;
+		int rv = sigaltstack(&ss, NULL);
+		assert(rv == 0);
+	}
+	make_stack1();
+	/*
+	 * Unreachable, but if _this_ INT3 is ever reached, it's a bug somewhere.
+	 * Fold it into main SIGTRAP pathway.
+	 */
+	__builtin_trap();
+}