248 files changed, 19757 insertions, 2563 deletions
diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index 2ff68702fd41..1195bd85af38 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -249,10 +249,17 @@ else
 	$(error Error: set INSTALL_PATH to use install)
 endif
 
+FORMAT ?= .gz
+TAR_PATH = $(abspath ${INSTALL_PATH}/kselftest-packages/kselftest.tar${FORMAT})
+gen_tar: install
+	@mkdir -p ${INSTALL_PATH}/kselftest-packages/
+	@tar caf ${TAR_PATH} --exclude=kselftest-packages -C ${INSTALL_PATH} .
+	@echo "Created ${TAR_PATH}"
+
 clean:
 	@for TARGET in $(TARGETS); do \
 		BUILD_TARGET=$$BUILD/$$TARGET;	\
 		$(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET clean;\
 	done;
 
-.PHONY: khdr all run_tests hotplug run_hotplug clean_hotplug run_pstore_crash install clean
+.PHONY: khdr all run_tests hotplug run_hotplug clean_hotplug run_pstore_crash install clean gen_tar
diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore
index c30079c86998..1bb204cee853 100644
--- a/tools/testing/selftests/bpf/.gitignore
+++ b/tools/testing/selftests/bpf/.gitignore
@@ -30,8 +30,6 @@ test_tcpnotify_user
 test_libbpf
 test_tcp_check_syncookie_user
 test_sysctl
-test_hashmap
-test_btf_dump
 test_current_pid_tgid_new_ns
 xdping
 test_cpp
@@ -39,4 +37,5 @@ test_cpp
 /no_alu32
 /bpf_gcc
 /tools
-
+/runqslower
+/bench
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 7729892e0b04..22aaec74ea0a 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -2,6 +2,8 @@
 include ../../../../scripts/Kbuild.include
 include ../../../scripts/Makefile.arch
 
+CXX ?= $(CROSS_COMPILE)g++
+
 CURDIR := $(abspath .)
 TOOLSDIR := $(abspath ../../..)
 LIBDIR := $(TOOLSDIR)/lib
@@ -20,9 +22,10 @@ CLANG		?= clang
 LLC		?= llc
 LLVM_OBJCOPY	?= llvm-objcopy
 BPF_GCC		?= $(shell command -v bpf-gcc;)
-CFLAGS += -g -rdynamic -Wall -O2 $(GENFLAGS) -I$(CURDIR)		\
-	  -I$(INCLUDE_DIR) -I$(GENDIR) -I$(LIBDIR) -I$(TOOLSINCDIR)	\
-	  -I$(APIDIR)							\
+SAN_CFLAGS	?=
+CFLAGS += -g -rdynamic -Wall -O2 $(GENFLAGS) $(SAN_CFLAGS)		\
+	  -I$(CURDIR) -I$(INCLUDE_DIR) -I$(GENDIR) -I$(LIBDIR)		\
+	  -I$(TOOLSINCDIR) -I$(APIDIR)					\
 	  -Dbpf_prog_load=bpf_prog_test_load				\
 	  -Dbpf_load_program=bpf_test_load_program
 LDLIBS += -lcap -lelf -lz -lrt -lpthread
@@ -32,7 +35,7 @@ TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test
 	test_align test_verifier_log test_dev_cgroup test_tcpbpf_user \
 	test_sock test_btf test_sockmap get_cgroup_id_user test_socket_cookie \
 	test_cgroup_storage \
-	test_netcnt test_tcpnotify_user test_sock_fields test_sysctl test_hashmap \
+	test_netcnt test_tcpnotify_user test_sock_fields test_sysctl \
 	test_progs-no_alu32 \
 	test_current_pid_tgid_new_ns
 
@@ -76,7 +79,7 @@ TEST_PROGS_EXTENDED := with_addr.sh \
 # Compile but not part of 'make run_tests'
 TEST_GEN_PROGS_EXTENDED = test_sock_addr test_skb_cgroup_id_user \
 	flow_dissector_load test_flow_dissector test_tcp_check_syncookie_user \
-	test_lirc_mode2_user xdping test_cpp runqslower
+	test_lirc_mode2_user xdping test_cpp runqslower bench
 
 TEST_CUSTOM_PROGS = urandom_read
 
@@ -141,7 +144,8 @@ VMLINUX_BTF := $(abspath $(firstword $(wildcard $(VMLINUX_BTF_PATHS))))
 $(OUTPUT)/runqslower: $(BPFOBJ)
 	$(Q)$(MAKE) $(submake_extras) -C $(TOOLSDIR)/bpf/runqslower	\
 		    OUTPUT=$(SCRATCH_DIR)/ VMLINUX_BTF=$(VMLINUX_BTF)   \
-		    BPFOBJ=$(BPFOBJ) BPF_INCLUDE=$(INCLUDE_DIR)
+		    BPFOBJ=$(BPFOBJ) BPF_INCLUDE=$(INCLUDE_DIR) &&	\
+		    cp $(SCRATCH_DIR)/runqslower $@
 
 $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED): $(OUTPUT)/test_stub.o $(BPFOBJ)
 
@@ -241,7 +245,7 @@ define GCC_BPF_BUILD_RULE
 	$(BPF_GCC) $3 $4 -O2 -c $1 -o $2
 endef
 
-SKEL_BLACKLIST := btf__% test_pinning_invalid.c
+SKEL_BLACKLIST := btf__% test_pinning_invalid.c test_sk_assign.c
 
 # Set up extra TRUNNER_XXX "temporary" variables in the environment (relies on
 # $eval()) and pass control to DEFINE_TEST_RUNNER_RULES.
@@ -263,6 +267,7 @@ TRUNNER_BPF_OBJS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.o, $$(TRUNNER_BPF_SRCS)
 TRUNNER_BPF_SKELS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.skel.h,	\
 				 $$(filter-out $(SKEL_BLACKLIST),	\
 					       $$(TRUNNER_BPF_SRCS)))
+TEST_GEN_FILES += $$(TRUNNER_BPF_OBJS)
 
 # Evaluate rules now with extra TRUNNER_XXX variables above already defined
 $$(eval $$(call DEFINE_TEST_RUNNER_RULES,$1,$2))
@@ -323,7 +328,7 @@ $(TRUNNER_TEST_OBJS): $(TRUNNER_OUTPUT)/%.test.o:			\
 		      $(TRUNNER_BPF_SKELS)				\
 		      $$(BPFOBJ) | $(TRUNNER_OUTPUT)
 	$$(call msg,TEST-OBJ,$(TRUNNER_BINARY),$$@)
-	cd $$(@D) && $$(CC) $$(CFLAGS) -c $(CURDIR)/$$< $$(LDLIBS) -o $$(@F)
+	cd $$(@D) && $$(CC) -I. $$(CFLAGS) -c $(CURDIR)/$$< $$(LDLIBS) -o $$(@F)
 
 $(TRUNNER_EXTRA_OBJS): $(TRUNNER_OUTPUT)/%.o:				\
 		       %.c						\
@@ -352,6 +357,7 @@ endef
 TRUNNER_TESTS_DIR := prog_tests
 TRUNNER_BPF_PROGS_DIR := progs
 TRUNNER_EXTRA_SOURCES := test_progs.c cgroup_helpers.c trace_helpers.c	\
+			 network_helpers.c testing_helpers.c		\
 			 flow_dissector_load.h
 TRUNNER_EXTRA_FILES := $(OUTPUT)/urandom_read				\
 		       $(wildcard progs/btf_dump_test_case_*.c)
@@ -403,6 +409,24 @@ $(OUTPUT)/test_cpp: test_cpp.cpp $(OUTPUT)/test_core_extern.skel.h $(BPFOBJ)
 	$(call msg,CXX,,$@)
 	$(CXX) $(CFLAGS) $^ $(LDLIBS) -o $@
 
+# Benchmark runner
+$(OUTPUT)/bench_%.o: benchs/bench_%.c bench.h
+	$(call msg,CC,,$@)
+	$(CC) $(CFLAGS) -c $(filter %.c,$^) $(LDLIBS) -o $@
+$(OUTPUT)/bench_rename.o: $(OUTPUT)/test_overhead.skel.h
+$(OUTPUT)/bench_trigger.o: $(OUTPUT)/trigger_bench.skel.h
+$(OUTPUT)/bench_ringbufs.o: $(OUTPUT)/ringbuf_bench.skel.h \
+			    $(OUTPUT)/perfbuf_bench.skel.h
+$(OUTPUT)/bench.o: bench.h testing_helpers.h
+$(OUTPUT)/bench: LDLIBS += -lm
+$(OUTPUT)/bench: $(OUTPUT)/bench.o $(OUTPUT)/testing_helpers.o \
+		 $(OUTPUT)/bench_count.o \
+		 $(OUTPUT)/bench_rename.o \
+		 $(OUTPUT)/bench_trigger.o \
+		 $(OUTPUT)/bench_ringbufs.o
+	$(call msg,BINARY,,$@)
+	$(CC) $(LDFLAGS) -o $@ $(filter %.a %.o,$^) $(LDLIBS)
+
 EXTRA_CLEAN := $(TEST_CUSTOM_PROGS) $(SCRATCH_DIR)			\
 	prog_tests/tests.h map_tests/tests.h verifier/tests.h		\
 	feature								\
diff --git a/tools/testing/selftests/bpf/README.rst b/tools/testing/selftests/bpf/README.rst
new file mode 100644
index 000000000000..e885d351595f
--- /dev/null
+++ b/tools/testing/selftests/bpf/README.rst
@@ -0,0 +1,45 @@
+==================
+BPF Selftest Notes
+==================
+General instructions on running selftests can be found in
+`Documentation/bpf/bpf_devel_QA.rst`_.
+
+Additional information about selftest failures are
+documented here.
+
+bpf_iter test failures with clang/llvm 10.0.0
+=============================================
+
+With clang/llvm 10.0.0, the following two bpf_iter tests failed:
+  * ``bpf_iter/ipv6_route``
+  * ``bpf_iter/netlink``
+
+The symptom for ``bpf_iter/ipv6_route`` looks like
+
+.. code-block:: c
+
+  2: (79) r8 = *(u64 *)(r1 +8)
+  ...
+  14: (bf) r2 = r8
+  15: (0f) r2 += r1
+  ; BPF_SEQ_PRINTF(seq, "%pi6 %02x ", &rt->fib6_dst.addr, rt->fib6_dst.plen);
+  16: (7b) *(u64 *)(r8 +64) = r2
+  only read is supported
+
+The symptom for ``bpf_iter/netlink`` looks like
+
+.. code-block:: c
+
+  ; struct netlink_sock *nlk = ctx->sk;
+  2: (79) r7 = *(u64 *)(r1 +8)
+  ...
+  15: (bf) r2 = r7
+  16: (0f) r2 += r1
+  ; BPF_SEQ_PRINTF(seq, "%pK %-3d ", s, s->sk_protocol);
+  17: (7b) *(u64 *)(r7 +0) = r2
+  only read is supported
+
+This is due to a llvm BPF backend bug. The fix 
+  https://reviews.llvm.org/D78466
+has been pushed to llvm 10.x release branch and will be
+available in 10.0.1. The fix is available in llvm 11.0.0 trunk.
diff --git a/tools/testing/selftests/bpf/bench.c b/tools/testing/selftests/bpf/bench.c
new file mode 100644
index 000000000000..944ad4721c83
--- /dev/null
+++ b/tools/testing/selftests/bpf/bench.c
@@ -0,0 +1,465 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#define _GNU_SOURCE
+#include <argp.h>
+#include <linux/compiler.h>
+#include <sys/time.h>
+#include <sched.h>
+#include <fcntl.h>
+#include <pthread.h>
+#include <sys/sysinfo.h>
+#include <sys/resource.h>
+#include <signal.h>
+#include "bench.h"
+#include "testing_helpers.h"
+
+struct env env = {
+	.warmup_sec = 1,
+	.duration_sec = 5,
+	.affinity = false,
+	.consumer_cnt = 1,
+	.producer_cnt = 1,
+};
+
+static int libbpf_print_fn(enum libbpf_print_level level,
+		    const char *format, va_list args)
+{
+	if (level == LIBBPF_DEBUG && !env.verbose)
+		return 0;
+	return vfprintf(stderr, format, args);
+}
+
+static int bump_memlock_rlimit(void)
+{
+	struct rlimit rlim_new = {
+		.rlim_cur	= RLIM_INFINITY,
+		.rlim_max	= RLIM_INFINITY,
+	};
+
+	return setrlimit(RLIMIT_MEMLOCK, &rlim_new);
+}
+
+void setup_libbpf()
+{
+	int err;
+
+	libbpf_set_print(libbpf_print_fn);
+
+	err = bump_memlock_rlimit();
+	if (err)
+		fprintf(stderr, "failed to increase RLIMIT_MEMLOCK: %d", err);
+}
+
+void hits_drops_report_progress(int iter, struct bench_res *res, long delta_ns)
+{
+	double hits_per_sec, drops_per_sec;
+	double hits_per_prod;
+
+	hits_per_sec = res->hits / 1000000.0 / (delta_ns / 1000000000.0);
+	hits_per_prod = hits_per_sec / env.producer_cnt;
+	drops_per_sec = res->drops / 1000000.0 / (delta_ns / 1000000000.0);
+
+	printf("Iter %3d (%7.3lfus): ",
+	       iter, (delta_ns - 1000000000) / 1000.0);
+
+	printf("hits %8.3lfM/s (%7.3lfM/prod), drops %8.3lfM/s\n",
+	       hits_per_sec, hits_per_prod, drops_per_sec);
+}
+
+void hits_drops_report_final(struct bench_res res[], int res_cnt)
+{
+	int i;
+	double hits_mean = 0.0, drops_mean = 0.0;
+	double hits_stddev = 0.0, drops_stddev = 0.0;
+
+	for (i = 0; i < res_cnt; i++) {
+		hits_mean += res[i].hits / 1000000.0 / (0.0 + res_cnt);
+		drops_mean += res[i].drops / 1000000.0 / (0.0 + res_cnt);
+	}
+
+	if (res_cnt > 1)  {
+		for (i = 0; i < res_cnt; i++) {
+			hits_stddev += (hits_mean - res[i].hits / 1000000.0) *
+				       (hits_mean - res[i].hits / 1000000.0) /
+				       (res_cnt - 1.0);
+			drops_stddev += (drops_mean - res[i].drops / 1000000.0) *
+					(drops_mean - res[i].drops / 1000000.0) /
+					(res_cnt - 1.0);
+		}
+		hits_stddev = sqrt(hits_stddev);
+		drops_stddev = sqrt(drops_stddev);
+	}
+	printf("Summary: hits %8.3lf \u00B1 %5.3lfM/s (%7.3lfM/prod), ",
+	       hits_mean, hits_stddev, hits_mean / env.producer_cnt);
+	printf("drops %8.3lf \u00B1 %5.3lfM/s\n",
+	       drops_mean, drops_stddev);
+}
+
+const char *argp_program_version = "benchmark";
+const char *argp_program_bug_address = "<bpf@vger.kernel.org>";
+const char argp_program_doc[] =
+"benchmark    Generic benchmarking framework.\n"
+"\n"
+"This tool runs benchmarks.\n"
+"\n"
+"USAGE: benchmark <bench-name>\n"
+"\n"
+"EXAMPLES:\n"
+"    # run 'count-local' benchmark with 1 producer and 1 consumer\n"
+"    benchmark count-local\n"
+"    # run 'count-local' with 16 producer and 8 consumer thread, pinned to CPUs\n"
+"    benchmark -p16 -c8 -a count-local\n";
+
+enum {
+	ARG_PROD_AFFINITY_SET = 1000,
+	ARG_CONS_AFFINITY_SET = 1001,
+};
+
+static const struct argp_option opts[] = {
+	{ "list", 'l', NULL, 0, "List available benchmarks"},
+	{ "duration", 'd', "SEC", 0, "Duration of benchmark, seconds"},
+	{ "warmup", 'w', "SEC", 0, "Warm-up period, seconds"},
+	{ "producers", 'p', "NUM", 0, "Number of producer threads"},
+	{ "consumers", 'c', "NUM", 0, "Number of consumer threads"},
+	{ "verbose", 'v', NULL, 0, "Verbose debug output"},
+	{ "affinity", 'a', NULL, 0, "Set consumer/producer thread affinity"},
+	{ "prod-affinity", ARG_PROD_AFFINITY_SET, "CPUSET", 0,
+	  "Set of CPUs for producer threads; implies --affinity"},
+	{ "cons-affinity", ARG_CONS_AFFINITY_SET, "CPUSET", 0,
+	  "Set of CPUs for consumer threads; implies --affinity"},
+	{},
+};
+
+extern struct argp bench_ringbufs_argp;
+
+static const struct argp_child bench_parsers[] = {
+	{ &bench_ringbufs_argp, 0, "Ring buffers benchmark", 0 },
+	{},
+};
+
+static error_t parse_arg(int key, char *arg, struct argp_state *state)
+{
+	static int pos_args;
+
+	switch (key) {
+	case 'v':
+		env.verbose = true;
+		break;
+	case 'l':
+		env.list = true;
+		break;
+	case 'd':
+		env.duration_sec = strtol(arg, NULL, 10);
+		if (env.duration_sec <= 0) {
+			fprintf(stderr, "Invalid duration: %s\n", arg);
+			argp_usage(state);
+		}
+		break;
+	case 'w':
+		env.warmup_sec = strtol(arg, NULL, 10);
+		if (env.warmup_sec <= 0) {
+			fprintf(stderr, "Invalid warm-up duration: %s\n", arg);
+			argp_usage(state);
+		}
+		break;
+	case 'p':
+		env.producer_cnt = strtol(arg, NULL, 10);
+		if (env.producer_cnt <= 0) {
+			fprintf(stderr, "Invalid producer count: %s\n", arg);
+			argp_usage(state);
+		}
+		break;
+	case 'c':
+		env.consumer_cnt = strtol(arg, NULL, 10);
+		if (env.consumer_cnt <= 0) {
+			fprintf(stderr, "Invalid consumer count: %s\n", arg);
+			argp_usage(state);
+		}
+		break;
+	case 'a':
+		env.affinity = true;
+		break;
+	case ARG_PROD_AFFINITY_SET:
+		env.affinity = true;
+		if (parse_num_list(arg, &env.prod_cpus.cpus,
+				   &env.prod_cpus.cpus_len)) {
+			fprintf(stderr, "Invalid format of CPU set for producers.");
+			argp_usage(state);
+		}
+		break;
+	case ARG_CONS_AFFINITY_SET:
+		env.affinity = true;
+		if (parse_num_list(arg, &env.cons_cpus.cpus,
+				   &env.cons_cpus.cpus_len)) {
+			fprintf(stderr, "Invalid format of CPU set for consumers.");
+			argp_usage(state);
+		}
+		break;
+	case ARGP_KEY_ARG:
+		if (pos_args++) {
+			fprintf(stderr,
+				"Unrecognized positional argument: %s\n", arg);
+			argp_usage(state);
+		}
+		env.bench_name = strdup(arg);
+		break;
+	default:
+		return ARGP_ERR_UNKNOWN;
+	}
+	return 0;
+}
+
+static void parse_cmdline_args(int argc, char **argv)
+{
+	static const struct argp argp = {
+		.options = opts,
+		.parser = parse_arg,
+		.doc = argp_program_doc,
+		.children = bench_parsers,
+	};
+	if (argp_parse(&argp, argc, argv, 0, NULL, NULL))
+		exit(1);
+	if (!env.list && !env.bench_name) {
+		argp_help(&argp, stderr, ARGP_HELP_DOC, "bench");
+		exit(1);
+	}
+}
+
+static void collect_measurements(long delta_ns);
+
+static __u64 last_time_ns;
+static void sigalarm_handler(int signo)
+{
+	long new_time_ns = get_time_ns();
+	long delta_ns = new_time_ns - last_time_ns;
+
+	collect_measurements(delta_ns);
+
+	last_time_ns = new_time_ns;
+}
+
+/* set up periodic 1-second timer */
+static void setup_timer()
+{
+	static struct sigaction sigalarm_action = {
+		.sa_handler = sigalarm_handler,
+	};
+	struct itimerval timer_settings = {};
+	int err;
+
+	last_time_ns = get_time_ns();
+	err = sigaction(SIGALRM, &sigalarm_action, NULL);
+	if (err < 0) {
+		fprintf(stderr, "failed to install SIGALRM handler: %d\n", -errno);
+		exit(1);
+	}
+	timer_settings.it_interval.tv_sec = 1;
+	timer_settings.it_value.tv_sec = 1;
+	err = setitimer(ITIMER_REAL, &timer_settings, NULL);
+	if (err < 0) {
+		fprintf(stderr, "failed to arm interval timer: %d\n", -errno);
+		exit(1);
+	}
+}
+
+static void set_thread_affinity(pthread_t thread, int cpu)
+{
+	cpu_set_t cpuset;
+
+	CPU_ZERO(&cpuset);
+	CPU_SET(cpu, &cpuset);
+	if (pthread_setaffinity_np(thread, sizeof(cpuset), &cpuset)) {
+		fprintf(stderr, "setting affinity to CPU #%d failed: %d\n",
+			cpu, errno);
+		exit(1);
+	}
+}
+
+static int next_cpu(struct cpu_set *cpu_set)
+{
+	if (cpu_set->cpus) {
+		int i;
+
+		/* find next available CPU */
+		for (i = cpu_set->next_cpu; i < cpu_set->cpus_len; i++) {
+			if (cpu_set->cpus[i]) {
+				cpu_set->next_cpu = i + 1;
+				return i;
+			}
+		}
+		fprintf(stderr, "Not enough CPUs specified, need CPU #%d or higher.\n", i);
+		exit(1);
+	}
+
+	return cpu_set->next_cpu++;
+}
+
+static struct bench_state {
+	int res_cnt;
+	struct bench_res *results;
+	pthread_t *consumers;
+	pthread_t *producers;
+} state;
+
+const struct bench *bench = NULL;
+
+extern const struct bench bench_count_global;
+extern const struct bench bench_count_local;
+extern const struct bench bench_rename_base;
+extern const struct bench bench_rename_kprobe;
+extern const struct bench bench_rename_kretprobe;
+extern const struct bench bench_rename_rawtp;
+extern const struct bench bench_rename_fentry;
+extern const struct bench bench_rename_fexit;
+extern const struct bench bench_rename_fmodret;
+extern const struct bench bench_trig_base;
+extern const struct bench bench_trig_tp;
+extern const struct bench bench_trig_rawtp;
+extern const struct bench bench_trig_kprobe;
+extern const struct bench bench_trig_fentry;
+extern const struct bench bench_trig_fmodret;
+extern const struct bench bench_rb_libbpf;
+extern const struct bench bench_rb_custom;
+extern const struct bench bench_pb_libbpf;
+extern const struct bench bench_pb_custom;
+
+static const struct bench *benchs[] = {
+	&bench_count_global,
+	&bench_count_local,
+	&bench_rename_base,
+	&bench_rename_kprobe,
+	&bench_rename_kretprobe,
+	&bench_rename_rawtp,
+	&bench_rename_fentry,
+	&bench_rename_fexit,
+	&bench_rename_fmodret,
+	&bench_trig_base,
+	&bench_trig_tp,
+	&bench_trig_rawtp,
+	&bench_trig_kprobe,
+	&bench_trig_fentry,
+	&bench_trig_fmodret,
+	&bench_rb_libbpf,
+	&bench_rb_custom,
+	&bench_pb_libbpf,
+	&bench_pb_custom,
+};
+
+static void setup_benchmark()
+{
+	int i, err;
+
+	if (!env.bench_name) {
+		fprintf(stderr, "benchmark name is not specified\n");
+		exit(1);
+	}
+
+	for (i = 0; i < ARRAY_SIZE(benchs); i++) {
+		if (strcmp(benchs[i]->name, env.bench_name) == 0) {
+			bench = benchs[i];
+			break;
+		}
+	}
+	if (!bench) {
+		fprintf(stderr, "benchmark '%s' not found\n", env.bench_name);
+		exit(1);
+	}
+
+	printf("Setting up benchmark '%s'...\n", bench->name);
+
+	state.producers = calloc(env.producer_cnt, sizeof(*state.producers));
+	state.consumers = calloc(env.consumer_cnt, sizeof(*state.consumers));
+	state.results = calloc(env.duration_sec + env.warmup_sec + 2,
+			       sizeof(*state.results));
+	if (!state.producers || !state.consumers || !state.results)
+		exit(1);
+
+	if (bench->validate)
+		bench->validate();
+	if (bench->setup)
+		bench->setup();
+
+	for (i = 0; i < env.consumer_cnt; i++) {
+		err = pthread_create(&state.consumers[i], NULL,
+				     bench->consumer_thread, (void *)(long)i);
+		if (err) {
+			fprintf(stderr, "failed to create consumer thread #%d: %d\n",
+				i, -errno);
+			exit(1);
+		}
+		if (env.affinity)
+			set_thread_affinity(state.consumers[i],
+					    next_cpu(&env.cons_cpus));
+	}
+
+	/* unless explicit producer CPU list is specified, continue after
+	 * last consumer CPU
+	 */
+	if (!env.prod_cpus.cpus)
+		env.prod_cpus.next_cpu = env.cons_cpus.next_cpu;
+
+	for (i = 0; i < env.producer_cnt; i++) {
+		err = pthread_create(&state.producers[i], NULL,
+				     bench->producer_thread, (void *)(long)i);
+		if (err) {
+			fprintf(stderr, "failed to create producer thread #%d: %d\n",
+				i, -errno);
+			exit(1);
+		}
+		if (env.affinity)
+			set_thread_affinity(state.producers[i],
+					    next_cpu(&env.prod_cpus));
+	}
+
+	printf("Benchmark '%s' started.\n", bench->name);
+}
+
+static pthread_mutex_t bench_done_mtx = PTHREAD_MUTEX_INITIALIZER;
+static pthread_cond_t bench_done = PTHREAD_COND_INITIALIZER;
+
+static void collect_measurements(long delta_ns) {
+	int iter = state.res_cnt++;
+	struct bench_res *res = &state.results[iter];
+
+	bench->measure(res);
+
+	if (bench->report_progress)
+		bench->report_progress(iter, res, delta_ns);
+
+	if (iter == env.duration_sec + env.warmup_sec) {
+		pthread_mutex_lock(&bench_done_mtx);
+		pthread_cond_signal(&bench_done);
+		pthread_mutex_unlock(&bench_done_mtx);
+	}
+}
+
+int main(int argc, char **argv)
+{
+	parse_cmdline_args(argc, argv);
+
+	if (env.list) {
+		int i;
+
+		printf("Available benchmarks:\n");
+		for (i = 0; i < ARRAY_SIZE(benchs); i++) {
+			printf("- %s\n", benchs[i]->name);
+		}
+		return 0;
+	}
+
+	setup_benchmark();
+
+	setup_timer();
+
+	pthread_mutex_lock(&bench_done_mtx);
+	pthread_cond_wait(&bench_done, &bench_done_mtx);
+	pthread_mutex_unlock(&bench_done_mtx);
+
+	if (bench->report_final)
+		/* skip first sample */
+		bench->report_final(state.results + env.warmup_sec,
+				    state.res_cnt - env.warmup_sec);
+
+	return 0;
+}
+
diff --git a/tools/testing/selftests/bpf/bench.h b/tools/testing/selftests/bpf/bench.h
new file mode 100644
index 000000000000..c1f48a473b02
--- /dev/null
+++ b/tools/testing/selftests/bpf/bench.h
@@ -0,0 +1,81 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#pragma once
+#include <stdlib.h>
+#include <stdbool.h>
+#include <linux/err.h>
+#include <errno.h>
+#include <unistd.h>
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+#include <math.h>
+#include <time.h>
+#include <sys/syscall.h>
+
+struct cpu_set {
+	bool *cpus;
+	int cpus_len;
+	int next_cpu;
+};
+
+struct env {
+	char *bench_name;
+	int duration_sec;
+	int warmup_sec;
+	bool verbose;
+	bool list;
+	bool affinity;
+	int consumer_cnt;
+	int producer_cnt;
+	struct cpu_set prod_cpus;
+	struct cpu_set cons_cpus;
+};
+
+struct bench_res {
+	long hits;
+	long drops;
+};
+
+struct bench {
+	const char *name;
+	void (*validate)();
+	void (*setup)();
+	void *(*producer_thread)(void *ctx);
+	void *(*consumer_thread)(void *ctx);
+	void (*measure)(struct bench_res* res);
+	void (*report_progress)(int iter, struct bench_res* res, long delta_ns);
+	void (*report_final)(struct bench_res res[], int res_cnt);
+};
+
+struct counter {
+	long value;
+} __attribute__((aligned(128)));
+
+extern struct env env;
+extern const struct bench *bench;
+
+void setup_libbpf();
+void hits_drops_report_progress(int iter, struct bench_res *res, long delta_ns);
+void hits_drops_report_final(struct bench_res res[], int res_cnt);
+
+static inline __u64 get_time_ns() {
+	struct timespec t;
+
+	clock_gettime(CLOCK_MONOTONIC, &t);
+
+	return (u64)t.tv_sec * 1000000000 + t.tv_nsec;
+}
+
+static inline void atomic_inc(long *value)
+{
+	(void)__atomic_add_fetch(value, 1, __ATOMIC_RELAXED);
+}
+
+static inline void atomic_add(long *value, long n)
+{
+	(void)__atomic_add_fetch(value, n, __ATOMIC_RELAXED);
+}
+
+static inline long atomic_swap(long *value, long n)
+{
+	return __atomic_exchange_n(value, n, __ATOMIC_RELAXED);
+}
diff --git a/tools/testing/selftests/bpf/benchs/bench_count.c b/tools/testing/selftests/bpf/benchs/bench_count.c
new file mode 100644
index 000000000000..befba7a82643
--- /dev/null
+++ b/tools/testing/selftests/bpf/benchs/bench_count.c
@@ -0,0 +1,91 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include "bench.h"
+
+/* COUNT-GLOBAL benchmark */
+
+static struct count_global_ctx {
+	struct counter hits;
+} count_global_ctx;
+
+static void *count_global_producer(void *input)
+{
+	struct count_global_ctx *ctx = &count_global_ctx;
+
+	while (true) {
+		atomic_inc(&ctx->hits.value);
+	}
+	return NULL;
+}
+
+static void *count_global_consumer(void *input)
+{
+	return NULL;
+}
+
+static void count_global_measure(struct bench_res *res)
+{
+	struct count_global_ctx *ctx = &count_global_ctx;
+
+	res->hits = atomic_swap(&ctx->hits.value, 0);
+}
+
+/* COUNT-local benchmark */
+
+static struct count_local_ctx {
+	struct counter *hits;
+} count_local_ctx;
+
+static void count_local_setup()
+{
+	struct count_local_ctx *ctx = &count_local_ctx;
+
+	ctx->hits = calloc(env.consumer_cnt, sizeof(*ctx->hits));
+	if (!ctx->hits)
+		exit(1);
+}
+
+static void *count_local_producer(void *input)
+{
+	struct count_local_ctx *ctx = &count_local_ctx;
+	int idx = (long)input;
+
+	while (true) {
+		atomic_inc(&ctx->hits[idx].value);
+	}
+	return NULL;
+}
+
+static void *count_local_consumer(void *input)
+{
+	return NULL;
+}
+
+static void count_local_measure(struct bench_res *res)
+{
+	struct count_local_ctx *ctx = &count_local_ctx;
+	int i;
+
+	for (i = 0; i < env.producer_cnt; i++) {
+		res->hits += atomic_swap(&ctx->hits[i].value, 0);
+	}
+}
+
+const struct bench bench_count_global = {
+	.name = "count-global",
+	.producer_thread = count_global_producer,
+	.consumer_thread = count_global_consumer,
+	.measure = count_global_measure,
+	.report_progress = hits_drops_report_progress,
+	.report_final = hits_drops_report_final,
+};
+
+const struct bench bench_count_local = {
+	.name = "count-local",
+	.setup = count_local_setup,
+	.producer_thread = count_local_producer,
+	.consumer_thread = count_local_consumer,
+	.measure = count_local_measure,
+	.report_progress = hits_drops_report_progress,
+	.report_final = hits_drops_report_final,
+};
diff --git a/tools/testing/selftests/bpf/benchs/bench_rename.c b/tools/testing/selftests/bpf/benchs/bench_rename.c
new file mode 100644
index 000000000000..e74cff40f4fe
--- /dev/null
+++ b/tools/testing/selftests/bpf/benchs/bench_rename.c
@@ -0,0 +1,195 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include <fcntl.h>
+#include "bench.h"
+#include "test_overhead.skel.h"
+
+/* BPF triggering benchmarks */
+static struct ctx {
+	struct test_overhead *skel;
+	struct counter hits;
+	int fd;
+} ctx;
+
+static void validate()
+{
+	if (env.producer_cnt != 1) {
+		fprintf(stderr, "benchmark doesn't support multi-producer!\n");
+		exit(1);
+	}
+	if (env.consumer_cnt != 1) {
+		fprintf(stderr, "benchmark doesn't support multi-consumer!\n");
+		exit(1);
+	}
+}
+
+static void *producer(void *input)
+{
+	char buf[] = "test_overhead";
+	int err;
+
+	while (true) {
+		err = write(ctx.fd, buf, sizeof(buf));
+		if (err < 0) {
+			fprintf(stderr, "write failed\n");
+			exit(1);
+		}
+		atomic_inc(&ctx.hits.value);
+	}
+}
+
+static void measure(struct bench_res *res)
+{
+	res->hits = atomic_swap(&ctx.hits.value, 0);
+}
+
+static void setup_ctx()
+{
+	setup_libbpf();
+
+	ctx.skel = test_overhead__open_and_load();
+	if (!ctx.skel) {
+		fprintf(stderr, "failed to open skeleton\n");
+		exit(1);
+	}
+
+	ctx.fd = open("/proc/self/comm", O_WRONLY|O_TRUNC);
+	if (ctx.fd < 0) {
+		fprintf(stderr, "failed to open /proc/self/comm: %d\n", -errno);
+		exit(1);
+	}
+}
+
+static void attach_bpf(struct bpf_program *prog)
+{
+	struct bpf_link *link;
+
+	link = bpf_program__attach(prog);
+	if (IS_ERR(link)) {
+		fprintf(stderr, "failed to attach program!\n");
+		exit(1);
+	}
+}
+
+static void setup_base()
+{
+	setup_ctx();
+}
+
+static void setup_kprobe()
+{
+	setup_ctx();
+	attach_bpf(ctx.skel->progs.prog1);
+}
+
+static void setup_kretprobe()
+{
+	setup_ctx();
+	attach_bpf(ctx.skel->progs.prog2);
+}
+
+static void setup_rawtp()
+{
+	setup_ctx();
+	attach_bpf(ctx.skel->progs.prog3);
+}
+
+static void setup_fentry()
+{
+	setup_ctx();
+	attach_bpf(ctx.skel->progs.prog4);
+}
+
+static void setup_fexit()
+{
+	setup_ctx();
+	attach_bpf(ctx.skel->progs.prog5);
+}
+
+static void setup_fmodret()
+{
+	setup_ctx();
+	attach_bpf(ctx.skel->progs.prog6);
+}
+
+static void *consumer(void *input)
+{
+	return NULL;
+}
+
+const struct bench bench_rename_base = {
+	.name = "rename-base",
+	.validate = validate,
+	.setup = setup_base,
+	.producer_thread = producer,
+	.consumer_thread = consumer,
+	.measure = measure,
+	.report_progress = hits_drops_report_progress,
+	.report_final = hits_drops_report_final,
+};
+
+const struct bench bench_rename_kprobe = {
+	.name = "rename-kprobe",
+	.validate = validate,
+	.setup = setup_kprobe,
+	.producer_thread = producer,
+	.consumer_thread = consumer,
+	.measure = measure,
+	.report_progress = hits_drops_report_progress,
+	.report_final = hits_drops_report_final,
+};
+
+const struct bench bench_rename_kretprobe = {
+	.name = "rename-kretprobe",
+	.validate = validate,
+	.setup = setup_kretprobe,
+	.producer_thread = producer,
+	.consumer_thread = consumer,
+	.measure = measure,
+	.report_progress = hits_drops_report_progress,
+	.report_final = hits_drops_report_final,
+};
+
+const struct bench bench_rename_rawtp = {
+	.name = "rename-rawtp",
+	.validate = validate,
+	.setup = setup_rawtp,
+	.producer_thread = producer,
+	.consumer_thread = consumer,
+	.measure = measure,
+	.report_progress = hits_drops_report_progress,
+	.report_final = hits_drops_report_final,
+};
+
+const struct bench bench_rename_fentry = {
+	.name = "rename-fentry",
+	.validate = validate,
+	.setup = setup_fentry,
+	.producer_thread = producer,
+	.consumer_thread = consumer,
+	.measure = measure,
+	.report_progress = hits_drops_report_progress,
+	.report_final = hits_drops_report_final,
+};
+
+const struct bench bench_rename_fexit = {
+	.name = "rename-fexit",
+	.validate = validate,
+	.setup = setup_fexit,
+	.producer_thread = producer,
+	.consumer_thread = consumer,
+	.measure = measure,
+	.report_progress = hits_drops_report_progress,
+	.report_final = hits_drops_report_final,
+};
+
+const struct bench bench_rename_fmodret = {
+	.name = "rename-fmodret",
+	.validate = validate,
+	.setup = setup_fmodret,
+	.producer_thread = producer,
+	.consumer_thread = consumer,
+	.measure = measure,
+	.report_progress = hits_drops_report_progress,
+	.report_final = hits_drops_report_final,
+};
diff --git a/tools/testing/selftests/bpf/benchs/bench_ringbufs.c b/tools/testing/selftests/bpf/benchs/bench_ringbufs.c
new file mode 100644
index 000000000000..da87c7f31891
--- /dev/null
+++ b/tools/testing/selftests/bpf/benchs/bench_ringbufs.c
@@ -0,0 +1,566 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include <asm/barrier.h>
+#include <linux/perf_event.h>
+#include <linux/ring_buffer.h>
+#include <sys/epoll.h>
+#include <sys/mman.h>
+#include <argp.h>
+#include <stdlib.h>
+#include "bench.h"
+#include "ringbuf_bench.skel.h"
+#include "perfbuf_bench.skel.h"
+
+static struct {
+	bool back2back;
+	int batch_cnt;
+	bool sampled;
+	int sample_rate;
+	int ringbuf_sz; /* per-ringbuf, in bytes */
+	bool ringbuf_use_output; /* use slower output API */
+	int perfbuf_sz; /* per-CPU size, in pages */
+} args = {
+	.back2back = false,
+	.batch_cnt = 500,
+	.sampled = false,
+	.sample_rate = 500,
+	.ringbuf_sz = 512 * 1024,
+	.ringbuf_use_output = false,
+	.perfbuf_sz = 128,
+};
+
+enum {
+	ARG_RB_BACK2BACK = 2000,
+	ARG_RB_USE_OUTPUT = 2001,
+	ARG_RB_BATCH_CNT = 2002,
+	ARG_RB_SAMPLED = 2003,
+	ARG_RB_SAMPLE_RATE = 2004,
+};
+
+static const struct argp_option opts[] = {
+	{ "rb-b2b", ARG_RB_BACK2BACK, NULL, 0, "Back-to-back mode"},
+	{ "rb-use-output", ARG_RB_USE_OUTPUT, NULL, 0, "Use bpf_ringbuf_output() instead of bpf_ringbuf_reserve()"},
+	{ "rb-batch-cnt", ARG_RB_BATCH_CNT, "CNT", 0, "Set BPF-side record batch count"},
+	{ "rb-sampled", ARG_RB_SAMPLED, NULL, 0, "Notification sampling"},
+	{ "rb-sample-rate", ARG_RB_SAMPLE_RATE, "RATE", 0, "Notification sample rate"},
+	{},
+};
+
+static error_t parse_arg(int key, char *arg, struct argp_state *state)
+{
+	switch (key) {
+	case ARG_RB_BACK2BACK:
+		args.back2back = true;
+		break;
+	case ARG_RB_USE_OUTPUT:
+		args.ringbuf_use_output = true;
+		break;
+	case ARG_RB_BATCH_CNT:
+		args.batch_cnt = strtol(arg, NULL, 10);
+		if (args.batch_cnt < 0) {
+			fprintf(stderr, "Invalid batch count.");
+			argp_usage(state);
+		}
+		break;
+	case ARG_RB_SAMPLED:
+		args.sampled = true;
+		break;
+	case ARG_RB_SAMPLE_RATE:
+		args.sample_rate = strtol(arg, NULL, 10);
+		if (args.sample_rate < 0) {
+			fprintf(stderr, "Invalid perfbuf sample rate.");
+			argp_usage(state);
+		}
+		break;
+	default:
+		return ARGP_ERR_UNKNOWN;
+	}
+	return 0;
+}
+
+/* exported into benchmark runner */
+const struct argp bench_ringbufs_argp = {
+	.options = opts,
+	.parser = parse_arg,
+};
+
+/* RINGBUF-LIBBPF benchmark */
+
+static struct counter buf_hits;
+
+static inline void bufs_trigger_batch()
+{
+	(void)syscall(__NR_getpgid);
+}
+
+static void bufs_validate()
+{
+	if (env.consumer_cnt != 1) {
+		fprintf(stderr, "rb-libbpf benchmark doesn't support multi-consumer!\n");
+		exit(1);
+	}
+
+	if (args.back2back && env.producer_cnt > 1) {
+		fprintf(stderr, "back-to-back mode makes sense only for single-producer case!\n");
+		exit(1);
+	}
+}
+
+static void *bufs_sample_producer(void *input)
+{
+	if (args.back2back) {
+		/* initial batch to get everything started */
+		bufs_trigger_batch();
+		return NULL;
+	}
+
+	while (true)
+		bufs_trigger_batch();
+	return NULL;
+}
+
+static struct ringbuf_libbpf_ctx {
+	struct ringbuf_bench *skel;
+	struct ring_buffer *ringbuf;
+} ringbuf_libbpf_ctx;
+
+static void ringbuf_libbpf_measure(struct bench_res *res)
+{
+	struct ringbuf_libbpf_ctx *ctx = &ringbuf_libbpf_ctx;
+
+	res->hits = atomic_swap(&buf_hits.value, 0);
+	res->drops = atomic_swap(&ctx->skel->bss->dropped, 0);
+}
+
+static struct ringbuf_bench *ringbuf_setup_skeleton()
+{
+	struct ringbuf_bench *skel;
+
+	setup_libbpf();
+
+	skel = ringbuf_bench__open();
+	if (!skel) {
+		fprintf(stderr, "failed to open skeleton\n");
+		exit(1);
+	}
+
+	skel->rodata->batch_cnt = args.batch_cnt;
+	skel->rodata->use_output = args.ringbuf_use_output ? 1 : 0;
+
+	if (args.sampled)
+		/* record data + header take 16 bytes */
+		skel->rodata->wakeup_data_size = args.sample_rate * 16;
+
+	bpf_map__resize(skel->maps.ringbuf, args.ringbuf_sz);
+
+	if (ringbuf_bench__load(skel)) {
+		fprintf(stderr, "failed to load skeleton\n");
+		exit(1);
+	}
+
+	return skel;
+}
+
+static int buf_process_sample(void *ctx, void *data, size_t len)
+{
+	atomic_inc(&buf_hits.value);
+	return 0;
+}
+
+static void ringbuf_libbpf_setup()
+{
+	struct ringbuf_libbpf_ctx *ctx = &ringbuf_libbpf_ctx;
+	struct bpf_link *link;
+
+	ctx->skel = ringbuf_setup_skeleton();
+	ctx->ringbuf = ring_buffer__new(bpf_map__fd(ctx->skel->maps.ringbuf),
+					buf_process_sample, NULL, NULL);
+	if (!ctx->ringbuf) {
+		fprintf(stderr, "failed to create ringbuf\n");
+		exit(1);
+	}
+
+	link = bpf_program__attach(ctx->skel->progs.bench_ringbuf);
+	if (IS_ERR(link)) {
+		fprintf(stderr, "failed to attach program!\n");
+		exit(1);
+	}
+}
+
+static void *ringbuf_libbpf_consumer(void *input)
+{
+	struct ringbuf_libbpf_ctx *ctx = &ringbuf_libbpf_ctx;
+
+	while (ring_buffer__poll(ctx->ringbuf, -1) >= 0) {
+		if (args.back2back)
+			bufs_trigger_batch();
+	}
+	fprintf(stderr, "ringbuf polling failed!\n");
+	return NULL;
+}
+
+/* RINGBUF-CUSTOM benchmark */
+struct ringbuf_custom {
+	__u64 *consumer_pos;
+	__u64 *producer_pos;
+	__u64 mask;
+	void *data;
+	int map_fd;
+};
+
+static struct ringbuf_custom_ctx {
+	struct ringbuf_bench *skel;
+	struct ringbuf_custom ringbuf;
+	int epoll_fd;
+	struct epoll_event event;
+} ringbuf_custom_ctx;
+
+static void ringbuf_custom_measure(struct bench_res *res)
+{
+	struct ringbuf_custom_ctx *ctx = &ringbuf_custom_ctx;
+
+	res->hits = atomic_swap(&buf_hits.value, 0);
+	res->drops = atomic_swap(&ctx->skel->bss->dropped, 0);
+}
+
+static void ringbuf_custom_setup()
+{
+	struct ringbuf_custom_ctx *ctx = &ringbuf_custom_ctx;
+	const size_t page_size = getpagesize();
+	struct bpf_link *link;
+	struct ringbuf_custom *r;
+	void *tmp;
+	int err;
+
+	ctx->skel = ringbuf_setup_skeleton();
+
+	ctx->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
+	if (ctx->epoll_fd < 0) {
+		fprintf(stderr, "failed to create epoll fd: %d\n", -errno);
+		exit(1);
+	}
+
+	r = &ctx->ringbuf;
+	r->map_fd = bpf_map__fd(ctx->skel->maps.ringbuf);
+	r->mask = args.ringbuf_sz - 1;
+
+	/* Map writable consumer page */
+	tmp = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED,
+		   r->map_fd, 0);
+	if (tmp == MAP_FAILED) {
+		fprintf(stderr, "failed to mmap consumer page: %d\n", -errno);
+		exit(1);
+	}
+	r->consumer_pos = tmp;
+
+	/* Map read-only producer page and data pages. */
+	tmp = mmap(NULL, page_size + 2 * args.ringbuf_sz, PROT_READ, MAP_SHARED,
+		   r->map_fd, page_size);
+	if (tmp == MAP_FAILED) {
+		fprintf(stderr, "failed to mmap data pages: %d\n", -errno);
+		exit(1);
+	}
+	r->producer_pos = tmp;
+	r->data = tmp + page_size;
+
+	ctx->event.events = EPOLLIN;
+	err = epoll_ctl(ctx->epoll_fd, EPOLL_CTL_ADD, r->map_fd, &ctx->event);
+	if (err < 0) {
+		fprintf(stderr, "failed to epoll add ringbuf: %d\n", -errno);
+		exit(1);
+	}
+
+	link = bpf_program__attach(ctx->skel->progs.bench_ringbuf);
+	if (IS_ERR(link)) {
+		fprintf(stderr, "failed to attach program\n");
+		exit(1);
+	}
+}
+
+#define RINGBUF_BUSY_BIT (1 << 31)
+#define RINGBUF_DISCARD_BIT (1 << 30)
+#define RINGBUF_META_LEN 8
+
+static inline int roundup_len(__u32 len)
+{
+	/* clear out top 2 bits */
+	len <<= 2;
+	len >>= 2;
+	/* add length prefix */
+	len += RINGBUF_META_LEN;
+	/* round up to 8 byte alignment */
+	return (len + 7) / 8 * 8;
+}
+
+static void ringbuf_custom_process_ring(struct ringbuf_custom *r)
+{
+	unsigned long cons_pos, prod_pos;
+	int *len_ptr, len;
+	bool got_new_data;
+
+	cons_pos = smp_load_acquire(r->consumer_pos);
+	while (true) {
+		got_new_data = false;
+		prod_pos = smp_load_acquire(r->producer_pos);
+		while (cons_pos < prod_pos) {
+			len_ptr = r->data + (cons_pos & r->mask);
+			len = smp_load_acquire(len_ptr);
+
+			/* sample not committed yet, bail out for now */
+			if (len & RINGBUF_BUSY_BIT)
+				return;
+
+			got_new_data = true;
+			cons_pos += roundup_len(len);
+
+			atomic_inc(&buf_hits.value);
+		}
+		if (got_new_data)
+			smp_store_release(r->consumer_pos, cons_pos);
+		else
+			break;
+	};
+}
+
+static void *ringbuf_custom_consumer(void *input)
+{
+	struct ringbuf_custom_ctx *ctx = &ringbuf_custom_ctx;
+	int cnt;
+
+	do {
+		if (args.back2back)
+			bufs_trigger_batch();
+		cnt = epoll_wait(ctx->epoll_fd, &ctx->event, 1, -1);
+		if (cnt > 0)
+			ringbuf_custom_process_ring(&ctx->ringbuf);
+	} while (cnt >= 0);
+	fprintf(stderr, "ringbuf polling failed!\n");
+	return 0;
+}
+
+/* PERFBUF-LIBBPF benchmark */
+static struct perfbuf_libbpf_ctx {
+	struct perfbuf_bench *skel;
+	struct perf_buffer *perfbuf;
+} perfbuf_libbpf_ctx;
+
+static void perfbuf_measure(struct bench_res *res)
+{
+	struct perfbuf_libbpf_ctx *ctx = &perfbuf_libbpf_ctx;
+
+	res->hits = atomic_swap(&buf_hits.value, 0);
+	res->drops = atomic_swap(&ctx->skel->bss->dropped, 0);
+}
+
+static struct perfbuf_bench *perfbuf_setup_skeleton()
+{
+	struct perfbuf_bench *skel;
+
+	setup_libbpf();
+
+	skel = perfbuf_bench__open();
+	if (!skel) {
+		fprintf(stderr, "failed to open skeleton\n");
+		exit(1);
+	}
+
+	skel->rodata->batch_cnt = args.batch_cnt;
+
+	if (perfbuf_bench__load(skel)) {
+		fprintf(stderr, "failed to load skeleton\n");
+		exit(1);
+	}
+
+	return skel;
+}
+
+static enum bpf_perf_event_ret
+perfbuf_process_sample_raw(void *input_ctx, int cpu,
+			   struct perf_event_header *e)
+{
+	switch (e->type) {
+	case PERF_RECORD_SAMPLE:
+		atomic_inc(&buf_hits.value);
+		break;
+	case PERF_RECORD_LOST:
+		break;
+	default:
+		return LIBBPF_PERF_EVENT_ERROR;
+	}
+	return LIBBPF_PERF_EVENT_CONT;
+}
+
+static void perfbuf_libbpf_setup()
+{
+	struct perfbuf_libbpf_ctx *ctx = &perfbuf_libbpf_ctx;
+	struct perf_event_attr attr;
+	struct perf_buffer_raw_opts pb_opts = {
+		.event_cb = perfbuf_process_sample_raw,
+		.ctx = (void *)(long)0,
+		.attr = &attr,
+	};
+	struct bpf_link *link;
+
+	ctx->skel = perfbuf_setup_skeleton();
+
+	memset(&attr, 0, sizeof(attr));
+	attr.config = PERF_COUNT_SW_BPF_OUTPUT,
+	attr.type = PERF_TYPE_SOFTWARE;
+	attr.sample_type = PERF_SAMPLE_RAW;
+	/* notify only every Nth sample */
+	if (args.sampled) {
+		attr.sample_period = args.sample_rate;
+		attr.wakeup_events = args.sample_rate;
+	} else {
+		attr.sample_period = 1;
+		attr.wakeup_events = 1;
+	}
+
+	if (args.sample_rate > args.batch_cnt) {
+		fprintf(stderr, "sample rate %d is too high for given batch count %d\n",
+			args.sample_rate, args.batch_cnt);
+		exit(1);
+	}
+
+	ctx->perfbuf = perf_buffer__new_raw(bpf_map__fd(ctx->skel->maps.perfbuf),
+					    args.perfbuf_sz, &pb_opts);
+	if (!ctx->perfbuf) {
+		fprintf(stderr, "failed to create perfbuf\n");
+		exit(1);
+	}
+
+	link = bpf_program__attach(ctx->skel->progs.bench_perfbuf);
+	if (IS_ERR(link)) {
+		fprintf(stderr, "failed to attach program\n");
+		exit(1);
+	}
+}
+
+static void *perfbuf_libbpf_consumer(void *input)
+{
+	struct perfbuf_libbpf_ctx *ctx = &perfbuf_libbpf_ctx;
+
+	while (perf_buffer__poll(ctx->perfbuf, -1) >= 0) {
+		if (args.back2back)
+			bufs_trigger_batch();
+	}
+	fprintf(stderr, "perfbuf polling failed!\n");
+	return NULL;
+}
+
+/* PERFBUF-CUSTOM benchmark */
+
+/* copies of internal libbpf definitions */
+struct perf_cpu_buf {
+	struct perf_buffer *pb;
+	void *base; /* mmap()'ed memory */
+	void *buf; /* for reconstructing segmented data */
+	size_t buf_size;
+	int fd;
+	int cpu;
+	int map_key;
+};
+
+struct perf_buffer {
+	perf_buffer_event_fn event_cb;
+	perf_buffer_sample_fn sample_cb;
+	perf_buffer_lost_fn lost_cb;
+	void *ctx; /* passed into callbacks */
+
+	size_t page_size;
+	size_t mmap_size;
+	struct perf_cpu_buf **cpu_bufs;
+	struct epoll_event *events;
+	int cpu_cnt; /* number of allocated CPU buffers */
+	int epoll_fd; /* perf event FD */
+	int map_fd; /* BPF_MAP_TYPE_PERF_EVENT_ARRAY BPF map FD */
+};
+
+static void *perfbuf_custom_consumer(void *input)
+{
+	struct perfbuf_libbpf_ctx *ctx = &perfbuf_libbpf_ctx;
+	struct perf_buffer *pb = ctx->perfbuf;
+	struct perf_cpu_buf *cpu_buf;
+	struct perf_event_mmap_page *header;
+	size_t mmap_mask = pb->mmap_size - 1;
+	struct perf_event_header *ehdr;
+	__u64 data_head, data_tail;
+	size_t ehdr_size;
+	void *base;
+	int i, cnt;
+
+	while (true) {
+		if (args.back2back)
+			bufs_trigger_batch();
+		cnt = epoll_wait(pb->epoll_fd, pb->events, pb->cpu_cnt, -1);
+		if (cnt <= 0) {
+			fprintf(stderr, "perf epoll failed: %d\n", -errno);
+			exit(1);
+		}
+
+		for (i = 0; i < cnt; ++i) {
+			cpu_buf = pb->events[i].data.ptr;
+			header = cpu_buf->base;
+			base = ((void *)header) + pb->page_size;
+
+			data_head = ring_buffer_read_head(header);
+			data_tail = header->data_tail;
+			while (data_head != data_tail) {
+				ehdr = base + (data_tail & mmap_mask);
+				ehdr_size = ehdr->size;
+
+				if (ehdr->type == PERF_RECORD_SAMPLE)
+					atomic_inc(&buf_hits.value);
+
+				data_tail += ehdr_size;
+			}
+			ring_buffer_write_tail(header, data_tail);
+		}
+	}
+	return NULL;
+}
+
+const struct bench bench_rb_libbpf = {
+	.name = "rb-libbpf",
+	.validate = bufs_validate,
+	.setup = ringbuf_libbpf_setup,
+	.producer_thread = bufs_sample_producer,
+	.consumer_thread = ringbuf_libbpf_consumer,
+	.measure = ringbuf_libbpf_measure,
+	.report_progress = hits_drops_report_progress,
+	.report_final = hits_drops_report_final,
+};
+
+const struct bench bench_rb_custom = {
+	.name = "rb-custom",
+	.validate = bufs_validate,
+	.setup = ringbuf_custom_setup,
+	.producer_thread = bufs_sample_producer,
+	.consumer_thread = ringbuf_custom_consumer,
+	.measure = ringbuf_custom_measure,
+	.report_progress = hits_drops_report_progress,
+	.report_final = hits_drops_report_final,
+};
+
+const struct bench bench_pb_libbpf = {
+	.name = "pb-libbpf",
+	.validate = bufs_validate,
+	.setup = perfbuf_libbpf_setup,
+	.producer_thread = bufs_sample_producer,
+	.consumer_thread = perfbuf_libbpf_consumer,
+	.measure = perfbuf_measure,
+	.report_progress = hits_drops_report_progress,
+	.report_final = hits_drops_report_final,
+};
+
+const struct bench bench_pb_custom = {
+	.name = "pb-custom",
+	.validate = bufs_validate,
+	.setup = perfbuf_libbpf_setup,
+	.producer_thread = bufs_sample_producer,
+	.consumer_thread = perfbuf_custom_consumer,
+	.measure = perfbuf_measure,
+	.report_progress = hits_drops_report_progress,
+	.report_final = hits_drops_report_final,
+};
+
diff --git a/tools/testing/selftests/bpf/benchs/bench_trigger.c b/tools/testing/selftests/bpf/benchs/bench_trigger.c
new file mode 100644
index 000000000000..49c22832f216
--- /dev/null
+++ b/tools/testing/selftests/bpf/benchs/bench_trigger.c
@@ -0,0 +1,167 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include "bench.h"
+#include "trigger_bench.skel.h"
+
+/* BPF triggering benchmarks */
+static struct trigger_ctx {
+	struct trigger_bench *skel;
+} ctx;
+
+static struct counter base_hits;
+
+static void trigger_validate()
+{
+	if (env.consumer_cnt != 1) {
+		fprintf(stderr, "benchmark doesn't support multi-consumer!\n");
+		exit(1);
+	}
+}
+
+static void *trigger_base_producer(void *input)
+{
+	while (true) {
+		(void)syscall(__NR_getpgid);
+		atomic_inc(&base_hits.value);
+	}
+	return NULL;
+}
+
+static void trigger_base_measure(struct bench_res *res)
+{
+	res->hits = atomic_swap(&base_hits.value, 0);
+}
+
+static void *trigger_producer(void *input)
+{
+	while (true)
+		(void)syscall(__NR_getpgid);
+	return NULL;
+}
+
+static void trigger_measure(struct bench_res *res)
+{
+	res->hits = atomic_swap(&ctx.skel->bss->hits, 0);
+}
+
+static void setup_ctx()
+{
+	setup_libbpf();
+
+	ctx.skel = trigger_bench__open_and_load();
+	if (!ctx.skel) {
+		fprintf(stderr, "failed to open skeleton\n");
+		exit(1);
+	}
+}
+
+static void attach_bpf(struct bpf_program *prog)
+{
+	struct bpf_link *link;
+
+	link = bpf_program__attach(prog);
+	if (IS_ERR(link)) {
+		fprintf(stderr, "failed to attach program!\n");
+		exit(1);
+	}
+}
+
+static void trigger_tp_setup()
+{
+	setup_ctx();
+	attach_bpf(ctx.skel->progs.bench_trigger_tp);
+}
+
+static void trigger_rawtp_setup()
+{
+	setup_ctx();
+	attach_bpf(ctx.skel->progs.bench_trigger_raw_tp);
+}
+
+static void trigger_kprobe_setup()
+{
+	setup_ctx();
+	attach_bpf(ctx.skel->progs.bench_trigger_kprobe);
+}
+
+static void trigger_fentry_setup()
+{
+	setup_ctx();
+	attach_bpf(ctx.skel->progs.bench_trigger_fentry);
+}
+
+static void trigger_fmodret_setup()
+{
+	setup_ctx();
+	attach_bpf(ctx.skel->progs.bench_trigger_fmodret);
+}
+
+static void *trigger_consumer(void *input)
+{
+	return NULL;
+}
+
+const struct bench bench_trig_base = {
+	.name = "trig-base",
+	.validate = trigger_validate,
+	.producer_thread = trigger_base_producer,
+	.consumer_thread = trigger_consumer,
+	.measure = trigger_base_measure,
+	.report_progress = hits_drops_report_progress,
+	.report_final = hits_drops_report_final,
+};
+
+const struct bench bench_trig_tp = {
+	.name = "trig-tp",
+	.validate = trigger_validate,
+	.setup = trigger_tp_setup,
+	.producer_thread = trigger_producer,
+	.consumer_thread = trigger_consumer,
+	.measure = trigger_measure,
+	.report_progress = hits_drops_report_progress,
+	.report_final = hits_drops_report_final,
+};
+
+const struct bench bench_trig_rawtp = {
+	.name = "trig-rawtp",
+	.validate = trigger_validate,
+	.setup = trigger_rawtp_setup,
+	.producer_thread = trigger_producer,
+	.consumer_thread = trigger_consumer,
+	.measure = trigger_measure,
+	.report_progress = hits_drops_report_progress,
+	.report_final = hits_drops_report_final,
+};
+
+const struct bench bench_trig_kprobe = {
+	.name = "trig-kprobe",
+	.validate = trigger_validate,
+	.setup = trigger_kprobe_setup,
+	.producer_thread = trigger_producer,
+	.consumer_thread = trigger_consumer,
+	.measure = trigger_measure,
+	.report_progress = hits_drops_report_progress,
+	.report_final = hits_drops_report_final,
+};
+
+const struct bench bench_trig_fentry = {
+	.name = "trig-fentry",
+	.validate = trigger_validate,
+	.setup = trigger_fentry_setup,
+	.producer_thread = trigger_producer,
+	.consumer_thread = trigger_consumer,
+	.measure = trigger_measure,
+	.report_progress = hits_drops_report_progress,
+	.report_final = hits_drops_report_final,
+};
+
+const struct bench bench_trig_fmodret = {
+	.name = "trig-fmodret",
+	.validate = trigger_validate,
+	.setup = trigger_fmodret_setup,
+	.producer_thread = trigger_producer,
+	.consumer_thread = trigger_consumer,
+	.measure = trigger_measure,
+	.report_progress = hits_drops_report_progress,
+	.report_final = hits_drops_report_final,
+};
diff --git a/tools/testing/selftests/bpf/benchs/run_bench_rename.sh b/tools/testing/selftests/bpf/benchs/run_bench_rename.sh
new file mode 100755
index 000000000000..16f774b1cdbe
--- /dev/null
+++ b/tools/testing/selftests/bpf/benchs/run_bench_rename.sh
@@ -0,0 +1,9 @@
+#!/bin/bash
+
+set -eufo pipefail
+
+for i in base kprobe kretprobe rawtp fentry fexit fmodret
+do
+	summary=$(sudo ./bench -w2 -d5 -a rename-$i | tail -n1 | cut -d'(' -f1 | cut -d' ' -f3-)
+	printf "%-10s: %s\n" $i "$summary"
+done
diff --git a/tools/testing/selftests/bpf/benchs/run_bench_ringbufs.sh b/tools/testing/selftests/bpf/benchs/run_bench_ringbufs.sh
new file mode 100755
index 000000000000..af4aa04caba6
--- /dev/null
+++ b/tools/testing/selftests/bpf/benchs/run_bench_ringbufs.sh
@@ -0,0 +1,75 @@
+#!/bin/bash
+
+set -eufo pipefail
+
+RUN_BENCH="sudo ./bench -w3 -d10 -a"
+
+function hits()
+{
+	echo "$*" | sed -E "s/.*hits\s+([0-9]+\.[0-9]+ ± [0-9]+\.[0-9]+M\/s).*/\1/"
+}
+
+function drops()
+{
+	echo "$*" | sed -E "s/.*drops\s+([0-9]+\.[0-9]+ ± [0-9]+\.[0-9]+M\/s).*/\1/"
+}
+
+function header()
+{
+	local len=${#1}
+
+	printf "\n%s\n" "$1"
+	for i in $(seq 1 $len); do printf '='; done
+	printf '\n'
+}
+
+function summarize()
+{
+	bench="$1"
+	summary=$(echo $2 | tail -n1)
+	printf "%-20s %s (drops %s)\n" "$bench" "$(hits $summary)" "$(drops $summary)"
+}
+
+header "Single-producer, parallel producer"
+for b in rb-libbpf rb-custom pb-libbpf pb-custom; do
+	summarize $b "$($RUN_BENCH $b)"
+done
+
+header "Single-producer, parallel producer, sampled notification"
+for b in rb-libbpf rb-custom pb-libbpf pb-custom; do
+	summarize $b "$($RUN_BENCH --rb-sampled $b)"
+done
+
+header "Single-producer, back-to-back mode"
+for b in rb-libbpf rb-custom pb-libbpf pb-custom; do
+	summarize $b "$($RUN_BENCH --rb-b2b $b)"
+	summarize $b-sampled "$($RUN_BENCH --rb-sampled --rb-b2b $b)"
+done
+
+header "Ringbuf back-to-back, effect of sample rate"
+for b in 1 5 10 25 50 100 250 500 1000 2000 3000; do
+	summarize "rb-sampled-$b" "$($RUN_BENCH --rb-b2b --rb-batch-cnt $b --rb-sampled --rb-sample-rate $b rb-custom)"
+done
+header "Perfbuf back-to-back, effect of sample rate"
+for b in 1 5 10 25 50 100 250 500 1000 2000 3000; do
+	summarize "pb-sampled-$b" "$($RUN_BENCH --rb-b2b --rb-batch-cnt $b --rb-sampled --rb-sample-rate $b pb-custom)"
+done
+
+header "Ringbuf back-to-back, reserve+commit vs output"
+summarize "reserve" "$($RUN_BENCH --rb-b2b                 rb-custom)"
+summarize "output"  "$($RUN_BENCH --rb-b2b --rb-use-output rb-custom)"
+
+header "Ringbuf sampled, reserve+commit vs output"
+summarize "reserve-sampled" "$($RUN_BENCH --rb-sampled                 rb-custom)"
+summarize "output-sampled"  "$($RUN_BENCH --rb-sampled --rb-use-output rb-custom)"
+
+header "Single-producer, consumer/producer competing on the same CPU, low batch count"
+for b in rb-libbpf rb-custom pb-libbpf pb-custom; do
+	summarize $b "$($RUN_BENCH --rb-batch-cnt 1 --rb-sample-rate 1 --prod-affinity 0 --cons-affinity 0 $b)"
+done
+
+header "Ringbuf, multi-producer contention"
+for b in 1 2 3 4 8 12 16 20 24 28 32 36 40 44 48 52; do
+	summarize "rb-libbpf nr_prod $b" "$($RUN_BENCH -p$b --rb-batch-cnt 50 rb-libbpf)"
+done
+
diff --git a/tools/testing/selftests/bpf/benchs/run_bench_trigger.sh b/tools/testing/selftests/bpf/benchs/run_bench_trigger.sh
new file mode 100755
index 000000000000..78e83f243294
--- /dev/null
+++ b/tools/testing/selftests/bpf/benchs/run_bench_trigger.sh
@@ -0,0 +1,9 @@
+#!/bin/bash
+
+set -eufo pipefail
+
+for i in base tp rawtp kprobe fentry fmodret
+do
+	summary=$(sudo ./bench -w2 -d5 -a trig-$i | tail -n1 | cut -d'(' -f1 | cut -d' ' -f3-)
+	printf "%-10s: %s\n" $i "$summary"
+done
diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config
index 60e3ae5d4e48..2118e23ac07a 100644
--- a/tools/testing/selftests/bpf/config
+++ b/tools/testing/selftests/bpf/config
@@ -25,6 +25,7 @@ CONFIG_XDP_SOCKETS=y
 CONFIG_FTRACE_SYSCALLS=y
 CONFIG_IPV6_TUNNEL=y
 CONFIG_IPV6_GRE=y
+CONFIG_IPV6_SEG6_BPF=y
 CONFIG_NET_FOU=m
 CONFIG_NET_FOU_IP_TUNNELS=y
 CONFIG_IPV6_FOU=m
@@ -37,3 +38,4 @@ CONFIG_IPV6_SIT=m
 CONFIG_BPF_JIT=y
 CONFIG_BPF_LSM=y
 CONFIG_SECURITY=y
+CONFIG_LIRC=y
diff --git a/tools/testing/selftests/bpf/network_helpers.c b/tools/testing/selftests/bpf/network_helpers.c
new file mode 100644
index 000000000000..e36dd1a1780d
--- /dev/null
+++ b/tools/testing/selftests/bpf/network_helpers.c
@@ -0,0 +1,167 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <errno.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <arpa/inet.h>
+
+#include <sys/epoll.h>
+
+#include <linux/err.h>
+#include <linux/in.h>
+#include <linux/in6.h>
+
+#include "bpf_util.h"
+#include "network_helpers.h"
+
+#define clean_errno() (errno == 0 ? "None" : strerror(errno))
+#define log_err(MSG, ...) fprintf(stderr, "(%s:%d: errno: %s) " MSG "\n", \
+	__FILE__, __LINE__, clean_errno(), ##__VA_ARGS__)
+
+struct ipv4_packet pkt_v4 = {
+	.eth.h_proto = __bpf_constant_htons(ETH_P_IP),
+	.iph.ihl = 5,
+	.iph.protocol = IPPROTO_TCP,
+	.iph.tot_len = __bpf_constant_htons(MAGIC_BYTES),
+	.tcp.urg_ptr = 123,
+	.tcp.doff = 5,
+};
+
+struct ipv6_packet pkt_v6 = {
+	.eth.h_proto = __bpf_constant_htons(ETH_P_IPV6),
+	.iph.nexthdr = IPPROTO_TCP,
+	.iph.payload_len = __bpf_constant_htons(MAGIC_BYTES),
+	.tcp.urg_ptr = 123,
+	.tcp.doff = 5,
+};
+
+int start_server_with_port(int family, int type, __u16 port)
+{
+	struct sockaddr_storage addr = {};
+	socklen_t len;
+	int fd;
+
+	if (family == AF_INET) {
+		struct sockaddr_in *sin = (void *)&addr;
+
+		sin->sin_family = AF_INET;
+		sin->sin_port = htons(port);
+		len = sizeof(*sin);
+	} else {
+		struct sockaddr_in6 *sin6 = (void *)&addr;
+
+		sin6->sin6_family = AF_INET6;
+		sin6->sin6_port = htons(port);
+		len = sizeof(*sin6);
+	}
+
+	fd = socket(family, type | SOCK_NONBLOCK, 0);
+	if (fd < 0) {
+		log_err("Failed to create server socket");
+		return -1;
+	}
+
+	if (bind(fd, (const struct sockaddr *)&addr, len) < 0) {
+		log_err("Failed to bind socket");
+		close(fd);
+		return -1;
+	}
+
+	if (type == SOCK_STREAM) {
+		if (listen(fd, 1) < 0) {
+			log_err("Failed to listed on socket");
+			close(fd);
+			return -1;
+		}
+	}
+
+	return fd;
+}
+
+int start_server(int family, int type)
+{
+	return start_server_with_port(family, type, 0);
+}
+
+static const struct timeval timeo_sec = { .tv_sec = 3 };
+static const size_t timeo_optlen = sizeof(timeo_sec);
+
+int connect_to_fd(int family, int type, int server_fd)
+{
+	int fd, save_errno;
+
+	fd = socket(family, type, 0);
+	if (fd < 0) {
+		log_err("Failed to create client socket");
+		return -1;
+	}
+
+	if (connect_fd_to_fd(fd, server_fd) < 0 && errno != EINPROGRESS) {
+		save_errno = errno;
+		close(fd);
+		errno = save_errno;
+		return -1;
+	}
+
+	return fd;
+}
+
+int connect_fd_to_fd(int client_fd, int server_fd)
+{
+	struct sockaddr_storage addr;
+	socklen_t len = sizeof(addr);
+	int save_errno;
+
+	if (setsockopt(client_fd, SOL_SOCKET, SO_RCVTIMEO, &timeo_sec,
+		       timeo_optlen)) {
+		log_err("Failed to set SO_RCVTIMEO");
+		return -1;
+	}
+
+	if (getsockname(server_fd, (struct sockaddr *)&addr, &len)) {
+		log_err("Failed to get server addr");
+		return -1;
+	}
+
+	if (connect(client_fd, (const struct sockaddr *)&addr, len) < 0) {
+		if (errno != EINPROGRESS) {
+			save_errno = errno;
+			log_err("Failed to connect to server");
+			errno = save_errno;
+		}
+		return -1;
+	}
+
+	return 0;
+}
+
+int connect_wait(int fd)
+{
+	struct epoll_event ev = {}, events[2];
+	int timeout_ms = 1000;
+	int efd, nfd;
+
+	efd = epoll_create1(EPOLL_CLOEXEC);
+	if (efd < 0) {
+		log_err("Failed to open epoll fd");
+		return -1;
+	}
+
+	ev.events = EPOLLRDHUP | EPOLLOUT;
+	ev.data.fd = fd;
+
+	if (epoll_ctl(efd, EPOLL_CTL_ADD, fd, &ev) < 0) {
+		log_err("Failed to register fd=%d on epoll fd=%d", fd, efd);
+		close(efd);
+		return -1;
+	}
+
+	nfd = epoll_wait(efd, events, ARRAY_SIZE(events), timeout_ms);
+	if (nfd < 0)
+		log_err("Failed to wait for I/O event on epoll fd=%d", efd);
+
+	close(efd);
+	return nfd;
+}
diff --git a/tools/testing/selftests/bpf/network_helpers.h b/tools/testing/selftests/bpf/network_helpers.h
new file mode 100644
index 000000000000..6a8009605670
--- /dev/null
+++ b/tools/testing/selftests/bpf/network_helpers.h
@@ -0,0 +1,42 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __NETWORK_HELPERS_H
+#define __NETWORK_HELPERS_H
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <linux/types.h>
+typedef __u16 __sum16;
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <netinet/tcp.h>
+#include <bpf/bpf_endian.h>
+
+#define MAGIC_VAL 0x1234
+#define NUM_ITER 100000
+#define VIP_NUM 5
+#define MAGIC_BYTES 123
+
+/* ipv4 test vector */
+struct ipv4_packet {
+	struct ethhdr eth;
+	struct iphdr iph;
+	struct tcphdr tcp;
+} __packed;
+extern struct ipv4_packet pkt_v4;
+
+/* ipv6 test vector */
+struct ipv6_packet {
+	struct ethhdr eth;
+	struct ipv6hdr iph;
+	struct tcphdr tcp;
+} __packed;
+extern struct ipv6_packet pkt_v6;
+
+int start_server(int family, int type);
+int start_server_with_port(int family, int type, __u16 port);
+int connect_to_fd(int family, int type, int server_fd);
+int connect_fd_to_fd(int client_fd, int server_fd);
+int connect_wait(int client_fd);
+
+#endif
diff --git a/tools/testing/selftests/bpf/test_align.c b/tools/testing/selftests/bpf/prog_tests/align.c
index 0262f7b374f9..c548aded6585 100644
--- a/tools/testing/selftests/bpf/test_align.c
+++ b/tools/testing/selftests/bpf/prog_tests/align.c
@@ -1,24 +1,5 @@
-#include <asm/types.h>
-#include <linux/types.h>
-#include <stdint.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <errno.h>
-#include <string.h>
-#include <stddef.h>
-#include <stdbool.h>
-
-#include <linux/unistd.h>
-#include <linux/filter.h>
-#include <linux/bpf_perf_event.h>
-#include <linux/bpf.h>
-
-#include <bpf/bpf.h>
-
-#include "../../../include/linux/filter.h"
-#include "bpf_rlimit.h"
-#include "bpf_util.h"
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
 
 #define MAX_INSNS	512
 #define MAX_MATCHES	16
@@ -359,15 +340,15 @@ static struct bpf_align_test tests[] = {
 			 * is still (4n), fixed offset is not changed.
 			 * Also, we create a new reg->id.
 			 */
-			{29, "R5_w=pkt(id=4,off=18,r=0,umax_value=2040,var_off=(0x0; 0x7fc))"},
+			{29, "R5_w=pkt(id=4,off=18,r=0,umax_value=2040,var_off=(0x0; 0x7fc)"},
 			/* At the time the word size load is performed from R5,
 			 * its total fixed offset is NET_IP_ALIGN + reg->off (18)
 			 * which is 20.  Then the variable offset is (4n), so
 			 * the total offset is 4-byte aligned and meets the
 			 * load's requirements.
 			 */
-			{33, "R4=pkt(id=4,off=22,r=22,umax_value=2040,var_off=(0x0; 0x7fc))"},
-			{33, "R5=pkt(id=4,off=18,r=22,umax_value=2040,var_off=(0x0; 0x7fc))"},
+			{33, "R4=pkt(id=4,off=22,r=22,umax_value=2040,var_off=(0x0; 0x7fc)"},
+			{33, "R5=pkt(id=4,off=18,r=22,umax_value=2040,var_off=(0x0; 0x7fc)"},
 		},
 	},
 	{
@@ -410,15 +391,15 @@ static struct bpf_align_test tests[] = {
 			/* Adding 14 makes R6 be (4n+2) */
 			{9, "R6_w=inv(id=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"},
 			/* Packet pointer has (4n+2) offset */
-			{11, "R5_w=pkt(id=1,off=0,r=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"},
-			{13, "R4=pkt(id=1,off=4,r=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"},
+			{11, "R5_w=pkt(id=1,off=0,r=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc)"},
+			{13, "R4=pkt(id=1,off=4,r=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc)"},
 			/* At the time the word size load is performed from R5,
 			 * its total fixed offset is NET_IP_ALIGN + reg->off (0)
 			 * which is 2.  Then the variable offset is (4n+2), so
 			 * the total offset is 4-byte aligned and meets the
 			 * load's requirements.
 			 */
-			{15, "R5=pkt(id=1,off=0,r=4,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"},
+			{15, "R5=pkt(id=1,off=0,r=4,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc)"},
 			/* Newly read value in R6 was shifted left by 2, so has
 			 * known alignment of 4.
 			 */
@@ -426,15 +407,15 @@ static struct bpf_align_test tests[] = {
 			/* Added (4n) to packet pointer's (4n+2) var_off, giving
 			 * another (4n+2).
 			 */
-			{19, "R5_w=pkt(id=2,off=0,r=0,umin_value=14,umax_value=2054,var_off=(0x2; 0xffc))"},
-			{21, "R4=pkt(id=2,off=4,r=0,umin_value=14,umax_value=2054,var_off=(0x2; 0xffc))"},
+			{19, "R5_w=pkt(id=2,off=0,r=0,umin_value=14,umax_value=2054,var_off=(0x2; 0xffc)"},
+			{21, "R4=pkt(id=2,off=4,r=0,umin_value=14,umax_value=2054,var_off=(0x2; 0xffc)"},
 			/* At the time the word size load is performed from R5,
 			 * its total fixed offset is NET_IP_ALIGN + reg->off (0)
 			 * which is 2.  Then the variable offset is (4n+2), so
 			 * the total offset is 4-byte aligned and meets the
 			 * load's requirements.
 			 */
-			{23, "R5=pkt(id=2,off=0,r=4,umin_value=14,umax_value=2054,var_off=(0x2; 0xffc))"},
+			{23, "R5=pkt(id=2,off=0,r=4,umin_value=14,umax_value=2054,var_off=(0x2; 0xffc)"},
 		},
 	},
 	{
@@ -469,16 +450,16 @@ static struct bpf_align_test tests[] = {
 		.matches = {
 			{4, "R5_w=pkt_end(id=0,off=0,imm=0)"},
 			/* (ptr - ptr) << 2 == unknown, (4n) */
-			{6, "R5_w=inv(id=0,smax_value=9223372036854775804,umax_value=18446744073709551612,var_off=(0x0; 0xfffffffffffffffc))"},
+			{6, "R5_w=inv(id=0,smax_value=9223372036854775804,umax_value=18446744073709551612,var_off=(0x0; 0xfffffffffffffffc)"},
 			/* (4n) + 14 == (4n+2).  We blow our bounds, because
 			 * the add could overflow.
 			 */
-			{7, "R5_w=inv(id=0,var_off=(0x2; 0xfffffffffffffffc))"},
+			{7, "R5_w=inv(id=0,smin_value=-9223372036854775806,smax_value=9223372036854775806,umin_value=2,umax_value=18446744073709551614,var_off=(0x2; 0xfffffffffffffffc)"},
 			/* Checked s>=0 */
-			{9, "R5=inv(id=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc))"},
+			{9, "R5=inv(id=0,umin_value=2,umax_value=9223372034707292158,var_off=(0x2; 0x7fffffff7ffffffc)"},
 			/* packet pointer + nonnegative (4n+2) */
-			{11, "R6_w=pkt(id=1,off=0,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc))"},
-			{13, "R4_w=pkt(id=1,off=4,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc))"},
+			{11, "R6_w=pkt(id=1,off=0,r=0,umin_value=2,umax_value=9223372034707292158,var_off=(0x2; 0x7fffffff7ffffffc)"},
+			{13, "R4_w=pkt(id=1,off=4,r=0,umin_value=2,umax_value=9223372034707292158,var_off=(0x2; 0x7fffffff7ffffffc)"},
 			/* NET_IP_ALIGN + (4n+2) == (4n), alignment is fine.
 			 * We checked the bounds, but it might have been able
 			 * to overflow if the packet pointer started in the
@@ -486,7 +467,7 @@ static struct bpf_align_test tests[] = {
 			 * So we did not get a 'range' on R6, and the access
 			 * attempt will fail.
 			 */
-			{15, "R6_w=pkt(id=1,off=0,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc))"},
+			{15, "R6_w=pkt(id=1,off=0,r=0,umin_value=2,umax_value=9223372034707292158,var_off=(0x2; 0x7fffffff7ffffffc)"},
 		}
 	},
 	{
@@ -528,7 +509,7 @@ static struct bpf_align_test tests[] = {
 			/* New unknown value in R7 is (4n) */
 			{11, "R7_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
 			/* Subtracting it from R6 blows our unsigned bounds */
-			{12, "R6=inv(id=0,smin_value=-1006,smax_value=1034,var_off=(0x2; 0xfffffffffffffffc))"},
+			{12, "R6=inv(id=0,smin_value=-1006,smax_value=1034,umin_value=2,umax_value=18446744073709551614,var_off=(0x2; 0xfffffffffffffffc)"},
 			/* Checked s>= 0 */
 			{14, "R6=inv(id=0,umin_value=2,umax_value=1034,var_off=(0x2; 0x7fc))"},
 			/* At the time the word size load is performed from R5,
@@ -537,7 +518,8 @@ static struct bpf_align_test tests[] = {
 			 * the total offset is 4-byte aligned and meets the
 			 * load's requirements.
 			 */
-			{20, "R5=pkt(id=1,off=0,r=4,umin_value=2,umax_value=1034,var_off=(0x2; 0x7fc))"},
+			{20, "R5=pkt(id=1,off=0,r=4,umin_value=2,umax_value=1034,var_off=(0x2; 0x7fc)"},
+
 		},
 	},
 	{
@@ -579,18 +561,18 @@ static struct bpf_align_test tests[] = {
 			/* Adding 14 makes R6 be (4n+2) */
 			{11, "R6_w=inv(id=0,umin_value=14,umax_value=74,var_off=(0x2; 0x7c))"},
 			/* Subtracting from packet pointer overflows ubounds */
-			{13, "R5_w=pkt(id=1,off=0,r=8,umin_value=18446744073709551542,umax_value=18446744073709551602,var_off=(0xffffffffffffff82; 0x7c))"},
+			{13, "R5_w=pkt(id=1,off=0,r=8,umin_value=18446744073709551542,umax_value=18446744073709551602,var_off=(0xffffffffffffff82; 0x7c)"},
 			/* New unknown value in R7 is (4n), >= 76 */
 			{15, "R7_w=inv(id=0,umin_value=76,umax_value=1096,var_off=(0x0; 0x7fc))"},
 			/* Adding it to packet pointer gives nice bounds again */
-			{16, "R5_w=pkt(id=2,off=0,r=0,umin_value=2,umax_value=1082,var_off=(0x2; 0x7fc))"},
+			{16, "R5_w=pkt(id=2,off=0,r=0,umin_value=2,umax_value=1082,var_off=(0x2; 0xfffffffc)"},
 			/* At the time the word size load is performed from R5,
 			 * its total fixed offset is NET_IP_ALIGN + reg->off (0)
 			 * which is 2.  Then the variable offset is (4n+2), so
 			 * the total offset is 4-byte aligned and meets the
 			 * load's requirements.
 			 */
-			{20, "R5=pkt(id=2,off=0,r=4,umin_value=2,umax_value=1082,var_off=(0x2; 0x7fc))"},
+			{20, "R5=pkt(id=2,off=0,r=4,umin_value=2,umax_value=1082,var_off=(0x2; 0xfffffffc)"},
 		},
 	},
 };
@@ -669,51 +651,16 @@ static int do_test_single(struct bpf_align_test *test)
 	return ret;
 }
 
-static int do_test(unsigned int from, unsigned int to)
+void test_align(void)
 {
-	int all_pass = 0;
-	int all_fail = 0;
 	unsigned int i;
 
-	for (i = from; i < to; i++) {
+	for (i = 0; i < ARRAY_SIZE(tests); i++) {
 		struct bpf_align_test *test = &tests[i];
-		int fail;
 
-		printf("Test %3d: %s ... ",
-		       i, test->descr);
-		fail = do_test_single(test);
-		if (fail) {
-			all_fail++;
-			printf("FAIL\n");
-		} else {
-			all_pass++;
-			printf("PASS\n");
-		}
-	}
-	printf("Results: %d pass %d fail\n",
-	       all_pass, all_fail);
-	return all_fail ? EXIT_FAILURE : EXIT_SUCCESS;
-}
+		if (!test__start_subtest(test->descr))
+			continue;
 
-int main(int argc, char **argv)
-{
-	unsigned int from = 0, to = ARRAY_SIZE(tests);
-
-	if (argc == 3) {
-		unsigned int l = atoi(argv[argc - 2]);
-		unsigned int u = atoi(argv[argc - 1]);
-
-		if (l < to && u < to) {
-			from = l;
-			to   = u + 1;
-		}
-	} else if (argc == 2) {
-		unsigned int t = atoi(argv[argc - 1]);
-
-		if (t < to) {
-			from = t;
-			to   = t + 1;
-		}
+		CHECK_FAIL(do_test_single(test));
 	}
-	return do_test(from, to);
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
new file mode 100644
index 000000000000..87c29dde1cf9
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
@@ -0,0 +1,409 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include <test_progs.h>
+#include "bpf_iter_ipv6_route.skel.h"
+#include "bpf_iter_netlink.skel.h"
+#include "bpf_iter_bpf_map.skel.h"
+#include "bpf_iter_task.skel.h"
+#include "bpf_iter_task_file.skel.h"
+#include "bpf_iter_test_kern1.skel.h"
+#include "bpf_iter_test_kern2.skel.h"
+#include "bpf_iter_test_kern3.skel.h"
+#include "bpf_iter_test_kern4.skel.h"
+
+static int duration;
+
+static void test_btf_id_or_null(void)
+{
+	struct bpf_iter_test_kern3 *skel;
+
+	skel = bpf_iter_test_kern3__open_and_load();
+	if (CHECK(skel, "bpf_iter_test_kern3__open_and_load",
+		  "skeleton open_and_load unexpectedly succeeded\n")) {
+		bpf_iter_test_kern3__destroy(skel);
+		return;
+	}
+}
+
+static void do_dummy_read(struct bpf_program *prog)
+{
+	struct bpf_link *link;
+	char buf[16] = {};
+	int iter_fd, len;
+
+	link = bpf_program__attach_iter(prog, NULL);
+	if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
+		return;
+
+	iter_fd = bpf_iter_create(bpf_link__fd(link));
+	if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n"))
+		goto free_link;
+
+	/* not check contents, but ensure read() ends without error */
+	while ((len = read(iter_fd, buf, sizeof(buf))) > 0)
+		;
+	CHECK(len < 0, "read", "read failed: %s\n", strerror(errno));
+
+	close(iter_fd);
+
+free_link:
+	bpf_link__destroy(link);
+}
+
+static void test_ipv6_route(void)
+{
+	struct bpf_iter_ipv6_route *skel;
+
+	skel = bpf_iter_ipv6_route__open_and_load();
+	if (CHECK(!skel, "bpf_iter_ipv6_route__open_and_load",
+		  "skeleton open_and_load failed\n"))
+		return;
+
+	do_dummy_read(skel->progs.dump_ipv6_route);
+
+	bpf_iter_ipv6_route__destroy(skel);
+}
+
+static void test_netlink(void)
+{
+	struct bpf_iter_netlink *skel;
+
+	skel = bpf_iter_netlink__open_and_load();
+	if (CHECK(!skel, "bpf_iter_netlink__open_and_load",
+		  "skeleton open_and_load failed\n"))
+		return;
+
+	do_dummy_read(skel->progs.dump_netlink);
+
+	bpf_iter_netlink__destroy(skel);
+}
+
+static void test_bpf_map(void)
+{
+	struct bpf_iter_bpf_map *skel;
+
+	skel = bpf_iter_bpf_map__open_and_load();
+	if (CHECK(!skel, "bpf_iter_bpf_map__open_and_load",
+		  "skeleton open_and_load failed\n"))
+		return;
+
+	do_dummy_read(skel->progs.dump_bpf_map);
+
+	bpf_iter_bpf_map__destroy(skel);
+}
+
+static void test_task(void)
+{
+	struct bpf_iter_task *skel;
+
+	skel = bpf_iter_task__open_and_load();
+	if (CHECK(!skel, "bpf_iter_task__open_and_load",
+		  "skeleton open_and_load failed\n"))
+		return;
+
+	do_dummy_read(skel->progs.dump_task);
+
+	bpf_iter_task__destroy(skel);
+}
+
+static void test_task_file(void)
+{
+	struct bpf_iter_task_file *skel;
+
+	skel = bpf_iter_task_file__open_and_load();
+	if (CHECK(!skel, "bpf_iter_task_file__open_and_load",
+		  "skeleton open_and_load failed\n"))
+		return;
+
+	do_dummy_read(skel->progs.dump_task_file);
+
+	bpf_iter_task_file__destroy(skel);
+}
+
+/* The expected string is less than 16 bytes */
+static int do_read_with_fd(int iter_fd, const char *expected,
+			   bool read_one_char)
+{
+	int err = -1, len, read_buf_len, start;
+	char buf[16] = {};
+
+	read_buf_len = read_one_char ? 1 : 16;
+	start = 0;
+	while ((len = read(iter_fd, buf + start, read_buf_len)) > 0) {
+		start += len;
+		if (CHECK(start >= 16, "read", "read len %d\n", len))
+			return -1;
+		read_buf_len = read_one_char ? 1 : 16 - start;
+	}
+	if (CHECK(len < 0, "read", "read failed: %s\n", strerror(errno)))
+		return -1;
+
+	err = strcmp(buf, expected);
+	if (CHECK(err, "read", "incorrect read result: buf %s, expected %s\n",
+		  buf, expected))
+		return -1;
+
+	return 0;
+}
+
+static void test_anon_iter(bool read_one_char)
+{
+	struct bpf_iter_test_kern1 *skel;
+	struct bpf_link *link;
+	int iter_fd, err;
+
+	skel = bpf_iter_test_kern1__open_and_load();
+	if (CHECK(!skel, "bpf_iter_test_kern1__open_and_load",
+		  "skeleton open_and_load failed\n"))
+		return;
+
+	err = bpf_iter_test_kern1__attach(skel);
+	if (CHECK(err, "bpf_iter_test_kern1__attach",
+		  "skeleton attach failed\n")) {
+		goto out;
+	}
+
+	link = skel->links.dump_task;
+	iter_fd = bpf_iter_create(bpf_link__fd(link));
+	if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n"))
+		goto out;
+
+	do_read_with_fd(iter_fd, "abcd", read_one_char);
+	close(iter_fd);
+
+out:
+	bpf_iter_test_kern1__destroy(skel);
+}
+
+static int do_read(const char *path, const char *expected)
+{
+	int err, iter_fd;
+
+	iter_fd = open(path, O_RDONLY);
+	if (CHECK(iter_fd < 0, "open", "open %s failed: %s\n",
+		  path, strerror(errno)))
+		return -1;
+
+	err = do_read_with_fd(iter_fd, expected, false);
+	close(iter_fd);
+	return err;
+}
+
+static void test_file_iter(void)
+{
+	const char *path = "/sys/fs/bpf/bpf_iter_test1";
+	struct bpf_iter_test_kern1 *skel1;
+	struct bpf_iter_test_kern2 *skel2;
+	struct bpf_link *link;
+	int err;
+
+	skel1 = bpf_iter_test_kern1__open_and_load();
+	if (CHECK(!skel1, "bpf_iter_test_kern1__open_and_load",
+		  "skeleton open_and_load failed\n"))
+		return;
+
+	link = bpf_program__attach_iter(skel1->progs.dump_task, NULL);
+	if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
+		goto out;
+
+	/* unlink this path if it exists. */
+	unlink(path);
+
+	err = bpf_link__pin(link, path);
+	if (CHECK(err, "pin_iter", "pin_iter to %s failed: %d\n", path, err))
+		goto free_link;
+
+	err = do_read(path, "abcd");
+	if (err)
+		goto unlink_path;
+
+	/* file based iterator seems working fine. Let us a link update
+	 * of the underlying link and `cat` the iterator again, its content
+	 * should change.
+	 */
+	skel2 = bpf_iter_test_kern2__open_and_load();
+	if (CHECK(!skel2, "bpf_iter_test_kern2__open_and_load",
+		  "skeleton open_and_load failed\n"))
+		goto unlink_path;
+
+	err = bpf_link__update_program(link, skel2->progs.dump_task);
+	if (CHECK(err, "update_prog", "update_prog failed\n"))
+		goto destroy_skel2;
+
+	do_read(path, "ABCD");
+
+destroy_skel2:
+	bpf_iter_test_kern2__destroy(skel2);
+unlink_path:
+	unlink(path);
+free_link:
+	bpf_link__destroy(link);
+out:
+	bpf_iter_test_kern1__destroy(skel1);
+}
+
+static void test_overflow(bool test_e2big_overflow, bool ret1)
+{
+	__u32 map_info_len, total_read_len, expected_read_len;
+	int err, iter_fd, map1_fd, map2_fd, len;
+	struct bpf_map_info map_info = {};
+	struct bpf_iter_test_kern4 *skel;
+	struct bpf_link *link;
+	__u32 page_size;
+	char *buf;
+
+	skel = bpf_iter_test_kern4__open();
+	if (CHECK(!skel, "bpf_iter_test_kern4__open",
+		  "skeleton open failed\n"))
+		return;
+
+	/* create two maps: bpf program will only do bpf_seq_write
+	 * for these two maps. The goal is one map output almost
+	 * fills seq_file buffer and then the other will trigger
+	 * overflow and needs restart.
+	 */
+	map1_fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, 4, 8, 1, 0);
+	if (CHECK(map1_fd < 0, "bpf_create_map",
+		  "map_creation failed: %s\n", strerror(errno)))
+		goto out;
+	map2_fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, 4, 8, 1, 0);
+	if (CHECK(map2_fd < 0, "bpf_create_map",
+		  "map_creation failed: %s\n", strerror(errno)))
+		goto free_map1;
+
+	/* bpf_seq_printf kernel buffer is one page, so one map
+	 * bpf_seq_write will mostly fill it, and the other map
+	 * will partially fill and then trigger overflow and need
+	 * bpf_seq_read restart.
+	 */
+	page_size = sysconf(_SC_PAGE_SIZE);
+
+	if (test_e2big_overflow) {
+		skel->rodata->print_len = (page_size + 8) / 8;
+		expected_read_len = 2 * (page_size + 8);
+	} else if (!ret1) {
+		skel->rodata->print_len = (page_size - 8) / 8;
+		expected_read_len = 2 * (page_size - 8);
+	} else {
+		skel->rodata->print_len = 1;
+		expected_read_len = 2 * 8;
+	}
+	skel->rodata->ret1 = ret1;
+
+	if (CHECK(bpf_iter_test_kern4__load(skel),
+		  "bpf_iter_test_kern4__load", "skeleton load failed\n"))
+		goto free_map2;
+
+	/* setup filtering map_id in bpf program */
+	map_info_len = sizeof(map_info);
+	err = bpf_obj_get_info_by_fd(map1_fd, &map_info, &map_info_len);
+	if (CHECK(err, "get_map_info", "get map info failed: %s\n",
+		  strerror(errno)))
+		goto free_map2;
+	skel->bss->map1_id = map_info.id;
+
+	err = bpf_obj_get_info_by_fd(map2_fd, &map_info, &map_info_len);
+	if (CHECK(err, "get_map_info", "get map info failed: %s\n",
+		  strerror(errno)))
+		goto free_map2;
+	skel->bss->map2_id = map_info.id;
+
+	link = bpf_program__attach_iter(skel->progs.dump_bpf_map, NULL);
+	if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
+		goto free_map2;
+
+	iter_fd = bpf_iter_create(bpf_link__fd(link));
+	if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n"))
+		goto free_link;
+
+	buf = malloc(expected_read_len);
+	if (!buf)
+		goto close_iter;
+
+	/* do read */
+	total_read_len = 0;
+	if (test_e2big_overflow) {
+		while ((len = read(iter_fd, buf, expected_read_len)) > 0)
+			total_read_len += len;
+
+		CHECK(len != -1 || errno != E2BIG, "read",
+		      "expected ret -1, errno E2BIG, but get ret %d, error %s\n",
+			  len, strerror(errno));
+		goto free_buf;
+	} else if (!ret1) {
+		while ((len = read(iter_fd, buf, expected_read_len)) > 0)
+			total_read_len += len;
+
+		if (CHECK(len < 0, "read", "read failed: %s\n",
+			  strerror(errno)))
+			goto free_buf;
+	} else {
+		do {
+			len = read(iter_fd, buf, expected_read_len);
+			if (len > 0)
+				total_read_len += len;
+		} while (len > 0 || len == -EAGAIN);
+
+		if (CHECK(len < 0, "read", "read failed: %s\n",
+			  strerror(errno)))
+			goto free_buf;
+	}
+
+	if (CHECK(total_read_len != expected_read_len, "read",
+		  "total len %u, expected len %u\n", total_read_len,
+		  expected_read_len))
+		goto free_buf;
+
+	if (CHECK(skel->bss->map1_accessed != 1, "map1_accessed",
+		  "expected 1 actual %d\n", skel->bss->map1_accessed))
+		goto free_buf;
+
+	if (CHECK(skel->bss->map2_accessed != 2, "map2_accessed",
+		  "expected 2 actual %d\n", skel->bss->map2_accessed))
+		goto free_buf;
+
+	CHECK(skel->bss->map2_seqnum1 != skel->bss->map2_seqnum2,
+	      "map2_seqnum", "two different seqnum %lld %lld\n",
+	      skel->bss->map2_seqnum1, skel->bss->map2_seqnum2);
+
+free_buf:
+	free(buf);
+close_iter:
+	close(iter_fd);
+free_link:
+	bpf_link__destroy(link);
+free_map2:
+	close(map2_fd);
+free_map1:
+	close(map1_fd);
+out:
+	bpf_iter_test_kern4__destroy(skel);
+}
+
+void test_bpf_iter(void)
+{
+	if (test__start_subtest("btf_id_or_null"))
+		test_btf_id_or_null();
+	if (test__start_subtest("ipv6_route"))
+		test_ipv6_route();
+	if (test__start_subtest("netlink"))
+		test_netlink();
+	if (test__start_subtest("bpf_map"))
+		test_bpf_map();
+	if (test__start_subtest("task"))
+		test_task();
+	if (test__start_subtest("task_file"))
+		test_task_file();
+	if (test__start_subtest("anon"))
+		test_anon_iter(false);
+	if (test__start_subtest("anon-read-one-char"))
+		test_anon_iter(true);
+	if (test__start_subtest("file"))
+		test_file_iter();
+	if (test__start_subtest("overflow"))
+		test_overflow(false, false);
+	if (test__start_subtest("overflow-e2big"))
+		test_overflow(true, false);
+	if (test__start_subtest("prog-ret-1"))
+		test_overflow(false, true);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c b/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c
index f10029821e16..7afa4160416f 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c
@@ -1,26 +1,30 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <test_progs.h>
 
+#define nr_iters 2
+
 void test_bpf_obj_id(void)
 {
 	const __u64 array_magic_value = 0xfaceb00c;
 	const __u32 array_key = 0;
-	const int nr_iters = 2;
 	const char *file = "./test_obj_id.o";
 	const char *expected_prog_name = "test_obj_id";
 	const char *expected_map_name = "test_map_id";
 	const __u64 nsec_per_sec = 1000000000;
 
-	struct bpf_object *objs[nr_iters];
+	struct bpf_object *objs[nr_iters] = {};
+	struct bpf_link *links[nr_iters] = {};
+	struct bpf_program *prog;
 	int prog_fds[nr_iters], map_fds[nr_iters];
 	/* +1 to test for the info_len returned by kernel */
 	struct bpf_prog_info prog_infos[nr_iters + 1];
 	struct bpf_map_info map_infos[nr_iters + 1];
+	struct bpf_link_info link_infos[nr_iters + 1];
 	/* Each prog only uses one map. +1 to test nr_map_ids
 	 * returned by kernel.
 	 */
 	__u32 map_ids[nr_iters + 1];
-	char jited_insns[128], xlated_insns[128], zeros[128];
+	char jited_insns[128], xlated_insns[128], zeros[128], tp_name[128];
 	__u32 i, next_id, info_len, nr_id_found, duration = 0;
 	struct timespec real_time_ts, boot_time_ts;
 	int err = 0;
@@ -36,14 +40,15 @@ void test_bpf_obj_id(void)
 	CHECK(err >= 0 || errno != ENOENT,
 	      "get-fd-by-notexist-map-id", "err %d errno %d\n", err, errno);
 
-	for (i = 0; i < nr_iters; i++)
-		objs[i] = NULL;
+	err = bpf_link_get_fd_by_id(0);
+	CHECK(err >= 0 || errno != ENOENT,
+	      "get-fd-by-notexist-link-id", "err %d errno %d\n", err, errno);
 
 	/* Check bpf_obj_get_info_by_fd() */
 	bzero(zeros, sizeof(zeros));
 	for (i = 0; i < nr_iters; i++) {
 		now = time(NULL);
-		err = bpf_prog_load(file, BPF_PROG_TYPE_SOCKET_FILTER,
+		err = bpf_prog_load(file, BPF_PROG_TYPE_RAW_TRACEPOINT,
 				    &objs[i], &prog_fds[i]);
 		/* test_obj_id.o is a dumb prog. It should never fail
 		 * to load.
@@ -60,6 +65,17 @@ void test_bpf_obj_id(void)
 		if (CHECK_FAIL(err))
 			goto done;
 
+		prog = bpf_object__find_program_by_title(objs[i],
+							 "raw_tp/sys_enter");
+		if (CHECK_FAIL(!prog))
+			goto done;
+		links[i] = bpf_program__attach(prog);
+		err = libbpf_get_error(links[i]);
+		if (CHECK(err, "prog_attach", "prog #%d, err %d\n", i, err)) {
+			links[i] = NULL;
+			goto done;
+		}
+
 		/* Check getting map info */
 		info_len = sizeof(struct bpf_map_info) * 2;
 		bzero(&map_infos[i], info_len);
@@ -107,7 +123,7 @@ void test_bpf_obj_id(void)
 		load_time = (real_time_ts.tv_sec - boot_time_ts.tv_sec)
 			+ (prog_infos[i].load_time / nsec_per_sec);
 		if (CHECK(err ||
-			  prog_infos[i].type != BPF_PROG_TYPE_SOCKET_FILTER ||
+			  prog_infos[i].type != BPF_PROG_TYPE_RAW_TRACEPOINT ||
 			  info_len != sizeof(struct bpf_prog_info) ||
 			  (env.jit_enabled && !prog_infos[i].jited_prog_len) ||
 			  (env.jit_enabled &&
@@ -120,7 +136,11 @@ void test_bpf_obj_id(void)
 			  *(int *)(long)prog_infos[i].map_ids != map_infos[i].id ||
 			  strcmp((char *)prog_infos[i].name, expected_prog_name),
 			  "get-prog-info(fd)",
-			  "err %d errno %d i %d type %d(%d) info_len %u(%zu) jit_enabled %d jited_prog_len %u xlated_prog_len %u jited_prog %d xlated_prog %d load_time %lu(%lu) uid %u(%u) nr_map_ids %u(%u) map_id %u(%u) name %s(%s)\n",
+			  "err %d errno %d i %d type %d(%d) info_len %u(%zu) "
+			  "jit_enabled %d jited_prog_len %u xlated_prog_len %u "
+			  "jited_prog %d xlated_prog %d load_time %lu(%lu) "
+			  "uid %u(%u) nr_map_ids %u(%u) map_id %u(%u) "
+			  "name %s(%s)\n",
 			  err, errno, i,
 			  prog_infos[i].type, BPF_PROG_TYPE_SOCKET_FILTER,
 			  info_len, sizeof(struct bpf_prog_info),
@@ -135,6 +155,33 @@ void test_bpf_obj_id(void)
 			  *(int *)(long)prog_infos[i].map_ids, map_infos[i].id,
 			  prog_infos[i].name, expected_prog_name))
 			goto done;
+
+		/* Check getting link info */
+		info_len = sizeof(struct bpf_link_info) * 2;
+		bzero(&link_infos[i], info_len);
+		link_infos[i].raw_tracepoint.tp_name = (__u64)&tp_name;
+		link_infos[i].raw_tracepoint.tp_name_len = sizeof(tp_name);
+		err = bpf_obj_get_info_by_fd(bpf_link__fd(links[i]),
+					     &link_infos[i], &info_len);
+		if (CHECK(err ||
+			  link_infos[i].type != BPF_LINK_TYPE_RAW_TRACEPOINT ||
+			  link_infos[i].prog_id != prog_infos[i].id ||
+			  link_infos[i].raw_tracepoint.tp_name != (__u64)&tp_name ||
+			  strcmp((char *)link_infos[i].raw_tracepoint.tp_name,
+				 "sys_enter") ||
+			  info_len != sizeof(struct bpf_link_info),
+			  "get-link-info(fd)",
+			  "err %d errno %d info_len %u(%zu) type %d(%d) id %d "
+			  "prog_id %d (%d) tp_name %s(%s)\n",
+			  err, errno,
+			  info_len, sizeof(struct bpf_link_info),
+			  link_infos[i].type, BPF_LINK_TYPE_RAW_TRACEPOINT,
+			  link_infos[i].id,
+			  link_infos[i].prog_id, prog_infos[i].id,
+			  (char *)link_infos[i].raw_tracepoint.tp_name,
+			  "sys_enter"))
+			goto done;
+
 	}
 
 	/* Check bpf_prog_get_next_id() */
@@ -247,7 +294,52 @@ void test_bpf_obj_id(void)
 	      "nr_id_found %u(%u)\n",
 	      nr_id_found, nr_iters);
 
+	/* Check bpf_link_get_next_id() */
+	nr_id_found = 0;
+	next_id = 0;
+	while (!bpf_link_get_next_id(next_id, &next_id)) {
+		struct bpf_link_info link_info;
+		int link_fd, cmp_res;
+
+		info_len = sizeof(link_info);
+		memset(&link_info, 0, info_len);
+
+		link_fd = bpf_link_get_fd_by_id(next_id);
+		if (link_fd < 0 && errno == ENOENT)
+			/* The bpf_link is in the dead row */
+			continue;
+		if (CHECK(link_fd < 0, "get-link-fd(next_id)",
+			  "link_fd %d next_id %u errno %d\n",
+			  link_fd, next_id, errno))
+			break;
+
+		for (i = 0; i < nr_iters; i++)
+			if (link_infos[i].id == next_id)
+				break;
+
+		if (i == nr_iters)
+			continue;
+
+		nr_id_found++;
+
+		err = bpf_obj_get_info_by_fd(link_fd, &link_info, &info_len);
+		cmp_res = memcmp(&link_info, &link_infos[i],
+				offsetof(struct bpf_link_info, raw_tracepoint));
+		CHECK(err || info_len != sizeof(link_info) || cmp_res,
+		      "check get-link-info(next_id->fd)",
+		      "err %d errno %d info_len %u(%zu) memcmp %d\n",
+		      err, errno, info_len, sizeof(struct bpf_link_info),
+		      cmp_res);
+
+		close(link_fd);
+	}
+	CHECK(nr_id_found != nr_iters,
+	      "check total link id found by get_next_id",
+	      "nr_id_found %u(%u)\n", nr_id_found, nr_iters);
+
 done:
-	for (i = 0; i < nr_iters; i++)
+	for (i = 0; i < nr_iters; i++) {
+		bpf_link__destroy(links[i]);
 		bpf_object__close(objs[i]);
+	}
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_map_in_map.c b/tools/testing/selftests/bpf/prog_tests/btf_map_in_map.c
new file mode 100644
index 000000000000..f7ee8fa377ad
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/btf_map_in_map.c
@@ -0,0 +1,49 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+
+#include <test_progs.h>
+
+#include "test_btf_map_in_map.skel.h"
+
+void test_btf_map_in_map(void)
+{
+	int duration = 0, err, key = 0, val;
+	struct test_btf_map_in_map* skel;
+
+	skel = test_btf_map_in_map__open_and_load();
+	if (CHECK(!skel, "skel_open", "failed to open&load skeleton\n"))
+		return;
+
+	err = test_btf_map_in_map__attach(skel);
+	if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err))
+		goto cleanup;
+
+	/* inner1 = input, inner2 = input + 1 */
+	val = bpf_map__fd(skel->maps.inner_map1);
+	bpf_map_update_elem(bpf_map__fd(skel->maps.outer_arr), &key, &val, 0);
+	val = bpf_map__fd(skel->maps.inner_map2);
+	bpf_map_update_elem(bpf_map__fd(skel->maps.outer_hash), &key, &val, 0);
+	skel->bss->input = 1;
+	usleep(1);
+
+	bpf_map_lookup_elem(bpf_map__fd(skel->maps.inner_map1), &key, &val);
+	CHECK(val != 1, "inner1", "got %d != exp %d\n", val, 1);
+	bpf_map_lookup_elem(bpf_map__fd(skel->maps.inner_map2), &key, &val);
+	CHECK(val != 2, "inner2", "got %d != exp %d\n", val, 2);
+
+	/* inner1 = input + 1, inner2 = input */
+	val = bpf_map__fd(skel->maps.inner_map2);
+	bpf_map_update_elem(bpf_map__fd(skel->maps.outer_arr), &key, &val, 0);
+	val = bpf_map__fd(skel->maps.inner_map1);
+	bpf_map_update_elem(bpf_map__fd(skel->maps.outer_hash), &key, &val, 0);
+	skel->bss->input = 3;
+	usleep(1);
+
+	bpf_map_lookup_elem(bpf_map__fd(skel->maps.inner_map1), &key, &val);
+	CHECK(val != 4, "inner1", "got %d != exp %d\n", val, 4);
+	bpf_map_lookup_elem(bpf_map__fd(skel->maps.inner_map2), &key, &val);
+	CHECK(val != 3, "inner2", "got %d != exp %d\n", val, 3);
+
+cleanup:
+	test_btf_map_in_map__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_skb_sk_lookup.c b/tools/testing/selftests/bpf/prog_tests/cgroup_skb_sk_lookup.c
new file mode 100644
index 000000000000..059047af7df3
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_skb_sk_lookup.c
@@ -0,0 +1,95 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Facebook
+
+#include <test_progs.h>
+
+#include "network_helpers.h"
+#include "cgroup_skb_sk_lookup_kern.skel.h"
+
+static void run_lookup_test(__u16 *g_serv_port, int out_sk)
+{
+	int serv_sk = -1, in_sk = -1, serv_in_sk = -1, err;
+	struct sockaddr_in6 addr = {};
+	socklen_t addr_len = sizeof(addr);
+	__u32 duration = 0;
+
+	serv_sk = start_server(AF_INET6, SOCK_STREAM);
+	if (CHECK(serv_sk < 0, "start_server", "failed to start server\n"))
+		return;
+
+	err = getsockname(serv_sk, (struct sockaddr *)&addr, &addr_len);
+	if (CHECK(err, "getsockname", "errno %d\n", errno))
+		goto cleanup;
+
+	*g_serv_port = addr.sin6_port;
+
+	/* Client outside of test cgroup should fail to connect by timeout. */
+	err = connect_fd_to_fd(out_sk, serv_sk);
+	if (CHECK(!err || errno != EINPROGRESS, "connect_fd_to_fd",
+		  "unexpected result err %d errno %d\n", err, errno))
+		goto cleanup;
+
+	err = connect_wait(out_sk);
+	if (CHECK(err, "connect_wait", "unexpected result %d\n", err))
+		goto cleanup;
+
+	/* Client inside test cgroup should connect just fine. */
+	in_sk = connect_to_fd(AF_INET6, SOCK_STREAM, serv_sk);
+	if (CHECK(in_sk < 0, "connect_to_fd", "errno %d\n", errno))
+		goto cleanup;
+
+	serv_in_sk = accept(serv_sk, NULL, NULL);
+	if (CHECK(serv_in_sk < 0, "accept", "errno %d\n", errno))
+		goto cleanup;
+
+cleanup:
+	close(serv_in_sk);
+	close(in_sk);
+	close(serv_sk);
+}
+
+static void run_cgroup_bpf_test(const char *cg_path, int out_sk)
+{
+	struct cgroup_skb_sk_lookup_kern *skel;
+	struct bpf_link *link;
+	__u32 duration = 0;
+	int cgfd = -1;
+
+	skel = cgroup_skb_sk_lookup_kern__open_and_load();
+	if (CHECK(!skel, "skel_open_load", "open_load failed\n"))
+		return;
+
+	cgfd = test__join_cgroup(cg_path);
+	if (CHECK(cgfd < 0, "cgroup_join", "cgroup setup failed\n"))
+		goto cleanup;
+
+	link = bpf_program__attach_cgroup(skel->progs.ingress_lookup, cgfd);
+	if (CHECK(IS_ERR(link), "cgroup_attach", "err: %ld\n", PTR_ERR(link)))
+		goto cleanup;
+
+	run_lookup_test(&skel->bss->g_serv_port, out_sk);
+
+	bpf_link__destroy(link);
+
+cleanup:
+	close(cgfd);
+	cgroup_skb_sk_lookup_kern__destroy(skel);
+}
+
+void test_cgroup_skb_sk_lookup(void)
+{
+	const char *cg_path = "/foo";
+	int out_sk;
+
+	/* Create a socket before joining testing cgroup so that its cgroup id
+	 * differs from that of testing cgroup. Moving selftests process to
+	 * testing cgroup won't change cgroup id of an already created socket.
+	 */
+	out_sk = socket(AF_INET6, SOCK_STREAM | SOCK_NONBLOCK, 0);
+	if (CHECK_FAIL(out_sk < 0))
+		return;
+
+	run_cgroup_bpf_test(cg_path, out_sk);
+
+	close(out_sk);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/cls_redirect.c b/tools/testing/selftests/bpf/prog_tests/cls_redirect.c
new file mode 100644
index 000000000000..f259085cca6a
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/cls_redirect.c
@@ -0,0 +1,456 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+// Copyright (c) 2020 Cloudflare
+
+#define _GNU_SOURCE
+
+#include <arpa/inet.h>
+#include <string.h>
+
+#include <linux/pkt_cls.h>
+
+#include <test_progs.h>
+
+#include "progs/test_cls_redirect.h"
+#include "test_cls_redirect.skel.h"
+
+#define ENCAP_IP INADDR_LOOPBACK
+#define ENCAP_PORT (1234)
+
+struct addr_port {
+	in_port_t port;
+	union {
+		struct in_addr in_addr;
+		struct in6_addr in6_addr;
+	};
+};
+
+struct tuple {
+	int family;
+	struct addr_port src;
+	struct addr_port dst;
+};
+
+static int start_server(const struct sockaddr *addr, socklen_t len, int type)
+{
+	int fd = socket(addr->sa_family, type, 0);
+	if (CHECK_FAIL(fd == -1))
+		return -1;
+	if (CHECK_FAIL(bind(fd, addr, len) == -1))
+		goto err;
+	if (type == SOCK_STREAM && CHECK_FAIL(listen(fd, 128) == -1))
+		goto err;
+
+	return fd;
+
+err:
+	close(fd);
+	return -1;
+}
+
+static int connect_to_server(const struct sockaddr *addr, socklen_t len,
+			     int type)
+{
+	int fd = socket(addr->sa_family, type, 0);
+	if (CHECK_FAIL(fd == -1))
+		return -1;
+	if (CHECK_FAIL(connect(fd, addr, len)))
+		goto err;
+
+	return fd;
+
+err:
+	close(fd);
+	return -1;
+}
+
+static bool fill_addr_port(const struct sockaddr *sa, struct addr_port *ap)
+{
+	const struct sockaddr_in6 *in6;
+	const struct sockaddr_in *in;
+
+	switch (sa->sa_family) {
+	case AF_INET:
+		in = (const struct sockaddr_in *)sa;
+		ap->in_addr = in->sin_addr;
+		ap->port = in->sin_port;
+		return true;
+
+	case AF_INET6:
+		in6 = (const struct sockaddr_in6 *)sa;
+		ap->in6_addr = in6->sin6_addr;
+		ap->port = in6->sin6_port;
+		return true;
+
+	default:
+		return false;
+	}
+}
+
+static bool set_up_conn(const struct sockaddr *addr, socklen_t len, int type,
+			int *server, int *conn, struct tuple *tuple)
+{
+	struct sockaddr_storage ss;
+	socklen_t slen = sizeof(ss);
+	struct sockaddr *sa = (struct sockaddr *)&ss;
+
+	*server = start_server(addr, len, type);
+	if (*server < 0)
+		return false;
+
+	if (CHECK_FAIL(getsockname(*server, sa, &slen)))
+		goto close_server;
+
+	*conn = connect_to_server(sa, slen, type);
+	if (*conn < 0)
+		goto close_server;
+
+	/* We want to simulate packets arriving at conn, so we have to
+	 * swap src and dst.
+	 */
+	slen = sizeof(ss);
+	if (CHECK_FAIL(getsockname(*conn, sa, &slen)))
+		goto close_conn;
+
+	if (CHECK_FAIL(!fill_addr_port(sa, &tuple->dst)))
+		goto close_conn;
+
+	slen = sizeof(ss);
+	if (CHECK_FAIL(getpeername(*conn, sa, &slen)))
+		goto close_conn;
+
+	if (CHECK_FAIL(!fill_addr_port(sa, &tuple->src)))
+		goto close_conn;
+
+	tuple->family = ss.ss_family;
+	return true;
+
+close_conn:
+	close(*conn);
+	*conn = -1;
+close_server:
+	close(*server);
+	*server = -1;
+	return false;
+}
+
+static socklen_t prepare_addr(struct sockaddr_storage *addr, int family)
+{
+	struct sockaddr_in *addr4;
+	struct sockaddr_in6 *addr6;
+
+	switch (family) {
+	case AF_INET:
+		addr4 = (struct sockaddr_in *)addr;
+		memset(addr4, 0, sizeof(*addr4));
+		addr4->sin_family = family;
+		addr4->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+		return sizeof(*addr4);
+	case AF_INET6:
+		addr6 = (struct sockaddr_in6 *)addr;
+		memset(addr6, 0, sizeof(*addr6));
+		addr6->sin6_family = family;
+		addr6->sin6_addr = in6addr_loopback;
+		return sizeof(*addr6);
+	default:
+		fprintf(stderr, "Invalid family %d", family);
+		return 0;
+	}
+}
+
+static bool was_decapsulated(struct bpf_prog_test_run_attr *tattr)
+{
+	return tattr->data_size_out < tattr->data_size_in;
+}
+
+enum type {
+	UDP,
+	TCP,
+	__NR_KIND,
+};
+
+enum hops {
+	NO_HOPS,
+	ONE_HOP,
+};
+
+enum flags {
+	NONE,
+	SYN,
+	ACK,
+};
+
+enum conn {
+	KNOWN_CONN,
+	UNKNOWN_CONN,
+};
+
+enum result {
+	ACCEPT,
+	FORWARD,
+};
+
+struct test_cfg {
+	enum type type;
+	enum result result;
+	enum conn conn;
+	enum hops hops;
+	enum flags flags;
+};
+
+static int test_str(void *buf, size_t len, const struct test_cfg *test,
+		    int family)
+{
+	const char *family_str, *type, *conn, *hops, *result, *flags;
+
+	family_str = "IPv4";
+	if (family == AF_INET6)
+		family_str = "IPv6";
+
+	type = "TCP";
+	if (test->type == UDP)
+		type = "UDP";
+
+	conn = "known";
+	if (test->conn == UNKNOWN_CONN)
+		conn = "unknown";
+
+	hops = "no hops";
+	if (test->hops == ONE_HOP)
+		hops = "one hop";
+
+	result = "accept";
+	if (test->result == FORWARD)
+		result = "forward";
+
+	flags = "none";
+	if (test->flags == SYN)
+		flags = "SYN";
+	else if (test->flags == ACK)
+		flags = "ACK";
+
+	return snprintf(buf, len, "%s %s %s %s (%s, flags: %s)", family_str,
+			type, result, conn, hops, flags);
+}
+
+static struct test_cfg tests[] = {
+	{ TCP, ACCEPT, UNKNOWN_CONN, NO_HOPS, SYN },
+	{ TCP, ACCEPT, UNKNOWN_CONN, NO_HOPS, ACK },
+	{ TCP, FORWARD, UNKNOWN_CONN, ONE_HOP, ACK },
+	{ TCP, ACCEPT, KNOWN_CONN, ONE_HOP, ACK },
+	{ UDP, ACCEPT, UNKNOWN_CONN, NO_HOPS, NONE },
+	{ UDP, FORWARD, UNKNOWN_CONN, ONE_HOP, NONE },
+	{ UDP, ACCEPT, KNOWN_CONN, ONE_HOP, NONE },
+};
+
+static void encap_init(encap_headers_t *encap, uint8_t hop_count, uint8_t proto)
+{
+	const uint8_t hlen =
+		(sizeof(struct guehdr) / sizeof(uint32_t)) + hop_count;
+	*encap = (encap_headers_t){
+		.eth = { .h_proto = htons(ETH_P_IP) },
+		.ip = {
+			.ihl = 5,
+			.version = 4,
+			.ttl = IPDEFTTL,
+			.protocol = IPPROTO_UDP,
+			.daddr = htonl(ENCAP_IP)
+		},
+		.udp = {
+			.dest = htons(ENCAP_PORT),
+		},
+		.gue = {
+			.hlen = hlen,
+			.proto_ctype = proto
+		},
+		.unigue = {
+			.hop_count = hop_count
+		},
+	};
+}
+
+static size_t build_input(const struct test_cfg *test, void *const buf,
+			  const struct tuple *tuple)
+{
+	in_port_t sport = tuple->src.port;
+	encap_headers_t encap;
+	struct iphdr ip;
+	struct ipv6hdr ipv6;
+	struct tcphdr tcp;
+	struct udphdr udp;
+	struct in_addr next_hop;
+	uint8_t *p = buf;
+	int proto;
+
+	proto = IPPROTO_IPIP;
+	if (tuple->family == AF_INET6)
+		proto = IPPROTO_IPV6;
+
+	encap_init(&encap, test->hops == ONE_HOP ? 1 : 0, proto);
+	p = mempcpy(p, &encap, sizeof(encap));
+
+	if (test->hops == ONE_HOP) {
+		next_hop = (struct in_addr){ .s_addr = htonl(0x7f000002) };
+		p = mempcpy(p, &next_hop, sizeof(next_hop));
+	}
+
+	proto = IPPROTO_TCP;
+	if (test->type == UDP)
+		proto = IPPROTO_UDP;
+
+	switch (tuple->family) {
+	case AF_INET:
+		ip = (struct iphdr){
+			.ihl = 5,
+			.version = 4,
+			.ttl = IPDEFTTL,
+			.protocol = proto,
+			.saddr = tuple->src.in_addr.s_addr,
+			.daddr = tuple->dst.in_addr.s_addr,
+		};
+		p = mempcpy(p, &ip, sizeof(ip));
+		break;
+	case AF_INET6:
+		ipv6 = (struct ipv6hdr){
+			.version = 6,
+			.hop_limit = IPDEFTTL,
+			.nexthdr = proto,
+			.saddr = tuple->src.in6_addr,
+			.daddr = tuple->dst.in6_addr,
+		};
+		p = mempcpy(p, &ipv6, sizeof(ipv6));
+		break;
+	default:
+		return 0;
+	}
+
+	if (test->conn == UNKNOWN_CONN)
+		sport--;
+
+	switch (test->type) {
+	case TCP:
+		tcp = (struct tcphdr){
+			.source = sport,
+			.dest = tuple->dst.port,
+		};
+		if (test->flags == SYN)
+			tcp.syn = true;
+		if (test->flags == ACK)
+			tcp.ack = true;
+		p = mempcpy(p, &tcp, sizeof(tcp));
+		break;
+	case UDP:
+		udp = (struct udphdr){
+			.source = sport,
+			.dest = tuple->dst.port,
+		};
+		p = mempcpy(p, &udp, sizeof(udp));
+		break;
+	default:
+		return 0;
+	}
+
+	return (void *)p - buf;
+}
+
+static void close_fds(int *fds, int n)
+{
+	int i;
+
+	for (i = 0; i < n; i++)
+		if (fds[i] > 0)
+			close(fds[i]);
+}
+
+void test_cls_redirect(void)
+{
+	struct test_cls_redirect *skel = NULL;
+	struct bpf_prog_test_run_attr tattr = {};
+	int families[] = { AF_INET, AF_INET6 };
+	struct sockaddr_storage ss;
+	struct sockaddr *addr;
+	socklen_t slen;
+	int i, j, err;
+
+	int servers[__NR_KIND][ARRAY_SIZE(families)] = {};
+	int conns[__NR_KIND][ARRAY_SIZE(families)] = {};
+	struct tuple tuples[__NR_KIND][ARRAY_SIZE(families)];
+
+	skel = test_cls_redirect__open();
+	if (CHECK_FAIL(!skel))
+		return;
+
+	skel->rodata->ENCAPSULATION_IP = htonl(ENCAP_IP);
+	skel->rodata->ENCAPSULATION_PORT = htons(ENCAP_PORT);
+
+	if (CHECK_FAIL(test_cls_redirect__load(skel)))
+		goto cleanup;
+
+	addr = (struct sockaddr *)&ss;
+	for (i = 0; i < ARRAY_SIZE(families); i++) {
+		slen = prepare_addr(&ss, families[i]);
+		if (CHECK_FAIL(!slen))
+			goto cleanup;
+
+		if (CHECK_FAIL(!set_up_conn(addr, slen, SOCK_DGRAM,
+					    &servers[UDP][i], &conns[UDP][i],
+					    &tuples[UDP][i])))
+			goto cleanup;
+
+		if (CHECK_FAIL(!set_up_conn(addr, slen, SOCK_STREAM,
+					    &servers[TCP][i], &conns[TCP][i],
+					    &tuples[TCP][i])))
+			goto cleanup;
+	}
+
+	tattr.prog_fd = bpf_program__fd(skel->progs.cls_redirect);
+	for (i = 0; i < ARRAY_SIZE(tests); i++) {
+		struct test_cfg *test = &tests[i];
+
+		for (j = 0; j < ARRAY_SIZE(families); j++) {
+			struct tuple *tuple = &tuples[test->type][j];
+			char input[256];
+			char tmp[256];
+
+			test_str(tmp, sizeof(tmp), test, tuple->family);
+			if (!test__start_subtest(tmp))
+				continue;
+
+			tattr.data_out = tmp;
+			tattr.data_size_out = sizeof(tmp);
+
+			tattr.data_in = input;
+			tattr.data_size_in = build_input(test, input, tuple);
+			if (CHECK_FAIL(!tattr.data_size_in))
+				continue;
+
+			err = bpf_prog_test_run_xattr(&tattr);
+			if (CHECK_FAIL(err))
+				continue;
+
+			if (tattr.retval != TC_ACT_REDIRECT) {
+				PRINT_FAIL("expected TC_ACT_REDIRECT, got %d\n",
+					   tattr.retval);
+				continue;
+			}
+
+			switch (test->result) {
+			case ACCEPT:
+				if (CHECK_FAIL(!was_decapsulated(&tattr)))
+					continue;
+				break;
+			case FORWARD:
+				if (CHECK_FAIL(was_decapsulated(&tattr)))
+					continue;
+				break;
+			default:
+				PRINT_FAIL("unknown result %d\n", test->result);
+				continue;
+			}
+		}
+	}
+
+cleanup:
+	test_cls_redirect__destroy(skel);
+	close_fds((int *)servers, sizeof(servers) / sizeof(servers[0][0]));
+	close_fds((int *)conns, sizeof(conns) / sizeof(conns[0][0]));
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/connect_force_port.c b/tools/testing/selftests/bpf/prog_tests/connect_force_port.c
new file mode 100644
index 000000000000..17bbf76812ca
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/connect_force_port.c
@@ -0,0 +1,166 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <test_progs.h>
+#include "cgroup_helpers.h"
+#include "network_helpers.h"
+
+static int verify_ports(int family, int fd,
+			__u16 expected_local, __u16 expected_peer)
+{
+	struct sockaddr_storage addr;
+	socklen_t len = sizeof(addr);
+	__u16 port;
+
+	if (getsockname(fd, (struct sockaddr *)&addr, &len)) {
+		log_err("Failed to get server addr");
+		return -1;
+	}
+
+	if (family == AF_INET)
+		port = ((struct sockaddr_in *)&addr)->sin_port;
+	else
+		port = ((struct sockaddr_in6 *)&addr)->sin6_port;
+
+	if (ntohs(port) != expected_local) {
+		log_err("Unexpected local port %d, expected %d", ntohs(port),
+			expected_local);
+		return -1;
+	}
+
+	if (getpeername(fd, (struct sockaddr *)&addr, &len)) {
+		log_err("Failed to get peer addr");
+		return -1;
+	}
+
+	if (family == AF_INET)
+		port = ((struct sockaddr_in *)&addr)->sin_port;
+	else
+		port = ((struct sockaddr_in6 *)&addr)->sin6_port;
+
+	if (ntohs(port) != expected_peer) {
+		log_err("Unexpected peer port %d, expected %d", ntohs(port),
+			expected_peer);
+		return -1;
+	}
+
+	return 0;
+}
+
+static int run_test(int cgroup_fd, int server_fd, int family, int type)
+{
+	bool v4 = family == AF_INET;
+	__u16 expected_local_port = v4 ? 22222 : 22223;
+	__u16 expected_peer_port = 60000;
+	struct bpf_prog_load_attr attr = {
+		.file = v4 ? "./connect_force_port4.o" :
+			     "./connect_force_port6.o",
+	};
+	struct bpf_program *prog;
+	struct bpf_object *obj;
+	int xlate_fd, fd, err;
+	__u32 duration = 0;
+
+	err = bpf_prog_load_xattr(&attr, &obj, &xlate_fd);
+	if (err) {
+		log_err("Failed to load BPF object");
+		return -1;
+	}
+
+	prog = bpf_object__find_program_by_title(obj, v4 ?
+						 "cgroup/connect4" :
+						 "cgroup/connect6");
+	if (CHECK(!prog, "find_prog", "connect prog not found\n")) {
+		err = -EIO;
+		goto close_bpf_object;
+	}
+
+	err = bpf_prog_attach(bpf_program__fd(prog), cgroup_fd, v4 ?
+			      BPF_CGROUP_INET4_CONNECT :
+			      BPF_CGROUP_INET6_CONNECT, 0);
+	if (err) {
+		log_err("Failed to attach BPF program");
+		goto close_bpf_object;
+	}
+
+	prog = bpf_object__find_program_by_title(obj, v4 ?
+						 "cgroup/getpeername4" :
+						 "cgroup/getpeername6");
+	if (CHECK(!prog, "find_prog", "getpeername prog not found\n")) {
+		err = -EIO;
+		goto close_bpf_object;
+	}
+
+	err = bpf_prog_attach(bpf_program__fd(prog), cgroup_fd, v4 ?
+			      BPF_CGROUP_INET4_GETPEERNAME :
+			      BPF_CGROUP_INET6_GETPEERNAME, 0);
+	if (err) {
+		log_err("Failed to attach BPF program");
+		goto close_bpf_object;
+	}
+
+	prog = bpf_object__find_program_by_title(obj, v4 ?
+						 "cgroup/getsockname4" :
+						 "cgroup/getsockname6");
+	if (CHECK(!prog, "find_prog", "getsockname prog not found\n")) {
+		err = -EIO;
+		goto close_bpf_object;
+	}
+
+	err = bpf_prog_attach(bpf_program__fd(prog), cgroup_fd, v4 ?
+			      BPF_CGROUP_INET4_GETSOCKNAME :
+			      BPF_CGROUP_INET6_GETSOCKNAME, 0);
+	if (err) {
+		log_err("Failed to attach BPF program");
+		goto close_bpf_object;
+	}
+
+	fd = connect_to_fd(family, type, server_fd);
+	if (fd < 0) {
+		err = -1;
+		goto close_bpf_object;
+	}
+
+	err = verify_ports(family, fd, expected_local_port,
+			   expected_peer_port);
+	close(fd);
+
+close_bpf_object:
+	bpf_object__close(obj);
+	return err;
+}
+
+void test_connect_force_port(void)
+{
+	int server_fd, cgroup_fd;
+
+	cgroup_fd = test__join_cgroup("/connect_force_port");
+	if (CHECK_FAIL(cgroup_fd < 0))
+		return;
+
+	server_fd = start_server_with_port(AF_INET, SOCK_STREAM, 60123);
+	if (CHECK_FAIL(server_fd < 0))
+		goto close_cgroup_fd;
+	CHECK_FAIL(run_test(cgroup_fd, server_fd, AF_INET, SOCK_STREAM));
+	close(server_fd);
+
+	server_fd = start_server_with_port(AF_INET6, SOCK_STREAM, 60124);
+	if (CHECK_FAIL(server_fd < 0))
+		goto close_cgroup_fd;
+	CHECK_FAIL(run_test(cgroup_fd, server_fd, AF_INET6, SOCK_STREAM));
+	close(server_fd);
+
+	server_fd = start_server_with_port(AF_INET, SOCK_DGRAM, 60123);
+	if (CHECK_FAIL(server_fd < 0))
+		goto close_cgroup_fd;
+	CHECK_FAIL(run_test(cgroup_fd, server_fd, AF_INET, SOCK_DGRAM));
+	close(server_fd);
+
+	server_fd = start_server_with_port(AF_INET6, SOCK_DGRAM, 60124);
+	if (CHECK_FAIL(server_fd < 0))
+		goto close_cgroup_fd;
+	CHECK_FAIL(run_test(cgroup_fd, server_fd, AF_INET6, SOCK_DGRAM));
+	close(server_fd);
+
+close_cgroup_fd:
+	close(cgroup_fd);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/core_reloc.c b/tools/testing/selftests/bpf/prog_tests/core_reloc.c
index 31e177adbdf1..084ed26a7d78 100644
--- a/tools/testing/selftests/bpf/prog_tests/core_reloc.c
+++ b/tools/testing/selftests/bpf/prog_tests/core_reloc.c
@@ -392,7 +392,7 @@ static struct core_reloc_test_case test_cases[] = {
 		.input = STRUCT_TO_CHAR_PTR(core_reloc_existence___minimal) {
 			.a = 42,
 		},
-		.input_len = sizeof(struct core_reloc_existence),
+		.input_len = sizeof(struct core_reloc_existence___minimal),
 		.output = STRUCT_TO_CHAR_PTR(core_reloc_existence_output) {
 			.a_exists = 1,
 			.b_exists = 0,
diff --git a/tools/testing/selftests/bpf/prog_tests/enable_stats.c b/tools/testing/selftests/bpf/prog_tests/enable_stats.c
new file mode 100644
index 000000000000..2cb2085917e7
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/enable_stats.c
@@ -0,0 +1,45 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include "test_enable_stats.skel.h"
+
+void test_enable_stats(void)
+{
+	struct test_enable_stats *skel;
+	int stats_fd, err, prog_fd;
+	struct bpf_prog_info info;
+	__u32 info_len = sizeof(info);
+	int duration = 0;
+
+	skel = test_enable_stats__open_and_load();
+	if (CHECK(!skel, "skel_open_and_load", "skeleton open/load failed\n"))
+		return;
+
+	stats_fd = bpf_enable_stats(BPF_STATS_RUN_TIME);
+	if (CHECK(stats_fd < 0, "get_stats_fd", "failed %d\n", errno)) {
+		test_enable_stats__destroy(skel);
+		return;
+	}
+
+	err = test_enable_stats__attach(skel);
+	if (CHECK(err, "attach_raw_tp", "err %d\n", err))
+		goto cleanup;
+
+	test_enable_stats__detach(skel);
+
+	prog_fd = bpf_program__fd(skel->progs.test_enable_stats);
+	memset(&info, 0, info_len);
+	err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
+	if (CHECK(err, "get_prog_info",
+		  "failed to get bpf_prog_info for fd %d\n", prog_fd))
+		goto cleanup;
+	if (CHECK(info.run_time_ns == 0, "check_stats_enabled",
+		  "failed to enable run_time_ns stats\n"))
+		goto cleanup;
+
+	CHECK(info.run_cnt != skel->bss->count, "check_run_cnt_valid",
+	      "invalid run_cnt stats\n");
+
+cleanup:
+	test_enable_stats__destroy(skel);
+	close(stats_fd);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c
index c2642517e1d8..a895bfed55db 100644
--- a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c
+++ b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c
@@ -1,6 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 /* Copyright (c) 2019 Facebook */
 #include <test_progs.h>
+#include <network_helpers.h>
 
 static void test_fexit_bpf2bpf_common(const char *obj_file,
 				      const char *target_obj_file,
diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
index 92563898867c..ea14e3ece812 100644
--- a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
+++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
@@ -1,10 +1,13 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <test_progs.h>
+#include <network_helpers.h>
 #include <error.h>
 #include <linux/if.h>
 #include <linux/if_tun.h>
 #include <sys/uio.h>
 
+#include "bpf_flow.skel.h"
+
 #ifndef IP_MF
 #define IP_MF 0x2000
 #endif
@@ -100,6 +103,7 @@ struct test {
 
 #define VLAN_HLEN	4
 
+static __u32 duration;
 struct test tests[] = {
 	{
 		.name = "ipv4",
@@ -443,17 +447,130 @@ static int ifup(const char *ifname)
 	return 0;
 }
 
+static int init_prog_array(struct bpf_object *obj, struct bpf_map *prog_array)
+{
+	int i, err, map_fd, prog_fd;
+	struct bpf_program *prog;
+	char prog_name[32];
+
+	map_fd = bpf_map__fd(prog_array);
+	if (map_fd < 0)
+		return -1;
+
+	for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+		snprintf(prog_name, sizeof(prog_name), "flow_dissector/%i", i);
+
+		prog = bpf_object__find_program_by_title(obj, prog_name);
+		if (!prog)
+			return -1;
+
+		prog_fd = bpf_program__fd(prog);
+		if (prog_fd < 0)
+			return -1;
+
+		err = bpf_map_update_elem(map_fd, &i, &prog_fd, BPF_ANY);
+		if (err)
+			return -1;
+	}
+	return 0;
+}
+
+static void run_tests_skb_less(int tap_fd, struct bpf_map *keys)
+{
+	int i, err, keys_fd;
+
+	keys_fd = bpf_map__fd(keys);
+	if (CHECK(keys_fd < 0, "bpf_map__fd", "err %d\n", keys_fd))
+		return;
+
+	for (i = 0; i < ARRAY_SIZE(tests); i++) {
+		/* Keep in sync with 'flags' from eth_get_headlen. */
+		__u32 eth_get_headlen_flags =
+			BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG;
+		struct bpf_prog_test_run_attr tattr = {};
+		struct bpf_flow_keys flow_keys = {};
+		__u32 key = (__u32)(tests[i].keys.sport) << 16 |
+			    tests[i].keys.dport;
+
+		/* For skb-less case we can't pass input flags; run
+		 * only the tests that have a matching set of flags.
+		 */
+
+		if (tests[i].flags != eth_get_headlen_flags)
+			continue;
+
+		err = tx_tap(tap_fd, &tests[i].pkt, sizeof(tests[i].pkt));
+		CHECK(err < 0, "tx_tap", "err %d errno %d\n", err, errno);
+
+		err = bpf_map_lookup_elem(keys_fd, &key, &flow_keys);
+		CHECK_ATTR(err, tests[i].name, "bpf_map_lookup_elem %d\n", err);
+
+		CHECK_ATTR(err, tests[i].name, "skb-less err %d\n", err);
+		CHECK_FLOW_KEYS(tests[i].name, flow_keys, tests[i].keys);
+
+		err = bpf_map_delete_elem(keys_fd, &key);
+		CHECK_ATTR(err, tests[i].name, "bpf_map_delete_elem %d\n", err);
+	}
+}
+
+static void test_skb_less_prog_attach(struct bpf_flow *skel, int tap_fd)
+{
+	int err, prog_fd;
+
+	prog_fd = bpf_program__fd(skel->progs._dissect);
+	if (CHECK(prog_fd < 0, "bpf_program__fd", "err %d\n", prog_fd))
+		return;
+
+	err = bpf_prog_attach(prog_fd, 0, BPF_FLOW_DISSECTOR, 0);
+	if (CHECK(err, "bpf_prog_attach", "err %d errno %d\n", err, errno))
+		return;
+
+	run_tests_skb_less(tap_fd, skel->maps.last_dissection);
+
+	err = bpf_prog_detach(prog_fd, BPF_FLOW_DISSECTOR);
+	CHECK(err, "bpf_prog_detach", "err %d errno %d\n", err, errno);
+}
+
+static void test_skb_less_link_create(struct bpf_flow *skel, int tap_fd)
+{
+	struct bpf_link *link;
+	int err, net_fd;
+
+	net_fd = open("/proc/self/ns/net", O_RDONLY);
+	if (CHECK(net_fd < 0, "open(/proc/self/ns/net)", "err %d\n", errno))
+		return;
+
+	link = bpf_program__attach_netns(skel->progs._dissect, net_fd);
+	if (CHECK(IS_ERR(link), "attach_netns", "err %ld\n", PTR_ERR(link)))
+		goto out_close;
+
+	run_tests_skb_less(tap_fd, skel->maps.last_dissection);
+
+	err = bpf_link__destroy(link);
+	CHECK(err, "bpf_link__destroy", "err %d\n", err);
+out_close:
+	close(net_fd);
+}
+
 void test_flow_dissector(void)
 {
 	int i, err, prog_fd, keys_fd = -1, tap_fd;
-	struct bpf_object *obj;
-	__u32 duration = 0;
+	struct bpf_flow *skel;
 
-	err = bpf_flow_load(&obj, "./bpf_flow.o", "flow_dissector",
-			    "jmp_table", "last_dissection", &prog_fd, &keys_fd);
-	if (CHECK_FAIL(err))
+	skel = bpf_flow__open_and_load();
+	if (CHECK(!skel, "skel", "failed to open/load skeleton\n"))
 		return;
 
+	prog_fd = bpf_program__fd(skel->progs._dissect);
+	if (CHECK(prog_fd < 0, "bpf_program__fd", "err %d\n", prog_fd))
+		goto out_destroy_skel;
+	keys_fd = bpf_map__fd(skel->maps.last_dissection);
+	if (CHECK(keys_fd < 0, "bpf_map__fd", "err %d\n", keys_fd))
+		goto out_destroy_skel;
+	err = init_prog_array(skel->obj, skel->maps.jmp_table);
+	if (CHECK(err, "init_prog_array", "err %d\n", err))
+		goto out_destroy_skel;
+
 	for (i = 0; i < ARRAY_SIZE(tests); i++) {
 		struct bpf_flow_keys flow_keys;
 		struct bpf_prog_test_run_attr tattr = {
@@ -486,43 +603,17 @@ void test_flow_dissector(void)
 	 * via BPF map in this case.
 	 */
 
-	err = bpf_prog_attach(prog_fd, 0, BPF_FLOW_DISSECTOR, 0);
-	CHECK(err, "bpf_prog_attach", "err %d errno %d\n", err, errno);
-
 	tap_fd = create_tap("tap0");
 	CHECK(tap_fd < 0, "create_tap", "tap_fd %d errno %d\n", tap_fd, errno);
 	err = ifup("tap0");
 	CHECK(err, "ifup", "err %d errno %d\n", err, errno);
 
-	for (i = 0; i < ARRAY_SIZE(tests); i++) {
-		/* Keep in sync with 'flags' from eth_get_headlen. */
-		__u32 eth_get_headlen_flags =
-			BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG;
-		struct bpf_prog_test_run_attr tattr = {};
-		struct bpf_flow_keys flow_keys = {};
-		__u32 key = (__u32)(tests[i].keys.sport) << 16 |
-			    tests[i].keys.dport;
-
-		/* For skb-less case we can't pass input flags; run
-		 * only the tests that have a matching set of flags.
-		 */
-
-		if (tests[i].flags != eth_get_headlen_flags)
-			continue;
-
-		err = tx_tap(tap_fd, &tests[i].pkt, sizeof(tests[i].pkt));
-		CHECK(err < 0, "tx_tap", "err %d errno %d\n", err, errno);
-
-		err = bpf_map_lookup_elem(keys_fd, &key, &flow_keys);
-		CHECK_ATTR(err, tests[i].name, "bpf_map_lookup_elem %d\n", err);
-
-		CHECK_ATTR(err, tests[i].name, "skb-less err %d\n", err);
-		CHECK_FLOW_KEYS(tests[i].name, flow_keys, tests[i].keys);
-
-		err = bpf_map_delete_elem(keys_fd, &key);
-		CHECK_ATTR(err, tests[i].name, "bpf_map_delete_elem %d\n", err);
-	}
+	/* Test direct prog attachment */
+	test_skb_less_prog_attach(skel, tap_fd);
+	/* Test indirect prog attachment via link */
+	test_skb_less_link_create(skel, tap_fd);
 
-	bpf_prog_detach(prog_fd, BPF_FLOW_DISSECTOR);
-	bpf_object__close(obj);
+	close(tap_fd);
+out_destroy_skel:
+	bpf_flow__destroy(skel);
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector_load_bytes.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector_load_bytes.c
index dc5ef155ec28..0e8a4d2f023d 100644
--- a/tools/testing/selftests/bpf/prog_tests/flow_dissector_load_bytes.c
+++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector_load_bytes.c
@@ -1,5 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <test_progs.h>
+#include <network_helpers.h>
 
 void test_flow_dissector_load_bytes(void)
 {
diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector_reattach.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector_reattach.c
index 1f51ba66b98b..15cb554a66d8 100644
--- a/tools/testing/selftests/bpf/prog_tests/flow_dissector_reattach.c
+++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector_reattach.c
@@ -11,6 +11,7 @@
 #include <fcntl.h>
 #include <sched.h>
 #include <stdbool.h>
+#include <sys/stat.h>
 #include <unistd.h>
 
 #include <linux/bpf.h>
@@ -18,21 +19,30 @@
 
 #include "test_progs.h"
 
-static bool is_attached(int netns)
+static int init_net = -1;
+
+static __u32 query_attached_prog_id(int netns)
 {
-	__u32 cnt;
+	__u32 prog_ids[1] = {};
+	__u32 prog_cnt = ARRAY_SIZE(prog_ids);
 	int err;
 
-	err = bpf_prog_query(netns, BPF_FLOW_DISSECTOR, 0, NULL, NULL, &cnt);
+	err = bpf_prog_query(netns, BPF_FLOW_DISSECTOR, 0, NULL,
+			     prog_ids, &prog_cnt);
 	if (CHECK_FAIL(err)) {
 		perror("bpf_prog_query");
-		return true; /* fail-safe */
+		return 0;
 	}
 
-	return cnt > 0;
+	return prog_cnt == 1 ? prog_ids[0] : 0;
+}
+
+static bool prog_is_attached(int netns)
+{
+	return query_attached_prog_id(netns) > 0;
 }
 
-static int load_prog(void)
+static int load_prog(enum bpf_prog_type type)
 {
 	struct bpf_insn prog[] = {
 		BPF_MOV64_IMM(BPF_REG_0, BPF_OK),
@@ -40,61 +50,566 @@ static int load_prog(void)
 	};
 	int fd;
 
-	fd = bpf_load_program(BPF_PROG_TYPE_FLOW_DISSECTOR, prog,
-			      ARRAY_SIZE(prog), "GPL", 0, NULL, 0);
+	fd = bpf_load_program(type, prog, ARRAY_SIZE(prog), "GPL", 0, NULL, 0);
 	if (CHECK_FAIL(fd < 0))
 		perror("bpf_load_program");
 
 	return fd;
 }
 
-static void do_flow_dissector_reattach(void)
+static __u32 query_prog_id(int prog)
 {
-	int prog_fd[2] = { -1, -1 };
+	struct bpf_prog_info info = {};
+	__u32 info_len = sizeof(info);
 	int err;
 
-	prog_fd[0] = load_prog();
-	if (prog_fd[0] < 0)
-		return;
+	err = bpf_obj_get_info_by_fd(prog, &info, &info_len);
+	if (CHECK_FAIL(err || info_len != sizeof(info))) {
+		perror("bpf_obj_get_info_by_fd");
+		return 0;
+	}
 
-	prog_fd[1] = load_prog();
-	if (prog_fd[1] < 0)
-		goto out_close;
+	return info.id;
+}
+
+static int unshare_net(int old_net)
+{
+	int err, new_net;
 
-	err = bpf_prog_attach(prog_fd[0], 0, BPF_FLOW_DISSECTOR, 0);
+	err = unshare(CLONE_NEWNET);
 	if (CHECK_FAIL(err)) {
-		perror("bpf_prog_attach-0");
-		goto out_close;
+		perror("unshare(CLONE_NEWNET)");
+		return -1;
+	}
+	new_net = open("/proc/self/ns/net", O_RDONLY);
+	if (CHECK_FAIL(new_net < 0)) {
+		perror("open(/proc/self/ns/net)");
+		setns(old_net, CLONE_NEWNET);
+		return -1;
 	}
+	return new_net;
+}
+
+static void test_prog_attach_prog_attach(int netns, int prog1, int prog2)
+{
+	int err;
+
+	err = bpf_prog_attach(prog1, 0, BPF_FLOW_DISSECTOR, 0);
+	if (CHECK_FAIL(err)) {
+		perror("bpf_prog_attach(prog1)");
+		return;
+	}
+	CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1));
 
 	/* Expect success when attaching a different program */
-	err = bpf_prog_attach(prog_fd[1], 0, BPF_FLOW_DISSECTOR, 0);
+	err = bpf_prog_attach(prog2, 0, BPF_FLOW_DISSECTOR, 0);
 	if (CHECK_FAIL(err)) {
-		perror("bpf_prog_attach-1");
+		perror("bpf_prog_attach(prog2) #1");
 		goto out_detach;
 	}
+	CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog2));
 
 	/* Expect failure when attaching the same program twice */
-	err = bpf_prog_attach(prog_fd[1], 0, BPF_FLOW_DISSECTOR, 0);
+	err = bpf_prog_attach(prog2, 0, BPF_FLOW_DISSECTOR, 0);
 	if (CHECK_FAIL(!err || errno != EINVAL))
-		perror("bpf_prog_attach-2");
+		perror("bpf_prog_attach(prog2) #2");
+	CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog2));
 
 out_detach:
 	err = bpf_prog_detach(0, BPF_FLOW_DISSECTOR);
 	if (CHECK_FAIL(err))
 		perror("bpf_prog_detach");
+	CHECK_FAIL(prog_is_attached(netns));
+}
+
+static void test_link_create_link_create(int netns, int prog1, int prog2)
+{
+	DECLARE_LIBBPF_OPTS(bpf_link_create_opts, opts);
+	int link1, link2;
+
+	link1 = bpf_link_create(prog1, netns, BPF_FLOW_DISSECTOR, &opts);
+	if (CHECK_FAIL(link < 0)) {
+		perror("bpf_link_create(prog1)");
+		return;
+	}
+	CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1));
+
+	/* Expect failure creating link when another link exists */
+	errno = 0;
+	link2 = bpf_link_create(prog2, netns, BPF_FLOW_DISSECTOR, &opts);
+	if (CHECK_FAIL(link2 != -1 || errno != E2BIG))
+		perror("bpf_prog_attach(prog2) expected E2BIG");
+	if (link2 != -1)
+		close(link2);
+	CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1));
+
+	close(link1);
+	CHECK_FAIL(prog_is_attached(netns));
+}
+
+static void test_prog_attach_link_create(int netns, int prog1, int prog2)
+{
+	DECLARE_LIBBPF_OPTS(bpf_link_create_opts, opts);
+	int err, link;
+
+	err = bpf_prog_attach(prog1, -1, BPF_FLOW_DISSECTOR, 0);
+	if (CHECK_FAIL(err)) {
+		perror("bpf_prog_attach(prog1)");
+		return;
+	}
+	CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1));
+
+	/* Expect failure creating link when prog attached */
+	errno = 0;
+	link = bpf_link_create(prog2, netns, BPF_FLOW_DISSECTOR, &opts);
+	if (CHECK_FAIL(link != -1 || errno != EEXIST))
+		perror("bpf_link_create(prog2) expected EEXIST");
+	if (link != -1)
+		close(link);
+	CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1));
+
+	err = bpf_prog_detach(-1, BPF_FLOW_DISSECTOR);
+	if (CHECK_FAIL(err))
+		perror("bpf_prog_detach");
+	CHECK_FAIL(prog_is_attached(netns));
+}
+
+static void test_link_create_prog_attach(int netns, int prog1, int prog2)
+{
+	DECLARE_LIBBPF_OPTS(bpf_link_create_opts, opts);
+	int err, link;
+
+	link = bpf_link_create(prog1, netns, BPF_FLOW_DISSECTOR, &opts);
+	if (CHECK_FAIL(link < 0)) {
+		perror("bpf_link_create(prog1)");
+		return;
+	}
+	CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1));
+
+	/* Expect failure attaching prog when link exists */
+	errno = 0;
+	err = bpf_prog_attach(prog2, -1, BPF_FLOW_DISSECTOR, 0);
+	if (CHECK_FAIL(!err || errno != EEXIST))
+		perror("bpf_prog_attach(prog2) expected EEXIST");
+	CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1));
+
+	close(link);
+	CHECK_FAIL(prog_is_attached(netns));
+}
+
+static void test_link_create_prog_detach(int netns, int prog1, int prog2)
+{
+	DECLARE_LIBBPF_OPTS(bpf_link_create_opts, opts);
+	int err, link;
+
+	link = bpf_link_create(prog1, netns, BPF_FLOW_DISSECTOR, &opts);
+	if (CHECK_FAIL(link < 0)) {
+		perror("bpf_link_create(prog1)");
+		return;
+	}
+	CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1));
+
+	/* Expect failure detaching prog when link exists */
+	errno = 0;
+	err = bpf_prog_detach(-1, BPF_FLOW_DISSECTOR);
+	if (CHECK_FAIL(!err || errno != EINVAL))
+		perror("bpf_prog_detach expected EINVAL");
+	CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1));
+
+	close(link);
+	CHECK_FAIL(prog_is_attached(netns));
+}
+
+static void test_prog_attach_detach_query(int netns, int prog1, int prog2)
+{
+	int err;
+
+	err = bpf_prog_attach(prog1, 0, BPF_FLOW_DISSECTOR, 0);
+	if (CHECK_FAIL(err)) {
+		perror("bpf_prog_attach(prog1)");
+		return;
+	}
+	CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1));
+
+	err = bpf_prog_detach(0, BPF_FLOW_DISSECTOR);
+	if (CHECK_FAIL(err)) {
+		perror("bpf_prog_detach");
+		return;
+	}
+
+	/* Expect no prog attached after successful detach */
+	CHECK_FAIL(prog_is_attached(netns));
+}
+
+static void test_link_create_close_query(int netns, int prog1, int prog2)
+{
+	DECLARE_LIBBPF_OPTS(bpf_link_create_opts, opts);
+	int link;
+
+	link = bpf_link_create(prog1, netns, BPF_FLOW_DISSECTOR, &opts);
+	if (CHECK_FAIL(link < 0)) {
+		perror("bpf_link_create(prog1)");
+		return;
+	}
+	CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1));
+
+	close(link);
+	/* Expect no prog attached after closing last link FD */
+	CHECK_FAIL(prog_is_attached(netns));
+}
+
+static void test_link_update_no_old_prog(int netns, int prog1, int prog2)
+{
+	DECLARE_LIBBPF_OPTS(bpf_link_create_opts, create_opts);
+	DECLARE_LIBBPF_OPTS(bpf_link_update_opts, update_opts);
+	int err, link;
+
+	link = bpf_link_create(prog1, netns, BPF_FLOW_DISSECTOR, &create_opts);
+	if (CHECK_FAIL(link < 0)) {
+		perror("bpf_link_create(prog1)");
+		return;
+	}
+	CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1));
+
+	/* Expect success replacing the prog when old prog not specified */
+	update_opts.flags = 0;
+	update_opts.old_prog_fd = 0;
+	err = bpf_link_update(link, prog2, &update_opts);
+	if (CHECK_FAIL(err))
+		perror("bpf_link_update");
+	CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog2));
+
+	close(link);
+	CHECK_FAIL(prog_is_attached(netns));
+}
+
+static void test_link_update_replace_old_prog(int netns, int prog1, int prog2)
+{
+	DECLARE_LIBBPF_OPTS(bpf_link_create_opts, create_opts);
+	DECLARE_LIBBPF_OPTS(bpf_link_update_opts, update_opts);
+	int err, link;
 
+	link = bpf_link_create(prog1, netns, BPF_FLOW_DISSECTOR, &create_opts);
+	if (CHECK_FAIL(link < 0)) {
+		perror("bpf_link_create(prog1)");
+		return;
+	}
+	CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1));
+
+	/* Expect success F_REPLACE and old prog specified to succeed */
+	update_opts.flags = BPF_F_REPLACE;
+	update_opts.old_prog_fd = prog1;
+	err = bpf_link_update(link, prog2, &update_opts);
+	if (CHECK_FAIL(err))
+		perror("bpf_link_update");
+	CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog2));
+
+	close(link);
+	CHECK_FAIL(prog_is_attached(netns));
+}
+
+static void test_link_update_invalid_opts(int netns, int prog1, int prog2)
+{
+	DECLARE_LIBBPF_OPTS(bpf_link_create_opts, create_opts);
+	DECLARE_LIBBPF_OPTS(bpf_link_update_opts, update_opts);
+	int err, link;
+
+	link = bpf_link_create(prog1, netns, BPF_FLOW_DISSECTOR, &create_opts);
+	if (CHECK_FAIL(link < 0)) {
+		perror("bpf_link_create(prog1)");
+		return;
+	}
+	CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1));
+
+	/* Expect update to fail w/ old prog FD but w/o F_REPLACE*/
+	errno = 0;
+	update_opts.flags = 0;
+	update_opts.old_prog_fd = prog1;
+	err = bpf_link_update(link, prog2, &update_opts);
+	if (CHECK_FAIL(!err || errno != EINVAL)) {
+		perror("bpf_link_update expected EINVAL");
+		goto out_close;
+	}
+	CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1));
+
+	/* Expect update to fail on old prog FD mismatch */
+	errno = 0;
+	update_opts.flags = BPF_F_REPLACE;
+	update_opts.old_prog_fd = prog2;
+	err = bpf_link_update(link, prog2, &update_opts);
+	if (CHECK_FAIL(!err || errno != EPERM)) {
+		perror("bpf_link_update expected EPERM");
+		goto out_close;
+	}
+	CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1));
+
+	/* Expect update to fail for invalid old prog FD */
+	errno = 0;
+	update_opts.flags = BPF_F_REPLACE;
+	update_opts.old_prog_fd = -1;
+	err = bpf_link_update(link, prog2, &update_opts);
+	if (CHECK_FAIL(!err || errno != EBADF)) {
+		perror("bpf_link_update expected EBADF");
+		goto out_close;
+	}
+	CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1));
+
+	/* Expect update to fail with invalid flags */
+	errno = 0;
+	update_opts.flags = BPF_F_ALLOW_MULTI;
+	update_opts.old_prog_fd = 0;
+	err = bpf_link_update(link, prog2, &update_opts);
+	if (CHECK_FAIL(!err || errno != EINVAL))
+		perror("bpf_link_update expected EINVAL");
+	CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1));
+
+out_close:
+	close(link);
+	CHECK_FAIL(prog_is_attached(netns));
+}
+
+static void test_link_update_invalid_prog(int netns, int prog1, int prog2)
+{
+	DECLARE_LIBBPF_OPTS(bpf_link_create_opts, create_opts);
+	DECLARE_LIBBPF_OPTS(bpf_link_update_opts, update_opts);
+	int err, link, prog3;
+
+	link = bpf_link_create(prog1, netns, BPF_FLOW_DISSECTOR, &create_opts);
+	if (CHECK_FAIL(link < 0)) {
+		perror("bpf_link_create(prog1)");
+		return;
+	}
+	CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1));
+
+	/* Expect failure when new prog FD is not valid */
+	errno = 0;
+	update_opts.flags = 0;
+	update_opts.old_prog_fd = 0;
+	err = bpf_link_update(link, -1, &update_opts);
+	if (CHECK_FAIL(!err || errno != EBADF)) {
+		perror("bpf_link_update expected EINVAL");
+		goto out_close_link;
+	}
+	CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1));
+
+	prog3 = load_prog(BPF_PROG_TYPE_SOCKET_FILTER);
+	if (prog3 < 0)
+		goto out_close_link;
+
+	/* Expect failure when new prog FD type doesn't match */
+	errno = 0;
+	update_opts.flags = 0;
+	update_opts.old_prog_fd = 0;
+	err = bpf_link_update(link, prog3, &update_opts);
+	if (CHECK_FAIL(!err || errno != EINVAL))
+		perror("bpf_link_update expected EINVAL");
+	CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1));
+
+	close(prog3);
+out_close_link:
+	close(link);
+	CHECK_FAIL(prog_is_attached(netns));
+}
+
+static void test_link_update_netns_gone(int netns, int prog1, int prog2)
+{
+	DECLARE_LIBBPF_OPTS(bpf_link_create_opts, create_opts);
+	DECLARE_LIBBPF_OPTS(bpf_link_update_opts, update_opts);
+	int err, link, old_net;
+
+	old_net = netns;
+	netns = unshare_net(old_net);
+	if (netns < 0)
+		return;
+
+	link = bpf_link_create(prog1, netns, BPF_FLOW_DISSECTOR, &create_opts);
+	if (CHECK_FAIL(link < 0)) {
+		perror("bpf_link_create(prog1)");
+		return;
+	}
+	CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1));
+
+	close(netns);
+	err = setns(old_net, CLONE_NEWNET);
+	if (CHECK_FAIL(err)) {
+		perror("setns(CLONE_NEWNET)");
+		close(link);
+		return;
+	}
+
+	/* Expect failure when netns destroyed */
+	errno = 0;
+	update_opts.flags = 0;
+	update_opts.old_prog_fd = 0;
+	err = bpf_link_update(link, prog2, &update_opts);
+	if (CHECK_FAIL(!err || errno != ENOLINK))
+		perror("bpf_link_update");
+
+	close(link);
+}
+
+static void test_link_get_info(int netns, int prog1, int prog2)
+{
+	DECLARE_LIBBPF_OPTS(bpf_link_create_opts, create_opts);
+	DECLARE_LIBBPF_OPTS(bpf_link_update_opts, update_opts);
+	struct bpf_link_info info = {};
+	struct stat netns_stat = {};
+	__u32 info_len, link_id;
+	int err, link, old_net;
+
+	old_net = netns;
+	netns = unshare_net(old_net);
+	if (netns < 0)
+		return;
+
+	err = fstat(netns, &netns_stat);
+	if (CHECK_FAIL(err)) {
+		perror("stat(netns)");
+		goto out_resetns;
+	}
+
+	link = bpf_link_create(prog1, netns, BPF_FLOW_DISSECTOR, &create_opts);
+	if (CHECK_FAIL(link < 0)) {
+		perror("bpf_link_create(prog1)");
+		goto out_resetns;
+	}
+
+	info_len = sizeof(info);
+	err = bpf_obj_get_info_by_fd(link, &info, &info_len);
+	if (CHECK_FAIL(err)) {
+		perror("bpf_obj_get_info");
+		goto out_unlink;
+	}
+	CHECK_FAIL(info_len != sizeof(info));
+
+	/* Expect link info to be sane and match prog and netns details */
+	CHECK_FAIL(info.type != BPF_LINK_TYPE_NETNS);
+	CHECK_FAIL(info.id == 0);
+	CHECK_FAIL(info.prog_id != query_prog_id(prog1));
+	CHECK_FAIL(info.netns.netns_ino != netns_stat.st_ino);
+	CHECK_FAIL(info.netns.attach_type != BPF_FLOW_DISSECTOR);
+
+	update_opts.flags = 0;
+	update_opts.old_prog_fd = 0;
+	err = bpf_link_update(link, prog2, &update_opts);
+	if (CHECK_FAIL(err)) {
+		perror("bpf_link_update(prog2)");
+		goto out_unlink;
+	}
+
+	link_id = info.id;
+	info_len = sizeof(info);
+	err = bpf_obj_get_info_by_fd(link, &info, &info_len);
+	if (CHECK_FAIL(err)) {
+		perror("bpf_obj_get_info");
+		goto out_unlink;
+	}
+	CHECK_FAIL(info_len != sizeof(info));
+
+	/* Expect no info change after update except in prog id */
+	CHECK_FAIL(info.type != BPF_LINK_TYPE_NETNS);
+	CHECK_FAIL(info.id != link_id);
+	CHECK_FAIL(info.prog_id != query_prog_id(prog2));
+	CHECK_FAIL(info.netns.netns_ino != netns_stat.st_ino);
+	CHECK_FAIL(info.netns.attach_type != BPF_FLOW_DISSECTOR);
+
+	/* Leave netns link is attached to and close last FD to it */
+	err = setns(old_net, CLONE_NEWNET);
+	if (CHECK_FAIL(err)) {
+		perror("setns(NEWNET)");
+		goto out_unlink;
+	}
+	close(netns);
+	old_net = -1;
+	netns = -1;
+
+	info_len = sizeof(info);
+	err = bpf_obj_get_info_by_fd(link, &info, &info_len);
+	if (CHECK_FAIL(err)) {
+		perror("bpf_obj_get_info");
+		goto out_unlink;
+	}
+	CHECK_FAIL(info_len != sizeof(info));
+
+	/* Expect netns_ino to change to 0 */
+	CHECK_FAIL(info.type != BPF_LINK_TYPE_NETNS);
+	CHECK_FAIL(info.id != link_id);
+	CHECK_FAIL(info.prog_id != query_prog_id(prog2));
+	CHECK_FAIL(info.netns.netns_ino != 0);
+	CHECK_FAIL(info.netns.attach_type != BPF_FLOW_DISSECTOR);
+
+out_unlink:
+	close(link);
+out_resetns:
+	if (old_net != -1)
+		setns(old_net, CLONE_NEWNET);
+	if (netns != -1)
+		close(netns);
+}
+
+static void run_tests(int netns)
+{
+	struct test {
+		const char *test_name;
+		void (*test_func)(int netns, int prog1, int prog2);
+	} tests[] = {
+		{ "prog attach, prog attach",
+		  test_prog_attach_prog_attach },
+		{ "link create, link create",
+		  test_link_create_link_create },
+		{ "prog attach, link create",
+		  test_prog_attach_link_create },
+		{ "link create, prog attach",
+		  test_link_create_prog_attach },
+		{ "link create, prog detach",
+		  test_link_create_prog_detach },
+		{ "prog attach, detach, query",
+		  test_prog_attach_detach_query },
+		{ "link create, close, query",
+		  test_link_create_close_query },
+		{ "link update no old prog",
+		  test_link_update_no_old_prog },
+		{ "link update with replace old prog",
+		  test_link_update_replace_old_prog },
+		{ "link update invalid opts",
+		  test_link_update_invalid_opts },
+		{ "link update invalid prog",
+		  test_link_update_invalid_prog },
+		{ "link update netns gone",
+		  test_link_update_netns_gone },
+		{ "link get info",
+		  test_link_get_info },
+	};
+	int i, progs[2] = { -1, -1 };
+	char test_name[80];
+
+	for (i = 0; i < ARRAY_SIZE(progs); i++) {
+		progs[i] = load_prog(BPF_PROG_TYPE_FLOW_DISSECTOR);
+		if (progs[i] < 0)
+			goto out_close;
+	}
+
+	for (i = 0; i < ARRAY_SIZE(tests); i++) {
+		snprintf(test_name, sizeof(test_name),
+			 "flow dissector %s%s",
+			 tests[i].test_name,
+			 netns == init_net ? " (init_net)" : "");
+		if (test__start_subtest(test_name))
+			tests[i].test_func(netns, progs[0], progs[1]);
+	}
 out_close:
-	close(prog_fd[1]);
-	close(prog_fd[0]);
+	for (i = 0; i < ARRAY_SIZE(progs); i++) {
+		if (progs[i] != -1)
+			CHECK_FAIL(close(progs[i]));
+	}
 }
 
 void test_flow_dissector_reattach(void)
 {
-	int init_net, self_net, err;
+	int err, new_net, saved_net;
 
-	self_net = open("/proc/self/ns/net", O_RDONLY);
-	if (CHECK_FAIL(self_net < 0)) {
+	saved_net = open("/proc/self/ns/net", O_RDONLY);
+	if (CHECK_FAIL(saved_net < 0)) {
 		perror("open(/proc/self/ns/net");
 		return;
 	}
@@ -111,30 +626,29 @@ void test_flow_dissector_reattach(void)
 		goto out_close;
 	}
 
-	if (is_attached(init_net)) {
+	if (prog_is_attached(init_net)) {
 		test__skip();
 		printf("Can't test with flow dissector attached to init_net\n");
 		goto out_setns;
 	}
 
 	/* First run tests in root network namespace */
-	do_flow_dissector_reattach();
+	run_tests(init_net);
 
 	/* Then repeat tests in a non-root namespace */
-	err = unshare(CLONE_NEWNET);
-	if (CHECK_FAIL(err)) {
-		perror("unshare(CLONE_NEWNET)");
+	new_net = unshare_net(init_net);
+	if (new_net < 0)
 		goto out_setns;
-	}
-	do_flow_dissector_reattach();
+	run_tests(new_net);
+	close(new_net);
 
 out_setns:
 	/* Move back to netns we started in. */
-	err = setns(self_net, CLONE_NEWNET);
+	err = setns(saved_net, CLONE_NEWNET);
 	if (CHECK_FAIL(err))
 		perror("setns(/proc/self/ns/net)");
 
 out_close:
 	close(init_net);
-	close(self_net);
+	close(saved_net);
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/global_data.c b/tools/testing/selftests/bpf/prog_tests/global_data.c
index c680926fce73..e3cb62b0a110 100644
--- a/tools/testing/selftests/bpf/prog_tests/global_data.c
+++ b/tools/testing/selftests/bpf/prog_tests/global_data.c
@@ -1,5 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <test_progs.h>
+#include <network_helpers.h>
 
 static void test_global_data_number(struct bpf_object *obj, __u32 duration)
 {
diff --git a/tools/testing/selftests/bpf/test_hashmap.c b/tools/testing/selftests/bpf/prog_tests/hashmap.c
index c490e012c23f..428d488830c6 100644
--- a/tools/testing/selftests/bpf/test_hashmap.c
+++ b/tools/testing/selftests/bpf/prog_tests/hashmap.c
@@ -5,26 +5,17 @@
  *
  * Copyright (c) 2019 Facebook
  */
-#include <stdio.h>
-#include <errno.h>
-#include <linux/err.h>
+#include "test_progs.h"
 #include "bpf/hashmap.h"
 
-#define CHECK(condition, format...) ({					\
-	int __ret = !!(condition);					\
-	if (__ret) {							\
-		fprintf(stderr, "%s:%d:FAIL ", __func__, __LINE__);	\
-		fprintf(stderr, format);				\
-	}								\
-	__ret;								\
-})
+static int duration = 0;
 
-size_t hash_fn(const void *k, void *ctx)
+static size_t hash_fn(const void *k, void *ctx)
 {
 	return (long)k;
 }
 
-bool equal_fn(const void *a, const void *b, void *ctx)
+static bool equal_fn(const void *a, const void *b, void *ctx)
 {
 	return (long)a == (long)b;
 }
@@ -49,53 +40,55 @@ static inline size_t exp_cap(size_t sz)
 
 #define ELEM_CNT 62
 
-int test_hashmap_generic(void)
+static void test_hashmap_generic(void)
 {
 	struct hashmap_entry *entry, *tmp;
 	int err, bkt, found_cnt, i;
 	long long found_msk;
 	struct hashmap *map;
 
-	fprintf(stderr, "%s: ", __func__);
-
 	map = hashmap__new(hash_fn, equal_fn, NULL);
-	if (CHECK(IS_ERR(map), "failed to create map: %ld\n", PTR_ERR(map)))
-		return 1;
+	if (CHECK(IS_ERR(map), "hashmap__new",
+		  "failed to create map: %ld\n", PTR_ERR(map)))
+		return;
 
 	for (i = 0; i < ELEM_CNT; i++) {
 		const void *oldk, *k = (const void *)(long)i;
 		void *oldv, *v = (void *)(long)(1024 + i);
 
 		err = hashmap__update(map, k, v, &oldk, &oldv);
-		if (CHECK(err != -ENOENT, "unexpected result: %d\n", err))
-			return 1;
+		if (CHECK(err != -ENOENT, "hashmap__update",
+			  "unexpected result: %d\n", err))
+			goto cleanup;
 
 		if (i % 2) {
 			err = hashmap__add(map, k, v);
 		} else {
 			err = hashmap__set(map, k, v, &oldk, &oldv);
-			if (CHECK(oldk != NULL || oldv != NULL,
+			if (CHECK(oldk != NULL || oldv != NULL, "check_kv",
 				  "unexpected k/v: %p=%p\n", oldk, oldv))
-				return 1;
+				goto cleanup;
 		}
 
-		if (CHECK(err, "failed to add k/v %ld = %ld: %d\n",
+		if (CHECK(err, "elem_add", "failed to add k/v %ld = %ld: %d\n",
 			       (long)k, (long)v, err))
-			return 1;
+			goto cleanup;
 
-		if (CHECK(!hashmap__find(map, k, &oldv),
+		if (CHECK(!hashmap__find(map, k, &oldv), "elem_find",
 			  "failed to find key %ld\n", (long)k))
-			return 1;
-		if (CHECK(oldv != v, "found value is wrong: %ld\n", (long)oldv))
-			return 1;
+			goto cleanup;
+		if (CHECK(oldv != v, "elem_val",
+			  "found value is wrong: %ld\n", (long)oldv))
+			goto cleanup;
 	}
 
-	if (CHECK(hashmap__size(map) != ELEM_CNT,
+	if (CHECK(hashmap__size(map) != ELEM_CNT, "hashmap__size",
 		  "invalid map size: %zu\n", hashmap__size(map)))
-		return 1;
+		goto cleanup;
 	if (CHECK(hashmap__capacity(map) != exp_cap(hashmap__size(map)),
+		  "hashmap_cap",
 		  "unexpected map capacity: %zu\n", hashmap__capacity(map)))
-		return 1;
+		goto cleanup;
 
 	found_msk = 0;
 	hashmap__for_each_entry(map, entry, bkt) {
@@ -103,42 +96,47 @@ int test_hashmap_generic(void)
 		long v = (long)entry->value;
 
 		found_msk |= 1ULL << k;
-		if (CHECK(v - k != 1024, "invalid k/v pair: %ld = %ld\n", k, v))
-			return 1;
+		if (CHECK(v - k != 1024, "check_kv",
+			  "invalid k/v pair: %ld = %ld\n", k, v))
+			goto cleanup;
 	}
-	if (CHECK(found_msk != (1ULL << ELEM_CNT) - 1,
+	if (CHECK(found_msk != (1ULL << ELEM_CNT) - 1, "elem_cnt",
 		  "not all keys iterated: %llx\n", found_msk))
-		return 1;
+		goto cleanup;
 
 	for (i = 0; i < ELEM_CNT; i++) {
 		const void *oldk, *k = (const void *)(long)i;
 		void *oldv, *v = (void *)(long)(256 + i);
 
 		err = hashmap__add(map, k, v);
-		if (CHECK(err != -EEXIST, "unexpected add result: %d\n", err))
-			return 1;
+		if (CHECK(err != -EEXIST, "hashmap__add",
+			  "unexpected add result: %d\n", err))
+			goto cleanup;
 
 		if (i % 2)
 			err = hashmap__update(map, k, v, &oldk, &oldv);
 		else
 			err = hashmap__set(map, k, v, &oldk, &oldv);
 
-		if (CHECK(err, "failed to update k/v %ld = %ld: %d\n",
-			       (long)k, (long)v, err))
-			return 1;
-		if (CHECK(!hashmap__find(map, k, &oldv),
+		if (CHECK(err, "elem_upd",
+			  "failed to update k/v %ld = %ld: %d\n",
+			  (long)k, (long)v, err))
+			goto cleanup;
+		if (CHECK(!hashmap__find(map, k, &oldv), "elem_find",
 			  "failed to find key %ld\n", (long)k))
-			return 1;
-		if (CHECK(oldv != v, "found value is wrong: %ld\n", (long)oldv))
-			return 1;
+			goto cleanup;
+		if (CHECK(oldv != v, "elem_val",
+			  "found value is wrong: %ld\n", (long)oldv))
+			goto cleanup;
 	}
 
-	if (CHECK(hashmap__size(map) != ELEM_CNT,
+	if (CHECK(hashmap__size(map) != ELEM_CNT, "hashmap__size",
 		  "invalid updated map size: %zu\n", hashmap__size(map)))
-		return 1;
+		goto cleanup;
 	if (CHECK(hashmap__capacity(map) != exp_cap(hashmap__size(map)),
+		  "hashmap__capacity",
 		  "unexpected map capacity: %zu\n", hashmap__capacity(map)))
-		return 1;
+		goto cleanup;
 
 	found_msk = 0;
 	hashmap__for_each_entry_safe(map, entry, tmp, bkt) {
@@ -146,20 +144,21 @@ int test_hashmap_generic(void)
 		long v = (long)entry->value;
 
 		found_msk |= 1ULL << k;
-		if (CHECK(v - k != 256,
+		if (CHECK(v - k != 256, "elem_check",
 			  "invalid updated k/v pair: %ld = %ld\n", k, v))
-			return 1;
+			goto cleanup;
 	}
-	if (CHECK(found_msk != (1ULL << ELEM_CNT) - 1,
+	if (CHECK(found_msk != (1ULL << ELEM_CNT) - 1, "elem_cnt",
 		  "not all keys iterated after update: %llx\n", found_msk))
-		return 1;
+		goto cleanup;
 
 	found_cnt = 0;
 	hashmap__for_each_key_entry(map, entry, (void *)0) {
 		found_cnt++;
 	}
-	if (CHECK(!found_cnt, "didn't find any entries for key 0\n"))
-		return 1;
+	if (CHECK(!found_cnt, "found_cnt",
+		  "didn't find any entries for key 0\n"))
+		goto cleanup;
 
 	found_msk = 0;
 	found_cnt = 0;
@@ -173,30 +172,31 @@ int test_hashmap_generic(void)
 		found_cnt++;
 		found_msk |= 1ULL << (long)k;
 
-		if (CHECK(!hashmap__delete(map, k, &oldk, &oldv),
+		if (CHECK(!hashmap__delete(map, k, &oldk, &oldv), "elem_del",
 			  "failed to delete k/v %ld = %ld\n",
 			  (long)k, (long)v))
-			return 1;
-		if (CHECK(oldk != k || oldv != v,
+			goto cleanup;
+		if (CHECK(oldk != k || oldv != v, "check_old",
 			  "invalid deleted k/v: expected %ld = %ld, got %ld = %ld\n",
 			  (long)k, (long)v, (long)oldk, (long)oldv))
-			return 1;
-		if (CHECK(hashmap__delete(map, k, &oldk, &oldv),
+			goto cleanup;
+		if (CHECK(hashmap__delete(map, k, &oldk, &oldv), "elem_del",
 			  "unexpectedly deleted k/v %ld = %ld\n",
 			  (long)oldk, (long)oldv))
-			return 1;
+			goto cleanup;
 	}
 
-	if (CHECK(!found_cnt || !found_msk,
+	if (CHECK(!found_cnt || !found_msk, "found_entries",
 		  "didn't delete any key entries\n"))
-		return 1;
-	if (CHECK(hashmap__size(map) != ELEM_CNT - found_cnt,
+		goto cleanup;
+	if (CHECK(hashmap__size(map) != ELEM_CNT - found_cnt, "elem_cnt",
 		  "invalid updated map size (already deleted: %d): %zu\n",
 		  found_cnt, hashmap__size(map)))
-		return 1;
+		goto cleanup;
 	if (CHECK(hashmap__capacity(map) != exp_cap(hashmap__size(map)),
+		  "hashmap__capacity",
 		  "unexpected map capacity: %zu\n", hashmap__capacity(map)))
-		return 1;
+		goto cleanup;
 
 	hashmap__for_each_entry_safe(map, entry, tmp, bkt) {
 		const void *oldk, *k;
@@ -208,53 +208,56 @@ int test_hashmap_generic(void)
 		found_cnt++;
 		found_msk |= 1ULL << (long)k;
 
-		if (CHECK(!hashmap__delete(map, k, &oldk, &oldv),
+		if (CHECK(!hashmap__delete(map, k, &oldk, &oldv), "elem_del",
 			  "failed to delete k/v %ld = %ld\n",
 			  (long)k, (long)v))
-			return 1;
-		if (CHECK(oldk != k || oldv != v,
+			goto cleanup;
+		if (CHECK(oldk != k || oldv != v, "elem_check",
 			  "invalid old k/v: expect %ld = %ld, got %ld = %ld\n",
 			  (long)k, (long)v, (long)oldk, (long)oldv))
-			return 1;
-		if (CHECK(hashmap__delete(map, k, &oldk, &oldv),
+			goto cleanup;
+		if (CHECK(hashmap__delete(map, k, &oldk, &oldv), "elem_del",
 			  "unexpectedly deleted k/v %ld = %ld\n",
 			  (long)k, (long)v))
-			return 1;
+			goto cleanup;
 	}
 
 	if (CHECK(found_cnt != ELEM_CNT || found_msk != (1ULL << ELEM_CNT) - 1,
+		  "found_cnt",
 		  "not all keys were deleted: found_cnt:%d, found_msk:%llx\n",
 		  found_cnt, found_msk))
-		return 1;
-	if (CHECK(hashmap__size(map) != 0,
+		goto cleanup;
+	if (CHECK(hashmap__size(map) != 0, "hashmap__size",
 		  "invalid updated map size (already deleted: %d): %zu\n",
 		  found_cnt, hashmap__size(map)))
-		return 1;
+		goto cleanup;
 
 	found_cnt = 0;
 	hashmap__for_each_entry(map, entry, bkt) {
-		CHECK(false, "unexpected map entries left: %ld = %ld\n",
-			     (long)entry->key, (long)entry->value);
-		return 1;
+		CHECK(false, "elem_exists",
+		      "unexpected map entries left: %ld = %ld\n",
+		      (long)entry->key, (long)entry->value);
+		goto cleanup;
 	}
 
-	hashmap__free(map);
+	hashmap__clear(map);
 	hashmap__for_each_entry(map, entry, bkt) {
-		CHECK(false, "unexpected map entries left: %ld = %ld\n",
-			     (long)entry->key, (long)entry->value);
-		return 1;
+		CHECK(false, "elem_exists",
+		      "unexpected map entries left: %ld = %ld\n",
+		      (long)entry->key, (long)entry->value);
+		goto cleanup;
 	}
 
-	fprintf(stderr, "OK\n");
-	return 0;
+cleanup:
+	hashmap__free(map);
 }
 
-size_t collision_hash_fn(const void *k, void *ctx)
+static size_t collision_hash_fn(const void *k, void *ctx)
 {
 	return 0;
 }
 
-int test_hashmap_multimap(void)
+static void test_hashmap_multimap(void)
 {
 	void *k1 = (void *)0, *k2 = (void *)1;
 	struct hashmap_entry *entry;
@@ -262,121 +265,116 @@ int test_hashmap_multimap(void)
 	long found_msk;
 	int err, bkt;
 
-	fprintf(stderr, "%s: ", __func__);
-
 	/* force collisions */
 	map = hashmap__new(collision_hash_fn, equal_fn, NULL);
-	if (CHECK(IS_ERR(map), "failed to create map: %ld\n", PTR_ERR(map)))
-		return 1;
-
+	if (CHECK(IS_ERR(map), "hashmap__new",
+		  "failed to create map: %ld\n", PTR_ERR(map)))
+		return;
 
 	/* set up multimap:
 	 * [0] -> 1, 2, 4;
 	 * [1] -> 8, 16, 32;
 	 */
 	err = hashmap__append(map, k1, (void *)1);
-	if (CHECK(err, "failed to add k/v: %d\n", err))
-		return 1;
+	if (CHECK(err, "elem_add", "failed to add k/v: %d\n", err))
+		goto cleanup;
 	err = hashmap__append(map, k1, (void *)2);
-	if (CHECK(err, "failed to add k/v: %d\n", err))
-		return 1;
+	if (CHECK(err, "elem_add", "failed to add k/v: %d\n", err))
+		goto cleanup;
 	err = hashmap__append(map, k1, (void *)4);
-	if (CHECK(err, "failed to add k/v: %d\n", err))
-		return 1;
+	if (CHECK(err, "elem_add", "failed to add k/v: %d\n", err))
+		goto cleanup;
 
 	err = hashmap__append(map, k2, (void *)8);
-	if (CHECK(err, "failed to add k/v: %d\n", err))
-		return 1;
+	if (CHECK(err, "elem_add", "failed to add k/v: %d\n", err))
+		goto cleanup;
 	err = hashmap__append(map, k2, (void *)16);
-	if (CHECK(err, "failed to add k/v: %d\n", err))
-		return 1;
+	if (CHECK(err, "elem_add", "failed to add k/v: %d\n", err))
+		goto cleanup;
 	err = hashmap__append(map, k2, (void *)32);
-	if (CHECK(err, "failed to add k/v: %d\n", err))
-		return 1;
+	if (CHECK(err, "elem_add", "failed to add k/v: %d\n", err))
+		goto cleanup;
 
-	if (CHECK(hashmap__size(map) != 6,
+	if (CHECK(hashmap__size(map) != 6, "hashmap_size",
 		  "invalid map size: %zu\n", hashmap__size(map)))
-		return 1;
+		goto cleanup;
 
 	/* verify global iteration still works and sees all values */
 	found_msk = 0;
 	hashmap__for_each_entry(map, entry, bkt) {
 		found_msk |= (long)entry->value;
 	}
-	if (CHECK(found_msk != (1 << 6) - 1,
+	if (CHECK(found_msk != (1 << 6) - 1, "found_msk",
 		  "not all keys iterated: %lx\n", found_msk))
-		return 1;
+		goto cleanup;
 
 	/* iterate values for key 1 */
 	found_msk = 0;
 	hashmap__for_each_key_entry(map, entry, k1) {
 		found_msk |= (long)entry->value;
 	}
-	if (CHECK(found_msk != (1 | 2 | 4),
+	if (CHECK(found_msk != (1 | 2 | 4), "found_msk",
 		  "invalid k1 values: %lx\n", found_msk))
-		return 1;
+		goto cleanup;
 
 	/* iterate values for key 2 */
 	found_msk = 0;
 	hashmap__for_each_key_entry(map, entry, k2) {
 		found_msk |= (long)entry->value;
 	}
-	if (CHECK(found_msk != (8 | 16 | 32),
+	if (CHECK(found_msk != (8 | 16 | 32), "found_msk",
 		  "invalid k2 values: %lx\n", found_msk))
-		return 1;
+		goto cleanup;
 
-	fprintf(stderr, "OK\n");
-	return 0;
+cleanup:
+	hashmap__free(map);
 }
 
-int test_hashmap_empty()
+static void test_hashmap_empty()
 {
 	struct hashmap_entry *entry;
 	int bkt;
 	struct hashmap *map;
 	void *k = (void *)0;
 
-	fprintf(stderr, "%s: ", __func__);
-
 	/* force collisions */
 	map = hashmap__new(hash_fn, equal_fn, NULL);
-	if (CHECK(IS_ERR(map), "failed to create map: %ld\n", PTR_ERR(map)))
-		return 1;
+	if (CHECK(IS_ERR(map), "hashmap__new",
+		  "failed to create map: %ld\n", PTR_ERR(map)))
+		goto cleanup;
 
-	if (CHECK(hashmap__size(map) != 0,
+	if (CHECK(hashmap__size(map) != 0, "hashmap__size",
 		  "invalid map size: %zu\n", hashmap__size(map)))
-		return 1;
-	if (CHECK(hashmap__capacity(map) != 0,
+		goto cleanup;
+	if (CHECK(hashmap__capacity(map) != 0, "hashmap__capacity",
 		  "invalid map capacity: %zu\n", hashmap__capacity(map)))
-		return 1;
-	if (CHECK(hashmap__find(map, k, NULL), "unexpected find\n"))
-		return 1;
-	if (CHECK(hashmap__delete(map, k, NULL, NULL), "unexpected delete\n"))
-		return 1;
+		goto cleanup;
+	if (CHECK(hashmap__find(map, k, NULL), "elem_find",
+		  "unexpected find\n"))
+		goto cleanup;
+	if (CHECK(hashmap__delete(map, k, NULL, NULL), "elem_del",
+		  "unexpected delete\n"))
+		goto cleanup;
 
 	hashmap__for_each_entry(map, entry, bkt) {
-		CHECK(false, "unexpected iterated entry\n");
-		return 1;
+		CHECK(false, "elem_found", "unexpected iterated entry\n");
+		goto cleanup;
 	}
 	hashmap__for_each_key_entry(map, entry, k) {
-		CHECK(false, "unexpected key entry\n");
-		return 1;
+		CHECK(false, "key_found", "unexpected key entry\n");
+		goto cleanup;
 	}
 
-	fprintf(stderr, "OK\n");
-	return 0;
+cleanup:
+	hashmap__free(map);
 }
 
-int main(int argc, char **argv)
+void test_hashmap()
 {
-	bool failed = false;
-
-	if (test_hashmap_generic())
-		failed = true;
-	if (test_hashmap_multimap())
-		failed = true;
-	if (test_hashmap_empty())
-		failed = true;
-
-	return failed;
+	if (test__start_subtest("generic"))
+		test_hashmap_generic();
+	if (test__start_subtest("multimap"))
+		test_hashmap_multimap();
+	if (test__start_subtest("empty"))
+		test_hashmap_empty();
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/kfree_skb.c b/tools/testing/selftests/bpf/prog_tests/kfree_skb.c
index 7507c8f689bc..42c3a3103c26 100644
--- a/tools/testing/selftests/bpf/prog_tests/kfree_skb.c
+++ b/tools/testing/selftests/bpf/prog_tests/kfree_skb.c
@@ -1,5 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <test_progs.h>
+#include <network_helpers.h>
 
 struct meta {
 	int ifindex;
diff --git a/tools/testing/selftests/bpf/prog_tests/l4lb_all.c b/tools/testing/selftests/bpf/prog_tests/l4lb_all.c
index eaf64595be88..c2d373e294bb 100644
--- a/tools/testing/selftests/bpf/prog_tests/l4lb_all.c
+++ b/tools/testing/selftests/bpf/prog_tests/l4lb_all.c
@@ -1,5 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <test_progs.h>
+#include <network_helpers.h>
 
 static void test_l4lb(const char *file)
 {
diff --git a/tools/testing/selftests/bpf/prog_tests/map_lock.c b/tools/testing/selftests/bpf/prog_tests/map_lock.c
index 8f91f1881d11..ce17b1ed8709 100644
--- a/tools/testing/selftests/bpf/prog_tests/map_lock.c
+++ b/tools/testing/selftests/bpf/prog_tests/map_lock.c
@@ -1,5 +1,19 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <test_progs.h>
+#include <network_helpers.h>
+
+static void *spin_lock_thread(void *arg)
+{
+	__u32 duration, retval;
+	int err, prog_fd = *(u32 *) arg;
+
+	err = bpf_prog_test_run(prog_fd, 10000, &pkt_v4, sizeof(pkt_v4),
+				NULL, NULL, &retval, &duration);
+	CHECK(err || retval, "",
+	      "err %d errno %d retval %d duration %d\n",
+	      err, errno, retval, duration);
+	pthread_exit(arg);
+}
 
 static void *parallel_map_access(void *arg)
 {
diff --git a/tools/testing/selftests/bpf/prog_tests/mmap.c b/tools/testing/selftests/bpf/prog_tests/mmap.c
index 6b9dce431d41..43d0b5578f46 100644
--- a/tools/testing/selftests/bpf/prog_tests/mmap.c
+++ b/tools/testing/selftests/bpf/prog_tests/mmap.c
@@ -19,7 +19,7 @@ void test_mmap(void)
 	const size_t map_sz = roundup_page(sizeof(struct map_data));
 	const int zero = 0, one = 1, two = 2, far = 1500;
 	const long page_size = sysconf(_SC_PAGE_SIZE);
-	int err, duration = 0, i, data_map_fd, data_map_id, tmp_fd;
+	int err, duration = 0, i, data_map_fd, data_map_id, tmp_fd, rdmap_fd;
 	struct bpf_map *data_map, *bss_map;
 	void *bss_mmaped = NULL, *map_mmaped = NULL, *tmp1, *tmp2;
 	struct test_mmap__bss *bss_data;
@@ -37,6 +37,17 @@ void test_mmap(void)
 	data_map = skel->maps.data_map;
 	data_map_fd = bpf_map__fd(data_map);
 
+	rdmap_fd = bpf_map__fd(skel->maps.rdonly_map);
+	tmp1 = mmap(NULL, 4096, PROT_READ | PROT_WRITE, MAP_SHARED, rdmap_fd, 0);
+	if (CHECK(tmp1 != MAP_FAILED, "rdonly_write_mmap", "unexpected success\n")) {
+		munmap(tmp1, 4096);
+		goto cleanup;
+	}
+	/* now double-check if it's mmap()'able at all */
+	tmp1 = mmap(NULL, 4096, PROT_READ, MAP_SHARED, rdmap_fd, 0);
+	if (CHECK(tmp1 == MAP_FAILED, "rdonly_read_mmap", "failed: %d\n", errno))
+		goto cleanup;
+
 	/* get map's ID */
 	memset(&map_info, 0, map_info_sz);
 	err = bpf_obj_get_info_by_fd(data_map_fd, &map_info, &map_info_sz);
diff --git a/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c b/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c
index 542240e16564..e74dc501b27f 100644
--- a/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c
+++ b/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c
@@ -80,9 +80,6 @@ void test_ns_current_pid_tgid(void)
 		  "User pid/tgid %llu BPF pid/tgid %llu\n", id, bss.pid_tgid))
 		goto cleanup;
 cleanup:
-	if (!link) {
-		bpf_link__destroy(link);
-		link = NULL;
-	}
+	bpf_link__destroy(link);
 	bpf_object__close(obj);
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/perf_buffer.c b/tools/testing/selftests/bpf/prog_tests/perf_buffer.c
index 1450ea2dd4cc..a122ce3b360e 100644
--- a/tools/testing/selftests/bpf/prog_tests/perf_buffer.c
+++ b/tools/testing/selftests/bpf/prog_tests/perf_buffer.c
@@ -6,6 +6,11 @@
 #include <test_progs.h>
 #include "bpf/libbpf_internal.h"
 
+/* AddressSanitizer sometimes crashes due to data dereference below, due to
+ * this being mmap()'ed memory. Disable instrumentation with
+ * no_sanitize_address attribute
+ */
+__attribute__((no_sanitize_address))
 static void on_sample(void *ctx, int cpu, void *data, __u32 size)
 {
 	int cpu_data = *(int *)data, duration = 0;
diff --git a/tools/testing/selftests/bpf/prog_tests/pkt_access.c b/tools/testing/selftests/bpf/prog_tests/pkt_access.c
index a2537dfa899c..44b514fabccd 100644
--- a/tools/testing/selftests/bpf/prog_tests/pkt_access.c
+++ b/tools/testing/selftests/bpf/prog_tests/pkt_access.c
@@ -1,5 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <test_progs.h>
+#include <network_helpers.h>
 
 void test_pkt_access(void)
 {
diff --git a/tools/testing/selftests/bpf/prog_tests/pkt_md_access.c b/tools/testing/selftests/bpf/prog_tests/pkt_md_access.c
index 5f7aea605019..939015cd6dba 100644
--- a/tools/testing/selftests/bpf/prog_tests/pkt_md_access.c
+++ b/tools/testing/selftests/bpf/prog_tests/pkt_md_access.c
@@ -1,5 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <test_progs.h>
+#include <network_helpers.h>
 
 void test_pkt_md_access(void)
 {
diff --git a/tools/testing/selftests/bpf/prog_tests/prog_run_xattr.c b/tools/testing/selftests/bpf/prog_tests/prog_run_xattr.c
index 5dd89b941f53..dde2b7ae7bc9 100644
--- a/tools/testing/selftests/bpf/prog_tests/prog_run_xattr.c
+++ b/tools/testing/selftests/bpf/prog_tests/prog_run_xattr.c
@@ -1,5 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <test_progs.h>
+#include <network_helpers.h>
 
 void test_prog_run_xattr(void)
 {
diff --git a/tools/testing/selftests/bpf/prog_tests/queue_stack_map.c b/tools/testing/selftests/bpf/prog_tests/queue_stack_map.c
index faccc66f4e39..f47e7b1cb32c 100644
--- a/tools/testing/selftests/bpf/prog_tests/queue_stack_map.c
+++ b/tools/testing/selftests/bpf/prog_tests/queue_stack_map.c
@@ -1,5 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <test_progs.h>
+#include <network_helpers.h>
 
 enum {
 	QUEUE,
diff --git a/tools/testing/selftests/bpf/prog_tests/ringbuf.c b/tools/testing/selftests/bpf/prog_tests/ringbuf.c
new file mode 100644
index 000000000000..2bba908dfa63
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/ringbuf.c
@@ -0,0 +1,211 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <linux/compiler.h>
+#include <asm/barrier.h>
+#include <test_progs.h>
+#include <sys/mman.h>
+#include <sys/epoll.h>
+#include <time.h>
+#include <sched.h>
+#include <signal.h>
+#include <pthread.h>
+#include <sys/sysinfo.h>
+#include <linux/perf_event.h>
+#include <linux/ring_buffer.h>
+#include "test_ringbuf.skel.h"
+
+#define EDONE 7777
+
+static int duration = 0;
+
+struct sample {
+	int pid;
+	int seq;
+	long value;
+	char comm[16];
+};
+
+static volatile int sample_cnt;
+
+static int process_sample(void *ctx, void *data, size_t len)
+{
+	struct sample *s = data;
+
+	sample_cnt++;
+
+	switch (s->seq) {
+	case 0:
+		CHECK(s->value != 333, "sample1_value", "exp %ld, got %ld\n",
+		      333L, s->value);
+		return 0;
+	case 1:
+		CHECK(s->value != 777, "sample2_value", "exp %ld, got %ld\n",
+		      777L, s->value);
+		return -EDONE;
+	default:
+		/* we don't care about the rest */
+		return 0;
+	}
+}
+
+static struct test_ringbuf *skel;
+static struct ring_buffer *ringbuf;
+
+static void trigger_samples()
+{
+	skel->bss->dropped = 0;
+	skel->bss->total = 0;
+	skel->bss->discarded = 0;
+
+	/* trigger exactly two samples */
+	skel->bss->value = 333;
+	syscall(__NR_getpgid);
+	skel->bss->value = 777;
+	syscall(__NR_getpgid);
+}
+
+static void *poll_thread(void *input)
+{
+	long timeout = (long)input;
+
+	return (void *)(long)ring_buffer__poll(ringbuf, timeout);
+}
+
+void test_ringbuf(void)
+{
+	const size_t rec_sz = BPF_RINGBUF_HDR_SZ + sizeof(struct sample);
+	pthread_t thread;
+	long bg_ret = -1;
+	int err;
+
+	skel = test_ringbuf__open_and_load();
+	if (CHECK(!skel, "skel_open_load", "skeleton open&load failed\n"))
+		return;
+
+	/* only trigger BPF program for current process */
+	skel->bss->pid = getpid();
+
+	ringbuf = ring_buffer__new(bpf_map__fd(skel->maps.ringbuf),
+				   process_sample, NULL, NULL);
+	if (CHECK(!ringbuf, "ringbuf_create", "failed to create ringbuf\n"))
+		goto cleanup;
+
+	err = test_ringbuf__attach(skel);
+	if (CHECK(err, "skel_attach", "skeleton attachment failed: %d\n", err))
+		goto cleanup;
+
+	trigger_samples();
+
+	/* 2 submitted + 1 discarded records */
+	CHECK(skel->bss->avail_data != 3 * rec_sz,
+	      "err_avail_size", "exp %ld, got %ld\n",
+	      3L * rec_sz, skel->bss->avail_data);
+	CHECK(skel->bss->ring_size != 4096,
+	      "err_ring_size", "exp %ld, got %ld\n",
+	      4096L, skel->bss->ring_size);
+	CHECK(skel->bss->cons_pos != 0,
+	      "err_cons_pos", "exp %ld, got %ld\n",
+	      0L, skel->bss->cons_pos);
+	CHECK(skel->bss->prod_pos != 3 * rec_sz,
+	      "err_prod_pos", "exp %ld, got %ld\n",
+	      3L * rec_sz, skel->bss->prod_pos);
+
+	/* poll for samples */
+	err = ring_buffer__poll(ringbuf, -1);
+
+	/* -EDONE is used as an indicator that we are done */
+	if (CHECK(err != -EDONE, "err_done", "done err: %d\n", err))
+		goto cleanup;
+
+	/* we expect extra polling to return nothing */
+	err = ring_buffer__poll(ringbuf, 0);
+	if (CHECK(err != 0, "extra_samples", "poll result: %d\n", err))
+		goto cleanup;
+
+	CHECK(skel->bss->dropped != 0, "err_dropped", "exp %ld, got %ld\n",
+	      0L, skel->bss->dropped);
+	CHECK(skel->bss->total != 2, "err_total", "exp %ld, got %ld\n",
+	      2L, skel->bss->total);
+	CHECK(skel->bss->discarded != 1, "err_discarded", "exp %ld, got %ld\n",
+	      1L, skel->bss->discarded);
+
+	/* now validate consumer position is updated and returned */
+	trigger_samples();
+	CHECK(skel->bss->cons_pos != 3 * rec_sz,
+	      "err_cons_pos", "exp %ld, got %ld\n",
+	      3L * rec_sz, skel->bss->cons_pos);
+	err = ring_buffer__poll(ringbuf, -1);
+	CHECK(err <= 0, "poll_err", "err %d\n", err);
+
+	/* start poll in background w/ long timeout */
+	err = pthread_create(&thread, NULL, poll_thread, (void *)(long)10000);
+	if (CHECK(err, "bg_poll", "pthread_create failed: %d\n", err))
+		goto cleanup;
+
+	/* turn off notifications now */
+	skel->bss->flags = BPF_RB_NO_WAKEUP;
+
+	/* give background thread a bit of a time */
+	usleep(50000);
+	trigger_samples();
+	/* sleeping arbitrarily is bad, but no better way to know that
+	 * epoll_wait() **DID NOT** unblock in background thread
+	 */
+	usleep(50000);
+	/* background poll should still be blocked */
+	err = pthread_tryjoin_np(thread, (void **)&bg_ret);
+	if (CHECK(err != EBUSY, "try_join", "err %d\n", err))
+		goto cleanup;
+
+	/* BPF side did everything right */
+	CHECK(skel->bss->dropped != 0, "err_dropped", "exp %ld, got %ld\n",
+	      0L, skel->bss->dropped);
+	CHECK(skel->bss->total != 2, "err_total", "exp %ld, got %ld\n",
+	      2L, skel->bss->total);
+	CHECK(skel->bss->discarded != 1, "err_discarded", "exp %ld, got %ld\n",
+	      1L, skel->bss->discarded);
+
+	/* clear flags to return to "adaptive" notification mode */
+	skel->bss->flags = 0;
+
+	/* produce new samples, no notification should be triggered, because
+	 * consumer is now behind
+	 */
+	trigger_samples();
+
+	/* background poll should still be blocked */
+	err = pthread_tryjoin_np(thread, (void **)&bg_ret);
+	if (CHECK(err != EBUSY, "try_join", "err %d\n", err))
+		goto cleanup;
+
+	/* now force notifications */
+	skel->bss->flags = BPF_RB_FORCE_WAKEUP;
+	sample_cnt = 0;
+	trigger_samples();
+
+	/* now we should get a pending notification */
+	usleep(50000);
+	err = pthread_tryjoin_np(thread, (void **)&bg_ret);
+	if (CHECK(err, "join_bg", "err %d\n", err))
+		goto cleanup;
+
+	if (CHECK(bg_ret != 1, "bg_ret", "epoll_wait result: %ld", bg_ret))
+		goto cleanup;
+
+	/* 3 rounds, 2 samples each */
+	CHECK(sample_cnt != 6, "wrong_sample_cnt",
+	      "expected to see %d samples, got %d\n", 6, sample_cnt);
+
+	/* BPF side did everything right */
+	CHECK(skel->bss->dropped != 0, "err_dropped", "exp %ld, got %ld\n",
+	      0L, skel->bss->dropped);
+	CHECK(skel->bss->total != 2, "err_total", "exp %ld, got %ld\n",
+	      2L, skel->bss->total);
+	CHECK(skel->bss->discarded != 1, "err_discarded", "exp %ld, got %ld\n",
+	      1L, skel->bss->discarded);
+
+	test_ringbuf__detach(skel);
+cleanup:
+	ring_buffer__free(ringbuf);
+	test_ringbuf__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c b/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c
new file mode 100644
index 000000000000..78e450609803
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c
@@ -0,0 +1,102 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <test_progs.h>
+#include <sys/epoll.h>
+#include "test_ringbuf_multi.skel.h"
+
+static int duration = 0;
+
+struct sample {
+	int pid;
+	int seq;
+	long value;
+	char comm[16];
+};
+
+static int process_sample(void *ctx, void *data, size_t len)
+{
+	int ring = (unsigned long)ctx;
+	struct sample *s = data;
+
+	switch (s->seq) {
+	case 0:
+		CHECK(ring != 1, "sample1_ring", "exp %d, got %d\n", 1, ring);
+		CHECK(s->value != 333, "sample1_value", "exp %ld, got %ld\n",
+		      333L, s->value);
+		break;
+	case 1:
+		CHECK(ring != 2, "sample2_ring", "exp %d, got %d\n", 2, ring);
+		CHECK(s->value != 777, "sample2_value", "exp %ld, got %ld\n",
+		      777L, s->value);
+		break;
+	default:
+		CHECK(true, "extra_sample", "unexpected sample seq %d, val %ld\n",
+		      s->seq, s->value);
+		return -1;
+	}
+
+	return 0;
+}
+
+void test_ringbuf_multi(void)
+{
+	struct test_ringbuf_multi *skel;
+	struct ring_buffer *ringbuf;
+	int err;
+
+	skel = test_ringbuf_multi__open_and_load();
+	if (CHECK(!skel, "skel_open_load", "skeleton open&load failed\n"))
+		return;
+
+	/* only trigger BPF program for current process */
+	skel->bss->pid = getpid();
+
+	ringbuf = ring_buffer__new(bpf_map__fd(skel->maps.ringbuf1),
+				   process_sample, (void *)(long)1, NULL);
+	if (CHECK(!ringbuf, "ringbuf_create", "failed to create ringbuf\n"))
+		goto cleanup;
+
+	err = ring_buffer__add(ringbuf, bpf_map__fd(skel->maps.ringbuf2),
+			      process_sample, (void *)(long)2);
+	if (CHECK(err, "ringbuf_add", "failed to add another ring\n"))
+		goto cleanup;
+
+	err = test_ringbuf_multi__attach(skel);
+	if (CHECK(err, "skel_attach", "skeleton attachment failed: %d\n", err))
+		goto cleanup;
+
+	/* trigger few samples, some will be skipped */
+	skel->bss->target_ring = 0;
+	skel->bss->value = 333;
+	syscall(__NR_getpgid);
+
+	/* skipped, no ringbuf in slot 1 */
+	skel->bss->target_ring = 1;
+	skel->bss->value = 555;
+	syscall(__NR_getpgid);
+
+	skel->bss->target_ring = 2;
+	skel->bss->value = 777;
+	syscall(__NR_getpgid);
+
+	/* poll for samples, should get 2 ringbufs back */
+	err = ring_buffer__poll(ringbuf, -1);
+	if (CHECK(err != 4, "poll_res", "expected 4 records, got %d\n", err))
+		goto cleanup;
+
+	/* expect extra polling to return nothing */
+	err = ring_buffer__poll(ringbuf, 0);
+	if (CHECK(err < 0, "extra_samples", "poll result: %d\n", err))
+		goto cleanup;
+
+	CHECK(skel->bss->dropped != 0, "err_dropped", "exp %ld, got %ld\n",
+	      0L, skel->bss->dropped);
+	CHECK(skel->bss->skipped != 1, "err_skipped", "exp %ld, got %ld\n",
+	      1L, skel->bss->skipped);
+	CHECK(skel->bss->total != 2, "err_total", "exp %ld, got %ld\n",
+	      2L, skel->bss->total);
+
+cleanup:
+	ring_buffer__free(ringbuf);
+	test_ringbuf_multi__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/signal_pending.c b/tools/testing/selftests/bpf/prog_tests/signal_pending.c
index 996e808f43a2..dfcbddcbe4d3 100644
--- a/tools/testing/selftests/bpf/prog_tests/signal_pending.c
+++ b/tools/testing/selftests/bpf/prog_tests/signal_pending.c
@@ -1,5 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <test_progs.h>
+#include <network_helpers.h>
 
 static void sigalrm_handler(int s) {}
 static struct sigaction sigalrm_action = {
diff --git a/tools/testing/selftests/bpf/prog_tests/sk_assign.c b/tools/testing/selftests/bpf/prog_tests/sk_assign.c
index d572e1a2c297..47fa04adc147 100644
--- a/tools/testing/selftests/bpf/prog_tests/sk_assign.c
+++ b/tools/testing/selftests/bpf/prog_tests/sk_assign.c
@@ -20,6 +20,7 @@
 #define CONNECT_PORT 4321
 #define TEST_DADDR (0xC0A80203)
 #define NS_SELF "/proc/self/ns/net"
+#define SERVER_MAP_PATH "/sys/fs/bpf/tc/globals/server_map"
 
 static const struct timeval timeo_sec = { .tv_sec = 3 };
 static const size_t timeo_optlen = sizeof(timeo_sec);
@@ -265,6 +266,7 @@ void test_sk_assign(void)
 		TEST("ipv6 udp addr redir", AF_INET6, SOCK_DGRAM, true),
 	};
 	int server = -1;
+	int server_map;
 	int self_net;
 
 	self_net = open(NS_SELF, O_RDONLY);
@@ -278,9 +280,17 @@ void test_sk_assign(void)
 		goto cleanup;
 	}
 
+	server_map = bpf_obj_get(SERVER_MAP_PATH);
+	if (CHECK_FAIL(server_map < 0)) {
+		perror("Unable to open " SERVER_MAP_PATH);
+		goto cleanup;
+	}
+
 	for (int i = 0; i < ARRAY_SIZE(tests) && !READ_ONCE(stop); i++) {
 		struct test_sk_cfg *test = &tests[i];
 		const struct sockaddr *addr;
+		const int zero = 0;
+		int err;
 
 		if (!test__start_subtest(test->name))
 			continue;
@@ -288,7 +298,13 @@ void test_sk_assign(void)
 		addr = (const struct sockaddr *)test->addr;
 		server = start_server(addr, test->len, test->type);
 		if (server == -1)
-			goto cleanup;
+			goto close;
+
+		err = bpf_map_update_elem(server_map, &zero, &server, BPF_ANY);
+		if (CHECK_FAIL(err)) {
+			perror("Unable to update server_map");
+			goto close;
+		}
 
 		/* connect to unbound ports */
 		prepare_addr(test->addr, test->family, CONNECT_PORT,
@@ -302,7 +318,10 @@ void test_sk_assign(void)
 
 close:
 	close(server);
+	close(server_map);
 cleanup:
+	if (CHECK_FAIL(unlink(SERVER_MAP_PATH)))
+		perror("Unable to unlink " SERVER_MAP_PATH);
 	if (CHECK_FAIL(setns(self_net, CLONE_NEWNET)))
 		perror("Failed to setns("NS_SELF")");
 	close(self_net);
diff --git a/tools/testing/selftests/bpf/prog_tests/skb_ctx.c b/tools/testing/selftests/bpf/prog_tests/skb_ctx.c
index 4538bd08203f..7021b92af313 100644
--- a/tools/testing/selftests/bpf/prog_tests/skb_ctx.c
+++ b/tools/testing/selftests/bpf/prog_tests/skb_ctx.c
@@ -1,5 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <test_progs.h>
+#include <network_helpers.h>
 
 void test_skb_ctx(void)
 {
diff --git a/tools/testing/selftests/bpf/prog_tests/skb_helpers.c b/tools/testing/selftests/bpf/prog_tests/skb_helpers.c
new file mode 100644
index 000000000000..f302ad84a298
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/skb_helpers.c
@@ -0,0 +1,30 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include <network_helpers.h>
+
+void test_skb_helpers(void)
+{
+	struct __sk_buff skb = {
+		.wire_len = 100,
+		.gso_segs = 8,
+		.gso_size = 10,
+	};
+	struct bpf_prog_test_run_attr tattr = {
+		.data_in = &pkt_v4,
+		.data_size_in = sizeof(pkt_v4),
+		.ctx_in = &skb,
+		.ctx_size_in = sizeof(skb),
+		.ctx_out = &skb,
+		.ctx_size_out = sizeof(skb),
+	};
+	struct bpf_object *obj;
+	int err;
+
+	err = bpf_prog_load("./test_skb_helpers.o", BPF_PROG_TYPE_SCHED_CLS, &obj,
+			    &tattr.prog_fd);
+	if (CHECK_ATTR(err, "load", "err %d errno %d\n", err, errno))
+		return;
+	err = bpf_prog_test_run_xattr(&tattr);
+	CHECK_ATTR(err, "len", "err %d errno %d\n", err, errno);
+	bpf_object__close(obj);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
index aa43e0bd210c..96e7b7f84c65 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
@@ -1,7 +1,9 @@
 // SPDX-License-Identifier: GPL-2.0
 // Copyright (c) 2020 Cloudflare
+#include <error.h>
 
 #include "test_progs.h"
+#include "test_skmsg_load_helpers.skel.h"
 
 #define TCP_REPAIR		19	/* TCP sock is under repair right now */
 
@@ -70,10 +72,43 @@ out:
 	close(s);
 }
 
+static void test_skmsg_helpers(enum bpf_map_type map_type)
+{
+	struct test_skmsg_load_helpers *skel;
+	int err, map, verdict;
+
+	skel = test_skmsg_load_helpers__open_and_load();
+	if (CHECK_FAIL(!skel)) {
+		perror("test_skmsg_load_helpers__open_and_load");
+		return;
+	}
+
+	verdict = bpf_program__fd(skel->progs.prog_msg_verdict);
+	map = bpf_map__fd(skel->maps.sock_map);
+
+	err = bpf_prog_attach(verdict, map, BPF_SK_MSG_VERDICT, 0);
+	if (CHECK_FAIL(err)) {
+		perror("bpf_prog_attach");
+		goto out;
+	}
+
+	err = bpf_prog_detach2(verdict, map, BPF_SK_MSG_VERDICT);
+	if (CHECK_FAIL(err)) {
+		perror("bpf_prog_detach2");
+		goto out;
+	}
+out:
+	test_skmsg_load_helpers__destroy(skel);
+}
+
 void test_sockmap_basic(void)
 {
 	if (test__start_subtest("sockmap create_update_free"))
 		test_sockmap_create_update_free(BPF_MAP_TYPE_SOCKMAP);
 	if (test__start_subtest("sockhash create_update_free"))
 		test_sockmap_create_update_free(BPF_MAP_TYPE_SOCKHASH);
+	if (test__start_subtest("sockmap sk_msg load helpers"))
+		test_skmsg_helpers(BPF_MAP_TYPE_SOCKMAP);
+	if (test__start_subtest("sockhash sk_msg load helpers"))
+		test_skmsg_helpers(BPF_MAP_TYPE_SOCKHASH);
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/spinlock.c b/tools/testing/selftests/bpf/prog_tests/spinlock.c
index 1ae00cd3174e..7577a77a4c4c 100644
--- a/tools/testing/selftests/bpf/prog_tests/spinlock.c
+++ b/tools/testing/selftests/bpf/prog_tests/spinlock.c
@@ -1,5 +1,19 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <test_progs.h>
+#include <network_helpers.h>
+
+static void *spin_lock_thread(void *arg)
+{
+	__u32 duration, retval;
+	int err, prog_fd = *(u32 *) arg;
+
+	err = bpf_prog_test_run(prog_fd, 10000, &pkt_v4, sizeof(pkt_v4),
+				NULL, NULL, &retval, &duration);
+	CHECK(err || retval, "",
+	      "err %d errno %d retval %d duration %d\n",
+	      err, errno, retval, duration);
+	pthread_exit(arg);
+}
 
 void test_spinlock(void)
 {
diff --git a/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c b/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c
index e56b52ab41da..9013a0c01eed 100644
--- a/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c
+++ b/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c
@@ -1,6 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <test_progs.h>
 #include "cgroup_helpers.h"
+#include "network_helpers.h"
 
 struct tcp_rtt_storage {
 	__u32 invoked;
@@ -87,34 +88,6 @@ static int verify_sk(int map_fd, int client_fd, const char *msg, __u32 invoked,
 	return err;
 }
 
-static int connect_to_server(int server_fd)
-{
-	struct sockaddr_storage addr;
-	socklen_t len = sizeof(addr);
-	int fd;
-
-	fd = socket(AF_INET, SOCK_STREAM, 0);
-	if (fd < 0) {
-		log_err("Failed to create client socket");
-		return -1;
-	}
-
-	if (getsockname(server_fd, (struct sockaddr *)&addr, &len)) {
-		log_err("Failed to get server addr");
-		goto out;
-	}
-
-	if (connect(fd, (const struct sockaddr *)&addr, len) < 0) {
-		log_err("Fail to connect to server");
-		goto out;
-	}
-
-	return fd;
-
-out:
-	close(fd);
-	return -1;
-}
 
 static int run_test(int cgroup_fd, int server_fd)
 {
@@ -145,7 +118,7 @@ static int run_test(int cgroup_fd, int server_fd)
 		goto close_bpf_object;
 	}
 
-	client_fd = connect_to_server(server_fd);
+	client_fd = connect_to_fd(AF_INET, SOCK_STREAM, server_fd);
 	if (client_fd < 0) {
 		err = -1;
 		goto close_bpf_object;
@@ -180,103 +153,22 @@ close_bpf_object:
 	return err;
 }
 
-static int start_server(void)
-{
-	struct sockaddr_in addr = {
-		.sin_family = AF_INET,
-		.sin_addr.s_addr = htonl(INADDR_LOOPBACK),
-	};
-	int fd;
-
-	fd = socket(AF_INET, SOCK_STREAM | SOCK_NONBLOCK, 0);
-	if (fd < 0) {
-		log_err("Failed to create server socket");
-		return -1;
-	}
-
-	if (bind(fd, (const struct sockaddr *)&addr, sizeof(addr)) < 0) {
-		log_err("Failed to bind socket");
-		close(fd);
-		return -1;
-	}
-
-	return fd;
-}
-
-static pthread_mutex_t server_started_mtx = PTHREAD_MUTEX_INITIALIZER;
-static pthread_cond_t server_started = PTHREAD_COND_INITIALIZER;
-static volatile bool server_done = false;
-
-static void *server_thread(void *arg)
-{
-	struct sockaddr_storage addr;
-	socklen_t len = sizeof(addr);
-	int fd = *(int *)arg;
-	int client_fd;
-	int err;
-
-	err = listen(fd, 1);
-
-	pthread_mutex_lock(&server_started_mtx);
-	pthread_cond_signal(&server_started);
-	pthread_mutex_unlock(&server_started_mtx);
-
-	if (CHECK_FAIL(err < 0)) {
-		perror("Failed to listed on socket");
-		return ERR_PTR(err);
-	}
-
-	while (true) {
-		client_fd = accept(fd, (struct sockaddr *)&addr, &len);
-		if (client_fd == -1 && errno == EAGAIN) {
-			usleep(50);
-			continue;
-		}
-		break;
-	}
-	if (CHECK_FAIL(client_fd < 0)) {
-		perror("Failed to accept client");
-		return ERR_PTR(err);
-	}
-
-	while (!server_done)
-		usleep(50);
-
-	close(client_fd);
-
-	return NULL;
-}
-
 void test_tcp_rtt(void)
 {
 	int server_fd, cgroup_fd;
-	pthread_t tid;
-	void *server_res;
 
 	cgroup_fd = test__join_cgroup("/tcp_rtt");
 	if (CHECK_FAIL(cgroup_fd < 0))
 		return;
 
-	server_fd = start_server();
+	server_fd = start_server(AF_INET, SOCK_STREAM);
 	if (CHECK_FAIL(server_fd < 0))
 		goto close_cgroup_fd;
 
-	if (CHECK_FAIL(pthread_create(&tid, NULL, server_thread,
-				      (void *)&server_fd)))
-		goto close_server_fd;
-
-	pthread_mutex_lock(&server_started_mtx);
-	pthread_cond_wait(&server_started, &server_started_mtx);
-	pthread_mutex_unlock(&server_started_mtx);
-
 	CHECK_FAIL(run_test(cgroup_fd, server_fd));
 
-	server_done = true;
-	CHECK_FAIL(pthread_join(tid, &server_res));
-	CHECK_FAIL(IS_ERR(server_res));
-
-close_server_fd:
 	close(server_fd);
+
 close_cgroup_fd:
 	close(cgroup_fd);
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/test_overhead.c b/tools/testing/selftests/bpf/prog_tests/test_overhead.c
index 465b371a561d..2702df2b2343 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_overhead.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_overhead.c
@@ -61,9 +61,10 @@ void test_test_overhead(void)
 	const char *raw_tp_name = "raw_tp/task_rename";
 	const char *fentry_name = "fentry/__set_task_comm";
 	const char *fexit_name = "fexit/__set_task_comm";
+	const char *fmodret_name = "fmod_ret/__set_task_comm";
 	const char *kprobe_func = "__set_task_comm";
 	struct bpf_program *kprobe_prog, *kretprobe_prog, *raw_tp_prog;
-	struct bpf_program *fentry_prog, *fexit_prog;
+	struct bpf_program *fentry_prog, *fexit_prog, *fmodret_prog;
 	struct bpf_object *obj;
 	struct bpf_link *link;
 	int err, duration = 0;
@@ -96,6 +97,10 @@ void test_test_overhead(void)
 	if (CHECK(!fexit_prog, "find_probe",
 		  "prog '%s' not found\n", fexit_name))
 		goto cleanup;
+	fmodret_prog = bpf_object__find_program_by_title(obj, fmodret_name);
+	if (CHECK(!fmodret_prog, "find_probe",
+		  "prog '%s' not found\n", fmodret_name))
+		goto cleanup;
 
 	err = bpf_object__load(obj);
 	if (CHECK(err, "obj_load", "err %d\n", err))
@@ -142,6 +147,13 @@ void test_test_overhead(void)
 		goto cleanup;
 	test_run("fexit");
 	bpf_link__destroy(link);
+
+	/* attach fmod_ret */
+	link = bpf_program__attach_trace(fmodret_prog);
+	if (CHECK(IS_ERR(link), "attach fmod_ret", "err %ld\n", PTR_ERR(link)))
+		goto cleanup;
+	test_run("fmod_ret");
+	bpf_link__destroy(link);
 cleanup:
 	prctl(PR_SET_NAME, comm, 0L, 0L, 0L);
 	bpf_object__close(obj);
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp.c b/tools/testing/selftests/bpf/prog_tests/xdp.c
index dcb5ecac778e..48921ff74850 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp.c
@@ -1,5 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <test_progs.h>
+#include <network_helpers.h>
 
 void test_xdp(void)
 {
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c
index 3744196d7cba..d5c98f2cb12f 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c
@@ -1,13 +1,14 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <test_progs.h>
+#include <network_helpers.h>
 
-void test_xdp_adjust_tail(void)
+void test_xdp_adjust_tail_shrink(void)
 {
-	const char *file = "./test_adjust_tail.o";
+	const char *file = "./test_xdp_adjust_tail_shrink.o";
+	__u32 duration, retval, size, expect_sz;
 	struct bpf_object *obj;
-	char buf[128];
-	__u32 duration, retval, size;
 	int err, prog_fd;
+	char buf[128];
 
 	err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd);
 	if (CHECK_FAIL(err))
@@ -20,10 +21,121 @@ void test_xdp_adjust_tail(void)
 	      "ipv4", "err %d errno %d retval %d size %d\n",
 	      err, errno, retval, size);
 
+	expect_sz = sizeof(pkt_v6) - 20;  /* Test shrink with 20 bytes */
 	err = bpf_prog_test_run(prog_fd, 1, &pkt_v6, sizeof(pkt_v6),
 				buf, &size, &retval, &duration);
-	CHECK(err || retval != XDP_TX || size != 54,
-	      "ipv6", "err %d errno %d retval %d size %d\n",
+	CHECK(err || retval != XDP_TX || size != expect_sz,
+	      "ipv6", "err %d errno %d retval %d size %d expect-size %d\n",
+	      err, errno, retval, size, expect_sz);
+	bpf_object__close(obj);
+}
+
+void test_xdp_adjust_tail_grow(void)
+{
+	const char *file = "./test_xdp_adjust_tail_grow.o";
+	struct bpf_object *obj;
+	char buf[4096]; /* avoid segfault: large buf to hold grow results */
+	__u32 duration, retval, size, expect_sz;
+	int err, prog_fd;
+
+	err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd);
+	if (CHECK_FAIL(err))
+		return;
+
+	err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
+				buf, &size, &retval, &duration);
+	CHECK(err || retval != XDP_DROP,
+	      "ipv4", "err %d errno %d retval %d size %d\n",
 	      err, errno, retval, size);
+
+	expect_sz = sizeof(pkt_v6) + 40; /* Test grow with 40 bytes */
+	err = bpf_prog_test_run(prog_fd, 1, &pkt_v6, sizeof(pkt_v6) /* 74 */,
+				buf, &size, &retval, &duration);
+	CHECK(err || retval != XDP_TX || size != expect_sz,
+	      "ipv6", "err %d errno %d retval %d size %d expect-size %d\n",
+	      err, errno, retval, size, expect_sz);
+
+	bpf_object__close(obj);
+}
+
+void test_xdp_adjust_tail_grow2(void)
+{
+	const char *file = "./test_xdp_adjust_tail_grow.o";
+	char buf[4096]; /* avoid segfault: large buf to hold grow results */
+	int tailroom = 320; /* SKB_DATA_ALIGN(sizeof(struct skb_shared_info))*/;
+	struct bpf_object *obj;
+	int err, cnt, i;
+	int max_grow;
+
+	struct bpf_prog_test_run_attr tattr = {
+		.repeat		= 1,
+		.data_in	= &buf,
+		.data_out	= &buf,
+		.data_size_in	= 0, /* Per test */
+		.data_size_out	= 0, /* Per test */
+	};
+
+	err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj, &tattr.prog_fd);
+	if (CHECK_ATTR(err, "load", "err %d errno %d\n", err, errno))
+		return;
+
+	/* Test case-64 */
+	memset(buf, 1, sizeof(buf));
+	tattr.data_size_in  =  64; /* Determine test case via pkt size */
+	tattr.data_size_out = 128; /* Limit copy_size */
+	/* Kernel side alloc packet memory area that is zero init */
+	err = bpf_prog_test_run_xattr(&tattr);
+
+	CHECK_ATTR(errno != ENOSPC /* Due limit copy_size in bpf_test_finish */
+		   || tattr.retval != XDP_TX
+		   || tattr.data_size_out != 192, /* Expected grow size */
+		   "case-64",
+		   "err %d errno %d retval %d size %d\n",
+		   err, errno, tattr.retval, tattr.data_size_out);
+
+	/* Extra checks for data contents */
+	CHECK_ATTR(tattr.data_size_out != 192
+		   || buf[0]   != 1 ||  buf[63]  != 1  /*  0-63  memset to 1 */
+		   || buf[64]  != 0 ||  buf[127] != 0  /* 64-127 memset to 0 */
+		   || buf[128] != 1 ||  buf[191] != 1, /*128-191 memset to 1 */
+		   "case-64-data",
+		   "err %d errno %d retval %d size %d\n",
+		   err, errno, tattr.retval, tattr.data_size_out);
+
+	/* Test case-128 */
+	memset(buf, 2, sizeof(buf));
+	tattr.data_size_in  = 128; /* Determine test case via pkt size */
+	tattr.data_size_out = sizeof(buf);   /* Copy everything */
+	err = bpf_prog_test_run_xattr(&tattr);
+
+	max_grow = 4096 - XDP_PACKET_HEADROOM -	tailroom; /* 3520 */
+	CHECK_ATTR(err
+		   || tattr.retval != XDP_TX
+		   || tattr.data_size_out != max_grow,/* Expect max grow size */
+		   "case-128",
+		   "err %d errno %d retval %d size %d expect-size %d\n",
+		   err, errno, tattr.retval, tattr.data_size_out, max_grow);
+
+	/* Extra checks for data content: Count grow size, will contain zeros */
+	for (i = 0, cnt = 0; i < sizeof(buf); i++) {
+		if (buf[i] == 0)
+			cnt++;
+	}
+	CHECK_ATTR((cnt != (max_grow - tattr.data_size_in)) /* Grow increase */
+		   || tattr.data_size_out != max_grow, /* Total grow size */
+		   "case-128-data",
+		   "err %d errno %d retval %d size %d grow-size %d\n",
+		   err, errno, tattr.retval, tattr.data_size_out, cnt);
+
 	bpf_object__close(obj);
 }
+
+void test_xdp_adjust_tail(void)
+{
+	if (test__start_subtest("xdp_adjust_tail_shrink"))
+		test_xdp_adjust_tail_shrink();
+	if (test__start_subtest("xdp_adjust_tail_grow"))
+		test_xdp_adjust_tail_grow();
+	if (test__start_subtest("xdp_adjust_tail_grow2"))
+		test_xdp_adjust_tail_grow2();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c b/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c
index a0f688c37023..2c6c570b21f8 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c
@@ -1,5 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <test_progs.h>
+#include <network_helpers.h>
 #include <net/if.h>
 #include "test_xdp.skel.h"
 #include "test_xdp_bpf2bpf.skel.h"
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c b/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c
new file mode 100644
index 000000000000..d19dbd668f6a
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c
@@ -0,0 +1,97 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <uapi/linux/bpf.h>
+#include <linux/if_link.h>
+#include <test_progs.h>
+
+#include "test_xdp_devmap_helpers.skel.h"
+#include "test_xdp_with_devmap_helpers.skel.h"
+
+#define IFINDEX_LO 1
+
+struct bpf_devmap_val {
+	u32 ifindex;   /* device index */
+	union {
+		int fd;  /* prog fd on map write */
+		u32 id;  /* prog id on map read */
+	} bpf_prog;
+};
+
+void test_xdp_with_devmap_helpers(void)
+{
+	struct test_xdp_with_devmap_helpers *skel;
+	struct bpf_prog_info info = {};
+	struct bpf_devmap_val val = {
+		.ifindex = IFINDEX_LO,
+	};
+	__u32 len = sizeof(info);
+	__u32 duration = 0, idx = 0;
+	int err, dm_fd, map_fd;
+
+
+	skel = test_xdp_with_devmap_helpers__open_and_load();
+	if (CHECK_FAIL(!skel)) {
+		perror("test_xdp_with_devmap_helpers__open_and_load");
+		return;
+	}
+
+	/* can not attach program with DEVMAPs that allow programs
+	 * as xdp generic
+	 */
+	dm_fd = bpf_program__fd(skel->progs.xdp_redir_prog);
+	err = bpf_set_link_xdp_fd(IFINDEX_LO, dm_fd, XDP_FLAGS_SKB_MODE);
+	CHECK(err == 0, "Generic attach of program with 8-byte devmap",
+	      "should have failed\n");
+
+	dm_fd = bpf_program__fd(skel->progs.xdp_dummy_dm);
+	map_fd = bpf_map__fd(skel->maps.dm_ports);
+	err = bpf_obj_get_info_by_fd(dm_fd, &info, &len);
+	if (CHECK_FAIL(err))
+		goto out_close;
+
+	val.bpf_prog.fd = dm_fd;
+	err = bpf_map_update_elem(map_fd, &idx, &val, 0);
+	CHECK(err, "Add program to devmap entry",
+	      "err %d errno %d\n", err, errno);
+
+	err = bpf_map_lookup_elem(map_fd, &idx, &val);
+	CHECK(err, "Read devmap entry", "err %d errno %d\n", err, errno);
+	CHECK(info.id != val.bpf_prog.id, "Expected program id in devmap entry",
+	      "expected %u read %u\n", info.id, val.bpf_prog.id);
+
+	/* can not attach BPF_XDP_DEVMAP program to a device */
+	err = bpf_set_link_xdp_fd(IFINDEX_LO, dm_fd, XDP_FLAGS_SKB_MODE);
+	CHECK(err == 0, "Attach of BPF_XDP_DEVMAP program",
+	      "should have failed\n");
+
+	val.ifindex = 1;
+	val.bpf_prog.fd = bpf_program__fd(skel->progs.xdp_dummy_prog);
+	err = bpf_map_update_elem(map_fd, &idx, &val, 0);
+	CHECK(err == 0, "Add non-BPF_XDP_DEVMAP program to devmap entry",
+	      "should have failed\n");
+
+out_close:
+	test_xdp_with_devmap_helpers__destroy(skel);
+}
+
+void test_neg_xdp_devmap_helpers(void)
+{
+	struct test_xdp_devmap_helpers *skel;
+	__u32 duration = 0;
+
+	skel = test_xdp_devmap_helpers__open_and_load();
+	if (CHECK(skel,
+		  "Load of XDP program accessing egress ifindex without attach type",
+		  "should have failed\n")) {
+		test_xdp_devmap_helpers__destroy(skel);
+	}
+}
+
+
+void test_xdp_devmap_attach(void)
+{
+	if (test__start_subtest("DEVMAP with programs in entries"))
+		test_xdp_with_devmap_helpers();
+
+	if (test__start_subtest("Verifier check of DEVMAP programs"))
+		test_neg_xdp_devmap_helpers();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_noinline.c b/tools/testing/selftests/bpf/prog_tests/xdp_noinline.c
index c9404e6b226e..f284f72158ef 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_noinline.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_noinline.c
@@ -1,5 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <test_progs.h>
+#include <network_helpers.h>
 
 void test_xdp_noinline(void)
 {
diff --git a/tools/testing/selftests/bpf/progs/bpf_flow.c b/tools/testing/selftests/bpf/progs/bpf_flow.c
index 9941f0ba471e..de6de9221518 100644
--- a/tools/testing/selftests/bpf/progs/bpf_flow.c
+++ b/tools/testing/selftests/bpf/progs/bpf_flow.c
@@ -20,20 +20,20 @@
 #include <bpf/bpf_endian.h>
 
 int _version SEC("version") = 1;
-#define PROG(F) SEC(#F) int bpf_func_##F
+#define PROG(F) PROG_(F, _##F)
+#define PROG_(NUM, NAME) SEC("flow_dissector/"#NUM) int bpf_func##NAME
 
 /* These are the identifiers of the BPF programs that will be used in tail
  * calls. Name is limited to 16 characters, with the terminating character and
  * bpf_func_ above, we have only 6 to work with, anything after will be cropped.
  */
-enum {
-	IP,
-	IPV6,
-	IPV6OP,	/* Destination/Hop-by-Hop Options IPv6 Extension header */
-	IPV6FR,	/* Fragmentation IPv6 Extension Header */
-	MPLS,
-	VLAN,
-};
+#define IP		0
+#define IPV6		1
+#define IPV6OP		2 /* Destination/Hop-by-Hop Options IPv6 Ext. Header */
+#define IPV6FR		3 /* Fragmentation IPv6 Extension Header */
+#define MPLS		4
+#define VLAN		5
+#define MAX_PROG	6
 
 #define IP_MF		0x2000
 #define IP_OFFSET	0x1FFF
@@ -59,7 +59,7 @@ struct frag_hdr {
 
 struct {
 	__uint(type, BPF_MAP_TYPE_PROG_ARRAY);
-	__uint(max_entries, 8);
+	__uint(max_entries, MAX_PROG);
 	__uint(key_size, sizeof(__u32));
 	__uint(value_size, sizeof(__u32));
 } jmp_table SEC(".maps");
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_map.c b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_map.c
new file mode 100644
index 000000000000..b57bd6fef208
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_map.c
@@ -0,0 +1,44 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+/* "undefine" structs in vmlinux.h, because we "override" them below */
+#define bpf_iter_meta bpf_iter_meta___not_used
+#define bpf_iter__bpf_map bpf_iter__bpf_map___not_used
+#include "vmlinux.h"
+#undef bpf_iter_meta
+#undef bpf_iter__bpf_map
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct bpf_iter_meta {
+	struct seq_file *seq;
+	__u64 session_id;
+	__u64 seq_num;
+} __attribute__((preserve_access_index));
+
+struct bpf_iter__bpf_map {
+	struct bpf_iter_meta *meta;
+	struct bpf_map *map;
+} __attribute__((preserve_access_index));
+
+SEC("iter/bpf_map")
+int dump_bpf_map(struct bpf_iter__bpf_map *ctx)
+{
+	struct seq_file *seq = ctx->meta->seq;
+	__u64 seq_num = ctx->meta->seq_num;
+	struct bpf_map *map = ctx->map;
+
+	if (map == (void *)0) {
+		BPF_SEQ_PRINTF(seq, "      %%%%%% END %%%%%%\n");
+		return 0;
+	}
+
+	if (seq_num == 0)
+		BPF_SEQ_PRINTF(seq, "      id   refcnt  usercnt  locked_vm\n");
+
+	BPF_SEQ_PRINTF(seq, "%8u %8ld %8ld %10lu\n", map->id, map->refcnt.counter,
+		       map->usercnt.counter,
+		       map->memory.user->locked_vm.counter);
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c b/tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c
new file mode 100644
index 000000000000..c8e9ca74c87b
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c
@@ -0,0 +1,78 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+/* "undefine" structs in vmlinux.h, because we "override" them below */
+#define bpf_iter_meta bpf_iter_meta___not_used
+#define bpf_iter__ipv6_route bpf_iter__ipv6_route___not_used
+#include "vmlinux.h"
+#undef bpf_iter_meta
+#undef bpf_iter__ipv6_route
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+struct bpf_iter_meta {
+	struct seq_file *seq;
+	__u64 session_id;
+	__u64 seq_num;
+} __attribute__((preserve_access_index));
+
+struct bpf_iter__ipv6_route {
+	struct bpf_iter_meta *meta;
+	struct fib6_info *rt;
+} __attribute__((preserve_access_index));
+
+char _license[] SEC("license") = "GPL";
+
+extern bool CONFIG_IPV6_SUBTREES __kconfig __weak;
+
+#define RTF_GATEWAY		0x0002
+#define IFNAMSIZ		16
+#define fib_nh_gw_family	nh_common.nhc_gw_family
+#define fib_nh_gw6		nh_common.nhc_gw.ipv6
+#define fib_nh_dev		nh_common.nhc_dev
+
+SEC("iter/ipv6_route")
+int dump_ipv6_route(struct bpf_iter__ipv6_route *ctx)
+{
+	struct seq_file *seq = ctx->meta->seq;
+	struct fib6_info *rt = ctx->rt;
+	const struct net_device *dev;
+	struct fib6_nh *fib6_nh;
+	unsigned int flags;
+	struct nexthop *nh;
+
+	if (rt == (void *)0)
+		return 0;
+
+	fib6_nh = &rt->fib6_nh[0];
+	flags = rt->fib6_flags;
+
+	/* FIXME: nexthop_is_multipath is not handled here. */
+	nh = rt->nh;
+	if (rt->nh)
+		fib6_nh = &nh->nh_info->fib6_nh;
+
+	BPF_SEQ_PRINTF(seq, "%pi6 %02x ", &rt->fib6_dst.addr, rt->fib6_dst.plen);
+
+	if (CONFIG_IPV6_SUBTREES)
+		BPF_SEQ_PRINTF(seq, "%pi6 %02x ", &rt->fib6_src.addr,
+			       rt->fib6_src.plen);
+	else
+		BPF_SEQ_PRINTF(seq, "00000000000000000000000000000000 00 ");
+
+	if (fib6_nh->fib_nh_gw_family) {
+		flags |= RTF_GATEWAY;
+		BPF_SEQ_PRINTF(seq, "%pi6 ", &fib6_nh->fib_nh_gw6);
+	} else {
+		BPF_SEQ_PRINTF(seq, "00000000000000000000000000000000 ");
+	}
+
+	dev = fib6_nh->fib_nh_dev;
+	if (dev)
+		BPF_SEQ_PRINTF(seq, "%08x %08x %08x %08x %8s\n", rt->fib6_metric,
+			       rt->fib6_ref.refs.counter, 0, flags, dev->name);
+	else
+		BPF_SEQ_PRINTF(seq, "%08x %08x %08x %08x\n", rt->fib6_metric,
+			       rt->fib6_ref.refs.counter, 0, flags);
+
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c b/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c
new file mode 100644
index 000000000000..e7b8753eac0b
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c
@@ -0,0 +1,82 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+/* "undefine" structs in vmlinux.h, because we "override" them below */
+#define bpf_iter_meta bpf_iter_meta___not_used
+#define bpf_iter__netlink bpf_iter__netlink___not_used
+#include "vmlinux.h"
+#undef bpf_iter_meta
+#undef bpf_iter__netlink
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+#define sk_rmem_alloc	sk_backlog.rmem_alloc
+#define sk_refcnt	__sk_common.skc_refcnt
+
+struct bpf_iter_meta {
+	struct seq_file *seq;
+	__u64 session_id;
+	__u64 seq_num;
+} __attribute__((preserve_access_index));
+
+struct bpf_iter__netlink {
+	struct bpf_iter_meta *meta;
+	struct netlink_sock *sk;
+} __attribute__((preserve_access_index));
+
+static inline struct inode *SOCK_INODE(struct socket *socket)
+{
+	return &container_of(socket, struct socket_alloc, socket)->vfs_inode;
+}
+
+SEC("iter/netlink")
+int dump_netlink(struct bpf_iter__netlink *ctx)
+{
+	struct seq_file *seq = ctx->meta->seq;
+	struct netlink_sock *nlk = ctx->sk;
+	unsigned long group, ino;
+	struct inode *inode;
+	struct socket *sk;
+	struct sock *s;
+
+	if (nlk == (void *)0)
+		return 0;
+
+	if (ctx->meta->seq_num == 0)
+		BPF_SEQ_PRINTF(seq, "sk               Eth Pid        Groups   "
+				    "Rmem     Wmem     Dump  Locks    Drops    "
+				    "Inode\n");
+
+	s = &nlk->sk;
+	BPF_SEQ_PRINTF(seq, "%pK %-3d ", s, s->sk_protocol);
+
+	if (!nlk->groups)  {
+		group = 0;
+	} else {
+		/* FIXME: temporary use bpf_probe_read here, needs
+		 * verifier support to do direct access.
+		 */
+		bpf_probe_read(&group, sizeof(group), &nlk->groups[0]);
+	}
+	BPF_SEQ_PRINTF(seq, "%-10u %08x %-8d %-8d %-5d %-8d ",
+		       nlk->portid, (u32)group,
+		       s->sk_rmem_alloc.counter,
+		       s->sk_wmem_alloc.refs.counter - 1,
+		       nlk->cb_running, s->sk_refcnt.refs.counter);
+
+	sk = s->sk_socket;
+	if (!sk) {
+		ino = 0;
+	} else {
+		/* FIXME: container_of inside SOCK_INODE has a forced
+		 * type conversion, and direct access cannot be used
+		 * with current verifier.
+		 */
+		inode = SOCK_INODE(sk);
+		bpf_probe_read(&ino, sizeof(ino), &inode->i_ino);
+	}
+	BPF_SEQ_PRINTF(seq, "%-8u %-8lu\n", s->sk_drops.counter, ino);
+
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task.c b/tools/testing/selftests/bpf/progs/bpf_iter_task.c
new file mode 100644
index 000000000000..ee754021f98e
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_task.c
@@ -0,0 +1,41 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+/* "undefine" structs in vmlinux.h, because we "override" them below */
+#define bpf_iter_meta bpf_iter_meta___not_used
+#define bpf_iter__task bpf_iter__task___not_used
+#include "vmlinux.h"
+#undef bpf_iter_meta
+#undef bpf_iter__task
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct bpf_iter_meta {
+	struct seq_file *seq;
+	__u64 session_id;
+	__u64 seq_num;
+} __attribute__((preserve_access_index));
+
+struct bpf_iter__task {
+	struct bpf_iter_meta *meta;
+	struct task_struct *task;
+} __attribute__((preserve_access_index));
+
+SEC("iter/task")
+int dump_task(struct bpf_iter__task *ctx)
+{
+	struct seq_file *seq = ctx->meta->seq;
+	struct task_struct *task = ctx->task;
+
+	if (task == (void *)0) {
+		BPF_SEQ_PRINTF(seq, "    === END ===\n");
+		return 0;
+	}
+
+	if (ctx->meta->seq_num == 0)
+		BPF_SEQ_PRINTF(seq, "    tgid      gid\n");
+
+	BPF_SEQ_PRINTF(seq, "%8d %8d\n", task->tgid, task->pid);
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c b/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c
new file mode 100644
index 000000000000..0f0ec3db20ba
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c
@@ -0,0 +1,44 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+/* "undefine" structs in vmlinux.h, because we "override" them below */
+#define bpf_iter_meta bpf_iter_meta___not_used
+#define bpf_iter__task_file bpf_iter__task_file___not_used
+#include "vmlinux.h"
+#undef bpf_iter_meta
+#undef bpf_iter__task_file
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct bpf_iter_meta {
+	struct seq_file *seq;
+	__u64 session_id;
+	__u64 seq_num;
+} __attribute__((preserve_access_index));
+
+struct bpf_iter__task_file {
+	struct bpf_iter_meta *meta;
+	struct task_struct *task;
+	__u32 fd;
+	struct file *file;
+} __attribute__((preserve_access_index));
+
+SEC("iter/task_file")
+int dump_task_file(struct bpf_iter__task_file *ctx)
+{
+	struct seq_file *seq = ctx->meta->seq;
+	struct task_struct *task = ctx->task;
+	__u32 fd = ctx->fd;
+	struct file *file = ctx->file;
+
+	if (task == (void *)0 || file == (void *)0)
+		return 0;
+
+	if (ctx->meta->seq_num == 0)
+		BPF_SEQ_PRINTF(seq, "    tgid      gid       fd      file\n");
+
+	BPF_SEQ_PRINTF(seq, "%8d %8d %8d %lx\n", task->tgid, task->pid, fd,
+		       (long)file->f_op);
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern1.c b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern1.c
new file mode 100644
index 000000000000..c71a7c283108
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern1.c
@@ -0,0 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#define START_CHAR 'a'
+#include "bpf_iter_test_kern_common.h"
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern2.c b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern2.c
new file mode 100644
index 000000000000..8bdc8dc07444
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern2.c
@@ -0,0 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#define START_CHAR 'A'
+#include "bpf_iter_test_kern_common.h"
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern3.c b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern3.c
new file mode 100644
index 000000000000..13c2c90c835f
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern3.c
@@ -0,0 +1,33 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#define bpf_iter_meta bpf_iter_meta___not_used
+#define bpf_iter__task bpf_iter__task___not_used
+#include "vmlinux.h"
+#undef bpf_iter_meta
+#undef bpf_iter__task
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct bpf_iter_meta {
+	struct seq_file *seq;
+	__u64 session_id;
+	__u64 seq_num;
+} __attribute__((preserve_access_index));
+
+struct bpf_iter__task {
+	struct bpf_iter_meta *meta;
+	struct task_struct *task;
+} __attribute__((preserve_access_index));
+
+SEC("iter/task")
+int dump_task(struct bpf_iter__task *ctx)
+{
+	struct seq_file *seq = ctx->meta->seq;
+	struct task_struct *task = ctx->task;
+	int tgid;
+
+	tgid = task->tgid;
+	bpf_seq_write(seq, &tgid, sizeof(tgid));
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern4.c b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern4.c
new file mode 100644
index 000000000000..0aa71b333cf3
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern4.c
@@ -0,0 +1,67 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#define bpf_iter_meta bpf_iter_meta___not_used
+#define bpf_iter__bpf_map bpf_iter__bpf_map___not_used
+#include "vmlinux.h"
+#undef bpf_iter_meta
+#undef bpf_iter__bpf_map
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct bpf_iter_meta {
+	struct seq_file *seq;
+	__u64 session_id;
+	__u64 seq_num;
+} __attribute__((preserve_access_index));
+
+struct bpf_iter__bpf_map {
+	struct bpf_iter_meta *meta;
+	struct bpf_map *map;
+} __attribute__((preserve_access_index));
+
+__u32 map1_id = 0, map2_id = 0;
+__u32 map1_accessed = 0, map2_accessed = 0;
+__u64 map1_seqnum = 0, map2_seqnum1 = 0, map2_seqnum2 = 0;
+
+static volatile const __u32 print_len;
+static volatile const __u32 ret1;
+
+SEC("iter/bpf_map")
+int dump_bpf_map(struct bpf_iter__bpf_map *ctx)
+{
+	struct seq_file *seq = ctx->meta->seq;
+	struct bpf_map *map = ctx->map;
+	__u64 seq_num;
+	int i, ret = 0;
+
+	if (map == (void *)0)
+		return 0;
+
+	/* only dump map1_id and map2_id */
+	if (map->id != map1_id && map->id != map2_id)
+		return 0;
+
+	seq_num = ctx->meta->seq_num;
+	if (map->id == map1_id) {
+		map1_seqnum = seq_num;
+		map1_accessed++;
+	}
+
+	if (map->id == map2_id) {
+		if (map2_accessed == 0) {
+			map2_seqnum1 = seq_num;
+			if (ret1)
+				ret = 1;
+		} else {
+			map2_seqnum2 = seq_num;
+		}
+		map2_accessed++;
+	}
+
+	/* fill seq_file buffer */
+	for (i = 0; i < print_len; i++)
+		bpf_seq_write(seq, &seq_num, sizeof(seq_num));
+
+	return ret;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern_common.h b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern_common.h
new file mode 100644
index 000000000000..dee1339e6905
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern_common.h
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2020 Facebook */
+/* "undefine" structs in vmlinux.h, because we "override" them below */
+#define bpf_iter_meta bpf_iter_meta___not_used
+#define bpf_iter__task bpf_iter__task___not_used
+#include "vmlinux.h"
+#undef bpf_iter_meta
+#undef bpf_iter__task
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+int count = 0;
+
+struct bpf_iter_meta {
+	struct seq_file *seq;
+	__u64 session_id;
+	__u64 seq_num;
+} __attribute__((preserve_access_index));
+
+struct bpf_iter__task {
+	struct bpf_iter_meta *meta;
+	struct task_struct *task;
+} __attribute__((preserve_access_index));
+
+SEC("iter/task")
+int dump_task(struct bpf_iter__task *ctx)
+{
+	struct seq_file *seq = ctx->meta->seq;
+	char c;
+
+	if (count < 4) {
+		c = START_CHAR + count;
+		bpf_seq_write(seq, &c, sizeof(c));
+		count++;
+	}
+
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/cgroup_skb_sk_lookup_kern.c b/tools/testing/selftests/bpf/progs/cgroup_skb_sk_lookup_kern.c
new file mode 100644
index 000000000000..3f757e30d7a0
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/cgroup_skb_sk_lookup_kern.c
@@ -0,0 +1,97 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Facebook
+
+#include <linux/bpf.h>
+#include <bpf/bpf_endian.h>
+#include <bpf/bpf_helpers.h>
+
+#include <linux/if_ether.h>
+#include <linux/in.h>
+#include <linux/in6.h>
+#include <linux/ipv6.h>
+#include <linux/tcp.h>
+
+#include <sys/types.h>
+#include <sys/socket.h>
+
+int _version SEC("version") = 1;
+char _license[] SEC("license") = "GPL";
+
+__u16 g_serv_port = 0;
+
+static inline void set_ip(__u32 *dst, const struct in6_addr *src)
+{
+	dst[0] = src->in6_u.u6_addr32[0];
+	dst[1] = src->in6_u.u6_addr32[1];
+	dst[2] = src->in6_u.u6_addr32[2];
+	dst[3] = src->in6_u.u6_addr32[3];
+}
+
+static inline void set_tuple(struct bpf_sock_tuple *tuple,
+			     const struct ipv6hdr *ip6h,
+			     const struct tcphdr *tcph)
+{
+	set_ip(tuple->ipv6.saddr, &ip6h->daddr);
+	set_ip(tuple->ipv6.daddr, &ip6h->saddr);
+	tuple->ipv6.sport = tcph->dest;
+	tuple->ipv6.dport = tcph->source;
+}
+
+static inline int is_allowed_peer_cg(struct __sk_buff *skb,
+				     const struct ipv6hdr *ip6h,
+				     const struct tcphdr *tcph)
+{
+	__u64 cgid, acgid, peer_cgid, peer_acgid;
+	struct bpf_sock_tuple tuple;
+	size_t tuple_len = sizeof(tuple.ipv6);
+	struct bpf_sock *peer_sk;
+
+	set_tuple(&tuple, ip6h, tcph);
+
+	peer_sk = bpf_sk_lookup_tcp(skb, &tuple, tuple_len,
+				    BPF_F_CURRENT_NETNS, 0);
+	if (!peer_sk)
+		return 0;
+
+	cgid = bpf_skb_cgroup_id(skb);
+	peer_cgid = bpf_sk_cgroup_id(peer_sk);
+
+	acgid = bpf_skb_ancestor_cgroup_id(skb, 2);
+	peer_acgid = bpf_sk_ancestor_cgroup_id(peer_sk, 2);
+
+	bpf_sk_release(peer_sk);
+
+	return cgid && cgid == peer_cgid && acgid && acgid == peer_acgid;
+}
+
+SEC("cgroup_skb/ingress")
+int ingress_lookup(struct __sk_buff *skb)
+{
+	__u32 serv_port_key = 0;
+	struct ipv6hdr ip6h;
+	struct tcphdr tcph;
+
+	if (skb->protocol != bpf_htons(ETH_P_IPV6))
+		return 1;
+
+	/* For SYN packets coming to listening socket skb->remote_port will be
+	 * zero, so IPv6/TCP headers are loaded to identify remote peer
+	 * instead.
+	 */
+	if (bpf_skb_load_bytes(skb, 0, &ip6h, sizeof(ip6h)))
+		return 1;
+
+	if (ip6h.nexthdr != IPPROTO_TCP)
+		return 1;
+
+	if (bpf_skb_load_bytes(skb, sizeof(ip6h), &tcph, sizeof(tcph)))
+		return 1;
+
+	if (!g_serv_port)
+		return 0;
+
+	if (tcph.dest != g_serv_port)
+		return 1;
+
+	return is_allowed_peer_cg(skb, &ip6h, &tcph);
+}
diff --git a/tools/testing/selftests/bpf/progs/connect4_prog.c b/tools/testing/selftests/bpf/progs/connect4_prog.c
index ad3c498a8150..1ab2c5eba86c 100644
--- a/tools/testing/selftests/bpf/progs/connect4_prog.c
+++ b/tools/testing/selftests/bpf/progs/connect4_prog.c
@@ -8,6 +8,9 @@
 #include <linux/in.h>
 #include <linux/in6.h>
 #include <sys/socket.h>
+#include <netinet/tcp.h>
+#include <linux/if.h>
+#include <errno.h>
 
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_endian.h>
@@ -16,6 +19,14 @@
 #define DST_REWRITE_IP4		0x7f000001U
 #define DST_REWRITE_PORT4	4444
 
+#ifndef TCP_CA_NAME_MAX
+#define TCP_CA_NAME_MAX 16
+#endif
+
+#ifndef IFNAMSIZ
+#define IFNAMSIZ 16
+#endif
+
 int _version SEC("version") = 1;
 
 __attribute__ ((noinline))
@@ -33,6 +44,66 @@ int do_bind(struct bpf_sock_addr *ctx)
 	return 1;
 }
 
+static __inline int verify_cc(struct bpf_sock_addr *ctx,
+			      char expected[TCP_CA_NAME_MAX])
+{
+	char buf[TCP_CA_NAME_MAX];
+	int i;
+
+	if (bpf_getsockopt(ctx, SOL_TCP, TCP_CONGESTION, &buf, sizeof(buf)))
+		return 1;
+
+	for (i = 0; i < TCP_CA_NAME_MAX; i++) {
+		if (buf[i] != expected[i])
+			return 1;
+		if (buf[i] == 0)
+			break;
+	}
+
+	return 0;
+}
+
+static __inline int set_cc(struct bpf_sock_addr *ctx)
+{
+	char reno[TCP_CA_NAME_MAX] = "reno";
+	char cubic[TCP_CA_NAME_MAX] = "cubic";
+
+	if (bpf_setsockopt(ctx, SOL_TCP, TCP_CONGESTION, &reno, sizeof(reno)))
+		return 1;
+	if (verify_cc(ctx, reno))
+		return 1;
+
+	if (bpf_setsockopt(ctx, SOL_TCP, TCP_CONGESTION, &cubic, sizeof(cubic)))
+		return 1;
+	if (verify_cc(ctx, cubic))
+		return 1;
+
+	return 0;
+}
+
+static __inline int bind_to_device(struct bpf_sock_addr *ctx)
+{
+	char veth1[IFNAMSIZ] = "test_sock_addr1";
+	char veth2[IFNAMSIZ] = "test_sock_addr2";
+	char missing[IFNAMSIZ] = "nonexistent_dev";
+	char del_bind[IFNAMSIZ] = "";
+
+	if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTODEVICE,
+				&veth1, sizeof(veth1)))
+		return 1;
+	if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTODEVICE,
+				&veth2, sizeof(veth2)))
+		return 1;
+	if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTODEVICE,
+				&missing, sizeof(missing)) != -ENODEV)
+		return 1;
+	if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTODEVICE,
+				&del_bind, sizeof(del_bind)))
+		return 1;
+
+	return 0;
+}
+
 SEC("cgroup/connect4")
 int connect_v4_prog(struct bpf_sock_addr *ctx)
 {
@@ -46,6 +117,10 @@ int connect_v4_prog(struct bpf_sock_addr *ctx)
 	tuple.ipv4.daddr = bpf_htonl(DST_REWRITE_IP4);
 	tuple.ipv4.dport = bpf_htons(DST_REWRITE_PORT4);
 
+	/* Bind to device and unbind it. */
+	if (bind_to_device(ctx))
+		return 0;
+
 	if (ctx->type != SOCK_STREAM && ctx->type != SOCK_DGRAM)
 		return 0;
 	else if (ctx->type == SOCK_STREAM)
@@ -66,6 +141,10 @@ int connect_v4_prog(struct bpf_sock_addr *ctx)
 
 	bpf_sk_release(sk);
 
+	/* Rewrite congestion control. */
+	if (ctx->type == SOCK_STREAM && set_cc(ctx))
+		return 0;
+
 	/* Rewrite destination. */
 	ctx->user_ip4 = bpf_htonl(DST_REWRITE_IP4);
 	ctx->user_port = bpf_htons(DST_REWRITE_PORT4);
diff --git a/tools/testing/selftests/bpf/progs/connect_force_port4.c b/tools/testing/selftests/bpf/progs/connect_force_port4.c
new file mode 100644
index 000000000000..7396308677a3
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/connect_force_port4.c
@@ -0,0 +1,83 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <string.h>
+#include <stdbool.h>
+
+#include <linux/bpf.h>
+#include <linux/in.h>
+#include <linux/in6.h>
+#include <sys/socket.h>
+
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+char _license[] SEC("license") = "GPL";
+int _version SEC("version") = 1;
+
+struct svc_addr {
+	__be32 addr;
+	__be16 port;
+};
+
+struct {
+	__uint(type, BPF_MAP_TYPE_SK_STORAGE);
+	__uint(map_flags, BPF_F_NO_PREALLOC);
+	__type(key, int);
+	__type(value, struct svc_addr);
+} service_mapping SEC(".maps");
+
+SEC("cgroup/connect4")
+int connect4(struct bpf_sock_addr *ctx)
+{
+	struct sockaddr_in sa = {};
+	struct svc_addr *orig;
+
+	/* Force local address to 127.0.0.1:22222. */
+	sa.sin_family = AF_INET;
+	sa.sin_port = bpf_htons(22222);
+	sa.sin_addr.s_addr = bpf_htonl(0x7f000001);
+
+	if (bpf_bind(ctx, (struct sockaddr *)&sa, sizeof(sa)) != 0)
+		return 0;
+
+	/* Rewire service 1.2.3.4:60000 to backend 127.0.0.1:60123. */
+	if (ctx->user_port == bpf_htons(60000)) {
+		orig = bpf_sk_storage_get(&service_mapping, ctx->sk, 0,
+					  BPF_SK_STORAGE_GET_F_CREATE);
+		if (!orig)
+			return 0;
+
+		orig->addr = ctx->user_ip4;
+		orig->port = ctx->user_port;
+
+		ctx->user_ip4 = bpf_htonl(0x7f000001);
+		ctx->user_port = bpf_htons(60123);
+	}
+	return 1;
+}
+
+SEC("cgroup/getsockname4")
+int getsockname4(struct bpf_sock_addr *ctx)
+{
+	/* Expose local server as 1.2.3.4:60000 to client. */
+	if (ctx->user_port == bpf_htons(60123)) {
+		ctx->user_ip4 = bpf_htonl(0x01020304);
+		ctx->user_port = bpf_htons(60000);
+	}
+	return 1;
+}
+
+SEC("cgroup/getpeername4")
+int getpeername4(struct bpf_sock_addr *ctx)
+{
+	struct svc_addr *orig;
+
+	/* Expose service 1.2.3.4:60000 as peer instead of backend. */
+	if (ctx->user_port == bpf_htons(60123)) {
+		orig = bpf_sk_storage_get(&service_mapping, ctx->sk, 0, 0);
+		if (orig) {
+			ctx->user_ip4 = orig->addr;
+			ctx->user_port = orig->port;
+		}
+	}
+	return 1;
+}
diff --git a/tools/testing/selftests/bpf/progs/connect_force_port6.c b/tools/testing/selftests/bpf/progs/connect_force_port6.c
new file mode 100644
index 000000000000..c1a2b555e9ad
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/connect_force_port6.c
@@ -0,0 +1,94 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <string.h>
+
+#include <linux/bpf.h>
+#include <linux/in.h>
+#include <linux/in6.h>
+#include <sys/socket.h>
+
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+char _license[] SEC("license") = "GPL";
+int _version SEC("version") = 1;
+
+struct svc_addr {
+	__be32 addr[4];
+	__be16 port;
+};
+
+struct {
+	__uint(type, BPF_MAP_TYPE_SK_STORAGE);
+	__uint(map_flags, BPF_F_NO_PREALLOC);
+	__type(key, int);
+	__type(value, struct svc_addr);
+} service_mapping SEC(".maps");
+
+SEC("cgroup/connect6")
+int connect6(struct bpf_sock_addr *ctx)
+{
+	struct sockaddr_in6 sa = {};
+	struct svc_addr *orig;
+
+	/* Force local address to [::1]:22223. */
+	sa.sin6_family = AF_INET6;
+	sa.sin6_port = bpf_htons(22223);
+	sa.sin6_addr.s6_addr32[3] = bpf_htonl(1);
+
+	if (bpf_bind(ctx, (struct sockaddr *)&sa, sizeof(sa)) != 0)
+		return 0;
+
+	/* Rewire service [fc00::1]:60000 to backend [::1]:60124. */
+	if (ctx->user_port == bpf_htons(60000)) {
+		orig = bpf_sk_storage_get(&service_mapping, ctx->sk, 0,
+					  BPF_SK_STORAGE_GET_F_CREATE);
+		if (!orig)
+			return 0;
+
+		orig->addr[0] = ctx->user_ip6[0];
+		orig->addr[1] = ctx->user_ip6[1];
+		orig->addr[2] = ctx->user_ip6[2];
+		orig->addr[3] = ctx->user_ip6[3];
+		orig->port = ctx->user_port;
+
+		ctx->user_ip6[0] = 0;
+		ctx->user_ip6[1] = 0;
+		ctx->user_ip6[2] = 0;
+		ctx->user_ip6[3] = bpf_htonl(1);
+		ctx->user_port = bpf_htons(60124);
+	}
+	return 1;
+}
+
+SEC("cgroup/getsockname6")
+int getsockname6(struct bpf_sock_addr *ctx)
+{
+	/* Expose local server as [fc00::1]:60000 to client. */
+	if (ctx->user_port == bpf_htons(60124)) {
+		ctx->user_ip6[0] = bpf_htonl(0xfc000000);
+		ctx->user_ip6[1] = 0;
+		ctx->user_ip6[2] = 0;
+		ctx->user_ip6[3] = bpf_htonl(1);
+		ctx->user_port = bpf_htons(60000);
+	}
+	return 1;
+}
+
+SEC("cgroup/getpeername6")
+int getpeername6(struct bpf_sock_addr *ctx)
+{
+	struct svc_addr *orig;
+
+	/* Expose service [fc00::1]:60000 as peer instead of backend. */
+	if (ctx->user_port == bpf_htons(60124)) {
+		orig = bpf_sk_storage_get(&service_mapping, ctx->sk, 0, 0);
+		if (orig) {
+			ctx->user_ip6[0] = orig->addr[0];
+			ctx->user_ip6[1] = orig->addr[1];
+			ctx->user_ip6[2] = orig->addr[2];
+			ctx->user_ip6[3] = orig->addr[3];
+			ctx->user_port = orig->port;
+		}
+	}
+	return 1;
+}
diff --git a/tools/testing/selftests/bpf/progs/core_reloc_types.h b/tools/testing/selftests/bpf/progs/core_reloc_types.h
index 6d598cfbdb3e..34d84717c946 100644
--- a/tools/testing/selftests/bpf/progs/core_reloc_types.h
+++ b/tools/testing/selftests/bpf/progs/core_reloc_types.h
@@ -379,7 +379,7 @@ struct core_reloc_arrays___equiv_zero_sz_arr {
 	struct core_reloc_arrays_substruct c[3];
 	struct core_reloc_arrays_substruct d[1][2];
 	/* equivalent to flexible array */
-	struct core_reloc_arrays_substruct f[0][2];
+	struct core_reloc_arrays_substruct f[][2];
 };
 
 struct core_reloc_arrays___fixed_arr {
diff --git a/tools/testing/selftests/bpf/progs/perfbuf_bench.c b/tools/testing/selftests/bpf/progs/perfbuf_bench.c
new file mode 100644
index 000000000000..e5ab4836a641
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/perfbuf_bench.c
@@ -0,0 +1,33 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Facebook
+
+#include <linux/bpf.h>
+#include <stdint.h>
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+	__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
+	__uint(value_size, sizeof(int));
+	__uint(key_size, sizeof(int));
+} perfbuf SEC(".maps");
+
+const volatile int batch_cnt = 0;
+
+long sample_val = 42;
+long dropped __attribute__((aligned(128))) = 0;
+
+SEC("fentry/__x64_sys_getpgid")
+int bench_perfbuf(void *ctx)
+{
+	__u64 *sample;
+	int i;
+
+	for (i = 0; i < batch_cnt; i++) {
+		if (bpf_perf_event_output(ctx, &perfbuf, BPF_F_CURRENT_CPU,
+					  &sample_val, sizeof(sample_val)))
+			__sync_add_and_fetch(&dropped, 1);
+	}
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/ringbuf_bench.c b/tools/testing/selftests/bpf/progs/ringbuf_bench.c
new file mode 100644
index 000000000000..123607d314d6
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/ringbuf_bench.c
@@ -0,0 +1,60 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Facebook
+
+#include <linux/bpf.h>
+#include <stdint.h>
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+	__uint(type, BPF_MAP_TYPE_RINGBUF);
+} ringbuf SEC(".maps");
+
+const volatile int batch_cnt = 0;
+const volatile long use_output = 0;
+
+long sample_val = 42;
+long dropped __attribute__((aligned(128))) = 0;
+
+const volatile long wakeup_data_size = 0;
+
+static __always_inline long get_flags()
+{
+	long sz;
+
+	if (!wakeup_data_size)
+		return 0;
+
+	sz = bpf_ringbuf_query(&ringbuf, BPF_RB_AVAIL_DATA);
+	return sz >= wakeup_data_size ? BPF_RB_FORCE_WAKEUP : BPF_RB_NO_WAKEUP;
+}
+
+SEC("fentry/__x64_sys_getpgid")
+int bench_ringbuf(void *ctx)
+{
+	long *sample, flags;
+	int i;
+
+	if (!use_output) {
+		for (i = 0; i < batch_cnt; i++) {
+			sample = bpf_ringbuf_reserve(&ringbuf,
+					             sizeof(sample_val), 0);
+			if (!sample) {
+				__sync_add_and_fetch(&dropped, 1);
+			} else {
+				*sample = sample_val;
+				flags = get_flags();
+				bpf_ringbuf_submit(sample, flags);
+			}
+		}
+	} else {
+		for (i = 0; i < batch_cnt; i++) {
+			flags = get_flags();
+			if (bpf_ringbuf_output(&ringbuf, &sample_val,
+					       sizeof(sample_val), flags))
+				__sync_add_and_fetch(&dropped, 1);
+		}
+	}
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_btf_map_in_map.c b/tools/testing/selftests/bpf/progs/test_btf_map_in_map.c
new file mode 100644
index 000000000000..e5093796be97
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_btf_map_in_map.c
@@ -0,0 +1,76 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2020 Facebook */
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+struct inner_map {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 1);
+	__type(key, int);
+	__type(value, int);
+} inner_map1 SEC(".maps"),
+  inner_map2 SEC(".maps");
+
+struct outer_arr {
+	__uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
+	__uint(max_entries, 3);
+	__uint(key_size, sizeof(int));
+	__uint(value_size, sizeof(int));
+	/* it's possible to use anonymous struct as inner map definition here */
+	__array(values, struct {
+		__uint(type, BPF_MAP_TYPE_ARRAY);
+		/* changing max_entries to 2 will fail during load
+		 * due to incompatibility with inner_map definition */
+		__uint(max_entries, 1);
+		__type(key, int);
+		__type(value, int);
+	});
+} outer_arr SEC(".maps") = {
+	/* (void *) cast is necessary because we didn't use `struct inner_map`
+	 * in __inner(values, ...)
+	 * Actually, a conscious effort is required to screw up initialization
+	 * of inner map slots, which is a great thing!
+	 */
+	.values = { (void *)&inner_map1, 0, (void *)&inner_map2 },
+};
+
+struct outer_hash {
+	__uint(type, BPF_MAP_TYPE_HASH_OF_MAPS);
+	__uint(max_entries, 5);
+	__uint(key_size, sizeof(int));
+	/* Here everything works flawlessly due to reuse of struct inner_map
+	 * and compiler will complain at the attempt to use non-inner_map
+	 * references below. This is great experience.
+	 */
+	__array(values, struct inner_map);
+} outer_hash SEC(".maps") = {
+	.values = {
+		[0] = &inner_map2,
+		[4] = &inner_map1,
+	},
+};
+
+int input = 0;
+
+SEC("raw_tp/sys_enter")
+int handle__sys_enter(void *ctx)
+{
+	struct inner_map *inner_map;
+	int key = 0, val;
+
+	inner_map = bpf_map_lookup_elem(&outer_arr, &key);
+	if (!inner_map)
+		return 1;
+	val = input;
+	bpf_map_update_elem(inner_map, &key, &val, 0);
+
+	inner_map = bpf_map_lookup_elem(&outer_hash, &key);
+	if (!inner_map)
+		return 1;
+	val = input + 1;
+	bpf_map_update_elem(inner_map, &key, &val, 0);
+
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_cls_redirect.c b/tools/testing/selftests/bpf/progs/test_cls_redirect.c
new file mode 100644
index 000000000000..f0b72e86bee5
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_cls_redirect.c
@@ -0,0 +1,1061 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+// Copyright (c) 2019, 2020 Cloudflare
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
+
+#include <linux/bpf.h>
+#include <linux/icmp.h>
+#include <linux/icmpv6.h>
+#include <linux/if_ether.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/pkt_cls.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+#include "test_cls_redirect.h"
+
+#define offsetofend(TYPE, MEMBER) \
+	(offsetof(TYPE, MEMBER) + sizeof((((TYPE *)0)->MEMBER)))
+
+#define IP_OFFSET_MASK (0x1FFF)
+#define IP_MF (0x2000)
+
+char _license[] SEC("license") = "Dual BSD/GPL";
+
+/**
+ * Destination port and IP used for UDP encapsulation.
+ */
+static volatile const __be16 ENCAPSULATION_PORT;
+static volatile const __be32 ENCAPSULATION_IP;
+
+typedef struct {
+	uint64_t processed_packets_total;
+	uint64_t l3_protocol_packets_total_ipv4;
+	uint64_t l3_protocol_packets_total_ipv6;
+	uint64_t l4_protocol_packets_total_tcp;
+	uint64_t l4_protocol_packets_total_udp;
+	uint64_t accepted_packets_total_syn;
+	uint64_t accepted_packets_total_syn_cookies;
+	uint64_t accepted_packets_total_last_hop;
+	uint64_t accepted_packets_total_icmp_echo_request;
+	uint64_t accepted_packets_total_established;
+	uint64_t forwarded_packets_total_gue;
+	uint64_t forwarded_packets_total_gre;
+
+	uint64_t errors_total_unknown_l3_proto;
+	uint64_t errors_total_unknown_l4_proto;
+	uint64_t errors_total_malformed_ip;
+	uint64_t errors_total_fragmented_ip;
+	uint64_t errors_total_malformed_icmp;
+	uint64_t errors_total_unwanted_icmp;
+	uint64_t errors_total_malformed_icmp_pkt_too_big;
+	uint64_t errors_total_malformed_tcp;
+	uint64_t errors_total_malformed_udp;
+	uint64_t errors_total_icmp_echo_replies;
+	uint64_t errors_total_malformed_encapsulation;
+	uint64_t errors_total_encap_adjust_failed;
+	uint64_t errors_total_encap_buffer_too_small;
+	uint64_t errors_total_redirect_loop;
+} metrics_t;
+
+typedef enum {
+	INVALID = 0,
+	UNKNOWN,
+	ECHO_REQUEST,
+	SYN,
+	SYN_COOKIE,
+	ESTABLISHED,
+} verdict_t;
+
+typedef struct {
+	uint16_t src, dst;
+} flow_ports_t;
+
+_Static_assert(
+	sizeof(flow_ports_t) !=
+		offsetofend(struct bpf_sock_tuple, ipv4.dport) -
+			offsetof(struct bpf_sock_tuple, ipv4.sport) - 1,
+	"flow_ports_t must match sport and dport in struct bpf_sock_tuple");
+_Static_assert(
+	sizeof(flow_ports_t) !=
+		offsetofend(struct bpf_sock_tuple, ipv6.dport) -
+			offsetof(struct bpf_sock_tuple, ipv6.sport) - 1,
+	"flow_ports_t must match sport and dport in struct bpf_sock_tuple");
+
+typedef int ret_t;
+
+/* This is a bit of a hack. We need a return value which allows us to
+ * indicate that the regular flow of the program should continue,
+ * while allowing functions to use XDP_PASS and XDP_DROP, etc.
+ */
+static const ret_t CONTINUE_PROCESSING = -1;
+
+/* Convenience macro to call functions which return ret_t.
+ */
+#define MAYBE_RETURN(x)                           \
+	do {                                      \
+		ret_t __ret = x;                  \
+		if (__ret != CONTINUE_PROCESSING) \
+			return __ret;             \
+	} while (0)
+
+/* Linux packet pointers are either aligned to NET_IP_ALIGN (aka 2 bytes),
+ * or not aligned if the arch supports efficient unaligned access.
+ *
+ * Since the verifier ensures that eBPF packet accesses follow these rules,
+ * we can tell LLVM to emit code as if we always had a larger alignment.
+ * It will yell at us if we end up on a platform where this is not valid.
+ */
+typedef uint8_t *net_ptr __attribute__((align_value(8)));
+
+typedef struct buf {
+	struct __sk_buff *skb;
+	net_ptr head;
+	/* NB: tail musn't have alignment other than 1, otherwise
+	* LLVM will go and eliminate code, e.g. when checking packet lengths.
+	*/
+	uint8_t *const tail;
+} buf_t;
+
+static size_t buf_off(const buf_t *buf)
+{
+	/* Clang seems to optimize constructs like
+	 *    a - b + c
+	 * if c is known:
+	 *    r? = c
+	 *    r? -= b
+	 *    r? += a
+	 *
+	 * This is a problem if a and b are packet pointers,
+	 * since the verifier allows subtracting two pointers to
+	 * get a scalar, but not a scalar and a pointer.
+	 *
+	 * Use inline asm to break this optimization.
+	 */
+	size_t off = (size_t)buf->head;
+	asm("%0 -= %1" : "+r"(off) : "r"(buf->skb->data));
+	return off;
+}
+
+static bool buf_copy(buf_t *buf, void *dst, size_t len)
+{
+	if (bpf_skb_load_bytes(buf->skb, buf_off(buf), dst, len)) {
+		return false;
+	}
+
+	buf->head += len;
+	return true;
+}
+
+static bool buf_skip(buf_t *buf, const size_t len)
+{
+	/* Check whether off + len is valid in the non-linear part. */
+	if (buf_off(buf) + len > buf->skb->len) {
+		return false;
+	}
+
+	buf->head += len;
+	return true;
+}
+
+/* Returns a pointer to the start of buf, or NULL if len is
+ * larger than the remaining data. Consumes len bytes on a successful
+ * call.
+ *
+ * If scratch is not NULL, the function will attempt to load non-linear
+ * data via bpf_skb_load_bytes. On success, scratch is returned.
+ */
+static void *buf_assign(buf_t *buf, const size_t len, void *scratch)
+{
+	if (buf->head + len > buf->tail) {
+		if (scratch == NULL) {
+			return NULL;
+		}
+
+		return buf_copy(buf, scratch, len) ? scratch : NULL;
+	}
+
+	void *ptr = buf->head;
+	buf->head += len;
+	return ptr;
+}
+
+static bool pkt_skip_ipv4_options(buf_t *buf, const struct iphdr *ipv4)
+{
+	if (ipv4->ihl <= 5) {
+		return true;
+	}
+
+	return buf_skip(buf, (ipv4->ihl - 5) * 4);
+}
+
+static bool ipv4_is_fragment(const struct iphdr *ip)
+{
+	uint16_t frag_off = ip->frag_off & bpf_htons(IP_OFFSET_MASK);
+	return (ip->frag_off & bpf_htons(IP_MF)) != 0 || frag_off > 0;
+}
+
+static struct iphdr *pkt_parse_ipv4(buf_t *pkt, struct iphdr *scratch)
+{
+	struct iphdr *ipv4 = buf_assign(pkt, sizeof(*ipv4), scratch);
+	if (ipv4 == NULL) {
+		return NULL;
+	}
+
+	if (ipv4->ihl < 5) {
+		return NULL;
+	}
+
+	if (!pkt_skip_ipv4_options(pkt, ipv4)) {
+		return NULL;
+	}
+
+	return ipv4;
+}
+
+/* Parse the L4 ports from a packet, assuming a layout like TCP or UDP. */
+static bool pkt_parse_icmp_l4_ports(buf_t *pkt, flow_ports_t *ports)
+{
+	if (!buf_copy(pkt, ports, sizeof(*ports))) {
+		return false;
+	}
+
+	/* Ports in the L4 headers are reversed, since we are parsing an ICMP
+	 * payload which is going towards the eyeball.
+	 */
+	uint16_t dst = ports->src;
+	ports->src = ports->dst;
+	ports->dst = dst;
+	return true;
+}
+
+static uint16_t pkt_checksum_fold(uint32_t csum)
+{
+	/* The highest reasonable value for an IPv4 header
+	 * checksum requires two folds, so we just do that always.
+	 */
+	csum = (csum & 0xffff) + (csum >> 16);
+	csum = (csum & 0xffff) + (csum >> 16);
+	return (uint16_t)~csum;
+}
+
+static void pkt_ipv4_checksum(struct iphdr *iph)
+{
+	iph->check = 0;
+
+	/* An IP header without options is 20 bytes. Two of those
+	 * are the checksum, which we always set to zero. Hence,
+	 * the maximum accumulated value is 18 / 2 * 0xffff = 0x8fff7,
+	 * which fits in 32 bit.
+	 */
+	_Static_assert(sizeof(struct iphdr) == 20, "iphdr must be 20 bytes");
+	uint32_t acc = 0;
+	uint16_t *ipw = (uint16_t *)iph;
+
+#pragma clang loop unroll(full)
+	for (size_t i = 0; i < sizeof(struct iphdr) / 2; i++) {
+		acc += ipw[i];
+	}
+
+	iph->check = pkt_checksum_fold(acc);
+}
+
+static bool pkt_skip_ipv6_extension_headers(buf_t *pkt,
+					    const struct ipv6hdr *ipv6,
+					    uint8_t *upper_proto,
+					    bool *is_fragment)
+{
+	/* We understand five extension headers.
+	 * https://tools.ietf.org/html/rfc8200#section-4.1 states that all
+	 * headers should occur once, except Destination Options, which may
+	 * occur twice. Hence we give up after 6 headers.
+	 */
+	struct {
+		uint8_t next;
+		uint8_t len;
+	} exthdr = {
+		.next = ipv6->nexthdr,
+	};
+	*is_fragment = false;
+
+#pragma clang loop unroll(full)
+	for (int i = 0; i < 6; i++) {
+		switch (exthdr.next) {
+		case IPPROTO_FRAGMENT:
+			*is_fragment = true;
+			/* NB: We don't check that hdrlen == 0 as per spec. */
+			/* fallthrough; */
+
+		case IPPROTO_HOPOPTS:
+		case IPPROTO_ROUTING:
+		case IPPROTO_DSTOPTS:
+		case IPPROTO_MH:
+			if (!buf_copy(pkt, &exthdr, sizeof(exthdr))) {
+				return false;
+			}
+
+			/* hdrlen is in 8-octet units, and excludes the first 8 octets. */
+			if (!buf_skip(pkt,
+				      (exthdr.len + 1) * 8 - sizeof(exthdr))) {
+				return false;
+			}
+
+			/* Decode next header */
+			break;
+
+		default:
+			/* The next header is not one of the known extension
+			 * headers, treat it as the upper layer header.
+			 *
+			 * This handles IPPROTO_NONE.
+			 *
+			 * Encapsulating Security Payload (50) and Authentication
+			 * Header (51) also end up here (and will trigger an
+			 * unknown proto error later). They have a custom header
+			 * format and seem too esoteric to care about.
+			 */
+			*upper_proto = exthdr.next;
+			return true;
+		}
+	}
+
+	/* We never found an upper layer header. */
+	return false;
+}
+
+/* This function has to be inlined, because the verifier otherwise rejects it
+ * due to returning a pointer to the stack. This is technically correct, since
+ * scratch is allocated on the stack. However, this usage should be safe since
+ * it's the callers stack after all.
+ */
+static inline __attribute__((__always_inline__)) struct ipv6hdr *
+pkt_parse_ipv6(buf_t *pkt, struct ipv6hdr *scratch, uint8_t *proto,
+	       bool *is_fragment)
+{
+	struct ipv6hdr *ipv6 = buf_assign(pkt, sizeof(*ipv6), scratch);
+	if (ipv6 == NULL) {
+		return NULL;
+	}
+
+	if (!pkt_skip_ipv6_extension_headers(pkt, ipv6, proto, is_fragment)) {
+		return NULL;
+	}
+
+	return ipv6;
+}
+
+/* Global metrics, per CPU
+ */
+struct bpf_map_def metrics_map SEC("maps") = {
+	.type = BPF_MAP_TYPE_PERCPU_ARRAY,
+	.key_size = sizeof(unsigned int),
+	.value_size = sizeof(metrics_t),
+	.max_entries = 1,
+};
+
+static metrics_t *get_global_metrics(void)
+{
+	uint64_t key = 0;
+	return bpf_map_lookup_elem(&metrics_map, &key);
+}
+
+static ret_t accept_locally(struct __sk_buff *skb, encap_headers_t *encap)
+{
+	const int payload_off =
+		sizeof(*encap) +
+		sizeof(struct in_addr) * encap->unigue.hop_count;
+	int32_t encap_overhead = payload_off - sizeof(struct ethhdr);
+
+	// Changing the ethertype if the encapsulated packet is ipv6
+	if (encap->gue.proto_ctype == IPPROTO_IPV6) {
+		encap->eth.h_proto = bpf_htons(ETH_P_IPV6);
+	}
+
+	if (bpf_skb_adjust_room(skb, -encap_overhead, BPF_ADJ_ROOM_MAC,
+				BPF_F_ADJ_ROOM_FIXED_GSO |
+				BPF_F_ADJ_ROOM_NO_CSUM_RESET) ||
+	    bpf_csum_level(skb, BPF_CSUM_LEVEL_DEC))
+		return TC_ACT_SHOT;
+
+	return bpf_redirect(skb->ifindex, BPF_F_INGRESS);
+}
+
+static ret_t forward_with_gre(struct __sk_buff *skb, encap_headers_t *encap,
+			      struct in_addr *next_hop, metrics_t *metrics)
+{
+	metrics->forwarded_packets_total_gre++;
+
+	const int payload_off =
+		sizeof(*encap) +
+		sizeof(struct in_addr) * encap->unigue.hop_count;
+	int32_t encap_overhead =
+		payload_off - sizeof(struct ethhdr) - sizeof(struct iphdr);
+	int32_t delta = sizeof(struct gre_base_hdr) - encap_overhead;
+	uint16_t proto = ETH_P_IP;
+
+	/* Loop protection: the inner packet's TTL is decremented as a safeguard
+	 * against any forwarding loop. As the only interesting field is the TTL
+	 * hop limit for IPv6, it is easier to use bpf_skb_load_bytes/bpf_skb_store_bytes
+	 * as they handle the split packets if needed (no need for the data to be
+	 * in the linear section).
+	 */
+	if (encap->gue.proto_ctype == IPPROTO_IPV6) {
+		proto = ETH_P_IPV6;
+		uint8_t ttl;
+		int rc;
+
+		rc = bpf_skb_load_bytes(
+			skb, payload_off + offsetof(struct ipv6hdr, hop_limit),
+			&ttl, 1);
+		if (rc != 0) {
+			metrics->errors_total_malformed_encapsulation++;
+			return TC_ACT_SHOT;
+		}
+
+		if (ttl == 0) {
+			metrics->errors_total_redirect_loop++;
+			return TC_ACT_SHOT;
+		}
+
+		ttl--;
+		rc = bpf_skb_store_bytes(
+			skb, payload_off + offsetof(struct ipv6hdr, hop_limit),
+			&ttl, 1, 0);
+		if (rc != 0) {
+			metrics->errors_total_malformed_encapsulation++;
+			return TC_ACT_SHOT;
+		}
+	} else {
+		uint8_t ttl;
+		int rc;
+
+		rc = bpf_skb_load_bytes(
+			skb, payload_off + offsetof(struct iphdr, ttl), &ttl,
+			1);
+		if (rc != 0) {
+			metrics->errors_total_malformed_encapsulation++;
+			return TC_ACT_SHOT;
+		}
+
+		if (ttl == 0) {
+			metrics->errors_total_redirect_loop++;
+			return TC_ACT_SHOT;
+		}
+
+		/* IPv4 also has a checksum to patch. While the TTL is only one byte,
+		 * this function only works for 2 and 4 bytes arguments (the result is
+		 * the same).
+		 */
+		rc = bpf_l3_csum_replace(
+			skb, payload_off + offsetof(struct iphdr, check), ttl,
+			ttl - 1, 2);
+		if (rc != 0) {
+			metrics->errors_total_malformed_encapsulation++;
+			return TC_ACT_SHOT;
+		}
+
+		ttl--;
+		rc = bpf_skb_store_bytes(
+			skb, payload_off + offsetof(struct iphdr, ttl), &ttl, 1,
+			0);
+		if (rc != 0) {
+			metrics->errors_total_malformed_encapsulation++;
+			return TC_ACT_SHOT;
+		}
+	}
+
+	if (bpf_skb_adjust_room(skb, delta, BPF_ADJ_ROOM_NET,
+				BPF_F_ADJ_ROOM_FIXED_GSO |
+				BPF_F_ADJ_ROOM_NO_CSUM_RESET) ||
+	    bpf_csum_level(skb, BPF_CSUM_LEVEL_INC)) {
+		metrics->errors_total_encap_adjust_failed++;
+		return TC_ACT_SHOT;
+	}
+
+	if (bpf_skb_pull_data(skb, sizeof(encap_gre_t))) {
+		metrics->errors_total_encap_buffer_too_small++;
+		return TC_ACT_SHOT;
+	}
+
+	buf_t pkt = {
+		.skb = skb,
+		.head = (uint8_t *)(long)skb->data,
+		.tail = (uint8_t *)(long)skb->data_end,
+	};
+
+	encap_gre_t *encap_gre = buf_assign(&pkt, sizeof(encap_gre_t), NULL);
+	if (encap_gre == NULL) {
+		metrics->errors_total_encap_buffer_too_small++;
+		return TC_ACT_SHOT;
+	}
+
+	encap_gre->ip.protocol = IPPROTO_GRE;
+	encap_gre->ip.daddr = next_hop->s_addr;
+	encap_gre->ip.saddr = ENCAPSULATION_IP;
+	encap_gre->ip.tot_len =
+		bpf_htons(bpf_ntohs(encap_gre->ip.tot_len) + delta);
+	encap_gre->gre.flags = 0;
+	encap_gre->gre.protocol = bpf_htons(proto);
+	pkt_ipv4_checksum((void *)&encap_gre->ip);
+
+	return bpf_redirect(skb->ifindex, 0);
+}
+
+static ret_t forward_to_next_hop(struct __sk_buff *skb, encap_headers_t *encap,
+				 struct in_addr *next_hop, metrics_t *metrics)
+{
+	/* swap L2 addresses */
+	/* This assumes that packets are received from a router.
+	 * So just swapping the MAC addresses here will make the packet go back to
+	 * the router, which will send it to the appropriate machine.
+	 */
+	unsigned char temp[ETH_ALEN];
+	memcpy(temp, encap->eth.h_dest, sizeof(temp));
+	memcpy(encap->eth.h_dest, encap->eth.h_source,
+	       sizeof(encap->eth.h_dest));
+	memcpy(encap->eth.h_source, temp, sizeof(encap->eth.h_source));
+
+	if (encap->unigue.next_hop == encap->unigue.hop_count - 1 &&
+	    encap->unigue.last_hop_gre) {
+		return forward_with_gre(skb, encap, next_hop, metrics);
+	}
+
+	metrics->forwarded_packets_total_gue++;
+	uint32_t old_saddr = encap->ip.saddr;
+	encap->ip.saddr = encap->ip.daddr;
+	encap->ip.daddr = next_hop->s_addr;
+	if (encap->unigue.next_hop < encap->unigue.hop_count) {
+		encap->unigue.next_hop++;
+	}
+
+	/* Remove ip->saddr, add next_hop->s_addr */
+	const uint64_t off = offsetof(typeof(*encap), ip.check);
+	int ret = bpf_l3_csum_replace(skb, off, old_saddr, next_hop->s_addr, 4);
+	if (ret < 0) {
+		return TC_ACT_SHOT;
+	}
+
+	return bpf_redirect(skb->ifindex, 0);
+}
+
+static ret_t skip_next_hops(buf_t *pkt, int n)
+{
+	switch (n) {
+	case 1:
+		if (!buf_skip(pkt, sizeof(struct in_addr)))
+			return TC_ACT_SHOT;
+	case 0:
+		return CONTINUE_PROCESSING;
+
+	default:
+		return TC_ACT_SHOT;
+	}
+}
+
+/* Get the next hop from the GLB header.
+ *
+ * Sets next_hop->s_addr to 0 if there are no more hops left.
+ * pkt is positioned just after the variable length GLB header
+ * iff the call is successful.
+ */
+static ret_t get_next_hop(buf_t *pkt, encap_headers_t *encap,
+			  struct in_addr *next_hop)
+{
+	if (encap->unigue.next_hop > encap->unigue.hop_count) {
+		return TC_ACT_SHOT;
+	}
+
+	/* Skip "used" next hops. */
+	MAYBE_RETURN(skip_next_hops(pkt, encap->unigue.next_hop));
+
+	if (encap->unigue.next_hop == encap->unigue.hop_count) {
+		/* No more next hops, we are at the end of the GLB header. */
+		next_hop->s_addr = 0;
+		return CONTINUE_PROCESSING;
+	}
+
+	if (!buf_copy(pkt, next_hop, sizeof(*next_hop))) {
+		return TC_ACT_SHOT;
+	}
+
+	/* Skip the remainig next hops (may be zero). */
+	return skip_next_hops(pkt, encap->unigue.hop_count -
+					   encap->unigue.next_hop - 1);
+}
+
+/* Fill a bpf_sock_tuple to be used with the socket lookup functions.
+ * This is a kludge that let's us work around verifier limitations:
+ *
+ *    fill_tuple(&t, foo, sizeof(struct iphdr), 123, 321)
+ *
+ * clang will substitue a costant for sizeof, which allows the verifier
+ * to track it's value. Based on this, it can figure out the constant
+ * return value, and calling code works while still being "generic" to
+ * IPv4 and IPv6.
+ */
+static uint64_t fill_tuple(struct bpf_sock_tuple *tuple, void *iph,
+			   uint64_t iphlen, uint16_t sport, uint16_t dport)
+{
+	switch (iphlen) {
+	case sizeof(struct iphdr): {
+		struct iphdr *ipv4 = (struct iphdr *)iph;
+		tuple->ipv4.daddr = ipv4->daddr;
+		tuple->ipv4.saddr = ipv4->saddr;
+		tuple->ipv4.sport = sport;
+		tuple->ipv4.dport = dport;
+		return sizeof(tuple->ipv4);
+	}
+
+	case sizeof(struct ipv6hdr): {
+		struct ipv6hdr *ipv6 = (struct ipv6hdr *)iph;
+		memcpy(&tuple->ipv6.daddr, &ipv6->daddr,
+		       sizeof(tuple->ipv6.daddr));
+		memcpy(&tuple->ipv6.saddr, &ipv6->saddr,
+		       sizeof(tuple->ipv6.saddr));
+		tuple->ipv6.sport = sport;
+		tuple->ipv6.dport = dport;
+		return sizeof(tuple->ipv6);
+	}
+
+	default:
+		return 0;
+	}
+}
+
+static verdict_t classify_tcp(struct __sk_buff *skb,
+			      struct bpf_sock_tuple *tuple, uint64_t tuplen,
+			      void *iph, struct tcphdr *tcp)
+{
+	struct bpf_sock *sk =
+		bpf_skc_lookup_tcp(skb, tuple, tuplen, BPF_F_CURRENT_NETNS, 0);
+	if (sk == NULL) {
+		return UNKNOWN;
+	}
+
+	if (sk->state != BPF_TCP_LISTEN) {
+		bpf_sk_release(sk);
+		return ESTABLISHED;
+	}
+
+	if (iph != NULL && tcp != NULL) {
+		/* Kludge: we've run out of arguments, but need the length of the ip header. */
+		uint64_t iphlen = sizeof(struct iphdr);
+		if (tuplen == sizeof(tuple->ipv6)) {
+			iphlen = sizeof(struct ipv6hdr);
+		}
+
+		if (bpf_tcp_check_syncookie(sk, iph, iphlen, tcp,
+					    sizeof(*tcp)) == 0) {
+			bpf_sk_release(sk);
+			return SYN_COOKIE;
+		}
+	}
+
+	bpf_sk_release(sk);
+	return UNKNOWN;
+}
+
+static verdict_t classify_udp(struct __sk_buff *skb,
+			      struct bpf_sock_tuple *tuple, uint64_t tuplen)
+{
+	struct bpf_sock *sk =
+		bpf_sk_lookup_udp(skb, tuple, tuplen, BPF_F_CURRENT_NETNS, 0);
+	if (sk == NULL) {
+		return UNKNOWN;
+	}
+
+	if (sk->state == BPF_TCP_ESTABLISHED) {
+		bpf_sk_release(sk);
+		return ESTABLISHED;
+	}
+
+	bpf_sk_release(sk);
+	return UNKNOWN;
+}
+
+static verdict_t classify_icmp(struct __sk_buff *skb, uint8_t proto,
+			       struct bpf_sock_tuple *tuple, uint64_t tuplen,
+			       metrics_t *metrics)
+{
+	switch (proto) {
+	case IPPROTO_TCP:
+		return classify_tcp(skb, tuple, tuplen, NULL, NULL);
+
+	case IPPROTO_UDP:
+		return classify_udp(skb, tuple, tuplen);
+
+	default:
+		metrics->errors_total_malformed_icmp++;
+		return INVALID;
+	}
+}
+
+static verdict_t process_icmpv4(buf_t *pkt, metrics_t *metrics)
+{
+	struct icmphdr icmp;
+	if (!buf_copy(pkt, &icmp, sizeof(icmp))) {
+		metrics->errors_total_malformed_icmp++;
+		return INVALID;
+	}
+
+	/* We should never receive encapsulated echo replies. */
+	if (icmp.type == ICMP_ECHOREPLY) {
+		metrics->errors_total_icmp_echo_replies++;
+		return INVALID;
+	}
+
+	if (icmp.type == ICMP_ECHO) {
+		return ECHO_REQUEST;
+	}
+
+	if (icmp.type != ICMP_DEST_UNREACH || icmp.code != ICMP_FRAG_NEEDED) {
+		metrics->errors_total_unwanted_icmp++;
+		return INVALID;
+	}
+
+	struct iphdr _ip4;
+	const struct iphdr *ipv4 = pkt_parse_ipv4(pkt, &_ip4);
+	if (ipv4 == NULL) {
+		metrics->errors_total_malformed_icmp_pkt_too_big++;
+		return INVALID;
+	}
+
+	/* The source address in the outer IP header is from the entity that
+	 * originated the ICMP message. Use the original IP header to restore
+	 * the correct flow tuple.
+	 */
+	struct bpf_sock_tuple tuple;
+	tuple.ipv4.saddr = ipv4->daddr;
+	tuple.ipv4.daddr = ipv4->saddr;
+
+	if (!pkt_parse_icmp_l4_ports(pkt, (flow_ports_t *)&tuple.ipv4.sport)) {
+		metrics->errors_total_malformed_icmp_pkt_too_big++;
+		return INVALID;
+	}
+
+	return classify_icmp(pkt->skb, ipv4->protocol, &tuple,
+			     sizeof(tuple.ipv4), metrics);
+}
+
+static verdict_t process_icmpv6(buf_t *pkt, metrics_t *metrics)
+{
+	struct icmp6hdr icmp6;
+	if (!buf_copy(pkt, &icmp6, sizeof(icmp6))) {
+		metrics->errors_total_malformed_icmp++;
+		return INVALID;
+	}
+
+	/* We should never receive encapsulated echo replies. */
+	if (icmp6.icmp6_type == ICMPV6_ECHO_REPLY) {
+		metrics->errors_total_icmp_echo_replies++;
+		return INVALID;
+	}
+
+	if (icmp6.icmp6_type == ICMPV6_ECHO_REQUEST) {
+		return ECHO_REQUEST;
+	}
+
+	if (icmp6.icmp6_type != ICMPV6_PKT_TOOBIG) {
+		metrics->errors_total_unwanted_icmp++;
+		return INVALID;
+	}
+
+	bool is_fragment;
+	uint8_t l4_proto;
+	struct ipv6hdr _ipv6;
+	const struct ipv6hdr *ipv6 =
+		pkt_parse_ipv6(pkt, &_ipv6, &l4_proto, &is_fragment);
+	if (ipv6 == NULL) {
+		metrics->errors_total_malformed_icmp_pkt_too_big++;
+		return INVALID;
+	}
+
+	if (is_fragment) {
+		metrics->errors_total_fragmented_ip++;
+		return INVALID;
+	}
+
+	/* Swap source and dest addresses. */
+	struct bpf_sock_tuple tuple;
+	memcpy(&tuple.ipv6.saddr, &ipv6->daddr, sizeof(tuple.ipv6.saddr));
+	memcpy(&tuple.ipv6.daddr, &ipv6->saddr, sizeof(tuple.ipv6.daddr));
+
+	if (!pkt_parse_icmp_l4_ports(pkt, (flow_ports_t *)&tuple.ipv6.sport)) {
+		metrics->errors_total_malformed_icmp_pkt_too_big++;
+		return INVALID;
+	}
+
+	return classify_icmp(pkt->skb, l4_proto, &tuple, sizeof(tuple.ipv6),
+			     metrics);
+}
+
+static verdict_t process_tcp(buf_t *pkt, void *iph, uint64_t iphlen,
+			     metrics_t *metrics)
+{
+	metrics->l4_protocol_packets_total_tcp++;
+
+	struct tcphdr _tcp;
+	struct tcphdr *tcp = buf_assign(pkt, sizeof(_tcp), &_tcp);
+	if (tcp == NULL) {
+		metrics->errors_total_malformed_tcp++;
+		return INVALID;
+	}
+
+	if (tcp->syn) {
+		return SYN;
+	}
+
+	struct bpf_sock_tuple tuple;
+	uint64_t tuplen =
+		fill_tuple(&tuple, iph, iphlen, tcp->source, tcp->dest);
+	return classify_tcp(pkt->skb, &tuple, tuplen, iph, tcp);
+}
+
+static verdict_t process_udp(buf_t *pkt, void *iph, uint64_t iphlen,
+			     metrics_t *metrics)
+{
+	metrics->l4_protocol_packets_total_udp++;
+
+	struct udphdr _udp;
+	struct udphdr *udph = buf_assign(pkt, sizeof(_udp), &_udp);
+	if (udph == NULL) {
+		metrics->errors_total_malformed_udp++;
+		return INVALID;
+	}
+
+	struct bpf_sock_tuple tuple;
+	uint64_t tuplen =
+		fill_tuple(&tuple, iph, iphlen, udph->source, udph->dest);
+	return classify_udp(pkt->skb, &tuple, tuplen);
+}
+
+static verdict_t process_ipv4(buf_t *pkt, metrics_t *metrics)
+{
+	metrics->l3_protocol_packets_total_ipv4++;
+
+	struct iphdr _ip4;
+	struct iphdr *ipv4 = pkt_parse_ipv4(pkt, &_ip4);
+	if (ipv4 == NULL) {
+		metrics->errors_total_malformed_ip++;
+		return INVALID;
+	}
+
+	if (ipv4->version != 4) {
+		metrics->errors_total_malformed_ip++;
+		return INVALID;
+	}
+
+	if (ipv4_is_fragment(ipv4)) {
+		metrics->errors_total_fragmented_ip++;
+		return INVALID;
+	}
+
+	switch (ipv4->protocol) {
+	case IPPROTO_ICMP:
+		return process_icmpv4(pkt, metrics);
+
+	case IPPROTO_TCP:
+		return process_tcp(pkt, ipv4, sizeof(*ipv4), metrics);
+
+	case IPPROTO_UDP:
+		return process_udp(pkt, ipv4, sizeof(*ipv4), metrics);
+
+	default:
+		metrics->errors_total_unknown_l4_proto++;
+		return INVALID;
+	}
+}
+
+static verdict_t process_ipv6(buf_t *pkt, metrics_t *metrics)
+{
+	metrics->l3_protocol_packets_total_ipv6++;
+
+	uint8_t l4_proto;
+	bool is_fragment;
+	struct ipv6hdr _ipv6;
+	struct ipv6hdr *ipv6 =
+		pkt_parse_ipv6(pkt, &_ipv6, &l4_proto, &is_fragment);
+	if (ipv6 == NULL) {
+		metrics->errors_total_malformed_ip++;
+		return INVALID;
+	}
+
+	if (ipv6->version != 6) {
+		metrics->errors_total_malformed_ip++;
+		return INVALID;
+	}
+
+	if (is_fragment) {
+		metrics->errors_total_fragmented_ip++;
+		return INVALID;
+	}
+
+	switch (l4_proto) {
+	case IPPROTO_ICMPV6:
+		return process_icmpv6(pkt, metrics);
+
+	case IPPROTO_TCP:
+		return process_tcp(pkt, ipv6, sizeof(*ipv6), metrics);
+
+	case IPPROTO_UDP:
+		return process_udp(pkt, ipv6, sizeof(*ipv6), metrics);
+
+	default:
+		metrics->errors_total_unknown_l4_proto++;
+		return INVALID;
+	}
+}
+
+SEC("classifier/cls_redirect")
+int cls_redirect(struct __sk_buff *skb)
+{
+	metrics_t *metrics = get_global_metrics();
+	if (metrics == NULL) {
+		return TC_ACT_SHOT;
+	}
+
+	metrics->processed_packets_total++;
+
+	/* Pass bogus packets as long as we're not sure they're
+	 * destined for us.
+	 */
+	if (skb->protocol != bpf_htons(ETH_P_IP)) {
+		return TC_ACT_OK;
+	}
+
+	encap_headers_t *encap;
+
+	/* Make sure that all encapsulation headers are available in
+	 * the linear portion of the skb. This makes it easy to manipulate them.
+	 */
+	if (bpf_skb_pull_data(skb, sizeof(*encap))) {
+		return TC_ACT_OK;
+	}
+
+	buf_t pkt = {
+		.skb = skb,
+		.head = (uint8_t *)(long)skb->data,
+		.tail = (uint8_t *)(long)skb->data_end,
+	};
+
+	encap = buf_assign(&pkt, sizeof(*encap), NULL);
+	if (encap == NULL) {
+		return TC_ACT_OK;
+	}
+
+	if (encap->ip.ihl != 5) {
+		/* We never have any options. */
+		return TC_ACT_OK;
+	}
+
+	if (encap->ip.daddr != ENCAPSULATION_IP ||
+	    encap->ip.protocol != IPPROTO_UDP) {
+		return TC_ACT_OK;
+	}
+
+	/* TODO Check UDP length? */
+	if (encap->udp.dest != ENCAPSULATION_PORT) {
+		return TC_ACT_OK;
+	}
+
+	/* We now know that the packet is destined to us, we can
+	 * drop bogus ones.
+	 */
+	if (ipv4_is_fragment((void *)&encap->ip)) {
+		metrics->errors_total_fragmented_ip++;
+		return TC_ACT_SHOT;
+	}
+
+	if (encap->gue.variant != 0) {
+		metrics->errors_total_malformed_encapsulation++;
+		return TC_ACT_SHOT;
+	}
+
+	if (encap->gue.control != 0) {
+		metrics->errors_total_malformed_encapsulation++;
+		return TC_ACT_SHOT;
+	}
+
+	if (encap->gue.flags != 0) {
+		metrics->errors_total_malformed_encapsulation++;
+		return TC_ACT_SHOT;
+	}
+
+	if (encap->gue.hlen !=
+	    sizeof(encap->unigue) / 4 + encap->unigue.hop_count) {
+		metrics->errors_total_malformed_encapsulation++;
+		return TC_ACT_SHOT;
+	}
+
+	if (encap->unigue.version != 0) {
+		metrics->errors_total_malformed_encapsulation++;
+		return TC_ACT_SHOT;
+	}
+
+	if (encap->unigue.reserved != 0) {
+		return TC_ACT_SHOT;
+	}
+
+	struct in_addr next_hop;
+	MAYBE_RETURN(get_next_hop(&pkt, encap, &next_hop));
+
+	if (next_hop.s_addr == 0) {
+		metrics->accepted_packets_total_last_hop++;
+		return accept_locally(skb, encap);
+	}
+
+	verdict_t verdict;
+	switch (encap->gue.proto_ctype) {
+	case IPPROTO_IPIP:
+		verdict = process_ipv4(&pkt, metrics);
+		break;
+
+	case IPPROTO_IPV6:
+		verdict = process_ipv6(&pkt, metrics);
+		break;
+
+	default:
+		metrics->errors_total_unknown_l3_proto++;
+		return TC_ACT_SHOT;
+	}
+
+	switch (verdict) {
+	case INVALID:
+		/* metrics have already been bumped */
+		return TC_ACT_SHOT;
+
+	case UNKNOWN:
+		return forward_to_next_hop(skb, encap, &next_hop, metrics);
+
+	case ECHO_REQUEST:
+		metrics->accepted_packets_total_icmp_echo_request++;
+		break;
+
+	case SYN:
+		if (encap->unigue.forward_syn) {
+			return forward_to_next_hop(skb, encap, &next_hop,
+						   metrics);
+		}
+
+		metrics->accepted_packets_total_syn++;
+		break;
+
+	case SYN_COOKIE:
+		metrics->accepted_packets_total_syn_cookies++;
+		break;
+
+	case ESTABLISHED:
+		metrics->accepted_packets_total_established++;
+		break;
+	}
+
+	return accept_locally(skb, encap);
+}
diff --git a/tools/testing/selftests/bpf/progs/test_cls_redirect.h b/tools/testing/selftests/bpf/progs/test_cls_redirect.h
new file mode 100644
index 000000000000..76eab0aacba0
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_cls_redirect.h
@@ -0,0 +1,54 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
+/* Copyright 2019, 2020 Cloudflare */
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
+
+#include <linux/if_ether.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/udp.h>
+
+struct gre_base_hdr {
+	uint16_t flags;
+	uint16_t protocol;
+} __attribute__((packed));
+
+struct guehdr {
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+	uint8_t hlen : 5, control : 1, variant : 2;
+#else
+	uint8_t variant : 2, control : 1, hlen : 5;
+#endif
+	uint8_t proto_ctype;
+	uint16_t flags;
+};
+
+struct unigue {
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+	uint8_t _r : 2, last_hop_gre : 1, forward_syn : 1, version : 4;
+#else
+	uint8_t version : 4, forward_syn : 1, last_hop_gre : 1, _r : 2;
+#endif
+	uint8_t reserved;
+	uint8_t next_hop;
+	uint8_t hop_count;
+	// Next hops go here
+} __attribute__((packed));
+
+typedef struct {
+	struct ethhdr eth;
+	struct iphdr ip;
+	struct gre_base_hdr gre;
+} __attribute__((packed)) encap_gre_t;
+
+typedef struct {
+	struct ethhdr eth;
+	struct iphdr ip;
+	struct udphdr udp;
+	struct guehdr gue;
+	struct unigue unigue;
+} __attribute__((packed)) encap_headers_t;
diff --git a/tools/testing/selftests/bpf/progs/test_enable_stats.c b/tools/testing/selftests/bpf/progs/test_enable_stats.c
new file mode 100644
index 000000000000..01a002ade529
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_enable_stats.c
@@ -0,0 +1,18 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Facebook
+
+#include <linux/bpf.h>
+#include <stdint.h>
+#include <linux/types.h>
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+__u64 count = 0;
+
+SEC("raw_tracepoint/sys_enter")
+int test_enable_stats(void *ctx)
+{
+	count += 1;
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_mmap.c b/tools/testing/selftests/bpf/progs/test_mmap.c
index 6239596cd14e..4eb42cff5fe9 100644
--- a/tools/testing/selftests/bpf/progs/test_mmap.c
+++ b/tools/testing/selftests/bpf/progs/test_mmap.c
@@ -9,6 +9,14 @@ char _license[] SEC("license") = "GPL";
 
 struct {
 	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 4096);
+	__uint(map_flags, BPF_F_MMAPABLE | BPF_F_RDONLY_PROG);
+	__type(key, __u32);
+	__type(value, char);
+} rdonly_map SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
 	__uint(max_entries, 512 * 4); /* at least 4 pages of data */
 	__uint(map_flags, BPF_F_MMAPABLE);
 	__type(key, __u32);
diff --git a/tools/testing/selftests/bpf/progs/test_obj_id.c b/tools/testing/selftests/bpf/progs/test_obj_id.c
index 98b9de2fafd0..ded71b3ff6b4 100644
--- a/tools/testing/selftests/bpf/progs/test_obj_id.c
+++ b/tools/testing/selftests/bpf/progs/test_obj_id.c
@@ -3,16 +3,8 @@
  */
 #include <stddef.h>
 #include <linux/bpf.h>
-#include <linux/pkt_cls.h>
 #include <bpf/bpf_helpers.h>
 
-/* It is a dumb bpf program such that it must have no
- * issue to be loaded since testing the verifier is
- * not the focus here.
- */
-
-int _version SEC("version") = 1;
-
 struct {
 	__uint(type, BPF_MAP_TYPE_ARRAY);
 	__uint(max_entries, 1);
@@ -20,13 +12,13 @@ struct {
 	__type(value, __u64);
 } test_map_id SEC(".maps");
 
-SEC("test_obj_id_dummy")
-int test_obj_id(struct __sk_buff *skb)
+SEC("raw_tp/sys_enter")
+int test_obj_id(void *ctx)
 {
 	__u32 key = 0;
 	__u64 *value;
 
 	value = bpf_map_lookup_elem(&test_map_id, &key);
 
-	return TC_ACT_OK;
+	return 0;
 }
diff --git a/tools/testing/selftests/bpf/progs/test_overhead.c b/tools/testing/selftests/bpf/progs/test_overhead.c
index abb7344b531f..42403d088abc 100644
--- a/tools/testing/selftests/bpf/progs/test_overhead.c
+++ b/tools/testing/selftests/bpf/progs/test_overhead.c
@@ -39,4 +39,10 @@ int BPF_PROG(prog5, struct task_struct *tsk, const char *buf, bool exec)
 	return 0;
 }
 
+SEC("fmod_ret/__set_task_comm")
+int BPF_PROG(prog6, struct task_struct *tsk, const char *buf, bool exec)
+{
+	return !tsk;
+}
+
 char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_ringbuf.c b/tools/testing/selftests/bpf/progs/test_ringbuf.c
new file mode 100644
index 000000000000..8ba9959b036b
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_ringbuf.c
@@ -0,0 +1,78 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Facebook
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct sample {
+	int pid;
+	int seq;
+	long value;
+	char comm[16];
+};
+
+struct {
+	__uint(type, BPF_MAP_TYPE_RINGBUF);
+	__uint(max_entries, 1 << 12);
+} ringbuf SEC(".maps");
+
+/* inputs */
+int pid = 0;
+long value = 0;
+long flags = 0;
+
+/* outputs */
+long total = 0;
+long discarded = 0;
+long dropped = 0;
+
+long avail_data = 0;
+long ring_size = 0;
+long cons_pos = 0;
+long prod_pos = 0;
+
+/* inner state */
+long seq = 0;
+
+SEC("tp/syscalls/sys_enter_getpgid")
+int test_ringbuf(void *ctx)
+{
+	int cur_pid = bpf_get_current_pid_tgid() >> 32;
+	struct sample *sample;
+	int zero = 0;
+
+	if (cur_pid != pid)
+		return 0;
+
+	sample = bpf_ringbuf_reserve(&ringbuf, sizeof(*sample), 0);
+	if (!sample) {
+		__sync_fetch_and_add(&dropped, 1);
+		return 1;
+	}
+
+	sample->pid = pid;
+	bpf_get_current_comm(sample->comm, sizeof(sample->comm));
+	sample->value = value;
+
+	sample->seq = seq++;
+	__sync_fetch_and_add(&total, 1);
+
+	if (sample->seq & 1) {
+		/* copy from reserved sample to a new one... */
+		bpf_ringbuf_output(&ringbuf, sample, sizeof(*sample), flags);
+		/* ...and then discard reserved sample */
+		bpf_ringbuf_discard(sample, flags);
+		__sync_fetch_and_add(&discarded, 1);
+	} else {
+		bpf_ringbuf_submit(sample, flags);
+	}
+
+	avail_data = bpf_ringbuf_query(&ringbuf, BPF_RB_AVAIL_DATA);
+	ring_size = bpf_ringbuf_query(&ringbuf, BPF_RB_RING_SIZE);
+	cons_pos = bpf_ringbuf_query(&ringbuf, BPF_RB_CONS_POS);
+	prod_pos = bpf_ringbuf_query(&ringbuf, BPF_RB_PROD_POS);
+
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_ringbuf_multi.c b/tools/testing/selftests/bpf/progs/test_ringbuf_multi.c
new file mode 100644
index 000000000000..edf3b6953533
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_ringbuf_multi.c
@@ -0,0 +1,77 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Facebook
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct sample {
+	int pid;
+	int seq;
+	long value;
+	char comm[16];
+};
+
+struct ringbuf_map {
+	__uint(type, BPF_MAP_TYPE_RINGBUF);
+	__uint(max_entries, 1 << 12);
+} ringbuf1 SEC(".maps"),
+  ringbuf2 SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
+	__uint(max_entries, 4);
+	__type(key, int);
+	__array(values, struct ringbuf_map);
+} ringbuf_arr SEC(".maps") = {
+	.values = {
+		[0] = &ringbuf1,
+		[2] = &ringbuf2,
+	},
+};
+
+/* inputs */
+int pid = 0;
+int target_ring = 0;
+long value = 0;
+
+/* outputs */
+long total = 0;
+long dropped = 0;
+long skipped = 0;
+
+SEC("tp/syscalls/sys_enter_getpgid")
+int test_ringbuf(void *ctx)
+{
+	int cur_pid = bpf_get_current_pid_tgid() >> 32;
+	struct sample *sample;
+	void *rb;
+	int zero = 0;
+
+	if (cur_pid != pid)
+		return 0;
+
+	rb = bpf_map_lookup_elem(&ringbuf_arr, &target_ring);
+	if (!rb) {
+		skipped += 1;
+		return 1;
+	}
+
+	sample = bpf_ringbuf_reserve(rb, sizeof(*sample), 0);
+	if (!sample) {
+		dropped += 1;
+		return 1;
+	}
+
+	sample->pid = pid;
+	bpf_get_current_comm(sample->comm, sizeof(sample->comm));
+	sample->value = value;
+
+	sample->seq = total;
+	total += 1;
+
+	bpf_ringbuf_submit(sample, 0);
+
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_sk_assign.c b/tools/testing/selftests/bpf/progs/test_sk_assign.c
index 8f530843b4da..1ecd987005d2 100644
--- a/tools/testing/selftests/bpf/progs/test_sk_assign.c
+++ b/tools/testing/selftests/bpf/progs/test_sk_assign.c
@@ -16,6 +16,26 @@
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_endian.h>
 
+/* Pin map under /sys/fs/bpf/tc/globals/<map name> */
+#define PIN_GLOBAL_NS 2
+
+/* Must match struct bpf_elf_map layout from iproute2 */
+struct {
+	__u32 type;
+	__u32 size_key;
+	__u32 size_value;
+	__u32 max_elem;
+	__u32 flags;
+	__u32 id;
+	__u32 pinning;
+} server_map SEC("maps") = {
+	.type = BPF_MAP_TYPE_SOCKMAP,
+	.size_key = sizeof(int),
+	.size_value  = sizeof(__u64),
+	.max_elem = 1,
+	.pinning = PIN_GLOBAL_NS,
+};
+
 int _version SEC("version") = 1;
 char _license[] SEC("license") = "GPL";
 
@@ -72,7 +92,9 @@ handle_udp(struct __sk_buff *skb, struct bpf_sock_tuple *tuple, bool ipv4)
 {
 	struct bpf_sock_tuple ln = {0};
 	struct bpf_sock *sk;
+	const int zero = 0;
 	size_t tuple_len;
+	__be16 dport;
 	int ret;
 
 	tuple_len = ipv4 ? sizeof(tuple->ipv4) : sizeof(tuple->ipv6);
@@ -83,32 +105,11 @@ handle_udp(struct __sk_buff *skb, struct bpf_sock_tuple *tuple, bool ipv4)
 	if (sk)
 		goto assign;
 
-	if (ipv4) {
-		if (tuple->ipv4.dport != bpf_htons(4321))
-			return TC_ACT_OK;
-
-		ln.ipv4.daddr = bpf_htonl(0x7f000001);
-		ln.ipv4.dport = bpf_htons(1234);
-
-		sk = bpf_sk_lookup_udp(skb, &ln, sizeof(ln.ipv4),
-					BPF_F_CURRENT_NETNS, 0);
-	} else {
-		if (tuple->ipv6.dport != bpf_htons(4321))
-			return TC_ACT_OK;
-
-		/* Upper parts of daddr are already zero. */
-		ln.ipv6.daddr[3] = bpf_htonl(0x1);
-		ln.ipv6.dport = bpf_htons(1234);
-
-		sk = bpf_sk_lookup_udp(skb, &ln, sizeof(ln.ipv6),
-					BPF_F_CURRENT_NETNS, 0);
-	}
+	dport = ipv4 ? tuple->ipv4.dport : tuple->ipv6.dport;
+	if (dport != bpf_htons(4321))
+		return TC_ACT_OK;
 
-	/* workaround: We can't do a single socket lookup here, because then
-	 * the compiler will likely spill tuple_len to the stack. This makes it
-	 * lose all bounds information in the verifier, which then rejects the
-	 * call as unsafe.
-	 */
+	sk = bpf_map_lookup_elem(&server_map, &zero);
 	if (!sk)
 		return TC_ACT_SHOT;
 
@@ -123,7 +124,9 @@ handle_tcp(struct __sk_buff *skb, struct bpf_sock_tuple *tuple, bool ipv4)
 {
 	struct bpf_sock_tuple ln = {0};
 	struct bpf_sock *sk;
+	const int zero = 0;
 	size_t tuple_len;
+	__be16 dport;
 	int ret;
 
 	tuple_len = ipv4 ? sizeof(tuple->ipv4) : sizeof(tuple->ipv6);
@@ -137,32 +140,11 @@ handle_tcp(struct __sk_buff *skb, struct bpf_sock_tuple *tuple, bool ipv4)
 		bpf_sk_release(sk);
 	}
 
-	if (ipv4) {
-		if (tuple->ipv4.dport != bpf_htons(4321))
-			return TC_ACT_OK;
+	dport = ipv4 ? tuple->ipv4.dport : tuple->ipv6.dport;
+	if (dport != bpf_htons(4321))
+		return TC_ACT_OK;
 
-		ln.ipv4.daddr = bpf_htonl(0x7f000001);
-		ln.ipv4.dport = bpf_htons(1234);
-
-		sk = bpf_skc_lookup_tcp(skb, &ln, sizeof(ln.ipv4),
-					BPF_F_CURRENT_NETNS, 0);
-	} else {
-		if (tuple->ipv6.dport != bpf_htons(4321))
-			return TC_ACT_OK;
-
-		/* Upper parts of daddr are already zero. */
-		ln.ipv6.daddr[3] = bpf_htonl(0x1);
-		ln.ipv6.dport = bpf_htons(1234);
-
-		sk = bpf_skc_lookup_tcp(skb, &ln, sizeof(ln.ipv6),
-					BPF_F_CURRENT_NETNS, 0);
-	}
-
-	/* workaround: We can't do a single socket lookup here, because then
-	 * the compiler will likely spill tuple_len to the stack. This makes it
-	 * lose all bounds information in the verifier, which then rejects the
-	 * call as unsafe.
-	 */
+	sk = bpf_map_lookup_elem(&server_map, &zero);
 	if (!sk)
 		return TC_ACT_SHOT;
 
diff --git a/tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c b/tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c
index d2b38fa6a5b0..e83d0b48d80c 100644
--- a/tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c
+++ b/tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c
@@ -73,6 +73,7 @@ int bpf_sk_lookup_test0(struct __sk_buff *skb)
 
 	tuple_len = ipv4 ? sizeof(tuple->ipv4) : sizeof(tuple->ipv6);
 	sk = bpf_sk_lookup_tcp(skb, tuple, tuple_len, BPF_F_CURRENT_NETNS, 0);
+	bpf_printk("sk=%d\n", sk ? 1 : 0);
 	if (sk)
 		bpf_sk_release(sk);
 	return sk ? TC_ACT_OK : TC_ACT_UNSPEC;
diff --git a/tools/testing/selftests/bpf/progs/test_skb_helpers.c b/tools/testing/selftests/bpf/progs/test_skb_helpers.c
new file mode 100644
index 000000000000..bb3fbf1a29e3
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_skb_helpers.c
@@ -0,0 +1,28 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+#define TEST_COMM_LEN 16
+
+struct {
+	__uint(type, BPF_MAP_TYPE_CGROUP_ARRAY);
+	__uint(max_entries, 1);
+	__type(key, u32);
+	__type(value, u32);
+} cgroup_map SEC(".maps");
+
+char _license[] SEC("license") = "GPL";
+
+SEC("classifier/test_skb_helpers")
+int test_skb_helpers(struct __sk_buff *skb)
+{
+	struct task_struct *task;
+	char comm[TEST_COMM_LEN];
+	__u32 tpid;
+
+	task = (struct task_struct *)bpf_get_current_task();
+	bpf_probe_read_kernel(&tpid , sizeof(tpid), &task->tgid);
+	bpf_probe_read_kernel_str(&comm, sizeof(comm), &task->comm);
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_skmsg_load_helpers.c b/tools/testing/selftests/bpf/progs/test_skmsg_load_helpers.c
new file mode 100644
index 000000000000..45e8fc75a739
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_skmsg_load_helpers.c
@@ -0,0 +1,47 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Isovalent, Inc.
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+struct {
+	__uint(type, BPF_MAP_TYPE_SOCKMAP);
+	__uint(max_entries, 2);
+	__type(key, __u32);
+	__type(value, __u64);
+} sock_map SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_SOCKHASH);
+	__uint(max_entries, 2);
+	__type(key, __u32);
+	__type(value, __u64);
+} sock_hash SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_SK_STORAGE);
+	__uint(map_flags, BPF_F_NO_PREALLOC);
+	__type(key, __u32);
+	__type(value, __u64);
+} socket_storage SEC(".maps");
+
+SEC("sk_msg")
+int prog_msg_verdict(struct sk_msg_md *msg)
+{
+	struct task_struct *task = (struct task_struct *)bpf_get_current_task();
+	int verdict = SK_PASS;
+	__u32 pid, tpid;
+	__u64 *sk_stg;
+
+	pid = bpf_get_current_pid_tgid() >> 32;
+	sk_stg = bpf_sk_storage_get(&socket_storage, msg->sk, 0, BPF_SK_STORAGE_GET_F_CREATE);
+	if (!sk_stg)
+		return SK_DROP;
+	*sk_stg = pid;
+	bpf_probe_read_kernel(&tpid , sizeof(tpid), &task->tgid);
+	if (pid != tpid)
+		verdict = SK_DROP;
+	bpf_sk_storage_delete(&socket_storage, (void *)msg->sk);
+	return verdict;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_sockmap_kern.h b/tools/testing/selftests/bpf/progs/test_sockmap_kern.h
index 9b4d3a68a91a..057036ca1111 100644
--- a/tools/testing/selftests/bpf/test_sockmap_kern.h
+++ b/tools/testing/selftests/bpf/progs/test_sockmap_kern.h
@@ -79,11 +79,18 @@ struct {
 
 struct {
 	__uint(type, BPF_MAP_TYPE_ARRAY);
-	__uint(max_entries, 1);
+	__uint(max_entries, 2);
 	__type(key, int);
 	__type(value, int);
 } sock_skb_opts SEC(".maps");
 
+struct {
+	__uint(type, TEST_MAP_TYPE);
+	__uint(max_entries, 20);
+	__uint(key_size, sizeof(int));
+	__uint(value_size, sizeof(int));
+} tls_sock_map SEC(".maps");
+
 SEC("sk_skb1")
 int bpf_prog1(struct __sk_buff *skb)
 {
@@ -110,8 +117,6 @@ int bpf_prog2(struct __sk_buff *skb)
 		flags = *f;
 	}
 
-	bpf_printk("sk_skb2: redirect(%iB) flags=%i\n",
-		   len, flags);
 #ifdef SOCKMAP
 	return bpf_sk_redirect_map(skb, &sock_map, ret, flags);
 #else
@@ -120,6 +125,43 @@ int bpf_prog2(struct __sk_buff *skb)
 
 }
 
+SEC("sk_skb3")
+int bpf_prog3(struct __sk_buff *skb)
+{
+	const int one = 1;
+	int err, *f, ret = SK_PASS;
+	void *data_end;
+	char *c;
+
+	err = bpf_skb_pull_data(skb, 19);
+	if (err)
+		goto tls_out;
+
+	c = (char *)(long)skb->data;
+	data_end = (void *)(long)skb->data_end;
+
+	if (c + 18 < data_end)
+		memcpy(&c[13], "PASS", 4);
+	f = bpf_map_lookup_elem(&sock_skb_opts, &one);
+	if (f && *f) {
+		__u64 flags = 0;
+
+		ret = 0;
+		flags = *f;
+#ifdef SOCKMAP
+		return bpf_sk_redirect_map(skb, &tls_sock_map, ret, flags);
+#else
+		return bpf_sk_redirect_hash(skb, &tls_sock_map, &ret, flags);
+#endif
+	}
+
+	f = bpf_map_lookup_elem(&sock_skb_opts, &one);
+	if (f && *f)
+		ret = SK_DROP;
+tls_out:
+	return ret;
+}
+
 SEC("sockops")
 int bpf_sockmap(struct bpf_sock_ops *skops)
 {
@@ -143,8 +185,6 @@ int bpf_sockmap(struct bpf_sock_ops *skops)
 			err = bpf_sock_hash_update(skops, &sock_map, &ret,
 						   BPF_NOEXIST);
 #endif
-			bpf_printk("passive(%i -> %i) map ctx update err: %d\n",
-				   lport, bpf_ntohl(rport), err);
 		}
 		break;
 	case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB:
@@ -160,8 +200,6 @@ int bpf_sockmap(struct bpf_sock_ops *skops)
 			err = bpf_sock_hash_update(skops, &sock_map, &ret,
 						   BPF_NOEXIST);
 #endif
-			bpf_printk("active(%i -> %i) map ctx update err: %d\n",
-				   lport, bpf_ntohl(rport), err);
 		}
 		break;
 	default:
@@ -199,72 +237,6 @@ int bpf_prog4(struct sk_msg_md *msg)
 }
 
 SEC("sk_msg2")
-int bpf_prog5(struct sk_msg_md *msg)
-{
-	int zero = 0, one = 1, two = 2, three = 3, four = 4, five = 5;
-	int *start, *end, *start_push, *end_push, *start_pop, *pop;
-	int *bytes, len1, len2 = 0, len3, len4;
-	int err1 = -1, err2 = -1;
-
-	bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
-	if (bytes)
-		err1 = bpf_msg_apply_bytes(msg, *bytes);
-	bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero);
-	if (bytes)
-		err2 = bpf_msg_cork_bytes(msg, *bytes);
-	len1 = (__u64)msg->data_end - (__u64)msg->data;
-	start = bpf_map_lookup_elem(&sock_bytes, &zero);
-	end = bpf_map_lookup_elem(&sock_bytes, &one);
-	if (start && end) {
-		int err;
-
-		bpf_printk("sk_msg2: pull(%i:%i)\n",
-			   start ? *start : 0, end ? *end : 0);
-		err = bpf_msg_pull_data(msg, *start, *end, 0);
-		if (err)
-			bpf_printk("sk_msg2: pull_data err %i\n",
-				   err);
-		len2 = (__u64)msg->data_end - (__u64)msg->data;
-		bpf_printk("sk_msg2: length update %i->%i\n",
-			   len1, len2);
-	}
-
-	start_push = bpf_map_lookup_elem(&sock_bytes, &two);
-	end_push = bpf_map_lookup_elem(&sock_bytes, &three);
-	if (start_push && end_push) {
-		int err;
-
-		bpf_printk("sk_msg2: push(%i:%i)\n",
-			   start_push ? *start_push : 0,
-			   end_push ? *end_push : 0);
-		err = bpf_msg_push_data(msg, *start_push, *end_push, 0);
-		if (err)
-			bpf_printk("sk_msg2: push_data err %i\n", err);
-		len3 = (__u64)msg->data_end - (__u64)msg->data;
-		bpf_printk("sk_msg2: length push_update %i->%i\n",
-			   len2 ? len2 : len1, len3);
-	}
-	start_pop = bpf_map_lookup_elem(&sock_bytes, &four);
-	pop = bpf_map_lookup_elem(&sock_bytes, &five);
-	if (start_pop && pop) {
-		int err;
-
-		bpf_printk("sk_msg2: pop(%i@%i)\n",
-			   start_pop, pop);
-		err = bpf_msg_pop_data(msg, *start_pop, *pop, 0);
-		if (err)
-			bpf_printk("sk_msg2: pop_data err %i\n", err);
-		len4 = (__u64)msg->data_end - (__u64)msg->data;
-		bpf_printk("sk_msg2: length pop_data %i->%i\n",
-			   len1 ? len1 : 0,  len4);
-	}
-
-	bpf_printk("sk_msg2: data length %i err1 %i err2 %i\n",
-		   len1, err1, err2);
-	return SK_PASS;
-}
-
-SEC("sk_msg3")
 int bpf_prog6(struct sk_msg_md *msg)
 {
 	int zero = 0, one = 1, two = 2, three = 3, four = 4, five = 5, key = 0;
@@ -305,86 +277,7 @@ int bpf_prog6(struct sk_msg_md *msg)
 #endif
 }
 
-SEC("sk_msg4")
-int bpf_prog7(struct sk_msg_md *msg)
-{
-	int *bytes, *start, *end, *start_push, *end_push, *start_pop, *pop, *f;
-	int zero = 0, one = 1, two = 2, three = 3, four = 4, five = 5;
-	int len1, len2 = 0, len3, len4;
-	int err1 = 0, err2 = 0, key = 0;
-	__u64 flags = 0;
-
-		int err;
-	bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
-	if (bytes)
-		err1 = bpf_msg_apply_bytes(msg, *bytes);
-	bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero);
-	if (bytes)
-		err2 = bpf_msg_cork_bytes(msg, *bytes);
-	len1 = (__u64)msg->data_end - (__u64)msg->data;
-
-	start = bpf_map_lookup_elem(&sock_bytes, &zero);
-	end = bpf_map_lookup_elem(&sock_bytes, &one);
-	if (start && end) {
-		bpf_printk("sk_msg2: pull(%i:%i)\n",
-			   start ? *start : 0, end ? *end : 0);
-		err = bpf_msg_pull_data(msg, *start, *end, 0);
-		if (err)
-			bpf_printk("sk_msg2: pull_data err %i\n",
-				   err);
-		len2 = (__u64)msg->data_end - (__u64)msg->data;
-		bpf_printk("sk_msg2: length update %i->%i\n",
-			   len1, len2);
-	}
-
-	start_push = bpf_map_lookup_elem(&sock_bytes, &two);
-	end_push = bpf_map_lookup_elem(&sock_bytes, &three);
-	if (start_push && end_push) {
-		bpf_printk("sk_msg4: push(%i:%i)\n",
-			   start_push ? *start_push : 0,
-			   end_push ? *end_push : 0);
-		err = bpf_msg_push_data(msg, *start_push, *end_push, 0);
-		if (err)
-			bpf_printk("sk_msg4: push_data err %i\n",
-				   err);
-		len3 = (__u64)msg->data_end - (__u64)msg->data;
-		bpf_printk("sk_msg4: length push_update %i->%i\n",
-			   len2 ? len2 : len1, len3);
-	}
-
-	start_pop = bpf_map_lookup_elem(&sock_bytes, &four);
-	pop = bpf_map_lookup_elem(&sock_bytes, &five);
-	if (start_pop && pop) {
-		int err;
-
-		bpf_printk("sk_msg4: pop(%i@%i)\n",
-			   start_pop, pop);
-		err = bpf_msg_pop_data(msg, *start_pop, *pop, 0);
-		if (err)
-			bpf_printk("sk_msg4: pop_data err %i\n", err);
-		len4 = (__u64)msg->data_end - (__u64)msg->data;
-		bpf_printk("sk_msg4: length pop_data %i->%i\n",
-			   len1 ? len1 : 0,  len4);
-	}
-
-
-	f = bpf_map_lookup_elem(&sock_redir_flags, &zero);
-	if (f && *f) {
-		key = 2;
-		flags = *f;
-	}
-	bpf_printk("sk_msg3: redirect(%iB) flags=%i err=%i\n",
-		   len1, flags, err1 ? err1 : err2);
-#ifdef SOCKMAP
-	err = bpf_msg_redirect_map(msg, &sock_map_redir, key, flags);
-#else
-	err = bpf_msg_redirect_hash(msg, &sock_map_redir, &key, flags);
-#endif
-	bpf_printk("sk_msg3: err %i\n", err);
-	return err;
-}
-
-SEC("sk_msg5")
+SEC("sk_msg3")
 int bpf_prog8(struct sk_msg_md *msg)
 {
 	void *data_end = (void *)(long) msg->data_end;
@@ -401,7 +294,7 @@ int bpf_prog8(struct sk_msg_md *msg)
 	}
 	return SK_PASS;
 }
-SEC("sk_msg6")
+SEC("sk_msg4")
 int bpf_prog9(struct sk_msg_md *msg)
 {
 	void *data_end = (void *)(long) msg->data_end;
@@ -419,7 +312,7 @@ int bpf_prog9(struct sk_msg_md *msg)
 	return SK_PASS;
 }
 
-SEC("sk_msg7")
+SEC("sk_msg5")
 int bpf_prog10(struct sk_msg_md *msg)
 {
 	int *bytes, *start, *end, *start_push, *end_push, *start_pop, *pop;
@@ -443,7 +336,6 @@ int bpf_prog10(struct sk_msg_md *msg)
 	pop = bpf_map_lookup_elem(&sock_bytes, &five);
 	if (start_pop && pop)
 		bpf_msg_pop_data(msg, *start_pop, *pop, 0);
-	bpf_printk("return sk drop\n");
 	return SK_DROP;
 }
 
diff --git a/tools/testing/selftests/bpf/progs/test_sysctl_prog.c b/tools/testing/selftests/bpf/progs/test_sysctl_prog.c
index 2d0b0b82a78a..50525235380e 100644
--- a/tools/testing/selftests/bpf/progs/test_sysctl_prog.c
+++ b/tools/testing/selftests/bpf/progs/test_sysctl_prog.c
@@ -45,7 +45,7 @@ int sysctl_tcp_mem(struct bpf_sysctl *ctx)
 	unsigned long tcp_mem[3] = {0, 0, 0};
 	char value[MAX_VALUE_STR_LEN];
 	unsigned char i, off = 0;
-	int ret;
+	volatile int ret;
 
 	if (ctx->write)
 		return 0;
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c b/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c
new file mode 100644
index 000000000000..3d66599eee2e
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c
@@ -0,0 +1,33 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+SEC("xdp_adjust_tail_grow")
+int _xdp_adjust_tail_grow(struct xdp_md *xdp)
+{
+	void *data_end = (void *)(long)xdp->data_end;
+	void *data = (void *)(long)xdp->data;
+	unsigned int data_len;
+	int offset = 0;
+
+	/* Data length determine test case */
+	data_len = data_end - data;
+
+	if (data_len == 54) { /* sizeof(pkt_v4) */
+		offset = 4096; /* test too large offset */
+	} else if (data_len == 74) { /* sizeof(pkt_v6) */
+		offset = 40;
+	} else if (data_len == 64) {
+		offset = 128;
+	} else if (data_len == 128) {
+		offset = 4096 - 256 - 320 - data_len; /* Max tail grow 3520 */
+	} else {
+		return XDP_ABORTED; /* No matching test */
+	}
+
+	if (bpf_xdp_adjust_tail(xdp, offset))
+		return XDP_DROP;
+	return XDP_TX;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_adjust_tail.c b/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_shrink.c
index b7fc85769bdc..22065a9cfb25 100644
--- a/tools/testing/selftests/bpf/progs/test_adjust_tail.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_shrink.c
@@ -1,5 +1,5 @@
-/* SPDX-License-Identifier: GPL-2.0
- * Copyright (c) 2018 Facebook
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2018 Facebook
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of version 2 of the GNU General Public
@@ -11,15 +11,15 @@
 
 int _version SEC("version") = 1;
 
-SEC("xdp_adjust_tail")
-int _xdp_adjust_tail(struct xdp_md *xdp)
+SEC("xdp_adjust_tail_shrink")
+int _xdp_adjust_tail_shrink(struct xdp_md *xdp)
 {
 	void *data_end = (void *)(long)xdp->data_end;
 	void *data = (void *)(long)xdp->data;
 	int offset = 0;
 
-	if (data_end - data == 54)
-		offset = 256;
+	if (data_end - data == 54) /* sizeof(pkt_v4) */
+		offset = 256; /* shrink too much */
 	else
 		offset = 20;
 	if (bpf_xdp_adjust_tail(xdp, 0 - offset))
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_devmap_helpers.c b/tools/testing/selftests/bpf/progs/test_xdp_devmap_helpers.c
new file mode 100644
index 000000000000..e5c0f131c8a7
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_xdp_devmap_helpers.c
@@ -0,0 +1,22 @@
+// SPDX-License-Identifier: GPL-2.0
+/* fails to load without expected_attach_type = BPF_XDP_DEVMAP
+ * because of access to egress_ifindex
+ */
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+SEC("xdp_dm_log")
+int xdpdm_devlog(struct xdp_md *ctx)
+{
+	char fmt[] = "devmap redirect: dev %u -> dev %u len %u\n";
+	void *data_end = (void *)(long)ctx->data_end;
+	void *data = (void *)(long)ctx->data;
+	unsigned int len = data_end - data;
+
+	bpf_trace_printk(fmt, sizeof(fmt),
+			 ctx->ingress_ifindex, ctx->egress_ifindex, len);
+
+	return XDP_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_helpers.c b/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_helpers.c
new file mode 100644
index 000000000000..deef0e050863
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_helpers.c
@@ -0,0 +1,44 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+struct {
+	__uint(type, BPF_MAP_TYPE_DEVMAP);
+	__uint(key_size, sizeof(__u32));
+	__uint(value_size, sizeof(struct bpf_devmap_val));
+	__uint(max_entries, 4);
+} dm_ports SEC(".maps");
+
+SEC("xdp_redir")
+int xdp_redir_prog(struct xdp_md *ctx)
+{
+	return bpf_redirect_map(&dm_ports, 1, 0);
+}
+
+/* invalid program on DEVMAP entry;
+ * SEC name means expected attach type not set
+ */
+SEC("xdp_dummy")
+int xdp_dummy_prog(struct xdp_md *ctx)
+{
+	return XDP_PASS;
+}
+
+/* valid program on DEVMAP entry via SEC name;
+ * has access to egress and ingress ifindex
+ */
+SEC("xdp_devmap")
+int xdp_dummy_dm(struct xdp_md *ctx)
+{
+	char fmt[] = "devmap redirect: dev %u -> dev %u len %u\n";
+	void *data_end = (void *)(long)ctx->data_end;
+	void *data = (void *)(long)ctx->data;
+	unsigned int len = data_end - data;
+
+	bpf_trace_printk(fmt, sizeof(fmt),
+			 ctx->ingress_ifindex, ctx->egress_ifindex, len);
+
+	return XDP_PASS;
+}
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/trigger_bench.c b/tools/testing/selftests/bpf/progs/trigger_bench.c
new file mode 100644
index 000000000000..8b36b6640e7e
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/trigger_bench.c
@@ -0,0 +1,47 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Facebook
+
+#include <linux/bpf.h>
+#include <asm/unistd.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+long hits = 0;
+
+SEC("tp/syscalls/sys_enter_getpgid")
+int bench_trigger_tp(void *ctx)
+{
+	__sync_add_and_fetch(&hits, 1);
+	return 0;
+}
+
+SEC("raw_tp/sys_enter")
+int BPF_PROG(bench_trigger_raw_tp, struct pt_regs *regs, long id)
+{
+	if (id == __NR_getpgid)
+		__sync_add_and_fetch(&hits, 1);
+	return 0;
+}
+
+SEC("kprobe/__x64_sys_getpgid")
+int bench_trigger_kprobe(void *ctx)
+{
+	__sync_add_and_fetch(&hits, 1);
+	return 0;
+}
+
+SEC("fentry/__x64_sys_getpgid")
+int bench_trigger_fentry(void *ctx)
+{
+	__sync_add_and_fetch(&hits, 1);
+	return 0;
+}
+
+SEC("fmod_ret/__x64_sys_getpgid")
+int bench_trigger_fmodret(void *ctx)
+{
+	__sync_add_and_fetch(&hits, 1);
+	return -22;
+}
diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c
index c6766b2cff85..6a12a0e01e07 100644
--- a/tools/testing/selftests/bpf/test_maps.c
+++ b/tools/testing/selftests/bpf/test_maps.c
@@ -1394,23 +1394,25 @@ static void test_map_rdonly(void)
 
 	key = 1;
 	value = 1234;
-	/* Insert key=1 element. */
+	/* Try to insert key=1 element. */
 	assert(bpf_map_update_elem(fd, &key, &value, BPF_ANY) == -1 &&
 	       errno == EPERM);
 
-	/* Check that key=2 is not found. */
+	/* Check that key=1 is not found. */
 	assert(bpf_map_lookup_elem(fd, &key, &value) == -1 && errno == ENOENT);
 	assert(bpf_map_get_next_key(fd, &key, &value) == -1 && errno == ENOENT);
+
+	close(fd);
 }
 
-static void test_map_wronly(void)
+static void test_map_wronly_hash(void)
 {
 	int fd, key = 0, value = 0;
 
 	fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(key), sizeof(value),
 			    MAP_SIZE, map_flags | BPF_F_WRONLY);
 	if (fd < 0) {
-		printf("Failed to create map for read only test '%s'!\n",
+		printf("Failed to create map for write only test '%s'!\n",
 		       strerror(errno));
 		exit(1);
 	}
@@ -1420,9 +1422,49 @@ static void test_map_wronly(void)
 	/* Insert key=1 element. */
 	assert(bpf_map_update_elem(fd, &key, &value, BPF_ANY) == 0);
 
-	/* Check that key=2 is not found. */
+	/* Check that reading elements and keys from the map is not allowed. */
 	assert(bpf_map_lookup_elem(fd, &key, &value) == -1 && errno == EPERM);
 	assert(bpf_map_get_next_key(fd, &key, &value) == -1 && errno == EPERM);
+
+	close(fd);
+}
+
+static void test_map_wronly_stack_or_queue(enum bpf_map_type map_type)
+{
+	int fd, value = 0;
+
+	assert(map_type == BPF_MAP_TYPE_QUEUE ||
+	       map_type == BPF_MAP_TYPE_STACK);
+	fd = bpf_create_map(map_type, 0, sizeof(value), MAP_SIZE,
+			    map_flags | BPF_F_WRONLY);
+	/* Stack/Queue maps do not support BPF_F_NO_PREALLOC */
+	if (map_flags & BPF_F_NO_PREALLOC) {
+		assert(fd < 0 && errno == EINVAL);
+		return;
+	}
+	if (fd < 0) {
+		printf("Failed to create map '%s'!\n", strerror(errno));
+		exit(1);
+	}
+
+	value = 1234;
+	assert(bpf_map_update_elem(fd, NULL, &value, BPF_ANY) == 0);
+
+	/* Peek element should fail */
+	assert(bpf_map_lookup_elem(fd, NULL, &value) == -1 && errno == EPERM);
+
+	/* Pop element should fail */
+	assert(bpf_map_lookup_and_delete_elem(fd, NULL, &value) == -1 &&
+	       errno == EPERM);
+
+	close(fd);
+}
+
+static void test_map_wronly(void)
+{
+	test_map_wronly_hash();
+	test_map_wronly_stack_or_queue(BPF_MAP_TYPE_STACK);
+	test_map_wronly_stack_or_queue(BPF_MAP_TYPE_QUEUE);
 }
 
 static void prepare_reuseport_grp(int type, int map_fd, size_t map_elem_size,
diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index b521e0a512b6..54fa5fa688ce 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -222,23 +222,6 @@ int test__join_cgroup(const char *path)
 	return fd;
 }
 
-struct ipv4_packet pkt_v4 = {
-	.eth.h_proto = __bpf_constant_htons(ETH_P_IP),
-	.iph.ihl = 5,
-	.iph.protocol = IPPROTO_TCP,
-	.iph.tot_len = __bpf_constant_htons(MAGIC_BYTES),
-	.tcp.urg_ptr = 123,
-	.tcp.doff = 5,
-};
-
-struct ipv6_packet pkt_v6 = {
-	.eth.h_proto = __bpf_constant_htons(ETH_P_IPV6),
-	.iph.nexthdr = IPPROTO_TCP,
-	.iph.payload_len = __bpf_constant_htons(MAGIC_BYTES),
-	.tcp.urg_ptr = 123,
-	.tcp.doff = 5,
-};
-
 int bpf_find_map(const char *test, struct bpf_object *obj, const char *name)
 {
 	struct bpf_map *map;
@@ -351,25 +334,13 @@ int extract_build_id(char *build_id, size_t size)
 		len = size;
 	memcpy(build_id, line, len);
 	build_id[len] = '\0';
+	free(line);
 	return 0;
 err:
 	fclose(fp);
 	return -1;
 }
 
-void *spin_lock_thread(void *arg)
-{
-	__u32 duration, retval;
-	int err, prog_fd = *(u32 *) arg;
-
-	err = bpf_prog_test_run(prog_fd, 10000, &pkt_v4, sizeof(pkt_v4),
-				NULL, NULL, &retval, &duration);
-	CHECK(err || retval, "",
-	      "err %d errno %d retval %d duration %d\n",
-	      err, errno, retval, duration);
-	pthread_exit(arg);
-}
-
 /* extern declarations for test funcs */
 #define DEFINE_TEST(name) extern void test_##name(void);
 #include <prog_tests/tests.h>
@@ -420,6 +391,18 @@ static int libbpf_print_fn(enum libbpf_print_level level,
 	return 0;
 }
 
+static void free_str_set(const struct str_set *set)
+{
+	int i;
+
+	if (!set)
+		return;
+
+	for (i = 0; i < set->cnt; i++)
+		free((void *)set->strs[i]);
+	free(set->strs);
+}
+
 static int parse_str_list(const char *s, struct str_set *set)
 {
 	char *input, *state = NULL, *next, **tmp, **strs = NULL;
@@ -455,67 +438,6 @@ err:
 	return -ENOMEM;
 }
 
-int parse_num_list(const char *s, struct test_selector *sel)
-{
-	int i, set_len = 0, new_len, num, start = 0, end = -1;
-	bool *set = NULL, *tmp, parsing_end = false;
-	char *next;
-
-	while (s[0]) {
-		errno = 0;
-		num = strtol(s, &next, 10);
-		if (errno)
-			return -errno;
-
-		if (parsing_end)
-			end = num;
-		else
-			start = num;
-
-		if (!parsing_end && *next == '-') {
-			s = next + 1;
-			parsing_end = true;
-			continue;
-		} else if (*next == ',') {
-			parsing_end = false;
-			s = next + 1;
-			end = num;
-		} else if (*next == '\0') {
-			parsing_end = false;
-			s = next;
-			end = num;
-		} else {
-			return -EINVAL;
-		}
-
-		if (start > end)
-			return -EINVAL;
-
-		if (end + 1 > set_len) {
-			new_len = end + 1;
-			tmp = realloc(set, new_len);
-			if (!tmp) {
-				free(set);
-				return -ENOMEM;
-			}
-			for (i = set_len; i < start; i++)
-				tmp[i] = false;
-			set = tmp;
-			set_len = new_len;
-		}
-		for (i = start; i <= end; i++)
-			set[i] = true;
-	}
-
-	if (!set)
-		return -EINVAL;
-
-	sel->num_set = set;
-	sel->num_set_len = set_len;
-
-	return 0;
-}
-
 extern int extra_prog_load_log_flags;
 
 static error_t parse_arg(int key, char *arg, struct argp_state *state)
@@ -529,13 +451,15 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state)
 		if (subtest_str) {
 			*subtest_str = '\0';
 			if (parse_num_list(subtest_str + 1,
-					   &env->subtest_selector)) {
+					   &env->subtest_selector.num_set,
+					   &env->subtest_selector.num_set_len)) {
 				fprintf(stderr,
 					"Failed to parse subtest numbers.\n");
 				return -EINVAL;
 			}
 		}
-		if (parse_num_list(arg, &env->test_selector)) {
+		if (parse_num_list(arg, &env->test_selector.num_set,
+				   &env->test_selector.num_set_len)) {
 			fprintf(stderr, "Failed to parse test numbers.\n");
 			return -EINVAL;
 		}
@@ -756,11 +680,11 @@ int main(int argc, char **argv)
 	fprintf(stdout, "Summary: %d/%d PASSED, %d SKIPPED, %d FAILED\n",
 		env.succ_cnt, env.sub_succ_cnt, env.skip_cnt, env.fail_cnt);
 
-	free(env.test_selector.blacklist.strs);
-	free(env.test_selector.whitelist.strs);
+	free_str_set(&env.test_selector.blacklist);
+	free_str_set(&env.test_selector.whitelist);
 	free(env.test_selector.num_set);
-	free(env.subtest_selector.blacklist.strs);
-	free(env.subtest_selector.whitelist.strs);
+	free_str_set(&env.subtest_selector.blacklist);
+	free_str_set(&env.subtest_selector.whitelist);
 	free(env.subtest_selector.num_set);
 
 	return env.fail_cnt ? EXIT_FAILURE : EXIT_SUCCESS;
diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h
index f4aff6b8284b..f4503c926aca 100644
--- a/tools/testing/selftests/bpf/test_progs.h
+++ b/tools/testing/selftests/bpf/test_progs.h
@@ -37,6 +37,7 @@ typedef __u16 __sum16;
 #include "bpf_util.h"
 #include <bpf/bpf_endian.h>
 #include "trace_helpers.h"
+#include "testing_helpers.h"
 #include "flow_dissector_load.h"
 
 enum verbosity {
@@ -87,23 +88,12 @@ extern void test__skip(void);
 extern void test__fail(void);
 extern int test__join_cgroup(const char *path);
 
-#define MAGIC_BYTES 123
-
-/* ipv4 test vector */
-struct ipv4_packet {
-	struct ethhdr eth;
-	struct iphdr iph;
-	struct tcphdr tcp;
-} __packed;
-extern struct ipv4_packet pkt_v4;
-
-/* ipv6 test vector */
-struct ipv6_packet {
-	struct ethhdr eth;
-	struct ipv6hdr iph;
-	struct tcphdr tcp;
-} __packed;
-extern struct ipv6_packet pkt_v6;
+#define PRINT_FAIL(format...)                                                  \
+	({                                                                     \
+		test__fail();                                                  \
+		fprintf(stdout, "%s:FAIL:%d ", __func__, __LINE__);            \
+		fprintf(stdout, ##format);                                     \
+	})
 
 #define _CHECK(condition, tag, duration, format...) ({			\
 	int __ret = !!(condition);					\
@@ -136,10 +126,6 @@ extern struct ipv6_packet pkt_v6;
 #define CHECK_ATTR(condition, tag, format...) \
 	_CHECK(condition, tag, tattr.duration, format)
 
-#define MAGIC_VAL 0x1234
-#define NUM_ITER 100000
-#define VIP_NUM 5
-
 static inline __u64 ptr_to_u64(const void *ptr)
 {
 	return (__u64) (unsigned long) ptr;
@@ -149,7 +135,6 @@ int bpf_find_map(const char *test, struct bpf_object *obj, const char *name);
 int compare_map_keys(int map1_fd, int map2_fd);
 int compare_stack_ips(int smap_fd, int amap_fd, int stack_trace_len);
 int extract_build_id(char *build_id, size_t size);
-void *spin_lock_thread(void *arg);
 
 #ifdef __x86_64__
 #define SYS_NANOSLEEP_KPROBE_NAME "__x64_sys_nanosleep"
diff --git a/tools/testing/selftests/bpf/test_sock_addr.c b/tools/testing/selftests/bpf/test_sock_addr.c
index 61fd95b89af8..0358814c67dc 100644
--- a/tools/testing/selftests/bpf/test_sock_addr.c
+++ b/tools/testing/selftests/bpf/test_sock_addr.c
@@ -677,7 +677,7 @@ static int bind4_prog_load(const struct sock_addr_test *test)
 		uint8_t u4_addr8[4];
 		uint16_t u4_addr16[2];
 		uint32_t u4_addr32;
-	} ip4;
+	} ip4, port;
 	struct sockaddr_in addr4_rw;
 
 	if (inet_pton(AF_INET, SERV4_IP, (void *)&ip4) != 1) {
@@ -685,6 +685,8 @@ static int bind4_prog_load(const struct sock_addr_test *test)
 		return -1;
 	}
 
+	port.u4_addr32 = htons(SERV4_PORT);
+
 	if (mk_sockaddr(AF_INET, SERV4_REWRITE_IP, SERV4_REWRITE_PORT,
 			(struct sockaddr *)&addr4_rw, sizeof(addr4_rw)) == -1)
 		return -1;
@@ -696,49 +698,65 @@ static int bind4_prog_load(const struct sock_addr_test *test)
 		/* if (sk.family == AF_INET && */
 		BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
 			    offsetof(struct bpf_sock_addr, family)),
-		BPF_JMP_IMM(BPF_JNE, BPF_REG_7, AF_INET, 24),
+		BPF_JMP_IMM(BPF_JNE, BPF_REG_7, AF_INET, 32),
 
 		/*     (sk.type == SOCK_DGRAM || sk.type == SOCK_STREAM) && */
 		BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
 			    offsetof(struct bpf_sock_addr, type)),
 		BPF_JMP_IMM(BPF_JNE, BPF_REG_7, SOCK_DGRAM, 1),
 		BPF_JMP_A(1),
-		BPF_JMP_IMM(BPF_JNE, BPF_REG_7, SOCK_STREAM, 20),
+		BPF_JMP_IMM(BPF_JNE, BPF_REG_7, SOCK_STREAM, 28),
 
 		/*     1st_byte_of_user_ip4 == expected && */
 		BPF_LDX_MEM(BPF_B, BPF_REG_7, BPF_REG_6,
 			    offsetof(struct bpf_sock_addr, user_ip4)),
-		BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr8[0], 18),
+		BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr8[0], 26),
 
 		/*     2nd_byte_of_user_ip4 == expected && */
 		BPF_LDX_MEM(BPF_B, BPF_REG_7, BPF_REG_6,
 			    offsetof(struct bpf_sock_addr, user_ip4) + 1),
-		BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr8[1], 16),
+		BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr8[1], 24),
 
 		/*     3rd_byte_of_user_ip4 == expected && */
 		BPF_LDX_MEM(BPF_B, BPF_REG_7, BPF_REG_6,
 			    offsetof(struct bpf_sock_addr, user_ip4) + 2),
-		BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr8[2], 14),
+		BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr8[2], 22),
 
 		/*     4th_byte_of_user_ip4 == expected && */
 		BPF_LDX_MEM(BPF_B, BPF_REG_7, BPF_REG_6,
 			    offsetof(struct bpf_sock_addr, user_ip4) + 3),
-		BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr8[3], 12),
+		BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr8[3], 20),
 
 		/*     1st_half_of_user_ip4 == expected && */
 		BPF_LDX_MEM(BPF_H, BPF_REG_7, BPF_REG_6,
 			    offsetof(struct bpf_sock_addr, user_ip4)),
-		BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr16[0], 10),
+		BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr16[0], 18),
 
 		/*     2nd_half_of_user_ip4 == expected && */
 		BPF_LDX_MEM(BPF_H, BPF_REG_7, BPF_REG_6,
 			    offsetof(struct bpf_sock_addr, user_ip4) + 2),
-		BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr16[1], 8),
+		BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr16[1], 16),
 
-		/*     whole_user_ip4 == expected) { */
+		/*     whole_user_ip4 == expected && */
 		BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
 			    offsetof(struct bpf_sock_addr, user_ip4)),
 		BPF_LD_IMM64(BPF_REG_8, ip4.u4_addr32), /* See [2]. */
+		BPF_JMP_REG(BPF_JNE, BPF_REG_7, BPF_REG_8, 12),
+
+		/*     1st_byte_of_user_port == expected && */
+		BPF_LDX_MEM(BPF_B, BPF_REG_7, BPF_REG_6,
+			    offsetof(struct bpf_sock_addr, user_port)),
+		BPF_JMP_IMM(BPF_JNE, BPF_REG_7, port.u4_addr8[0], 10),
+
+		/*     1st_half_of_user_port == expected && */
+		BPF_LDX_MEM(BPF_H, BPF_REG_7, BPF_REG_6,
+			    offsetof(struct bpf_sock_addr, user_port)),
+		BPF_JMP_IMM(BPF_JNE, BPF_REG_7, port.u4_addr16[0], 8),
+
+		/*     user_port == expected) { */
+		BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
+			    offsetof(struct bpf_sock_addr, user_port)),
+		BPF_LD_IMM64(BPF_REG_8, port.u4_addr32), /* See [2]. */
 		BPF_JMP_REG(BPF_JNE, BPF_REG_7, BPF_REG_8, 4),
 
 		/*      user_ip4 = addr4_rw.sin_addr */
diff --git a/tools/testing/selftests/bpf/test_sockmap.c b/tools/testing/selftests/bpf/test_sockmap.c
index 779e11da979c..37695fc8096a 100644
--- a/tools/testing/selftests/bpf/test_sockmap.c
+++ b/tools/testing/selftests/bpf/test_sockmap.c
@@ -54,7 +54,7 @@ static void running_handler(int a);
 #define S1_PORT 10000
 #define S2_PORT 10001
 
-#define BPF_SOCKMAP_FILENAME "test_sockmap_kern.o"
+#define BPF_SOCKMAP_FILENAME  "test_sockmap_kern.o"
 #define BPF_SOCKHASH_FILENAME "test_sockhash_kern.o"
 #define CG_PATH "/sockmap"
 
@@ -63,14 +63,12 @@ int s1, s2, c1, c2, p1, p2;
 int test_cnt;
 int passed;
 int failed;
-int map_fd[8];
-struct bpf_map *maps[8];
+int map_fd[9];
+struct bpf_map *maps[9];
 int prog_fd[11];
 
 int txmsg_pass;
-int txmsg_noisy;
 int txmsg_redir;
-int txmsg_redir_noisy;
 int txmsg_drop;
 int txmsg_apply;
 int txmsg_cork;
@@ -81,7 +79,10 @@ int txmsg_end_push;
 int txmsg_start_pop;
 int txmsg_pop;
 int txmsg_ingress;
-int txmsg_skb;
+int txmsg_redir_skb;
+int txmsg_ktls_skb;
+int txmsg_ktls_skb_drop;
+int txmsg_ktls_skb_redir;
 int ktls;
 int peek_flag;
 
@@ -89,15 +90,13 @@ static const struct option long_options[] = {
 	{"help",	no_argument,		NULL, 'h' },
 	{"cgroup",	required_argument,	NULL, 'c' },
 	{"rate",	required_argument,	NULL, 'r' },
-	{"verbose",	no_argument,		NULL, 'v' },
+	{"verbose",	optional_argument,	NULL, 'v' },
 	{"iov_count",	required_argument,	NULL, 'i' },
 	{"length",	required_argument,	NULL, 'l' },
 	{"test",	required_argument,	NULL, 't' },
 	{"data_test",   no_argument,		NULL, 'd' },
 	{"txmsg",		no_argument,	&txmsg_pass,  1  },
-	{"txmsg_noisy",		no_argument,	&txmsg_noisy, 1  },
 	{"txmsg_redir",		no_argument,	&txmsg_redir, 1  },
-	{"txmsg_redir_noisy",	no_argument,	&txmsg_redir_noisy, 1},
 	{"txmsg_drop",		no_argument,	&txmsg_drop, 1 },
 	{"txmsg_apply",	required_argument,	NULL, 'a'},
 	{"txmsg_cork",	required_argument,	NULL, 'k'},
@@ -108,12 +107,108 @@ static const struct option long_options[] = {
 	{"txmsg_start_pop",  required_argument,	NULL, 'w'},
 	{"txmsg_pop",	     required_argument,	NULL, 'x'},
 	{"txmsg_ingress", no_argument,		&txmsg_ingress, 1 },
-	{"txmsg_skb", no_argument,		&txmsg_skb, 1 },
+	{"txmsg_redir_skb", no_argument,	&txmsg_redir_skb, 1 },
 	{"ktls", no_argument,			&ktls, 1 },
 	{"peek", no_argument,			&peek_flag, 1 },
+	{"whitelist", required_argument,	NULL, 'n' },
+	{"blacklist", required_argument,	NULL, 'b' },
 	{0, 0, NULL, 0 }
 };
 
+struct test_env {
+	const char *type;
+	const char *subtest;
+	const char *prepend;
+
+	int test_num;
+	int subtest_num;
+
+	int succ_cnt;
+	int fail_cnt;
+	int fail_last;
+};
+
+struct test_env env;
+
+struct sockmap_options {
+	int verbose;
+	bool base;
+	bool sendpage;
+	bool data_test;
+	bool drop_expected;
+	int iov_count;
+	int iov_length;
+	int rate;
+	char *map;
+	char *whitelist;
+	char *blacklist;
+	char *prepend;
+};
+
+struct _test {
+	char *title;
+	void (*tester)(int cg_fd, struct sockmap_options *opt);
+};
+
+static void test_start(void)
+{
+	env.subtest_num++;
+}
+
+static void test_fail(void)
+{
+	env.fail_cnt++;
+}
+
+static void test_pass(void)
+{
+	env.succ_cnt++;
+}
+
+static void test_reset(void)
+{
+	txmsg_start = txmsg_end = 0;
+	txmsg_start_pop = txmsg_pop = 0;
+	txmsg_start_push = txmsg_end_push = 0;
+	txmsg_pass = txmsg_drop = txmsg_redir = 0;
+	txmsg_apply = txmsg_cork = 0;
+	txmsg_ingress = txmsg_redir_skb = 0;
+	txmsg_ktls_skb = txmsg_ktls_skb_drop = txmsg_ktls_skb_redir = 0;
+}
+
+static int test_start_subtest(const struct _test *t, struct sockmap_options *o)
+{
+	env.type = o->map;
+	env.subtest = t->title;
+	env.prepend = o->prepend;
+	env.test_num++;
+	env.subtest_num = 0;
+	env.fail_last = env.fail_cnt;
+	test_reset();
+	return 0;
+}
+
+static void test_end_subtest(void)
+{
+	int error = env.fail_cnt - env.fail_last;
+	int type = strcmp(env.type, BPF_SOCKMAP_FILENAME);
+
+	if (!error)
+		test_pass();
+
+	fprintf(stdout, "#%2d/%2d %8s:%s:%s:%s\n",
+		env.test_num, env.subtest_num,
+		!type ? "sockmap" : "sockhash",
+		env.prepend ? : "",
+		env.subtest, error ? "FAIL" : "OK");
+}
+
+static void test_print_results(void)
+{
+	fprintf(stdout, "Pass: %d Fail: %d\n",
+		env.succ_cnt, env.fail_cnt);
+}
+
 static void usage(char *argv[])
 {
 	int i;
@@ -296,7 +391,7 @@ static int sockmap_init_sockets(int verbose)
 		return errno;
 	}
 
-	if (verbose) {
+	if (verbose > 1) {
 		printf("connected sockets: c1 <-> p1, c2 <-> p2\n");
 		printf("cgroups binding: c1(%i) <-> s1(%i) - - - c2(%i) <-> s2(%i)\n",
 			c1, s1, c2, s2);
@@ -311,17 +406,6 @@ struct msg_stats {
 	struct timespec end;
 };
 
-struct sockmap_options {
-	int verbose;
-	bool base;
-	bool sendpage;
-	bool data_test;
-	bool drop_expected;
-	int iov_count;
-	int iov_length;
-	int rate;
-};
-
 static int msg_loop_sendpage(int fd, int iov_length, int cnt,
 			     struct msg_stats *s,
 			     struct sockmap_options *opt)
@@ -345,14 +429,18 @@ static int msg_loop_sendpage(int fd, int iov_length, int cnt,
 
 	clock_gettime(CLOCK_MONOTONIC, &s->start);
 	for (i = 0; i < cnt; i++) {
-		int sent = sendfile(fd, fp, NULL, iov_length);
+		int sent;
+
+		errno = 0;
+		sent = sendfile(fd, fp, NULL, iov_length);
 
 		if (!drop && sent < 0) {
-			perror("send loop error");
+			perror("sendpage loop error");
 			fclose(file);
 			return sent;
 		} else if (drop && sent >= 0) {
-			printf("sendpage loop error expected: %i\n", sent);
+			printf("sendpage loop error expected: %i errno %i\n",
+			       sent, errno);
 			fclose(file);
 			return -EIO;
 		}
@@ -418,14 +506,41 @@ unwind_iov:
 
 static int msg_verify_data(struct msghdr *msg, int size, int chunk_sz)
 {
-	int i, j, bytes_cnt = 0;
+	int i, j = 0, bytes_cnt = 0;
 	unsigned char k = 0;
 
 	for (i = 0; i < msg->msg_iovlen; i++) {
 		unsigned char *d = msg->msg_iov[i].iov_base;
 
-		for (j = 0;
-		     j < msg->msg_iov[i].iov_len && size; j++) {
+		/* Special case test for skb ingress + ktls */
+		if (i == 0 && txmsg_ktls_skb) {
+			if (msg->msg_iov[i].iov_len < 4)
+				return -EIO;
+			if (txmsg_ktls_skb_redir) {
+				if (memcmp(&d[13], "PASS", 4) != 0) {
+					fprintf(stderr,
+						"detected redirect ktls_skb data error with skb ingress update @iov[%i]:%i \"%02x %02x %02x %02x\" != \"PASS\"\n", i, 0, d[13], d[14], d[15], d[16]);
+					return -EIO;
+				}
+				d[13] = 0;
+				d[14] = 1;
+				d[15] = 2;
+				d[16] = 3;
+				j = 13;
+			} else if (txmsg_ktls_skb) {
+				if (memcmp(d, "PASS", 4) != 0) {
+					fprintf(stderr,
+						"detected ktls_skb data error with skb ingress update @iov[%i]:%i \"%02x %02x %02x %02x\" != \"PASS\"\n", i, 0, d[0], d[1], d[2], d[3]);
+					return -EIO;
+				}
+				d[0] = 0;
+				d[1] = 1;
+				d[2] = 2;
+				d[3] = 3;
+			}
+		}
+
+		for (; j < msg->msg_iov[i].iov_len && size; j++) {
 			if (d[j] != k++) {
 				fprintf(stderr,
 					"detected data corruption @iov[%i]:%i %02x != %02x, %02x ?= %02x\n",
@@ -464,13 +579,18 @@ static int msg_loop(int fd, int iov_count, int iov_length, int cnt,
 	if (tx) {
 		clock_gettime(CLOCK_MONOTONIC, &s->start);
 		for (i = 0; i < cnt; i++) {
-			int sent = sendmsg(fd, &msg, flags);
+			int sent;
+
+			errno = 0;
+			sent = sendmsg(fd, &msg, flags);
 
 			if (!drop && sent < 0) {
-				perror("send loop error");
+				perror("sendmsg loop error");
 				goto out_errno;
 			} else if (drop && sent >= 0) {
-				printf("send loop error expected: %i\n", sent);
+				fprintf(stderr,
+					"sendmsg loop error expected: %i errno %i\n",
+					sent, errno);
 				errno = -EIO;
 				goto out_errno;
 			}
@@ -497,9 +617,10 @@ static int msg_loop(int fd, int iov_count, int iov_length, int cnt,
 		 * paths.
 		 */
 		total_bytes = (float)iov_count * (float)iov_length * (float)cnt;
-		txmsg_pop_total = txmsg_pop;
 		if (txmsg_apply)
-			txmsg_pop_total *= (total_bytes / txmsg_apply);
+			txmsg_pop_total = txmsg_pop * (total_bytes / txmsg_apply);
+		else
+			txmsg_pop_total = txmsg_pop * cnt;
 		total_bytes -= txmsg_pop_total;
 		err = clock_gettime(CLOCK_MONOTONIC, &s->start);
 		if (err < 0)
@@ -633,14 +754,18 @@ static int sendmsg_test(struct sockmap_options *opt)
 
 	rxpid = fork();
 	if (rxpid == 0) {
-		if (opt->drop_expected)
-			exit(0);
+		iov_buf -= (txmsg_pop - txmsg_start_pop + 1);
+		if (opt->drop_expected || txmsg_ktls_skb_drop)
+			_exit(0);
+
+		if (!iov_buf) /* zero bytes sent case */
+			_exit(0);
 
 		if (opt->sendpage)
 			iov_count = 1;
 		err = msg_loop(rx_fd, iov_count, iov_buf,
 			       cnt, &s, false, opt);
-		if (opt->verbose)
+		if (opt->verbose > 1)
 			fprintf(stderr,
 				"msg_loop_rx: iov_count %i iov_buf %i cnt %i err %i\n",
 				iov_count, iov_buf, cnt, err);
@@ -648,7 +773,7 @@ static int sendmsg_test(struct sockmap_options *opt)
 			sent_Bps = sentBps(s);
 			recvd_Bps = recvdBps(s);
 		}
-		if (opt->verbose)
+		if (opt->verbose > 1)
 			fprintf(stdout,
 				"rx_sendmsg: TX: %zuB %fB/s %fGB/s RX: %zuB %fB/s %fGB/s %s\n",
 				s.bytes_sent, sent_Bps, sent_Bps/giga,
@@ -678,7 +803,7 @@ static int sendmsg_test(struct sockmap_options *opt)
 			sent_Bps = sentBps(s);
 			recvd_Bps = recvdBps(s);
 		}
-		if (opt->verbose)
+		if (opt->verbose > 1)
 			fprintf(stdout,
 				"tx_sendmsg: TX: %zuB %fB/s %f GB/s RX: %zuB %fB/s %fGB/s\n",
 				s.bytes_sent, sent_Bps, sent_Bps/giga,
@@ -694,14 +819,14 @@ static int sendmsg_test(struct sockmap_options *opt)
 	if (WIFEXITED(rx_status)) {
 		err = WEXITSTATUS(rx_status);
 		if (err) {
-			fprintf(stderr, "rx thread exited with err %d. ", err);
+			fprintf(stderr, "rx thread exited with err %d.\n", err);
 			goto out;
 		}
 	}
 	if (WIFEXITED(tx_status)) {
 		err = WEXITSTATUS(tx_status);
 		if (err)
-			fprintf(stderr, "tx thread exited with err %d. ", err);
+			fprintf(stderr, "tx thread exited with err %d.\n", err);
 	}
 out:
 	return err;
@@ -783,6 +908,7 @@ static int forever_ping_pong(int rate, struct sockmap_options *opt)
 }
 
 enum {
+	SELFTESTS,
 	PING_PONG,
 	SENDMSG,
 	BASE,
@@ -816,8 +942,28 @@ static int run_options(struct sockmap_options *options, int cg_fd,  int test)
 		return err;
 	}
 
+	/* Attach programs to TLS sockmap */
+	if (txmsg_ktls_skb) {
+		err = bpf_prog_attach(prog_fd[0], map_fd[8],
+					BPF_SK_SKB_STREAM_PARSER, 0);
+		if (err) {
+			fprintf(stderr,
+				"ERROR: bpf_prog_attach (TLS sockmap %i->%i): %d (%s)\n",
+				prog_fd[0], map_fd[8], err, strerror(errno));
+			return err;
+		}
+
+		err = bpf_prog_attach(prog_fd[2], map_fd[8],
+				      BPF_SK_SKB_STREAM_VERDICT, 0);
+		if (err) {
+			fprintf(stderr, "ERROR: bpf_prog_attach (TLS sockmap): %d (%s)\n",
+				err, strerror(errno));
+			return err;
+		}
+	}
+
 	/* Attach to cgroups */
-	err = bpf_prog_attach(prog_fd[2], cg_fd, BPF_CGROUP_SOCK_OPS, 0);
+	err = bpf_prog_attach(prog_fd[3], cg_fd, BPF_CGROUP_SOCK_OPS, 0);
 	if (err) {
 		fprintf(stderr, "ERROR: bpf_prog_attach (groups): %d (%s)\n",
 			err, strerror(errno));
@@ -833,19 +979,14 @@ run:
 
 	/* Attach txmsg program to sockmap */
 	if (txmsg_pass)
-		tx_prog_fd = prog_fd[3];
-	else if (txmsg_noisy)
 		tx_prog_fd = prog_fd[4];
 	else if (txmsg_redir)
 		tx_prog_fd = prog_fd[5];
-	else if (txmsg_redir_noisy)
-		tx_prog_fd = prog_fd[6];
-	else if (txmsg_drop)
-		tx_prog_fd = prog_fd[9];
-	/* apply and cork must be last */
 	else if (txmsg_apply)
-		tx_prog_fd = prog_fd[7];
+		tx_prog_fd = prog_fd[6];
 	else if (txmsg_cork)
+		tx_prog_fd = prog_fd[7];
+	else if (txmsg_drop)
 		tx_prog_fd = prog_fd[8];
 	else
 		tx_prog_fd = 0;
@@ -870,7 +1011,7 @@ run:
 			goto out;
 		}
 
-		if (txmsg_redir || txmsg_redir_noisy)
+		if (txmsg_redir)
 			redir_fd = c2;
 		else
 			redir_fd = c1;
@@ -1018,7 +1159,35 @@ run:
 			}
 		}
 
-		if (txmsg_skb) {
+		if (txmsg_ktls_skb) {
+			int ingress = BPF_F_INGRESS;
+
+			i = 0;
+			err = bpf_map_update_elem(map_fd[8], &i, &p2, BPF_ANY);
+			if (err) {
+				fprintf(stderr,
+					"ERROR: bpf_map_update_elem (c1 sockmap): %d (%s)\n",
+					err, strerror(errno));
+			}
+
+			if (txmsg_ktls_skb_redir) {
+				i = 1;
+				err = bpf_map_update_elem(map_fd[7],
+							  &i, &ingress, BPF_ANY);
+				if (err) {
+					fprintf(stderr,
+						"ERROR: bpf_map_update_elem (txmsg_ingress): %d (%s)\n",
+						err, strerror(errno));
+				}
+			}
+
+			if (txmsg_ktls_skb_drop) {
+				i = 1;
+				err = bpf_map_update_elem(map_fd[7], &i, &i, BPF_ANY);
+			}
+		}
+
+		if (txmsg_redir_skb) {
 			int skb_fd = (test == SENDMSG || test == SENDPAGE) ?
 					p2 : p1;
 			int ingress = BPF_F_INGRESS;
@@ -1033,8 +1202,7 @@ run:
 			}
 
 			i = 3;
-			err = bpf_map_update_elem(map_fd[0],
-						  &i, &skb_fd, BPF_ANY);
+			err = bpf_map_update_elem(map_fd[0], &i, &skb_fd, BPF_ANY);
 			if (err) {
 				fprintf(stderr,
 					"ERROR: bpf_map_update_elem (c1 sockmap): %d (%s)\n",
@@ -1068,9 +1236,12 @@ run:
 		fprintf(stderr, "unknown test\n");
 out:
 	/* Detatch and zero all the maps */
-	bpf_prog_detach2(prog_fd[2], cg_fd, BPF_CGROUP_SOCK_OPS);
+	bpf_prog_detach2(prog_fd[3], cg_fd, BPF_CGROUP_SOCK_OPS);
 	bpf_prog_detach2(prog_fd[0], map_fd[0], BPF_SK_SKB_STREAM_PARSER);
 	bpf_prog_detach2(prog_fd[1], map_fd[0], BPF_SK_SKB_STREAM_VERDICT);
+	bpf_prog_detach2(prog_fd[0], map_fd[8], BPF_SK_SKB_STREAM_PARSER);
+	bpf_prog_detach2(prog_fd[2], map_fd[8], BPF_SK_SKB_STREAM_VERDICT);
+
 	if (tx_prog_fd >= 0)
 		bpf_prog_detach2(tx_prog_fd, map_fd[1], BPF_SK_MSG_VERDICT);
 
@@ -1112,12 +1283,8 @@ static void test_options(char *options)
 
 	if (txmsg_pass)
 		strncat(options, "pass,", OPTSTRING);
-	if (txmsg_noisy)
-		strncat(options, "pass_noisy,", OPTSTRING);
 	if (txmsg_redir)
 		strncat(options, "redir,", OPTSTRING);
-	if (txmsg_redir_noisy)
-		strncat(options, "redir_noisy,", OPTSTRING);
 	if (txmsg_drop)
 		strncat(options, "drop,", OPTSTRING);
 	if (txmsg_apply) {
@@ -1143,8 +1310,10 @@ static void test_options(char *options)
 	}
 	if (txmsg_ingress)
 		strncat(options, "ingress,", OPTSTRING);
-	if (txmsg_skb)
-		strncat(options, "skb,", OPTSTRING);
+	if (txmsg_redir_skb)
+		strncat(options, "redir_skb,", OPTSTRING);
+	if (txmsg_ktls_skb)
+		strncat(options, "ktls_skb,", OPTSTRING);
 	if (ktls)
 		strncat(options, "ktls,", OPTSTRING);
 	if (peek_flag)
@@ -1168,416 +1337,317 @@ static int __test_exec(int cgrp, int test, struct sockmap_options *opt)
 
 	test_options(options);
 
-	fprintf(stdout,
-		"[TEST %i]: (%i, %i, %i, %s, %s): ",
-		test_cnt, opt->rate, opt->iov_count, opt->iov_length,
-		test_to_str(test), options);
-	fflush(stdout);
+	if (opt->verbose) {
+		fprintf(stdout,
+			" [TEST %i]: (%i, %i, %i, %s, %s): ",
+			test_cnt, opt->rate, opt->iov_count, opt->iov_length,
+			test_to_str(test), options);
+		fflush(stdout);
+	}
 	err = run_options(opt, cgrp, test);
-	fprintf(stdout, "%s\n", !err ? "PASS" : "FAILED");
+	if (opt->verbose)
+		fprintf(stdout, " %s\n", !err ? "PASS" : "FAILED");
 	test_cnt++;
 	!err ? passed++ : failed++;
 	free(options);
 	return err;
 }
 
-static int test_exec(int cgrp, struct sockmap_options *opt)
-{
-	int err = __test_exec(cgrp, SENDMSG, opt);
-
-	if (err)
-		goto out;
-
-	err = __test_exec(cgrp, SENDPAGE, opt);
-out:
-	return err;
-}
-
-static int test_loop(int cgrp)
-{
-	struct sockmap_options opt;
-
-	int err, i, l, r;
-
-	opt.verbose = 0;
-	opt.base = false;
-	opt.sendpage = false;
-	opt.data_test = false;
-	opt.drop_expected = false;
-	opt.iov_count = 0;
-	opt.iov_length = 0;
-	opt.rate = 0;
-
-	r = 1;
-	for (i = 1; i < 100; i += 33) {
-		for (l = 1; l < 100; l += 33) {
-			opt.rate = r;
-			opt.iov_count = i;
-			opt.iov_length = l;
-			err = test_exec(cgrp, &opt);
-			if (err)
-				goto out;
-		}
-	}
-	sched_yield();
-out:
-	return err;
-}
-
-static int test_txmsg(int cgrp)
+static void test_exec(int cgrp, struct sockmap_options *opt)
 {
+	int type = strcmp(opt->map, BPF_SOCKMAP_FILENAME);
 	int err;
 
-	txmsg_pass = txmsg_noisy = txmsg_redir_noisy = txmsg_drop = 0;
-	txmsg_apply = txmsg_cork = 0;
-	txmsg_ingress = txmsg_skb = 0;
-
-	txmsg_pass = 1;
-	err = test_loop(cgrp);
-	txmsg_pass = 0;
-	if (err)
-		goto out;
-
-	txmsg_redir = 1;
-	err = test_loop(cgrp);
-	txmsg_redir = 0;
-	if (err)
-		goto out;
-
-	txmsg_drop = 1;
-	err = test_loop(cgrp);
-	txmsg_drop = 0;
-	if (err)
-		goto out;
-
-	txmsg_redir = 1;
-	txmsg_ingress = 1;
-	err = test_loop(cgrp);
-	txmsg_redir = 0;
-	txmsg_ingress = 0;
-	if (err)
-		goto out;
-out:
-	txmsg_pass = 0;
-	txmsg_redir = 0;
-	txmsg_drop = 0;
-	return err;
+	if (type == 0) {
+		test_start();
+		err = __test_exec(cgrp, SENDMSG, opt);
+		if (err)
+			test_fail();
+	} else {
+		test_start();
+		err = __test_exec(cgrp, SENDPAGE, opt);
+		if (err)
+			test_fail();
+	}
 }
 
-static int test_send(struct sockmap_options *opt, int cgrp)
+static void test_send_one(struct sockmap_options *opt, int cgrp)
 {
-	int err;
-
 	opt->iov_length = 1;
 	opt->iov_count = 1;
 	opt->rate = 1;
-	err = test_exec(cgrp, opt);
-	if (err)
-		goto out;
+	test_exec(cgrp, opt);
 
 	opt->iov_length = 1;
 	opt->iov_count = 1024;
 	opt->rate = 1;
-	err = test_exec(cgrp, opt);
-	if (err)
-		goto out;
+	test_exec(cgrp, opt);
 
 	opt->iov_length = 1024;
 	opt->iov_count = 1;
 	opt->rate = 1;
-	err = test_exec(cgrp, opt);
-	if (err)
-		goto out;
+	test_exec(cgrp, opt);
 
-	opt->iov_length = 1;
+}
+
+static void test_send_many(struct sockmap_options *opt, int cgrp)
+{
+	opt->iov_length = 3;
 	opt->iov_count = 1;
 	opt->rate = 512;
-	err = test_exec(cgrp, opt);
-	if (err)
-		goto out;
-
-	opt->iov_length = 256;
-	opt->iov_count = 1024;
-	opt->rate = 2;
-	err = test_exec(cgrp, opt);
-	if (err)
-		goto out;
+	test_exec(cgrp, opt);
 
 	opt->rate = 100;
 	opt->iov_count = 1;
 	opt->iov_length = 5;
-	err = test_exec(cgrp, opt);
-	if (err)
-		goto out;
-out:
-	sched_yield();
-	return err;
+	test_exec(cgrp, opt);
 }
 
-static int test_mixed(int cgrp)
+static void test_send_large(struct sockmap_options *opt, int cgrp)
 {
-	struct sockmap_options opt = {0};
-	int err;
+	opt->iov_length = 256;
+	opt->iov_count = 1024;
+	opt->rate = 2;
+	test_exec(cgrp, opt);
+}
 
-	txmsg_pass = txmsg_noisy = txmsg_redir_noisy = txmsg_drop = 0;
-	txmsg_apply = txmsg_cork = 0;
-	txmsg_start = txmsg_end = 0;
-	txmsg_start_push = txmsg_end_push = 0;
-	txmsg_start_pop = txmsg_pop = 0;
+static void test_send(struct sockmap_options *opt, int cgrp)
+{
+	test_send_one(opt, cgrp);
+	test_send_many(opt, cgrp);
+	test_send_large(opt, cgrp);
+	sched_yield();
+}
 
+static void test_txmsg_pass(int cgrp, struct sockmap_options *opt)
+{
 	/* Test small and large iov_count values with pass/redir/apply/cork */
 	txmsg_pass = 1;
-	txmsg_redir = 0;
-	txmsg_apply = 1;
-	txmsg_cork = 0;
-	err = test_send(&opt, cgrp);
-	if (err)
-		goto out;
+	test_send(opt, cgrp);
+}
 
-	txmsg_pass = 1;
-	txmsg_redir = 0;
-	txmsg_apply = 0;
-	txmsg_cork = 1;
-	err = test_send(&opt, cgrp);
-	if (err)
-		goto out;
+static void test_txmsg_redir(int cgrp, struct sockmap_options *opt)
+{
+	txmsg_redir = 1;
+	test_send(opt, cgrp);
+}
 
-	txmsg_pass = 1;
-	txmsg_redir = 0;
-	txmsg_apply = 1;
-	txmsg_cork = 1;
-	err = test_send(&opt, cgrp);
-	if (err)
-		goto out;
+static void test_txmsg_drop(int cgrp, struct sockmap_options *opt)
+{
+	txmsg_drop = 1;
+	test_send(opt, cgrp);
+}
 
-	txmsg_pass = 1;
-	txmsg_redir = 0;
-	txmsg_apply = 1024;
-	txmsg_cork = 0;
-	err = test_send(&opt, cgrp);
-	if (err)
-		goto out;
+static void test_txmsg_ingress_redir(int cgrp, struct sockmap_options *opt)
+{
+	txmsg_pass = txmsg_drop = 0;
+	txmsg_ingress = txmsg_redir = 1;
+	test_send(opt, cgrp);
+}
 
-	txmsg_pass = 1;
-	txmsg_redir = 0;
-	txmsg_apply = 0;
-	txmsg_cork = 1024;
-	err = test_send(&opt, cgrp);
-	if (err)
-		goto out;
+static void test_txmsg_skb(int cgrp, struct sockmap_options *opt)
+{
+	bool data = opt->data_test;
+	int k = ktls;
 
-	txmsg_pass = 1;
-	txmsg_redir = 0;
-	txmsg_apply = 1024;
-	txmsg_cork = 1024;
-	err = test_send(&opt, cgrp);
-	if (err)
-		goto out;
+	opt->data_test = true;
+	ktls = 1;
 
+	txmsg_pass = txmsg_drop = 0;
+	txmsg_ingress = txmsg_redir = 0;
+	txmsg_ktls_skb = 1;
 	txmsg_pass = 1;
-	txmsg_redir = 0;
-	txmsg_cork = 4096;
-	txmsg_apply = 4096;
-	err = test_send(&opt, cgrp);
-	if (err)
-		goto out;
 
-	txmsg_pass = 0;
-	txmsg_redir = 1;
-	txmsg_apply = 1;
-	txmsg_cork = 0;
-	err = test_send(&opt, cgrp);
-	if (err)
-		goto out;
+	/* Using data verification so ensure iov layout is
+	 * expected from test receiver side. e.g. has enough
+	 * bytes to write test code.
+	 */
+	opt->iov_length = 100;
+	opt->iov_count = 1;
+	opt->rate = 1;
+	test_exec(cgrp, opt);
 
-	txmsg_pass = 0;
-	txmsg_redir = 1;
-	txmsg_apply = 0;
-	txmsg_cork = 1;
-	err = test_send(&opt, cgrp);
-	if (err)
-		goto out;
+	txmsg_ktls_skb_drop = 1;
+	test_exec(cgrp, opt);
 
-	txmsg_pass = 0;
-	txmsg_redir = 1;
-	txmsg_apply = 1024;
-	txmsg_cork = 0;
-	err = test_send(&opt, cgrp);
-	if (err)
-		goto out;
+	txmsg_ktls_skb_drop = 0;
+	txmsg_ktls_skb_redir = 1;
+	test_exec(cgrp, opt);
 
-	txmsg_pass = 0;
-	txmsg_redir = 1;
-	txmsg_apply = 0;
-	txmsg_cork = 1024;
-	err = test_send(&opt, cgrp);
-	if (err)
-		goto out;
+	opt->data_test = data;
+	ktls = k;
+}
+
+
+/* Test cork with hung data. This tests poor usage patterns where
+ * cork can leave data on the ring if user program is buggy and
+ * doesn't flush them somehow. They do take some time however
+ * because they wait for a timeout. Test pass, redir and cork with
+ * apply logic. Use cork size of 4097 with send_large to avoid
+ * aligning cork size with send size.
+ */
+static void test_txmsg_cork_hangs(int cgrp, struct sockmap_options *opt)
+{
+	txmsg_pass = 1;
+	txmsg_redir = 0;
+	txmsg_cork = 4097;
+	txmsg_apply = 4097;
+	test_send_large(opt, cgrp);
 
 	txmsg_pass = 0;
 	txmsg_redir = 1;
-	txmsg_apply = 1024;
-	txmsg_cork = 1024;
-	err = test_send(&opt, cgrp);
-	if (err)
-		goto out;
+	txmsg_apply = 0;
+	txmsg_cork = 4097;
+	test_send_large(opt, cgrp);
 
 	txmsg_pass = 0;
 	txmsg_redir = 1;
-	txmsg_cork = 4096;
-	txmsg_apply = 4096;
-	err = test_send(&opt, cgrp);
-	if (err)
-		goto out;
-out:
-	return err;
+	txmsg_apply = 4097;
+	txmsg_cork = 4097;
+	test_send_large(opt, cgrp);
 }
 
-static int test_start_end(int cgrp)
+static void test_txmsg_pull(int cgrp, struct sockmap_options *opt)
 {
-	struct sockmap_options opt = {0};
-	int err, i;
+	/* Test basic start/end */
+	txmsg_start = 1;
+	txmsg_end = 2;
+	test_send(opt, cgrp);
+
+	/* Test >4k pull */
+	txmsg_start = 4096;
+	txmsg_end = 9182;
+	test_send_large(opt, cgrp);
 
-	/* Test basic start/end with lots of iov_count and iov_lengths */
+	/* Test pull + redirect */
+	txmsg_redir = 0;
 	txmsg_start = 1;
 	txmsg_end = 2;
-	txmsg_start_push = 1;
-	txmsg_end_push = 2;
-	txmsg_start_pop = 1;
-	txmsg_pop = 1;
-	err = test_txmsg(cgrp);
-	if (err)
-		goto out;
+	test_send(opt, cgrp);
 
-	/* Cut a byte of pushed data but leave reamining in place */
+	/* Test pull + cork */
+	txmsg_redir = 0;
+	txmsg_cork = 512;
 	txmsg_start = 1;
 	txmsg_end = 2;
-	txmsg_start_push = 1;
-	txmsg_end_push = 3;
+	test_send_many(opt, cgrp);
+
+	/* Test pull + cork + redirect */
+	txmsg_redir = 1;
+	txmsg_cork = 512;
+	txmsg_start = 1;
+	txmsg_end = 2;
+	test_send_many(opt, cgrp);
+}
+
+static void test_txmsg_pop(int cgrp, struct sockmap_options *opt)
+{
+	/* Test basic pop */
 	txmsg_start_pop = 1;
-	txmsg_pop = 1;
-	err = test_txmsg(cgrp);
-	if (err)
-		goto out;
+	txmsg_pop = 2;
+	test_send_many(opt, cgrp);
 
-	/* Test start/end with cork */
-	opt.rate = 16;
-	opt.iov_count = 1;
-	opt.iov_length = 100;
-	txmsg_cork = 1600;
-
-	txmsg_start_pop = 0;
-	txmsg_pop = 0;
-
-	for (i = 99; i <= 1600; i += 500) {
-		txmsg_start = 0;
-		txmsg_end = i;
-		txmsg_start_push = 0;
-		txmsg_end_push = i;
-		err = test_exec(cgrp, &opt);
-		if (err)
-			goto out;
-	}
+	/* Test pop with >4k */
+	txmsg_start_pop = 4096;
+	txmsg_pop = 4096;
+	test_send_large(opt, cgrp);
 
-	/* Test pop data in middle of cork */
-	for (i = 99; i <= 1600; i += 500) {
-		txmsg_start_pop = 10;
-		txmsg_pop = i;
-		err = test_exec(cgrp, &opt);
-		if (err)
-			goto out;
-	}
-	txmsg_start_pop = 0;
-	txmsg_pop = 0;
-
-	/* Test start/end with cork but pull data in middle */
-	for (i = 199; i <= 1600; i += 500) {
-		txmsg_start = 100;
-		txmsg_end = i;
-		txmsg_start_push = 100;
-		txmsg_end_push = i;
-		err = test_exec(cgrp, &opt);
-		if (err)
-			goto out;
-	}
+	/* Test pop + redirect */
+	txmsg_redir = 1;
+	txmsg_start_pop = 1;
+	txmsg_pop = 2;
+	test_send_many(opt, cgrp);
 
-	/* Test start/end with cork pulling last sg entry */
-	txmsg_start = 1500;
-	txmsg_end = 1600;
-	txmsg_start_push = 1500;
-	txmsg_end_push = 1600;
-	err = test_exec(cgrp, &opt);
-	if (err)
-		goto out;
+	/* Test pop + cork */
+	txmsg_redir = 0;
+	txmsg_cork = 512;
+	txmsg_start_pop = 1;
+	txmsg_pop = 2;
+	test_send_many(opt, cgrp);
 
-	/* Test pop with cork pulling last sg entry */
-	txmsg_start_pop = 1500;
-	txmsg_pop = 1600;
-	err = test_exec(cgrp, &opt);
-	if (err)
-		goto out;
-	txmsg_start_pop = 0;
-	txmsg_pop = 0;
-
-	/* Test start/end pull of single byte in last page */
-	txmsg_start = 1111;
-	txmsg_end = 1112;
-	txmsg_start_push = 1111;
-	txmsg_end_push = 1112;
-	err = test_exec(cgrp, &opt);
-	if (err)
-		goto out;
+	/* Test pop + redirect + cork */
+	txmsg_redir = 1;
+	txmsg_cork = 4;
+	txmsg_start_pop = 1;
+	txmsg_pop = 2;
+	test_send_many(opt, cgrp);
+}
 
-	/* Test pop of single byte in last page */
-	txmsg_start_pop = 1111;
-	txmsg_pop = 1112;
-	err = test_exec(cgrp, &opt);
-	if (err)
-		goto out;
+static void test_txmsg_push(int cgrp, struct sockmap_options *opt)
+{
+	/* Test basic push */
+	txmsg_start_push = 1;
+	txmsg_end_push = 1;
+	test_send(opt, cgrp);
 
-	/* Test start/end with end < start */
-	txmsg_start = 1111;
-	txmsg_end = 0;
-	txmsg_start_push = 1111;
-	txmsg_end_push = 0;
-	err = test_exec(cgrp, &opt);
-	if (err)
-		goto out;
+	/* Test push 4kB >4k */
+	txmsg_start_push = 4096;
+	txmsg_end_push = 4096;
+	test_send_large(opt, cgrp);
 
-	/* Test start/end with end > data */
-	txmsg_start = 0;
-	txmsg_end = 1601;
-	txmsg_start_push = 0;
-	txmsg_end_push = 1601;
-	err = test_exec(cgrp, &opt);
-	if (err)
-		goto out;
+	/* Test push + redirect */
+	txmsg_redir = 1;
+	txmsg_start_push = 1;
+	txmsg_end_push = 2;
+	test_send_many(opt, cgrp);
 
-	/* Test start/end with start > data */
-	txmsg_start = 1601;
-	txmsg_end = 1600;
-	txmsg_start_push = 1601;
-	txmsg_end_push = 1600;
-	err = test_exec(cgrp, &opt);
-	if (err)
-		goto out;
+	/* Test push + cork */
+	txmsg_redir = 0;
+	txmsg_cork = 512;
+	txmsg_start_push = 1;
+	txmsg_end_push = 2;
+	test_send_many(opt, cgrp);
+}
 
-	/* Test pop with start > data */
-	txmsg_start_pop = 1601;
-	txmsg_pop = 1;
-	err = test_exec(cgrp, &opt);
-	if (err)
-		goto out;
+static void test_txmsg_push_pop(int cgrp, struct sockmap_options *opt)
+{
+	txmsg_start_push = 1;
+	txmsg_end_push = 10;
+	txmsg_start_pop = 5;
+	txmsg_pop = 4;
+	test_send_large(opt, cgrp);
+}
 
-	/* Test pop with pop range > data */
-	txmsg_start_pop = 1599;
-	txmsg_pop = 10;
-	err = test_exec(cgrp, &opt);
-out:
-	txmsg_start = 0;
-	txmsg_end = 0;
-	sched_yield();
-	return err;
+static void test_txmsg_apply(int cgrp, struct sockmap_options *opt)
+{
+	txmsg_pass = 1;
+	txmsg_redir = 0;
+	txmsg_apply = 1;
+	txmsg_cork = 0;
+	test_send_one(opt, cgrp);
+
+	txmsg_pass = 0;
+	txmsg_redir = 1;
+	txmsg_apply = 1;
+	txmsg_cork = 0;
+	test_send_one(opt, cgrp);
+
+	txmsg_pass = 1;
+	txmsg_redir = 0;
+	txmsg_apply = 1024;
+	txmsg_cork = 0;
+	test_send_large(opt, cgrp);
+
+	txmsg_pass = 0;
+	txmsg_redir = 1;
+	txmsg_apply = 1024;
+	txmsg_cork = 0;
+	test_send_large(opt, cgrp);
+}
+
+static void test_txmsg_cork(int cgrp, struct sockmap_options *opt)
+{
+	txmsg_pass = 1;
+	txmsg_redir = 0;
+	txmsg_apply = 0;
+	txmsg_cork = 1;
+	test_send(opt, cgrp);
+
+	txmsg_pass = 1;
+	txmsg_redir = 0;
+	txmsg_apply = 1;
+	txmsg_cork = 1;
+	test_send(opt, cgrp);
 }
 
 char *map_names[] = {
@@ -1589,11 +1659,13 @@ char *map_names[] = {
 	"sock_bytes",
 	"sock_redir_flags",
 	"sock_skb_opts",
+	"tls_sock_map",
 };
 
 int prog_attach_type[] = {
 	BPF_SK_SKB_STREAM_PARSER,
 	BPF_SK_SKB_STREAM_VERDICT,
+	BPF_SK_SKB_STREAM_VERDICT,
 	BPF_CGROUP_SOCK_OPS,
 	BPF_SK_MSG_VERDICT,
 	BPF_SK_MSG_VERDICT,
@@ -1607,6 +1679,7 @@ int prog_attach_type[] = {
 int prog_type[] = {
 	BPF_PROG_TYPE_SK_SKB,
 	BPF_PROG_TYPE_SK_SKB,
+	BPF_PROG_TYPE_SK_SKB,
 	BPF_PROG_TYPE_SOCK_OPS,
 	BPF_PROG_TYPE_SK_MSG,
 	BPF_PROG_TYPE_SK_MSG,
@@ -1662,73 +1735,117 @@ static int populate_progs(char *bpf_file)
 	return 0;
 }
 
-static int __test_suite(int cg_fd, char *bpf_file)
+struct _test test[] = {
+	{"txmsg test passthrough", test_txmsg_pass},
+	{"txmsg test redirect", test_txmsg_redir},
+	{"txmsg test drop", test_txmsg_drop},
+	{"txmsg test ingress redirect", test_txmsg_ingress_redir},
+	{"txmsg test skb", test_txmsg_skb},
+	{"txmsg test apply", test_txmsg_apply},
+	{"txmsg test cork", test_txmsg_cork},
+	{"txmsg test hanging corks", test_txmsg_cork_hangs},
+	{"txmsg test push_data", test_txmsg_push},
+	{"txmsg test pull-data", test_txmsg_pull},
+	{"txmsg test pop-data", test_txmsg_pop},
+	{"txmsg test push/pop data", test_txmsg_push_pop},
+};
+
+static int check_whitelist(struct _test *t, struct sockmap_options *opt)
 {
-	int err, cleanup = cg_fd;
+	char *entry, *ptr;
+
+	if (!opt->whitelist)
+		return 0;
+	ptr = strdup(opt->whitelist);
+	if (!ptr)
+		return -ENOMEM;
+	entry = strtok(ptr, ",");
+	while (entry) {
+		if ((opt->prepend && strstr(opt->prepend, entry) != 0) ||
+		    strstr(opt->map, entry) != 0 ||
+		    strstr(t->title, entry) != 0)
+			return 0;
+		entry = strtok(NULL, ",");
+	}
+	return -EINVAL;
+}
 
-	err = populate_progs(bpf_file);
+static int check_blacklist(struct _test *t, struct sockmap_options *opt)
+{
+	char *entry, *ptr;
+
+	if (!opt->blacklist)
+		return -EINVAL;
+	ptr = strdup(opt->blacklist);
+	if (!ptr)
+		return -ENOMEM;
+	entry = strtok(ptr, ",");
+	while (entry) {
+		if ((opt->prepend && strstr(opt->prepend, entry) != 0) ||
+		    strstr(opt->map, entry) != 0 ||
+		    strstr(t->title, entry) != 0)
+			return 0;
+		entry = strtok(NULL, ",");
+	}
+	return -EINVAL;
+}
+
+static int __test_selftests(int cg_fd, struct sockmap_options *opt)
+{
+	int i, err;
+
+	err = populate_progs(opt->map);
 	if (err < 0) {
 		fprintf(stderr, "ERROR: (%i) load bpf failed\n", err);
 		return err;
 	}
 
-	if (cg_fd < 0) {
-		if (setup_cgroup_environment()) {
-			fprintf(stderr, "ERROR: cgroup env failed\n");
-			return -EINVAL;
-		}
+	/* Tests basic commands and APIs */
+	for (i = 0; i < sizeof(test)/sizeof(struct _test); i++) {
+		struct _test t = test[i];
 
-		cg_fd = create_and_get_cgroup(CG_PATH);
-		if (cg_fd < 0) {
-			fprintf(stderr,
-				"ERROR: (%i) open cg path failed: %s\n",
-				cg_fd, optarg);
-			return cg_fd;
-		}
+		if (check_whitelist(&t, opt) != 0)
+			continue;
+		if (check_blacklist(&t, opt) == 0)
+			continue;
 
-		if (join_cgroup(CG_PATH)) {
-			fprintf(stderr, "ERROR: failed to join cgroup\n");
-			return -EINVAL;
-		}
+		test_start_subtest(&t, opt);
+		t.tester(cg_fd, opt);
+		test_end_subtest();
 	}
 
-	/* Tests basic commands and APIs with range of iov values */
-	txmsg_start = txmsg_end = txmsg_start_push = txmsg_end_push = 0;
-	err = test_txmsg(cg_fd);
-	if (err)
-		goto out;
+	return err;
+}
 
-	/* Tests interesting combinations of APIs used together */
-	err = test_mixed(cg_fd);
-	if (err)
-		goto out;
+static void test_selftests_sockmap(int cg_fd, struct sockmap_options *opt)
+{
+	opt->map = BPF_SOCKMAP_FILENAME;
+	__test_selftests(cg_fd, opt);
+}
 
-	/* Tests pull_data API using start/end API */
-	err = test_start_end(cg_fd);
-	if (err)
-		goto out;
+static void test_selftests_sockhash(int cg_fd, struct sockmap_options *opt)
+{
+	opt->map = BPF_SOCKHASH_FILENAME;
+	__test_selftests(cg_fd, opt);
+}
 
-out:
-	printf("Summary: %i PASSED %i FAILED\n", passed, failed);
-	if (cleanup < 0) {
-		cleanup_cgroup_environment();
-		close(cg_fd);
-	}
-	return err;
+static void test_selftests_ktls(int cg_fd, struct sockmap_options *opt)
+{
+	opt->map = BPF_SOCKHASH_FILENAME;
+	opt->prepend = "ktls";
+	ktls = 1;
+	__test_selftests(cg_fd, opt);
+	ktls = 0;
 }
 
-static int test_suite(int cg_fd)
+static int test_selftest(int cg_fd, struct sockmap_options *opt)
 {
-	int err;
 
-	err = __test_suite(cg_fd, BPF_SOCKMAP_FILENAME);
-	if (err)
-		goto out;
-	err = __test_suite(cg_fd, BPF_SOCKHASH_FILENAME);
-out:
-	if (cg_fd > -1)
-		close(cg_fd);
-	return err;
+	test_selftests_sockmap(cg_fd, opt);
+	test_selftests_sockhash(cg_fd, opt);
+	test_selftests_ktls(cg_fd, opt);
+	test_print_results();
+	return 0;
 }
 
 int main(int argc, char **argv)
@@ -1737,12 +1854,10 @@ int main(int argc, char **argv)
 	struct sockmap_options options = {0};
 	int opt, longindex, err, cg_fd = 0;
 	char *bpf_file = BPF_SOCKMAP_FILENAME;
-	int test = PING_PONG;
-
-	if (argc < 2)
-		return test_suite(-1);
+	int test = SELFTESTS;
+	bool cg_created = 0;
 
-	while ((opt = getopt_long(argc, argv, ":dhvc:r:i:l:t:p:q:",
+	while ((opt = getopt_long(argc, argv, ":dhv:c:r:i:l:t:p:q:n:b:",
 				  long_options, &longindex)) != -1) {
 		switch (opt) {
 		case 's':
@@ -1783,6 +1898,8 @@ int main(int argc, char **argv)
 			break;
 		case 'v':
 			options.verbose = 1;
+			if (optarg)
+				options.verbose = atoi(optarg);
 			break;
 		case 'i':
 			iov_count = atoi(optarg);
@@ -1809,6 +1926,15 @@ int main(int argc, char **argv)
 				return -1;
 			}
 			break;
+		case 'n':
+			options.whitelist = strdup(optarg);
+			if (!options.whitelist)
+				return -ENOMEM;
+			break;
+		case 'b':
+			options.blacklist = strdup(optarg);
+			if (!options.blacklist)
+				return -ENOMEM;
 		case 0:
 			break;
 		case 'h':
@@ -1818,13 +1944,30 @@ int main(int argc, char **argv)
 		}
 	}
 
-	if (argc <= 3 && cg_fd)
-		return test_suite(cg_fd);
-
 	if (!cg_fd) {
-		fprintf(stderr, "%s requires cgroup option: --cgroup <path>\n",
-			argv[0]);
-		return -1;
+		if (setup_cgroup_environment()) {
+			fprintf(stderr, "ERROR: cgroup env failed\n");
+			return -EINVAL;
+		}
+
+		cg_fd = create_and_get_cgroup(CG_PATH);
+		if (cg_fd < 0) {
+			fprintf(stderr,
+				"ERROR: (%i) open cg path failed: %s\n",
+				cg_fd, strerror(errno));
+			return cg_fd;
+		}
+
+		if (join_cgroup(CG_PATH)) {
+			fprintf(stderr, "ERROR: failed to join cgroup\n");
+			return -EINVAL;
+		}
+		cg_created = 1;
+	}
+
+	if (test == SELFTESTS) {
+		err = test_selftest(cg_fd, &options);
+		goto out;
 	}
 
 	err = populate_progs(bpf_file);
@@ -1843,6 +1986,13 @@ int main(int argc, char **argv)
 	options.rate = rate;
 
 	err = run_options(&options, cg_fd, test);
+out:
+	if (options.whitelist)
+		free(options.whitelist);
+	if (options.blacklist)
+		free(options.blacklist);
+	if (cg_created)
+		cleanup_cgroup_environment();
 	close(cg_fd);
 	return err;
 }
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 87eaa49609a0..78a6bae56ea6 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -50,7 +50,7 @@
 #define MAX_INSNS	BPF_MAXINSNS
 #define MAX_TEST_INSNS	1000000
 #define MAX_FIXUPS	8
-#define MAX_NR_MAPS	19
+#define MAX_NR_MAPS	20
 #define MAX_TEST_RUNS	8
 #define POINTER_VALUE	0xcafe4all
 #define TEST_DATA_LEN	64
@@ -86,6 +86,7 @@ struct bpf_test {
 	int fixup_map_array_small[MAX_FIXUPS];
 	int fixup_sk_storage_map[MAX_FIXUPS];
 	int fixup_map_event_output[MAX_FIXUPS];
+	int fixup_map_reuseport_array[MAX_FIXUPS];
 	const char *errstr;
 	const char *errstr_unpriv;
 	uint32_t insn_processed;
@@ -637,6 +638,7 @@ static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type,
 	int *fixup_map_array_small = test->fixup_map_array_small;
 	int *fixup_sk_storage_map = test->fixup_sk_storage_map;
 	int *fixup_map_event_output = test->fixup_map_event_output;
+	int *fixup_map_reuseport_array = test->fixup_map_reuseport_array;
 
 	if (test->fill_helper) {
 		test->fill_insns = calloc(MAX_TEST_INSNS, sizeof(struct bpf_insn));
@@ -806,12 +808,28 @@ static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type,
 			fixup_map_event_output++;
 		} while (*fixup_map_event_output);
 	}
+	if (*fixup_map_reuseport_array) {
+		map_fds[19] = __create_map(BPF_MAP_TYPE_REUSEPORT_SOCKARRAY,
+					   sizeof(u32), sizeof(u64), 1, 0);
+		do {
+			prog[*fixup_map_reuseport_array].imm = map_fds[19];
+			fixup_map_reuseport_array++;
+		} while (*fixup_map_reuseport_array);
+	}
 }
 
+struct libcap {
+	struct __user_cap_header_struct hdr;
+	struct __user_cap_data_struct data[2];
+};
+
 static int set_admin(bool admin)
 {
 	cap_t caps;
-	const cap_value_t cap_val = CAP_SYS_ADMIN;
+	/* need CAP_BPF, CAP_NET_ADMIN, CAP_PERFMON to load progs */
+	const cap_value_t cap_net_admin = CAP_NET_ADMIN;
+	const cap_value_t cap_sys_admin = CAP_SYS_ADMIN;
+	struct libcap *cap;
 	int ret = -1;
 
 	caps = cap_get_proc();
@@ -819,11 +837,26 @@ static int set_admin(bool admin)
 		perror("cap_get_proc");
 		return -1;
 	}
-	if (cap_set_flag(caps, CAP_EFFECTIVE, 1, &cap_val,
+	cap = (struct libcap *)caps;
+	if (cap_set_flag(caps, CAP_EFFECTIVE, 1, &cap_sys_admin, CAP_CLEAR)) {
+		perror("cap_set_flag clear admin");
+		goto out;
+	}
+	if (cap_set_flag(caps, CAP_EFFECTIVE, 1, &cap_net_admin,
 				admin ? CAP_SET : CAP_CLEAR)) {
-		perror("cap_set_flag");
+		perror("cap_set_flag set_or_clear net");
 		goto out;
 	}
+	/* libcap is likely old and simply ignores CAP_BPF and CAP_PERFMON,
+	 * so update effective bits manually
+	 */
+	if (admin) {
+		cap->data[1].effective |= 1 << (38 /* CAP_PERFMON */ - 32);
+		cap->data[1].effective |= 1 << (39 /* CAP_BPF */ - 32);
+	} else {
+		cap->data[1].effective &= ~(1 << (38 - 32));
+		cap->data[1].effective &= ~(1 << (39 - 32));
+	}
 	if (cap_set_proc(caps)) {
 		perror("cap_set_proc");
 		goto out;
@@ -943,7 +976,12 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
 	attr.insns = prog;
 	attr.insns_cnt = prog_len;
 	attr.license = "GPL";
-	attr.log_level = verbose || expected_ret == VERBOSE_ACCEPT ? 1 : 4;
+	if (verbose)
+		attr.log_level = 1;
+	else if (expected_ret == VERBOSE_ACCEPT)
+		attr.log_level = 2;
+	else
+		attr.log_level = 4;
 	attr.prog_flags = pflags;
 
 	fd_prog = bpf_load_program_xattr(&attr, bpf_vlog, sizeof(bpf_vlog));
@@ -1052,9 +1090,11 @@ fail_log:
 
 static bool is_admin(void)
 {
+	cap_flag_value_t net_priv = CAP_CLEAR;
+	bool perfmon_priv = false;
+	bool bpf_priv = false;
+	struct libcap *cap;
 	cap_t caps;
-	cap_flag_value_t sysadmin = CAP_CLEAR;
-	const cap_value_t cap_val = CAP_SYS_ADMIN;
 
 #ifdef CAP_IS_SUPPORTED
 	if (!CAP_IS_SUPPORTED(CAP_SETFCAP)) {
@@ -1067,11 +1107,14 @@ static bool is_admin(void)
 		perror("cap_get_proc");
 		return false;
 	}
-	if (cap_get_flag(caps, cap_val, CAP_EFFECTIVE, &sysadmin))
-		perror("cap_get_flag");
+	cap = (struct libcap *)caps;
+	bpf_priv = cap->data[1].effective & (1 << (39/* CAP_BPF */ - 32));
+	perfmon_priv = cap->data[1].effective & (1 << (38/* CAP_PERFMON */ - 32));
+	if (cap_get_flag(caps, CAP_NET_ADMIN, CAP_EFFECTIVE, &net_priv))
+		perror("cap_get_flag NET");
 	if (cap_free(caps))
 		perror("cap_free");
-	return (sysadmin == CAP_SET);
+	return bpf_priv && perfmon_priv && net_priv == CAP_SET;
 }
 
 static void get_unpriv_disabled()
diff --git a/tools/testing/selftests/bpf/testing_helpers.c b/tools/testing/selftests/bpf/testing_helpers.c
new file mode 100644
index 000000000000..0af6337a8962
--- /dev/null
+++ b/tools/testing/selftests/bpf/testing_helpers.c
@@ -0,0 +1,66 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+/* Copyright (C) 2020 Facebook, Inc. */
+#include <stdlib.h>
+#include <errno.h>
+#include "testing_helpers.h"
+
+int parse_num_list(const char *s, bool **num_set, int *num_set_len)
+{
+	int i, set_len = 0, new_len, num, start = 0, end = -1;
+	bool *set = NULL, *tmp, parsing_end = false;
+	char *next;
+
+	while (s[0]) {
+		errno = 0;
+		num = strtol(s, &next, 10);
+		if (errno)
+			return -errno;
+
+		if (parsing_end)
+			end = num;
+		else
+			start = num;
+
+		if (!parsing_end && *next == '-') {
+			s = next + 1;
+			parsing_end = true;
+			continue;
+		} else if (*next == ',') {
+			parsing_end = false;
+			s = next + 1;
+			end = num;
+		} else if (*next == '\0') {
+			parsing_end = false;
+			s = next;
+			end = num;
+		} else {
+			return -EINVAL;
+		}
+
+		if (start > end)
+			return -EINVAL;
+
+		if (end + 1 > set_len) {
+			new_len = end + 1;
+			tmp = realloc(set, new_len);
+			if (!tmp) {
+				free(set);
+				return -ENOMEM;
+			}
+			for (i = set_len; i < start; i++)
+				tmp[i] = false;
+			set = tmp;
+			set_len = new_len;
+		}
+		for (i = start; i <= end; i++)
+			set[i] = true;
+	}
+
+	if (!set)
+		return -EINVAL;
+
+	*num_set = set;
+	*num_set_len = set_len;
+
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/testing_helpers.h b/tools/testing/selftests/bpf/testing_helpers.h
new file mode 100644
index 000000000000..923b51762759
--- /dev/null
+++ b/tools/testing/selftests/bpf/testing_helpers.h
@@ -0,0 +1,5 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+/* Copyright (C) 2020 Facebook, Inc. */
+#include <stdbool.h>
+
+int parse_num_list(const char *s, bool **set, int *set_len);
diff --git a/tools/testing/selftests/bpf/verifier/and.c b/tools/testing/selftests/bpf/verifier/and.c
index e0fad1548737..d781bc86e100 100644
--- a/tools/testing/selftests/bpf/verifier/and.c
+++ b/tools/testing/selftests/bpf/verifier/and.c
@@ -15,7 +15,7 @@
 	BPF_EXIT_INSN(),
 	},
 	.fixup_map_hash_48b = { 3 },
-	.errstr = "R0 max value is outside of the array range",
+	.errstr = "R0 max value is outside of the allowed memory range",
 	.result = REJECT,
 	.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
 },
@@ -44,7 +44,7 @@
 	BPF_EXIT_INSN(),
 	},
 	.fixup_map_hash_48b = { 3 },
-	.errstr = "R0 max value is outside of the array range",
+	.errstr = "R0 max value is outside of the allowed memory range",
 	.result = REJECT,
 	.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
 },
diff --git a/tools/testing/selftests/bpf/verifier/array_access.c b/tools/testing/selftests/bpf/verifier/array_access.c
index f3c33e128709..1c4b1939f5a8 100644
--- a/tools/testing/selftests/bpf/verifier/array_access.c
+++ b/tools/testing/selftests/bpf/verifier/array_access.c
@@ -117,7 +117,7 @@
 	BPF_EXIT_INSN(),
 	},
 	.fixup_map_hash_48b = { 3 },
-	.errstr = "R0 min value is outside of the array range",
+	.errstr = "R0 min value is outside of the allowed memory range",
 	.result = REJECT,
 	.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
 },
@@ -137,7 +137,7 @@
 	BPF_EXIT_INSN(),
 	},
 	.fixup_map_hash_48b = { 3 },
-	.errstr = "R0 unbounded memory access, make sure to bounds check any array access into a map",
+	.errstr = "R0 unbounded memory access, make sure to bounds check any such access",
 	.result = REJECT,
 	.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
 },
diff --git a/tools/testing/selftests/bpf/verifier/bounds.c b/tools/testing/selftests/bpf/verifier/bounds.c
index a253a064e6e0..4d6645f2874c 100644
--- a/tools/testing/selftests/bpf/verifier/bounds.c
+++ b/tools/testing/selftests/bpf/verifier/bounds.c
@@ -20,7 +20,7 @@
 	BPF_EXIT_INSN(),
 	},
 	.fixup_map_hash_8b = { 3 },
-	.errstr = "R0 max value is outside of the array range",
+	.errstr = "R0 max value is outside of the allowed memory range",
 	.result = REJECT,
 },
 {
@@ -146,7 +146,7 @@
 	BPF_EXIT_INSN(),
 	},
 	.fixup_map_hash_8b = { 3 },
-	.errstr = "R0 min value is outside of the array range",
+	.errstr = "R0 min value is outside of the allowed memory range",
 	.result = REJECT
 },
 {
@@ -238,7 +238,7 @@
 	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
 	BPF_LD_MAP_FD(BPF_REG_1, 0),
 	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
-	BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
+	BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 8),
 	/* r1 = [0x00, 0xff] */
 	BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
 	BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0xffffff80 >> 1),
@@ -253,10 +253,6 @@
 	 *      [0xffff'ffff'0000'0080, 0xffff'ffff'ffff'ffff]
 	 */
 	BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 0xffffff80 >> 1),
-	/* r1 = 0 or
-	 *      [0x00ff'ffff'ff00'0000, 0x00ff'ffff'ffff'ffff]
-	 */
-	BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 8),
 	/* error on OOB pointer computation */
 	BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
 	/* exit */
@@ -265,8 +261,10 @@
 	},
 	.fixup_map_hash_8b = { 3 },
 	/* not actually fully unbounded, but the bound is very high */
-	.errstr = "value 72057594021150720 makes map_value pointer be out of bounds",
-	.result = REJECT
+	.errstr_unpriv = "R1 has unknown scalar with mixed signed bounds, pointer arithmetic with it prohibited for !root",
+	.result_unpriv = REJECT,
+	.errstr = "value -4294967168 makes map_value pointer be out of bounds",
+	.result = REJECT,
 },
 {
 	"bounds check after truncation of boundary-crossing range (2)",
@@ -276,7 +274,7 @@
 	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
 	BPF_LD_MAP_FD(BPF_REG_1, 0),
 	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
-	BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
+	BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 8),
 	/* r1 = [0x00, 0xff] */
 	BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
 	BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0xffffff80 >> 1),
@@ -293,10 +291,6 @@
 	 *      [0xffff'ffff'0000'0080, 0xffff'ffff'ffff'ffff]
 	 */
 	BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 0xffffff80 >> 1),
-	/* r1 = 0 or
-	 *      [0x00ff'ffff'ff00'0000, 0x00ff'ffff'ffff'ffff]
-	 */
-	BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 8),
 	/* error on OOB pointer computation */
 	BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
 	/* exit */
@@ -305,8 +299,10 @@
 	},
 	.fixup_map_hash_8b = { 3 },
 	/* not actually fully unbounded, but the bound is very high */
-	.errstr = "value 72057594021150720 makes map_value pointer be out of bounds",
-	.result = REJECT
+	.errstr_unpriv = "R1 has unknown scalar with mixed signed bounds, pointer arithmetic with it prohibited for !root",
+	.result_unpriv = REJECT,
+	.errstr = "value -4294967168 makes map_value pointer be out of bounds",
+	.result = REJECT,
 },
 {
 	"bounds check after wrapping 32-bit addition",
@@ -358,7 +354,7 @@
 	BPF_EXIT_INSN(),
 	},
 	.fixup_map_hash_8b = { 3 },
-	.errstr = "R0 max value is outside of the array range",
+	.errstr = "R0 max value is outside of the allowed memory range",
 	.result = REJECT
 },
 {
@@ -539,3 +535,25 @@
 	},
 	.result = ACCEPT
 },
+{
+	"assigning 32bit bounds to 64bit for wA = 0, wB = wA",
+	.insns = {
+	BPF_LDX_MEM(BPF_W, BPF_REG_8, BPF_REG_1,
+		    offsetof(struct __sk_buff, data_end)),
+	BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
+		    offsetof(struct __sk_buff, data)),
+	BPF_MOV32_IMM(BPF_REG_9, 0),
+	BPF_MOV32_REG(BPF_REG_2, BPF_REG_9),
+	BPF_MOV64_REG(BPF_REG_6, BPF_REG_7),
+	BPF_ALU64_REG(BPF_ADD, BPF_REG_6, BPF_REG_2),
+	BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, 8),
+	BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_8, 1),
+	BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_6, 0),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	},
+	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	.result = ACCEPT,
+	.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+},
diff --git a/tools/testing/selftests/bpf/verifier/calls.c b/tools/testing/selftests/bpf/verifier/calls.c
index 2d752c4f8d9d..94258c6b5235 100644
--- a/tools/testing/selftests/bpf/verifier/calls.c
+++ b/tools/testing/selftests/bpf/verifier/calls.c
@@ -19,7 +19,7 @@
 	BPF_MOV64_IMM(BPF_REG_0, 2),
 	BPF_EXIT_INSN(),
 	},
-	.errstr_unpriv = "function calls to other bpf functions are allowed for root only",
+	.errstr_unpriv = "function calls to other bpf functions are allowed for",
 	.result_unpriv = REJECT,
 	.result = ACCEPT,
 	.retval = 1,
@@ -105,7 +105,7 @@
 	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 	.fixup_map_hash_8b = { 16 },
 	.result = REJECT,
-	.errstr = "R0 min value is outside of the array range",
+	.errstr = "R0 min value is outside of the allowed memory range",
 },
 {
 	"calls: overlapping caller/callee",
@@ -315,7 +315,7 @@
 	BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
 	BPF_EXIT_INSN(),
 	},
-	.errstr_unpriv = "allowed for root only",
+	.errstr_unpriv = "allowed for",
 	.result_unpriv = REJECT,
 	.result = ACCEPT,
 	.retval = POINTER_VALUE,
@@ -346,7 +346,7 @@
 	BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2),
 	BPF_EXIT_INSN(),
 	},
-	.errstr_unpriv = "allowed for root only",
+	.errstr_unpriv = "allowed for",
 	.result_unpriv = REJECT,
 	.result = ACCEPT,
 	.retval = TEST_DATA_LEN + TEST_DATA_LEN - ETH_HLEN - ETH_HLEN,
@@ -397,7 +397,7 @@
 	BPF_MOV64_IMM(BPF_REG_0, 1),
 	BPF_EXIT_INSN(),
 	},
-	.errstr_unpriv = "function calls to other bpf functions are allowed for root only",
+	.errstr_unpriv = "function calls to other bpf functions are allowed for",
 	.fixup_map_hash_48b = { 3 },
 	.result_unpriv = REJECT,
 	.result = ACCEPT,
@@ -1064,7 +1064,7 @@
 	BPF_MOV64_IMM(BPF_REG_0, 0),
 	BPF_EXIT_INSN(),
 	},
-	.errstr_unpriv = "allowed for root only",
+	.errstr_unpriv = "allowed for",
 	.result_unpriv = REJECT,
 	.errstr = "R0 !read_ok",
 	.result = REJECT,
@@ -1977,7 +1977,7 @@
 	BPF_EXIT_INSN(),
 	},
 	.prog_type = BPF_PROG_TYPE_SOCKET_FILTER,
-	.errstr_unpriv = "function calls to other bpf functions are allowed for root only",
+	.errstr_unpriv = "function calls to other bpf functions are allowed for",
 	.result_unpriv = REJECT,
 	.result = ACCEPT,
 },
@@ -2003,7 +2003,7 @@
 	BPF_EXIT_INSN(),
 	},
 	.prog_type = BPF_PROG_TYPE_SOCKET_FILTER,
-	.errstr_unpriv = "function calls to other bpf functions are allowed for root only",
+	.errstr_unpriv = "function calls to other bpf functions are allowed for",
 	.errstr = "!read_ok",
 	.result = REJECT,
 },
@@ -2028,7 +2028,7 @@
 	BPF_EXIT_INSN(),
 	},
 	.prog_type = BPF_PROG_TYPE_SOCKET_FILTER,
-	.errstr_unpriv = "function calls to other bpf functions are allowed for root only",
+	.errstr_unpriv = "function calls to other bpf functions are allowed for",
 	.errstr = "!read_ok",
 	.result = REJECT,
 },
diff --git a/tools/testing/selftests/bpf/verifier/const_or.c b/tools/testing/selftests/bpf/verifier/const_or.c
index 84446dfc7c1d..6c214c58e8d4 100644
--- a/tools/testing/selftests/bpf/verifier/const_or.c
+++ b/tools/testing/selftests/bpf/verifier/const_or.c
@@ -6,7 +6,7 @@
 	BPF_MOV64_IMM(BPF_REG_2, 34),
 	BPF_ALU64_IMM(BPF_OR, BPF_REG_2, 13),
 	BPF_MOV64_IMM(BPF_REG_3, 0),
-	BPF_EMIT_CALL(BPF_FUNC_probe_read),
+	BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
 	BPF_EXIT_INSN(),
 	},
 	.result = ACCEPT,
@@ -20,7 +20,7 @@
 	BPF_MOV64_IMM(BPF_REG_2, 34),
 	BPF_ALU64_IMM(BPF_OR, BPF_REG_2, 24),
 	BPF_MOV64_IMM(BPF_REG_3, 0),
-	BPF_EMIT_CALL(BPF_FUNC_probe_read),
+	BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
 	BPF_EXIT_INSN(),
 	},
 	.errstr = "invalid stack type R1 off=-48 access_size=58",
@@ -36,7 +36,7 @@
 	BPF_MOV64_IMM(BPF_REG_4, 13),
 	BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_4),
 	BPF_MOV64_IMM(BPF_REG_3, 0),
-	BPF_EMIT_CALL(BPF_FUNC_probe_read),
+	BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
 	BPF_EXIT_INSN(),
 	},
 	.result = ACCEPT,
@@ -51,7 +51,7 @@
 	BPF_MOV64_IMM(BPF_REG_4, 24),
 	BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_4),
 	BPF_MOV64_IMM(BPF_REG_3, 0),
-	BPF_EMIT_CALL(BPF_FUNC_probe_read),
+	BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
 	BPF_EXIT_INSN(),
 	},
 	.errstr = "invalid stack type R1 off=-48 access_size=58",
diff --git a/tools/testing/selftests/bpf/verifier/dead_code.c b/tools/testing/selftests/bpf/verifier/dead_code.c
index 50a8a63be4ac..5cf361d8eb1c 100644
--- a/tools/testing/selftests/bpf/verifier/dead_code.c
+++ b/tools/testing/selftests/bpf/verifier/dead_code.c
@@ -85,7 +85,7 @@
 	BPF_MOV64_IMM(BPF_REG_0, 12),
 	BPF_EXIT_INSN(),
 	},
-	.errstr_unpriv = "function calls to other bpf functions are allowed for root only",
+	.errstr_unpriv = "function calls to other bpf functions are allowed for",
 	.result_unpriv = REJECT,
 	.result = ACCEPT,
 	.retval = 7,
@@ -103,7 +103,7 @@
 	BPF_MOV64_IMM(BPF_REG_0, 12),
 	BPF_EXIT_INSN(),
 	},
-	.errstr_unpriv = "function calls to other bpf functions are allowed for root only",
+	.errstr_unpriv = "function calls to other bpf functions are allowed for",
 	.result_unpriv = REJECT,
 	.result = ACCEPT,
 	.retval = 7,
@@ -121,7 +121,7 @@
 	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -5),
 	BPF_EXIT_INSN(),
 	},
-	.errstr_unpriv = "function calls to other bpf functions are allowed for root only",
+	.errstr_unpriv = "function calls to other bpf functions are allowed for",
 	.result_unpriv = REJECT,
 	.result = ACCEPT,
 	.retval = 7,
@@ -137,7 +137,7 @@
 	BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
 	BPF_EXIT_INSN(),
 	},
-	.errstr_unpriv = "function calls to other bpf functions are allowed for root only",
+	.errstr_unpriv = "function calls to other bpf functions are allowed for",
 	.result_unpriv = REJECT,
 	.result = ACCEPT,
 	.retval = 2,
@@ -152,7 +152,7 @@
 	BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
 	BPF_EXIT_INSN(),
 	},
-	.errstr_unpriv = "function calls to other bpf functions are allowed for root only",
+	.errstr_unpriv = "function calls to other bpf functions are allowed for",
 	.result_unpriv = REJECT,
 	.result = ACCEPT,
 	.retval = 2,
diff --git a/tools/testing/selftests/bpf/verifier/direct_value_access.c b/tools/testing/selftests/bpf/verifier/direct_value_access.c
index b9fb28e8e224..988f46a1a4c7 100644
--- a/tools/testing/selftests/bpf/verifier/direct_value_access.c
+++ b/tools/testing/selftests/bpf/verifier/direct_value_access.c
@@ -68,7 +68,7 @@
 	},
 	.fixup_map_array_48b = { 1 },
 	.result = REJECT,
-	.errstr = "R1 min value is outside of the array range",
+	.errstr = "R1 min value is outside of the allowed memory range",
 },
 {
 	"direct map access, write test 7",
@@ -220,7 +220,7 @@
 	},
 	.fixup_map_array_small = { 1 },
 	.result = REJECT,
-	.errstr = "R1 min value is outside of the array range",
+	.errstr = "R1 min value is outside of the allowed memory range",
 },
 {
 	"direct map access, write test 19",
diff --git a/tools/testing/selftests/bpf/verifier/event_output.c b/tools/testing/selftests/bpf/verifier/event_output.c
index 130553e19eca..99f8f582c02b 100644
--- a/tools/testing/selftests/bpf/verifier/event_output.c
+++ b/tools/testing/selftests/bpf/verifier/event_output.c
@@ -92,3 +92,27 @@
 	.result = ACCEPT,
 	.retval = 1,
 },
+{
+	"perfevent for cgroup dev",
+	.insns =  { __PERF_EVENT_INSNS__ },
+	.prog_type = BPF_PROG_TYPE_CGROUP_DEVICE,
+	.fixup_map_event_output = { 4 },
+	.result = ACCEPT,
+	.retval = 1,
+},
+{
+	"perfevent for cgroup sysctl",
+	.insns =  { __PERF_EVENT_INSNS__ },
+	.prog_type = BPF_PROG_TYPE_CGROUP_SYSCTL,
+	.fixup_map_event_output = { 4 },
+	.result = ACCEPT,
+	.retval = 1,
+},
+{
+	"perfevent for cgroup sockopt",
+	.insns =  { __PERF_EVENT_INSNS__ },
+	.prog_type = BPF_PROG_TYPE_CGROUP_SOCKOPT,
+	.fixup_map_event_output = { 4 },
+	.result = ACCEPT,
+	.retval = 1,
+},
diff --git a/tools/testing/selftests/bpf/verifier/helper_access_var_len.c b/tools/testing/selftests/bpf/verifier/helper_access_var_len.c
index 67ab12410050..87c4e7900083 100644
--- a/tools/testing/selftests/bpf/verifier/helper_access_var_len.c
+++ b/tools/testing/selftests/bpf/verifier/helper_access_var_len.c
@@ -19,7 +19,7 @@
 	BPF_MOV64_IMM(BPF_REG_4, 0),
 	BPF_JMP_REG(BPF_JGE, BPF_REG_4, BPF_REG_2, 2),
 	BPF_MOV64_IMM(BPF_REG_3, 0),
-	BPF_EMIT_CALL(BPF_FUNC_probe_read),
+	BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
 	BPF_MOV64_IMM(BPF_REG_0, 0),
 	BPF_EXIT_INSN(),
 	},
@@ -36,7 +36,7 @@
 	BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128),
 	BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 64),
 	BPF_MOV64_IMM(BPF_REG_3, 0),
-	BPF_EMIT_CALL(BPF_FUNC_probe_read),
+	BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
 	BPF_EXIT_INSN(),
 	},
 	.errstr = "invalid indirect read from stack off -64+0 size 64",
@@ -55,7 +55,7 @@
 	BPF_MOV64_IMM(BPF_REG_4, 0),
 	BPF_JMP_REG(BPF_JGE, BPF_REG_4, BPF_REG_2, 2),
 	BPF_MOV64_IMM(BPF_REG_3, 0),
-	BPF_EMIT_CALL(BPF_FUNC_probe_read),
+	BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
 	BPF_MOV64_IMM(BPF_REG_0, 0),
 	BPF_EXIT_INSN(),
 	},
@@ -84,7 +84,7 @@
 	BPF_MOV64_IMM(BPF_REG_4, 0),
 	BPF_JMP_REG(BPF_JGE, BPF_REG_4, BPF_REG_2, 2),
 	BPF_MOV64_IMM(BPF_REG_3, 0),
-	BPF_EMIT_CALL(BPF_FUNC_probe_read),
+	BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
 	BPF_MOV64_IMM(BPF_REG_0, 0),
 	BPF_EXIT_INSN(),
 	},
@@ -112,7 +112,7 @@
 	BPF_MOV64_IMM(BPF_REG_4, 0),
 	BPF_JMP_REG(BPF_JSGE, BPF_REG_4, BPF_REG_2, 2),
 	BPF_MOV64_IMM(BPF_REG_3, 0),
-	BPF_EMIT_CALL(BPF_FUNC_probe_read),
+	BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
 	BPF_MOV64_IMM(BPF_REG_0, 0),
 	BPF_EXIT_INSN(),
 	},
@@ -132,7 +132,7 @@
 	BPF_JMP_REG(BPF_JGE, BPF_REG_4, BPF_REG_2, 3),
 	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 1),
 	BPF_MOV64_IMM(BPF_REG_3, 0),
-	BPF_EMIT_CALL(BPF_FUNC_probe_read),
+	BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
 	BPF_MOV64_IMM(BPF_REG_0, 0),
 	BPF_EXIT_INSN(),
 	},
@@ -152,7 +152,7 @@
 	BPF_MOV64_IMM(BPF_REG_4, 0),
 	BPF_JMP_REG(BPF_JGE, BPF_REG_4, BPF_REG_2, 2),
 	BPF_MOV64_IMM(BPF_REG_3, 0),
-	BPF_EMIT_CALL(BPF_FUNC_probe_read),
+	BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
 	BPF_MOV64_IMM(BPF_REG_0, 0),
 	BPF_EXIT_INSN(),
 	},
@@ -171,7 +171,7 @@
 	BPF_MOV64_IMM(BPF_REG_4, 0),
 	BPF_JMP_REG(BPF_JGE, BPF_REG_4, BPF_REG_2, 2),
 	BPF_MOV64_IMM(BPF_REG_3, 0),
-	BPF_EMIT_CALL(BPF_FUNC_probe_read),
+	BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
 	BPF_MOV64_IMM(BPF_REG_0, 0),
 	BPF_EXIT_INSN(),
 	},
@@ -190,7 +190,7 @@
 	BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128),
 	BPF_JMP_IMM(BPF_JGT, BPF_REG_2, 64, 3),
 	BPF_MOV64_IMM(BPF_REG_3, 0),
-	BPF_EMIT_CALL(BPF_FUNC_probe_read),
+	BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
 	BPF_MOV64_IMM(BPF_REG_0, 0),
 	BPF_EXIT_INSN(),
 	},
@@ -208,7 +208,7 @@
 	BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128),
 	BPF_JMP_IMM(BPF_JSGT, BPF_REG_2, 64, 3),
 	BPF_MOV64_IMM(BPF_REG_3, 0),
-	BPF_EMIT_CALL(BPF_FUNC_probe_read),
+	BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
 	BPF_MOV64_IMM(BPF_REG_0, 0),
 	BPF_EXIT_INSN(),
 	},
@@ -233,7 +233,7 @@
 	BPF_MOV64_IMM(BPF_REG_4, 0),
 	BPF_JMP_REG(BPF_JSGE, BPF_REG_4, BPF_REG_2, 2),
 	BPF_MOV64_IMM(BPF_REG_3, 0),
-	BPF_EMIT_CALL(BPF_FUNC_probe_read),
+	BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
 	BPF_MOV64_IMM(BPF_REG_0, 0),
 	BPF_EXIT_INSN(),
 	},
@@ -259,7 +259,7 @@
 	BPF_MOV64_IMM(BPF_REG_4, 0),
 	BPF_JMP_REG(BPF_JSGE, BPF_REG_4, BPF_REG_2, 2),
 	BPF_MOV64_IMM(BPF_REG_3, 0),
-	BPF_EMIT_CALL(BPF_FUNC_probe_read),
+	BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
 	BPF_MOV64_IMM(BPF_REG_0, 0),
 	BPF_EXIT_INSN(),
 	},
@@ -286,7 +286,7 @@
 	BPF_MOV64_IMM(BPF_REG_4, 0),
 	BPF_JMP_REG(BPF_JSGE, BPF_REG_4, BPF_REG_2, 2),
 	BPF_MOV64_IMM(BPF_REG_3, 0),
-	BPF_EMIT_CALL(BPF_FUNC_probe_read),
+	BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
 	BPF_MOV64_IMM(BPF_REG_0, 0),
 	BPF_EXIT_INSN(),
 	},
@@ -313,12 +313,12 @@
 	BPF_MOV64_IMM(BPF_REG_4, 0),
 	BPF_JMP_REG(BPF_JSGE, BPF_REG_4, BPF_REG_2, 2),
 	BPF_MOV64_IMM(BPF_REG_3, 0),
-	BPF_EMIT_CALL(BPF_FUNC_probe_read),
+	BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
 	BPF_MOV64_IMM(BPF_REG_0, 0),
 	BPF_EXIT_INSN(),
 	},
 	.fixup_map_hash_48b = { 4 },
-	.errstr = "R1 min value is outside of the array range",
+	.errstr = "R1 min value is outside of the allowed memory range",
 	.result = REJECT,
 	.prog_type = BPF_PROG_TYPE_TRACEPOINT,
 },
@@ -468,7 +468,7 @@
 	BPF_MOV64_IMM(BPF_REG_1, 0),
 	BPF_MOV64_IMM(BPF_REG_2, 0),
 	BPF_MOV64_IMM(BPF_REG_3, 0),
-	BPF_EMIT_CALL(BPF_FUNC_probe_read),
+	BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
 	BPF_EXIT_INSN(),
 	},
 	.errstr = "R1 type=inv expected=fp",
@@ -481,7 +481,7 @@
 	BPF_MOV64_IMM(BPF_REG_1, 0),
 	BPF_MOV64_IMM(BPF_REG_2, 1),
 	BPF_MOV64_IMM(BPF_REG_3, 0),
-	BPF_EMIT_CALL(BPF_FUNC_probe_read),
+	BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
 	BPF_EXIT_INSN(),
 	},
 	.errstr = "R1 type=inv expected=fp",
@@ -495,7 +495,7 @@
 	BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
 	BPF_MOV64_IMM(BPF_REG_2, 0),
 	BPF_MOV64_IMM(BPF_REG_3, 0),
-	BPF_EMIT_CALL(BPF_FUNC_probe_read),
+	BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
 	BPF_EXIT_INSN(),
 	},
 	.result = ACCEPT,
@@ -513,7 +513,7 @@
 	BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
 	BPF_MOV64_IMM(BPF_REG_2, 0),
 	BPF_MOV64_IMM(BPF_REG_3, 0),
-	BPF_EMIT_CALL(BPF_FUNC_probe_read),
+	BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
 	BPF_EXIT_INSN(),
 	},
 	.fixup_map_hash_8b = { 3 },
@@ -534,7 +534,7 @@
 	BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
 	BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
 	BPF_MOV64_IMM(BPF_REG_3, 0),
-	BPF_EMIT_CALL(BPF_FUNC_probe_read),
+	BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
 	BPF_EXIT_INSN(),
 	},
 	.fixup_map_hash_8b = { 3 },
@@ -554,7 +554,7 @@
 	BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 0),
 	BPF_JMP_IMM(BPF_JGT, BPF_REG_2, 8, 2),
 	BPF_MOV64_IMM(BPF_REG_3, 0),
-	BPF_EMIT_CALL(BPF_FUNC_probe_read),
+	BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
 	BPF_EXIT_INSN(),
 	},
 	.fixup_map_hash_8b = { 3 },
@@ -580,7 +580,7 @@
 	BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 63),
 	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 1),
 	BPF_MOV64_IMM(BPF_REG_3, 0),
-	BPF_EMIT_CALL(BPF_FUNC_probe_read),
+	BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
 	BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
 	BPF_EXIT_INSN(),
 	},
@@ -607,7 +607,7 @@
 	BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 32),
 	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 32),
 	BPF_MOV64_IMM(BPF_REG_3, 0),
-	BPF_EMIT_CALL(BPF_FUNC_probe_read),
+	BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
 	BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
 	BPF_EXIT_INSN(),
 	},
diff --git a/tools/testing/selftests/bpf/verifier/helper_value_access.c b/tools/testing/selftests/bpf/verifier/helper_value_access.c
index 7572e403ddb9..1c7882ddfa63 100644
--- a/tools/testing/selftests/bpf/verifier/helper_value_access.c
+++ b/tools/testing/selftests/bpf/verifier/helper_value_access.c
@@ -10,7 +10,7 @@
 	BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
 	BPF_MOV64_IMM(BPF_REG_2, sizeof(struct test_val)),
 	BPF_MOV64_IMM(BPF_REG_3, 0),
-	BPF_EMIT_CALL(BPF_FUNC_probe_read),
+	BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
 	BPF_EXIT_INSN(),
 	},
 	.fixup_map_hash_48b = { 3 },
@@ -29,7 +29,7 @@
 	BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
 	BPF_MOV64_IMM(BPF_REG_2, 8),
 	BPF_MOV64_IMM(BPF_REG_3, 0),
-	BPF_EMIT_CALL(BPF_FUNC_probe_read),
+	BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
 	BPF_EXIT_INSN(),
 	},
 	.fixup_map_hash_48b = { 3 },
@@ -67,7 +67,7 @@
 	BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
 	BPF_MOV64_IMM(BPF_REG_2, sizeof(struct test_val) + 8),
 	BPF_MOV64_IMM(BPF_REG_3, 0),
-	BPF_EMIT_CALL(BPF_FUNC_probe_read),
+	BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
 	BPF_EXIT_INSN(),
 	},
 	.fixup_map_hash_48b = { 3 },
@@ -87,7 +87,7 @@
 	BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
 	BPF_MOV64_IMM(BPF_REG_2, -8),
 	BPF_MOV64_IMM(BPF_REG_3, 0),
-	BPF_EMIT_CALL(BPF_FUNC_probe_read),
+	BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
 	BPF_EXIT_INSN(),
 	},
 	.fixup_map_hash_48b = { 3 },
@@ -109,7 +109,7 @@
 	BPF_MOV64_IMM(BPF_REG_2,
 		      sizeof(struct test_val) -	offsetof(struct test_val, foo)),
 	BPF_MOV64_IMM(BPF_REG_3, 0),
-	BPF_EMIT_CALL(BPF_FUNC_probe_read),
+	BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
 	BPF_EXIT_INSN(),
 	},
 	.fixup_map_hash_48b = { 3 },
@@ -129,7 +129,7 @@
 	BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, offsetof(struct test_val, foo)),
 	BPF_MOV64_IMM(BPF_REG_2, 8),
 	BPF_MOV64_IMM(BPF_REG_3, 0),
-	BPF_EMIT_CALL(BPF_FUNC_probe_read),
+	BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
 	BPF_EXIT_INSN(),
 	},
 	.fixup_map_hash_48b = { 3 },
@@ -170,7 +170,7 @@
 	BPF_MOV64_IMM(BPF_REG_2,
 		      sizeof(struct test_val) - offsetof(struct test_val, foo) + 8),
 	BPF_MOV64_IMM(BPF_REG_3, 0),
-	BPF_EMIT_CALL(BPF_FUNC_probe_read),
+	BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
 	BPF_EXIT_INSN(),
 	},
 	.fixup_map_hash_48b = { 3 },
@@ -191,7 +191,7 @@
 	BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, offsetof(struct test_val, foo)),
 	BPF_MOV64_IMM(BPF_REG_2, -8),
 	BPF_MOV64_IMM(BPF_REG_3, 0),
-	BPF_EMIT_CALL(BPF_FUNC_probe_read),
+	BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
 	BPF_EXIT_INSN(),
 	},
 	.fixup_map_hash_48b = { 3 },
@@ -212,7 +212,7 @@
 	BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, offsetof(struct test_val, foo)),
 	BPF_MOV64_IMM(BPF_REG_2, -1),
 	BPF_MOV64_IMM(BPF_REG_3, 0),
-	BPF_EMIT_CALL(BPF_FUNC_probe_read),
+	BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
 	BPF_EXIT_INSN(),
 	},
 	.fixup_map_hash_48b = { 3 },
@@ -235,7 +235,7 @@
 	BPF_MOV64_IMM(BPF_REG_2,
 		      sizeof(struct test_val) - offsetof(struct test_val, foo)),
 	BPF_MOV64_IMM(BPF_REG_3, 0),
-	BPF_EMIT_CALL(BPF_FUNC_probe_read),
+	BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
 	BPF_EXIT_INSN(),
 	},
 	.fixup_map_hash_48b = { 3 },
@@ -256,7 +256,7 @@
 	BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
 	BPF_MOV64_IMM(BPF_REG_2, 8),
 	BPF_MOV64_IMM(BPF_REG_3, 0),
-	BPF_EMIT_CALL(BPF_FUNC_probe_read),
+	BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
 	BPF_EXIT_INSN(),
 	},
 	.fixup_map_hash_48b = { 3 },
@@ -280,7 +280,7 @@
 	BPF_EXIT_INSN(),
 	},
 	.fixup_map_hash_48b = { 3 },
-	.errstr = "R1 min value is outside of the array range",
+	.errstr = "R1 min value is outside of the allowed memory range",
 	.result = REJECT,
 	.prog_type = BPF_PROG_TYPE_TRACEPOINT,
 },
@@ -300,7 +300,7 @@
 		      sizeof(struct test_val) -
 		      offsetof(struct test_val, foo) + 8),
 	BPF_MOV64_IMM(BPF_REG_3, 0),
-	BPF_EMIT_CALL(BPF_FUNC_probe_read),
+	BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
 	BPF_EXIT_INSN(),
 	},
 	.fixup_map_hash_48b = { 3 },
@@ -322,7 +322,7 @@
 	BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
 	BPF_MOV64_IMM(BPF_REG_2, -8),
 	BPF_MOV64_IMM(BPF_REG_3, 0),
-	BPF_EMIT_CALL(BPF_FUNC_probe_read),
+	BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
 	BPF_EXIT_INSN(),
 	},
 	.fixup_map_hash_48b = { 3 },
@@ -344,7 +344,7 @@
 	BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
 	BPF_MOV64_IMM(BPF_REG_2, -1),
 	BPF_MOV64_IMM(BPF_REG_3, 0),
-	BPF_EMIT_CALL(BPF_FUNC_probe_read),
+	BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
 	BPF_EXIT_INSN(),
 	},
 	.fixup_map_hash_48b = { 3 },
@@ -368,7 +368,7 @@
 	BPF_MOV64_IMM(BPF_REG_2,
 		      sizeof(struct test_val) - offsetof(struct test_val, foo)),
 	BPF_MOV64_IMM(BPF_REG_3, 0),
-	BPF_EMIT_CALL(BPF_FUNC_probe_read),
+	BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
 	BPF_EXIT_INSN(),
 	},
 	.fixup_map_hash_48b = { 3 },
@@ -390,7 +390,7 @@
 	BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
 	BPF_MOV64_IMM(BPF_REG_2, 8),
 	BPF_MOV64_IMM(BPF_REG_3, 0),
-	BPF_EMIT_CALL(BPF_FUNC_probe_read),
+	BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
 	BPF_EXIT_INSN(),
 	},
 	.fixup_map_hash_48b = { 3 },
@@ -415,7 +415,7 @@
 	BPF_EXIT_INSN(),
 	},
 	.fixup_map_hash_48b = { 3 },
-	.errstr = "R1 min value is outside of the array range",
+	.errstr = "R1 min value is outside of the allowed memory range",
 	.result = REJECT,
 	.prog_type = BPF_PROG_TYPE_TRACEPOINT,
 },
@@ -433,7 +433,7 @@
 	BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
 	BPF_MOV64_IMM(BPF_REG_2, 1),
 	BPF_MOV64_IMM(BPF_REG_3, 0),
-	BPF_EMIT_CALL(BPF_FUNC_probe_read),
+	BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
 	BPF_EXIT_INSN(),
 	},
 	.fixup_map_hash_48b = { 3 },
@@ -458,7 +458,7 @@
 		      sizeof(struct test_val) -
 		      offsetof(struct test_val, foo) + 1),
 	BPF_MOV64_IMM(BPF_REG_3, 0),
-	BPF_EMIT_CALL(BPF_FUNC_probe_read),
+	BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
 	BPF_EXIT_INSN(),
 	},
 	.fixup_map_hash_48b = { 3 },
@@ -926,7 +926,7 @@
 	},
 	.fixup_map_hash_16b = { 3, 10 },
 	.result = REJECT,
-	.errstr = "R2 unbounded memory access, make sure to bounds check any array access into a map",
+	.errstr = "R2 unbounded memory access, make sure to bounds check any such access",
 	.prog_type = BPF_PROG_TYPE_TRACEPOINT,
 },
 {
diff --git a/tools/testing/selftests/bpf/verifier/precise.c b/tools/testing/selftests/bpf/verifier/precise.c
index 02151f8c940f..6dc8003ffc70 100644
--- a/tools/testing/selftests/bpf/verifier/precise.c
+++ b/tools/testing/selftests/bpf/verifier/precise.c
@@ -31,14 +31,14 @@
 	BPF_MOV64_REG(BPF_REG_1, BPF_REG_FP),
 	BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
 	BPF_MOV64_IMM(BPF_REG_3, 0),
-	BPF_EMIT_CALL(BPF_FUNC_probe_read),
+	BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
 	BPF_EXIT_INSN(),
 	},
 	.prog_type = BPF_PROG_TYPE_TRACEPOINT,
 	.fixup_map_array_48b = { 1 },
 	.result = VERBOSE_ACCEPT,
 	.errstr =
-	"26: (85) call bpf_probe_read#4\
+	"26: (85) call bpf_probe_read_kernel#113\
 	last_idx 26 first_idx 20\
 	regs=4 stack=0 before 25\
 	regs=4 stack=0 before 24\
@@ -91,7 +91,7 @@
 	BPF_MOV64_REG(BPF_REG_1, BPF_REG_FP),
 	BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
 	BPF_MOV64_IMM(BPF_REG_3, 0),
-	BPF_EMIT_CALL(BPF_FUNC_probe_read),
+	BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
 	BPF_EXIT_INSN(),
 	},
 	.prog_type = BPF_PROG_TYPE_TRACEPOINT,
@@ -99,7 +99,7 @@
 	.result = VERBOSE_ACCEPT,
 	.flags = BPF_F_TEST_STATE_FREQ,
 	.errstr =
-	"26: (85) call bpf_probe_read#4\
+	"26: (85) call bpf_probe_read_kernel#113\
 	last_idx 26 first_idx 22\
 	regs=4 stack=0 before 25\
 	regs=4 stack=0 before 24\
diff --git a/tools/testing/selftests/bpf/verifier/prevent_map_lookup.c b/tools/testing/selftests/bpf/verifier/prevent_map_lookup.c
index da7a4b37cb98..fc4e301260f6 100644
--- a/tools/testing/selftests/bpf/verifier/prevent_map_lookup.c
+++ b/tools/testing/selftests/bpf/verifier/prevent_map_lookup.c
@@ -1,34 +1,4 @@
 {
-	"prevent map lookup in sockmap",
-	.insns = {
-	BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
-	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
-	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
-	BPF_LD_MAP_FD(BPF_REG_1, 0),
-	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
-	BPF_EXIT_INSN(),
-	},
-	.fixup_map_sockmap = { 3 },
-	.result = REJECT,
-	.errstr = "cannot pass map_type 15 into func bpf_map_lookup_elem",
-	.prog_type = BPF_PROG_TYPE_SOCK_OPS,
-},
-{
-	"prevent map lookup in sockhash",
-	.insns = {
-	BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
-	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
-	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
-	BPF_LD_MAP_FD(BPF_REG_1, 0),
-	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
-	BPF_EXIT_INSN(),
-	},
-	.fixup_map_sockhash = { 3 },
-	.result = REJECT,
-	.errstr = "cannot pass map_type 18 into func bpf_map_lookup_elem",
-	.prog_type = BPF_PROG_TYPE_SOCK_OPS,
-},
-{
 	"prevent map lookup in stack trace",
 	.insns = {
 	BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
diff --git a/tools/testing/selftests/bpf/verifier/ref_tracking.c b/tools/testing/selftests/bpf/verifier/ref_tracking.c
index 604b46151736..056e0273bf12 100644
--- a/tools/testing/selftests/bpf/verifier/ref_tracking.c
+++ b/tools/testing/selftests/bpf/verifier/ref_tracking.c
@@ -821,3 +821,36 @@
 	.result = REJECT,
 	.errstr = "invalid mem access",
 },
+{
+	"reference tracking: branch tracking valid pointer null comparison",
+	.insns = {
+	BPF_SK_LOOKUP(sk_lookup_tcp),
+	BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
+	BPF_MOV64_IMM(BPF_REG_3, 1),
+	BPF_JMP_IMM(BPF_JNE, BPF_REG_6, 0, 1),
+	BPF_MOV64_IMM(BPF_REG_3, 0),
+	BPF_JMP_IMM(BPF_JEQ, BPF_REG_6, 0, 2),
+	BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+	BPF_EMIT_CALL(BPF_FUNC_sk_release),
+	BPF_EXIT_INSN(),
+	},
+	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	.result = ACCEPT,
+},
+{
+	"reference tracking: branch tracking valid pointer value comparison",
+	.insns = {
+	BPF_SK_LOOKUP(sk_lookup_tcp),
+	BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
+	BPF_MOV64_IMM(BPF_REG_3, 1),
+	BPF_JMP_IMM(BPF_JEQ, BPF_REG_6, 0, 4),
+	BPF_MOV64_IMM(BPF_REG_3, 0),
+	BPF_JMP_IMM(BPF_JEQ, BPF_REG_6, 1234, 2),
+	BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+	BPF_EMIT_CALL(BPF_FUNC_sk_release),
+	BPF_EXIT_INSN(),
+	},
+	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	.errstr = "Unreleased reference",
+	.result = REJECT,
+},
diff --git a/tools/testing/selftests/bpf/verifier/sock.c b/tools/testing/selftests/bpf/verifier/sock.c
index 9ed192e14f5f..b1aac2641498 100644
--- a/tools/testing/selftests/bpf/verifier/sock.c
+++ b/tools/testing/selftests/bpf/verifier/sock.c
@@ -222,7 +222,7 @@
 	BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
 	BPF_MOV64_IMM(BPF_REG_0, 0),
 	BPF_EXIT_INSN(),
-	BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, offsetofend(struct bpf_sock, state)),
+	BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, offsetofend(struct bpf_sock, rx_queue_mapping)),
 	BPF_MOV64_IMM(BPF_REG_0, 0),
 	BPF_EXIT_INSN(),
 	},
@@ -516,3 +516,118 @@
 	.prog_type = BPF_PROG_TYPE_XDP,
 	.result = ACCEPT,
 },
+{
+	"bpf_map_lookup_elem(sockmap, &key)",
+	.insns = {
+	BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 0),
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+	BPF_LD_MAP_FD(BPF_REG_1, 0),
+	BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	},
+	.fixup_map_sockmap = { 3 },
+	.prog_type = BPF_PROG_TYPE_SK_SKB,
+	.result = REJECT,
+	.errstr = "Unreleased reference id=2 alloc_insn=5",
+},
+{
+	"bpf_map_lookup_elem(sockhash, &key)",
+	.insns = {
+	BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 0),
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+	BPF_LD_MAP_FD(BPF_REG_1, 0),
+	BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	},
+	.fixup_map_sockhash = { 3 },
+	.prog_type = BPF_PROG_TYPE_SK_SKB,
+	.result = REJECT,
+	.errstr = "Unreleased reference id=2 alloc_insn=5",
+},
+{
+	"bpf_map_lookup_elem(sockmap, &key); sk->type [fullsock field]; bpf_sk_release(sk)",
+	.insns = {
+	BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 0),
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+	BPF_LD_MAP_FD(BPF_REG_1, 0),
+	BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+	BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+	BPF_EXIT_INSN(),
+	BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+	BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_sock, type)),
+	BPF_EMIT_CALL(BPF_FUNC_sk_release),
+	BPF_EXIT_INSN(),
+	},
+	.fixup_map_sockmap = { 3 },
+	.prog_type = BPF_PROG_TYPE_SK_SKB,
+	.result = ACCEPT,
+},
+{
+	"bpf_map_lookup_elem(sockhash, &key); sk->type [fullsock field]; bpf_sk_release(sk)",
+	.insns = {
+	BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 0),
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+	BPF_LD_MAP_FD(BPF_REG_1, 0),
+	BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+	BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+	BPF_EXIT_INSN(),
+	BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+	BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_sock, type)),
+	BPF_EMIT_CALL(BPF_FUNC_sk_release),
+	BPF_EXIT_INSN(),
+	},
+	.fixup_map_sockhash = { 3 },
+	.prog_type = BPF_PROG_TYPE_SK_SKB,
+	.result = ACCEPT,
+},
+{
+	"bpf_sk_select_reuseport(ctx, reuseport_array, &key, flags)",
+	.insns = {
+	BPF_MOV64_IMM(BPF_REG_4, 0),
+	BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 0),
+	BPF_MOV64_REG(BPF_REG_3, BPF_REG_10),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, -4),
+	BPF_LD_MAP_FD(BPF_REG_2, 0),
+	BPF_EMIT_CALL(BPF_FUNC_sk_select_reuseport),
+	BPF_EXIT_INSN(),
+	},
+	.fixup_map_reuseport_array = { 4 },
+	.prog_type = BPF_PROG_TYPE_SK_REUSEPORT,
+	.result = ACCEPT,
+},
+{
+	"bpf_sk_select_reuseport(ctx, sockmap, &key, flags)",
+	.insns = {
+	BPF_MOV64_IMM(BPF_REG_4, 0),
+	BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 0),
+	BPF_MOV64_REG(BPF_REG_3, BPF_REG_10),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, -4),
+	BPF_LD_MAP_FD(BPF_REG_2, 0),
+	BPF_EMIT_CALL(BPF_FUNC_sk_select_reuseport),
+	BPF_EXIT_INSN(),
+	},
+	.fixup_map_sockmap = { 4 },
+	.prog_type = BPF_PROG_TYPE_SK_REUSEPORT,
+	.result = ACCEPT,
+},
+{
+	"bpf_sk_select_reuseport(ctx, sockhash, &key, flags)",
+	.insns = {
+	BPF_MOV64_IMM(BPF_REG_4, 0),
+	BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 0),
+	BPF_MOV64_REG(BPF_REG_3, BPF_REG_10),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, -4),
+	BPF_LD_MAP_FD(BPF_REG_2, 0),
+	BPF_EMIT_CALL(BPF_FUNC_sk_select_reuseport),
+	BPF_EXIT_INSN(),
+	},
+	.fixup_map_sockmap = { 4 },
+	.prog_type = BPF_PROG_TYPE_SK_REUSEPORT,
+	.result = ACCEPT,
+},
diff --git a/tools/testing/selftests/bpf/verifier/value_or_null.c b/tools/testing/selftests/bpf/verifier/value_or_null.c
index 860d4a71cd83..3ecb70a3d939 100644
--- a/tools/testing/selftests/bpf/verifier/value_or_null.c
+++ b/tools/testing/selftests/bpf/verifier/value_or_null.c
@@ -150,3 +150,22 @@
 	.result_unpriv = REJECT,
 	.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
 },
+{
+	"map lookup and null branch prediction",
+	.insns = {
+	BPF_MOV64_IMM(BPF_REG_1, 10),
+	BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8),
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+	BPF_LD_MAP_FD(BPF_REG_1, 0),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+	BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
+	BPF_JMP_IMM(BPF_JEQ, BPF_REG_6, 0, 2),
+	BPF_JMP_IMM(BPF_JNE, BPF_REG_6, 0, 1),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_10, 10),
+	BPF_EXIT_INSN(),
+	},
+	.fixup_map_hash_8b = { 4 },
+	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	.result = ACCEPT,
+},
diff --git a/tools/testing/selftests/bpf/verifier/value_ptr_arith.c b/tools/testing/selftests/bpf/verifier/value_ptr_arith.c
index a53d99cebd9f..97ee658e1242 100644
--- a/tools/testing/selftests/bpf/verifier/value_ptr_arith.c
+++ b/tools/testing/selftests/bpf/verifier/value_ptr_arith.c
@@ -50,7 +50,7 @@
 	.fixup_map_array_48b = { 8 },
 	.result = ACCEPT,
 	.result_unpriv = REJECT,
-	.errstr_unpriv = "R0 min value is outside of the array range",
+	.errstr_unpriv = "R0 min value is outside of the allowed memory range",
 	.retval = 1,
 },
 {
@@ -325,7 +325,7 @@
 	},
 	.fixup_map_array_48b = { 3 },
 	.result = REJECT,
-	.errstr = "R0 min value is outside of the array range",
+	.errstr = "R0 min value is outside of the allowed memory range",
 	.result_unpriv = REJECT,
 	.errstr_unpriv = "R0 pointer arithmetic of map value goes out of range",
 },
@@ -601,7 +601,7 @@
 	},
 	.fixup_map_array_48b = { 3 },
 	.result = REJECT,
-	.errstr = "R1 max value is outside of the array range",
+	.errstr = "R1 max value is outside of the allowed memory range",
 	.errstr_unpriv = "R1 pointer arithmetic of map value goes out of range",
 	.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
 },
@@ -726,7 +726,7 @@
 	},
 	.fixup_map_array_48b = { 3 },
 	.result = REJECT,
-	.errstr = "R0 min value is outside of the array range",
+	.errstr = "R0 min value is outside of the allowed memory range",
 },
 {
 	"map access: value_ptr -= known scalar, 2",
diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_acl_drops.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_acl_drops.sh
index 26044e397157..b32ba5fec59d 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_acl_drops.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_acl_drops.sh
@@ -107,7 +107,7 @@ ingress_flow_action_drop_test()
 
 	RET=0
 
-	devlink_trap_drop_test ingress_flow_action_drop acl_drops $swp2 101
+	devlink_trap_drop_test ingress_flow_action_drop $swp2 101
 
 	log_test "ingress_flow_action_drop"
 
@@ -132,7 +132,7 @@ egress_flow_action_drop_test()
 
 	RET=0
 
-	devlink_trap_drop_test egress_flow_action_drop acl_drops $swp2 102
+	devlink_trap_drop_test egress_flow_action_drop $swp2 102
 
 	log_test "egress_flow_action_drop"
 
diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_control.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_control.sh
new file mode 100755
index 000000000000..a37273473c1b
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_control.sh
@@ -0,0 +1,688 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test devlink-trap control trap functionality over mlxsw. Each registered
+# control packet trap is tested to make sure it is triggered under the right
+# conditions.
+#
+# +---------------------------------+
+# | H1 (vrf)                        |
+# |    + $h1                        |
+# |    | 192.0.2.1/24               |
+# |    | 2001:db8:1::1/64           |
+# |    |                            |
+# |    |  default via 192.0.2.2     |
+# |    |  default via 2001:db8:1::2 |
+# +----|----------------------------+
+#      |
+# +----|----------------------------------------------------------------------+
+# | SW |                                                                      |
+# |    + $rp1                                                                 |
+# |        192.0.2.2/24                                                       |
+# |        2001:db8:1::2/64                                                   |
+# |                                                                           |
+# |        2001:db8:2::2/64                                                   |
+# |        198.51.100.2/24                                                    |
+# |    + $rp2                                                                 |
+# |    |                                                                      |
+# +----|----------------------------------------------------------------------+
+#      |
+# +----|----------------------------+
+# |    |  default via 198.51.100.2  |
+# |    |  default via 2001:db8:2::2 |
+# |    |                            |
+# |    | 2001:db8:2::1/64           |
+# |    | 198.51.100.1/24            |
+# |    + $h2                        |
+# | H2 (vrf)                        |
+# +---------------------------------+
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+	stp_test
+	lacp_test
+	lldp_test
+	igmp_query_test
+	igmp_v1_report_test
+	igmp_v2_report_test
+	igmp_v3_report_test
+	igmp_v2_leave_test
+	mld_query_test
+	mld_v1_report_test
+	mld_v2_report_test
+	mld_v1_done_test
+	ipv4_dhcp_test
+	ipv6_dhcp_test
+	arp_request_test
+	arp_response_test
+	ipv6_neigh_solicit_test
+	ipv6_neigh_advert_test
+	ipv4_bfd_test
+	ipv6_bfd_test
+	ipv4_ospf_test
+	ipv6_ospf_test
+	ipv4_bgp_test
+	ipv6_bgp_test
+	ipv4_vrrp_test
+	ipv6_vrrp_test
+	ipv4_pim_test
+	ipv6_pim_test
+	uc_loopback_test
+	local_route_test
+	external_route_test
+	ipv6_uc_dip_link_local_scope_test
+	ipv4_router_alert_test
+	ipv6_router_alert_test
+	ipv6_dip_all_nodes_test
+	ipv6_dip_all_routers_test
+	ipv6_router_solicit_test
+	ipv6_router_advert_test
+	ipv6_redirect_test
+	ptp_event_test
+	ptp_general_test
+	flow_action_sample_test
+	flow_action_trap_test
+"
+NUM_NETIFS=4
+source $lib_dir/lib.sh
+source $lib_dir/devlink_lib.sh
+
+h1_create()
+{
+	simple_if_init $h1 192.0.2.1/24 2001:db8:1::1/64
+
+	ip -4 route add default vrf v$h1 nexthop via 192.0.2.2
+	ip -6 route add default vrf v$h1 nexthop via 2001:db8:1::2
+}
+
+h1_destroy()
+{
+	ip -6 route del default vrf v$h1 nexthop via 2001:db8:1::2
+	ip -4 route del default vrf v$h1 nexthop via 192.0.2.2
+
+	simple_if_fini $h1 192.0.2.1/24 2001:db8:1::1/64
+}
+
+h2_create()
+{
+	simple_if_init $h2 198.51.100.1/24 2001:db8:2::1/64
+
+	ip -4 route add default vrf v$h2 nexthop via 198.51.100.2
+	ip -6 route add default vrf v$h2 nexthop via 2001:db8:2::2
+}
+
+h2_destroy()
+{
+	ip -6 route del default vrf v$h2 nexthop via 2001:db8:2::2
+	ip -4 route del default vrf v$h2 nexthop via 198.51.100.2
+
+	simple_if_fini $h2 198.51.100.1/24 2001:db8:2::1/64
+}
+
+router_create()
+{
+	ip link set dev $rp1 up
+	ip link set dev $rp2 up
+
+	__addr_add_del $rp1 add 192.0.2.2/24 2001:db8:1::2/64
+	__addr_add_del $rp2 add 198.51.100.2/24 2001:db8:2::2/64
+}
+
+router_destroy()
+{
+	__addr_add_del $rp2 del 198.51.100.2/24 2001:db8:2::2/64
+	__addr_add_del $rp1 del 192.0.2.2/24 2001:db8:1::2/64
+
+	ip link set dev $rp2 down
+	ip link set dev $rp1 down
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	rp1=${NETIFS[p2]}
+
+	rp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	vrf_prepare
+	forwarding_enable
+
+	h1_create
+	h2_create
+	router_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	router_destroy
+	h2_destroy
+	h1_destroy
+
+	forwarding_restore
+	vrf_cleanup
+}
+
+stp_test()
+{
+	devlink_trap_stats_test "STP" "stp" $MZ $h1 -c 1 -t bpdu -q
+}
+
+lacp_payload_get()
+{
+	local source_mac=$1; shift
+	local p
+
+	p=$(:
+		)"01:80:C2:00:00:02:"$(       : ETH daddr
+		)"$source_mac:"$(             : ETH saddr
+		)"88:09:"$(                   : ETH type
+		)
+	echo $p
+}
+
+lacp_test()
+{
+	local h1mac=$(mac_get $h1)
+
+	devlink_trap_stats_test "LACP" "lacp" $MZ $h1 -c 1 \
+		$(lacp_payload_get $h1mac) -p 100 -q
+}
+
+lldp_payload_get()
+{
+	local source_mac=$1; shift
+	local p
+
+	p=$(:
+		)"01:80:C2:00:00:0E:"$(       : ETH daddr
+		)"$source_mac:"$(             : ETH saddr
+		)"88:CC:"$(                   : ETH type
+		)
+	echo $p
+}
+
+lldp_test()
+{
+	local h1mac=$(mac_get $h1)
+
+	devlink_trap_stats_test "LLDP" "lldp" $MZ $h1 -c 1 \
+		$(lldp_payload_get $h1mac) -p 100 -q
+}
+
+igmp_query_test()
+{
+	# IGMP (IP Protocol 2) Membership Query (Type 0x11)
+	devlink_trap_stats_test "IGMP Membership Query" "igmp_query" \
+		$MZ $h1 -c 1 -a own -b 01:00:5E:00:00:01 \
+		-A 192.0.2.1 -B 224.0.0.1 -t ip proto=2,p=11 -p 100 -q
+}
+
+igmp_v1_report_test()
+{
+	# IGMP (IP Protocol 2) Version 1 Membership Report (Type 0x12)
+	devlink_trap_stats_test "IGMP Version 1 Membership Report" \
+		"igmp_v1_report" $MZ $h1 -c 1 -a own -b 01:00:5E:00:00:01 \
+		-A 192.0.2.1 -B 244.0.0.1 -t ip proto=2,p=12 -p 100 -q
+}
+
+igmp_v2_report_test()
+{
+	# IGMP (IP Protocol 2) Version 2 Membership Report (Type 0x16)
+	devlink_trap_stats_test "IGMP Version 2 Membership Report" \
+		"igmp_v2_report" $MZ $h1 -c 1 -a own -b 01:00:5E:00:00:01 \
+		-A 192.0.2.1 -B 244.0.0.1 -t ip proto=2,p=16 -p 100 -q
+}
+
+igmp_v3_report_test()
+{
+	# IGMP (IP Protocol 2) Version 3 Membership Report (Type 0x22)
+	devlink_trap_stats_test "IGMP Version 3 Membership Report" \
+		"igmp_v3_report" $MZ $h1 -c 1 -a own -b 01:00:5E:00:00:01 \
+		-A 192.0.2.1 -B 244.0.0.1 -t ip proto=2,p=22 -p 100 -q
+}
+
+igmp_v2_leave_test()
+{
+	# IGMP (IP Protocol 2) Version 2 Leave Group (Type 0x17)
+	devlink_trap_stats_test "IGMP Version 2 Leave Group" \
+		"igmp_v2_leave" $MZ $h1 -c 1 -a own -b 01:00:5E:00:00:02 \
+		-A 192.0.2.1 -B 224.0.0.2 -t ip proto=2,p=17 -p 100 -q
+}
+
+mld_payload_get()
+{
+	local type=$1; shift
+	local p
+
+	type=$(printf "%x" $type)
+	p=$(:
+		)"3A:"$(			: Next Header - ICMPv6
+		)"00:"$(			: Hdr Ext Len
+		)"00:00:00:00:00:00:"$(		: Options and Padding
+		)"$type:"$(			: ICMPv6.type
+		)"00:"$(			: ICMPv6.code
+		)"00:"$(			: ICMPv6.checksum
+		)
+	echo $p
+}
+
+mld_query_test()
+{
+	# MLD Multicast Listener Query (Type 130)
+	devlink_trap_stats_test "MLD Multicast Listener Query" "mld_query" \
+		$MZ $h1 -6 -c 1 -A fe80::1 -B ff02::1 \
+		-t ip hop=1,next=0,payload=$(mld_payload_get 130) -p 100 -q
+}
+
+mld_v1_report_test()
+{
+	# MLD Version 1 Multicast Listener Report (Type 131)
+	devlink_trap_stats_test "MLD Version 1 Multicast Listener Report" \
+		"mld_v1_report" $MZ $h1 -6 -c 1 -A fe80::1 -B ff02::16 \
+		-t ip hop=1,next=0,payload=$(mld_payload_get 131) -p 100 -q
+}
+
+mld_v2_report_test()
+{
+	# MLD Version 2 Multicast Listener Report (Type 143)
+	devlink_trap_stats_test "MLD Version 2 Multicast Listener Report" \
+		"mld_v2_report" $MZ $h1 -6 -c 1 -A fe80::1 -B ff02::16 \
+		-t ip hop=1,next=0,payload=$(mld_payload_get 143) -p 100 -q
+}
+
+mld_v1_done_test()
+{
+	# MLD Version 1 Multicast Listener Done (Type 132)
+	devlink_trap_stats_test "MLD Version 1 Multicast Listener Done" \
+		"mld_v1_done" $MZ $h1 -6 -c 1 -A fe80::1 -B ff02::16 \
+		-t ip hop=1,next=0,payload=$(mld_payload_get 132) -p 100 -q
+}
+
+ipv4_dhcp_test()
+{
+	devlink_trap_stats_test "IPv4 DHCP Port 67" "ipv4_dhcp" \
+		$MZ $h1 -c 1 -a own -b bcast -A 0.0.0.0 -B 255.255.255.255 \
+		-t udp sp=68,dp=67 -p 100 -q
+
+	devlink_trap_stats_test "IPv4 DHCP Port 68" "ipv4_dhcp" \
+		$MZ $h1 -c 1 -a own -b $(mac_get $rp1) -A 192.0.2.1 \
+		-B 255.255.255.255 -t udp sp=67,dp=68 -p 100 -q
+}
+
+ipv6_dhcp_test()
+{
+	devlink_trap_stats_test "IPv6 DHCP Port 547" "ipv6_dhcp" \
+		$MZ $h1 -6 -c 1 -A fe80::1 -B ff02::1:2 -t udp sp=546,dp=547 \
+		-p 100 -q
+
+	devlink_trap_stats_test "IPv6 DHCP Port 546" "ipv6_dhcp" \
+		$MZ $h1 -6 -c 1 -A fe80::1 -B ff02::1:2 -t udp sp=547,dp=546 \
+		-p 100 -q
+}
+
+arp_request_test()
+{
+	devlink_trap_stats_test "ARP Request" "arp_request" \
+		$MZ $h1 -c 1 -a own -b bcast -t arp request -p 100 -q
+}
+
+arp_response_test()
+{
+	devlink_trap_stats_test "ARP Response" "arp_response" \
+		$MZ $h1 -c 1 -a own -b $(mac_get $rp1) -t arp reply -p 100 -q
+}
+
+icmpv6_header_get()
+{
+	local type=$1; shift
+	local p
+
+	type=$(printf "%x" $type)
+	p=$(:
+		)"$type:"$(			: ICMPv6.type
+		)"00:"$(			: ICMPv6.code
+		)"00:"$(			: ICMPv6.checksum
+		)
+	echo $p
+}
+
+ipv6_neigh_solicit_test()
+{
+	devlink_trap_stats_test "IPv6 Neighbour Solicitation" \
+		"ipv6_neigh_solicit" $MZ $h1 -6 -c 1 \
+		-A fe80::1 -B ff02::1:ff00:02 \
+		-t ip hop=1,next=58,payload=$(icmpv6_header_get 135) -p 100 -q
+}
+
+ipv6_neigh_advert_test()
+{
+	devlink_trap_stats_test "IPv6 Neighbour Advertisement" \
+		"ipv6_neigh_advert" $MZ $h1 -6 -c 1 -a own -b $(mac_get $rp1) \
+		-A fe80::1 -B 2001:db8:1::2 \
+		-t ip hop=1,next=58,payload=$(icmpv6_header_get 136) -p 100 -q
+}
+
+ipv4_bfd_test()
+{
+	devlink_trap_stats_test "IPv4 BFD Control - Port 3784" "ipv4_bfd" \
+		$MZ $h1 -c 1 -a own -b $(mac_get $rp1) \
+		-A 192.0.2.1 -B 192.0.2.2 -t udp sp=49153,dp=3784 -p 100 -q
+
+	devlink_trap_stats_test "IPv4 BFD Echo - Port 3785" "ipv4_bfd" \
+		$MZ $h1 -c 1 -a own -b $(mac_get $rp1) \
+		-A 192.0.2.1 -B 192.0.2.2 -t udp sp=49153,dp=3785 -p 100 -q
+}
+
+ipv6_bfd_test()
+{
+	devlink_trap_stats_test "IPv6 BFD Control - Port 3784" "ipv6_bfd" \
+		$MZ $h1 -6 -c 1 -a own -b $(mac_get $rp1) \
+		-A 2001:db8:1::1 -B 2001:db8:1::2 \
+		-t udp sp=49153,dp=3784 -p 100 -q
+
+	devlink_trap_stats_test "IPv6 BFD Echo - Port 3785" "ipv6_bfd" \
+		$MZ $h1 -6 -c 1 -a own -b $(mac_get $rp1) \
+		-A 2001:db8:1::1 -B 2001:db8:1::2 \
+		-t udp sp=49153,dp=3785 -p 100 -q
+}
+
+ipv4_ospf_test()
+{
+	devlink_trap_stats_test "IPv4 OSPF - Multicast" "ipv4_ospf" \
+		$MZ $h1 -c 1 -a own -b 01:00:5e:00:00:05 \
+		-A 192.0.2.1 -B 224.0.0.5 -t ip proto=89 -p 100 -q
+
+	devlink_trap_stats_test "IPv4 OSPF - Unicast" "ipv4_ospf" \
+		$MZ $h1 -c 1 -a own -b $(mac_get $rp1) \
+		-A 192.0.2.1 -B 192.0.2.2 -t ip proto=89 -p 100 -q
+}
+
+ipv6_ospf_test()
+{
+	devlink_trap_stats_test "IPv6 OSPF - Multicast" "ipv6_ospf" \
+		$MZ $h1 -6 -c 1 -a own -b 33:33:00:00:00:05 \
+		-A fe80::1 -B ff02::5 -t ip next=89 -p 100 -q
+
+	devlink_trap_stats_test "IPv6 OSPF - Unicast" "ipv6_ospf" \
+		$MZ $h1 -6 -c 1 -a own -b $(mac_get $rp1) \
+		-A 2001:db8:1::1 -B 2001:db8:1::2 -t ip next=89 -p 100 -q
+}
+
+ipv4_bgp_test()
+{
+	devlink_trap_stats_test "IPv4 BGP" "ipv4_bgp" \
+		$MZ $h1 -c 1 -a own -b $(mac_get $rp1) \
+		-A 192.0.2.1 -B 192.0.2.2 -t tcp sp=54321,dp=179,flags=rst \
+		-p 100 -q
+}
+
+ipv6_bgp_test()
+{
+	devlink_trap_stats_test "IPv6 BGP" "ipv6_bgp" \
+		$MZ $h1 -6 -c 1 -a own -b $(mac_get $rp1) \
+		-A 2001:db8:1::1 -B 2001:db8:1::2 \
+		-t tcp sp=54321,dp=179,flags=rst -p 100 -q
+}
+
+ipv4_vrrp_test()
+{
+	devlink_trap_stats_test "IPv4 VRRP" "ipv4_vrrp" \
+		$MZ $h1 -c 1 -a own -b 01:00:5e:00:00:12 \
+		-A 192.0.2.1 -B 224.0.0.18 -t ip proto=112 -p 100 -q
+}
+
+ipv6_vrrp_test()
+{
+	devlink_trap_stats_test "IPv6 VRRP" "ipv6_vrrp" \
+		$MZ $h1 -6 -c 1 -a own -b 33:33:00:00:00:12 \
+		-A fe80::1 -B ff02::12 -t ip next=112 -p 100 -q
+}
+
+ipv4_pim_test()
+{
+	devlink_trap_stats_test "IPv4 PIM - Multicast" "ipv4_pim" \
+		$MZ $h1 -c 1 -a own -b 01:00:5e:00:00:0d \
+		-A 192.0.2.1 -B 224.0.0.13 -t ip proto=103 -p 100 -q
+
+	devlink_trap_stats_test "IPv4 PIM - Unicast" "ipv4_pim" \
+		$MZ $h1 -c 1 -a own -b $(mac_get $rp1) \
+		-A 192.0.2.1 -B 192.0.2.2 -t ip proto=103 -p 100 -q
+}
+
+ipv6_pim_test()
+{
+	devlink_trap_stats_test "IPv6 PIM - Multicast" "ipv6_pim" \
+		$MZ $h1 -6 -c 1 -a own -b 33:33:00:00:00:0d \
+		-A fe80::1 -B ff02::d -t ip next=103 -p 100 -q
+
+	devlink_trap_stats_test "IPv6 PIM - Unicast" "ipv6_pim" \
+		$MZ $h1 -6 -c 1 -a own -b $(mac_get $rp1) \
+		-A fe80::1 -B 2001:db8:1::2 -t ip next=103 -p 100 -q
+}
+
+uc_loopback_test()
+{
+	# Add neighbours to the fake destination IPs, so that the packets are
+	# routed in the device and not trapped due to an unresolved neighbour
+	# exception.
+	ip -4 neigh add 192.0.2.3 lladdr 00:11:22:33:44:55 nud permanent \
+		dev $rp1
+	ip -6 neigh add 2001:db8:1::3 lladdr 00:11:22:33:44:55 nud permanent \
+		dev $rp1
+
+	devlink_trap_stats_test "IPv4 Unicast Loopback" "uc_loopback" \
+		$MZ $h1 -c 1 -a own -b $(mac_get $rp1) \
+		-A 192.0.2.1 -B 192.0.2.3 -t udp sp=54321,dp=12345 -p 100 -q
+
+	devlink_trap_stats_test "IPv6 Unicast Loopback" "uc_loopback" \
+		$MZ $h1 -6 -c 1 -a own -b $(mac_get $rp1) \
+		-A 2001:db8:1::1 -B 2001:db8:1::3 -t udp sp=54321,dp=12345 \
+		-p 100 -q
+
+	ip -6 neigh del 2001:db8:1::3 dev $rp1
+	ip -4 neigh del 192.0.2.3 dev $rp1
+}
+
+local_route_test()
+{
+	# Use a fake source IP to prevent the trap from being triggered twice
+	# when the router sends back a port unreachable message.
+	devlink_trap_stats_test "IPv4 Local Route" "local_route" \
+		$MZ $h1 -c 1 -a own -b $(mac_get $rp1) \
+		-A 192.0.2.3 -B 192.0.2.2 -t udp sp=54321,dp=12345 -p 100 -q
+
+	devlink_trap_stats_test "IPv6 Local Route" "local_route" \
+		$MZ $h1 -6 -c 1 -a own -b $(mac_get $rp1) \
+		-A 2001:db8:1::3 -B 2001:db8:1::2 -t udp sp=54321,sp=12345 \
+		-p 100 -q
+}
+
+external_route_test()
+{
+	# Add a dummy device through which the incoming packets should be
+	# routed.
+	ip link add name dummy10 up type dummy
+	ip address add 203.0.113.1/24 dev dummy10
+	ip -6 address add 2001:db8:10::1/64 dev dummy10
+
+	devlink_trap_stats_test "IPv4 External Route" "external_route" \
+		$MZ $h1 -c 1 -a own -b $(mac_get $rp1) \
+		-A 192.0.2.1 -B 203.0.113.2 -t udp sp=54321,dp=12345 -p 100 -q
+
+	devlink_trap_stats_test "IPv6 External Route" "external_route" \
+		$MZ $h1 -6 -c 1 -a own -b $(mac_get $rp1) \
+		-A 2001:db8:1::1 -B 2001:db8:10::2 -t udp sp=54321,sp=12345 \
+		-p 100 -q
+
+	ip -6 address del 2001:db8:10::1/64 dev dummy10
+	ip address del 203.0.113.1/24 dev dummy10
+	ip link del dev dummy10
+}
+
+ipv6_uc_dip_link_local_scope_test()
+{
+	# Add a dummy link-local prefix route to allow the packet to be routed.
+	ip -6 route add fe80:1::/64 dev $rp2
+
+	devlink_trap_stats_test \
+		"IPv6 Unicast Destination IP With Link-Local Scope" \
+		"ipv6_uc_dip_link_local_scope" \
+		$MZ $h1 -6 -c 1 -a own -b $(mac_get $rp1) \
+		-A fe80::1 -B fe80:1::2 -t udp sp=54321,sp=12345 \
+		-p 100 -q
+
+	ip -6 route del fe80:1::/64 dev $rp2
+}
+
+ipv4_router_alert_get()
+{
+	local p
+
+	# https://en.wikipedia.org/wiki/IPv4#Options
+	p=$(:
+		)"94:"$(			: Option Number
+		)"04:"$(			: Option Length
+		)"00:00:"$(			: Option Data
+		)
+	echo $p
+}
+
+ipv4_router_alert_test()
+{
+	devlink_trap_stats_test "IPv4 Router Alert" "ipv4_router_alert" \
+		$MZ $h1 -c 1 -a own -b $(mac_get $rp1) \
+		-A 192.0.2.1 -B 198.51.100.3 \
+		-t ip option=$(ipv4_router_alert_get) -p 100 -q
+}
+
+ipv6_router_alert_get()
+{
+	local p
+
+	# https://en.wikipedia.org/wiki/IPv6_packet#Hop-by-hop_options_and_destination_options
+	# https://tools.ietf.org/html/rfc2711#section-2.1
+	p=$(:
+		)"11:"$(			: Next Header - UDP
+		)"00:"$(			: Hdr Ext Len
+		)"05:02:00:00:00:00:"$(		: Option Data
+		)
+	echo $p
+}
+
+ipv6_router_alert_test()
+{
+	devlink_trap_stats_test "IPv6 Router Alert" "ipv6_router_alert" \
+		$MZ $h1 -6 -c 1 -a own -b $(mac_get $rp1) \
+		-A 2001:db8:1::1 -B 2001:db8:1::3 \
+		-t ip next=0,payload=$(ipv6_router_alert_get) -p 100 -q
+}
+
+ipv6_dip_all_nodes_test()
+{
+	devlink_trap_stats_test "IPv6 Destination IP \"All Nodes Address\"" \
+		"ipv6_dip_all_nodes" \
+		$MZ $h1 -6 -c 1 -a own -b 33:33:00:00:00:01 \
+		-A 2001:db8:1::1 -B ff02::1 -t udp sp=12345,dp=54321 -p 100 -q
+}
+
+ipv6_dip_all_routers_test()
+{
+	devlink_trap_stats_test "IPv6 Destination IP \"All Routers Address\"" \
+		"ipv6_dip_all_routers" \
+		$MZ $h1 -6 -c 1 -a own -b 33:33:00:00:00:02 \
+		-A 2001:db8:1::1 -B ff02::2 -t udp sp=12345,dp=54321 -p 100 -q
+}
+
+ipv6_router_solicit_test()
+{
+	devlink_trap_stats_test "IPv6 Router Solicitation" \
+		"ipv6_router_solicit" \
+		$MZ $h1 -6 -c 1 -a own -b 33:33:00:00:00:02 \
+		-A fe80::1 -B ff02::2 \
+		-t ip hop=1,next=58,payload=$(icmpv6_header_get 133) -p 100 -q
+}
+
+ipv6_router_advert_test()
+{
+	devlink_trap_stats_test "IPv6 Router Advertisement" \
+		"ipv6_router_advert" \
+		$MZ $h1 -6 -c 1 -a own -b 33:33:00:00:00:01 \
+		-A fe80::1 -B ff02::1 \
+		-t ip hop=1,next=58,payload=$(icmpv6_header_get 134) -p 100 -q
+}
+
+ipv6_redirect_test()
+{
+	devlink_trap_stats_test "IPv6 Redirect Message" \
+		"ipv6_redirect" \
+		$MZ $h1 -6 -c 1 -a own -b $(mac_get $rp1) \
+		-A fe80::1 -B 2001:db8:1::2 \
+		-t ip hop=1,next=58,payload=$(icmpv6_header_get 137) -p 100 -q
+}
+
+ptp_event_test()
+{
+	# PTP is only supported on Spectrum-1, for now.
+	[[ "$DEVLINK_VIDDID" != "15b3:cb84" ]] && return
+
+	# PTP Sync (0)
+	devlink_trap_stats_test "PTP Time-Critical Event Message" "ptp_event" \
+		$MZ $h1 -c 1 -a own -b 01:00:5e:00:01:81 \
+		-A 192.0.2.1 -B 224.0.1.129 \
+		-t udp sp=12345,dp=319,payload=10 -p 100 -q
+}
+
+ptp_general_test()
+{
+	# PTP is only supported on Spectrum-1, for now.
+	[[ "$DEVLINK_VIDDID" != "15b3:cb84" ]] && return
+
+	# PTP Announce (b)
+	devlink_trap_stats_test "PTP General Message" "ptp_general" \
+		$MZ $h1 -c 1 -a own -b 01:00:5e:00:01:81 \
+		-A 192.0.2.1 -B 224.0.1.129 \
+		-t udp sp=12345,dp=320,payload=1b -p 100 -q
+}
+
+flow_action_sample_test()
+{
+	# Install a filter that samples every incoming packet.
+	tc qdisc add dev $rp1 clsact
+	tc filter add dev $rp1 ingress proto all pref 1 handle 101 matchall \
+		skip_sw action sample rate 1 group 1
+
+	devlink_trap_stats_test "Flow Sampling" "flow_action_sample" \
+		$MZ $h1 -c 1 -a own -b $(mac_get $rp1) \
+		-A 192.0.2.1 -B 198.51.100.1 -t udp sp=12345,dp=54321 -p 100 -q
+
+	tc filter del dev $rp1 ingress proto all pref 1 handle 101 matchall
+	tc qdisc del dev $rp1 clsact
+}
+
+flow_action_trap_test()
+{
+	# Install a filter that traps a specific flow.
+	tc qdisc add dev $rp1 clsact
+	tc filter add dev $rp1 ingress proto ip pref 1 handle 101 flower \
+		skip_sw ip_proto udp src_port 12345 dst_port 54321 action trap
+
+	devlink_trap_stats_test "Flow Trapping (Logging)" "flow_action_trap" \
+		$MZ $h1 -c 1 -a own -b $(mac_get $rp1) \
+		-A 192.0.2.1 -B 198.51.100.1 -t udp sp=12345,dp=54321 -p 100 -q
+
+	tc filter del dev $rp1 ingress proto ip pref 1 handle 101 flower
+	tc qdisc del dev $rp1 clsact
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l2_drops.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l2_drops.sh
index e7aecb065409..a4c2812e9807 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l2_drops.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l2_drops.sh
@@ -96,7 +96,6 @@ source_mac_is_multicast_test()
 {
 	local trap_name="source_mac_is_multicast"
 	local smac=01:02:03:04:05:06
-	local group_name="l2_drops"
 	local mz_pid
 
 	tc filter add dev $swp2 egress protocol ip pref 1 handle 101 \
@@ -107,7 +106,7 @@ source_mac_is_multicast_test()
 
 	RET=0
 
-	devlink_trap_drop_test $trap_name $group_name $swp2 101
+	devlink_trap_drop_test $trap_name $swp2 101
 
 	log_test "Source MAC is multicast"
 
@@ -118,7 +117,6 @@ __vlan_tag_mismatch_test()
 {
 	local trap_name="vlan_tag_mismatch"
 	local dmac=de:ad:be:ef:13:37
-	local group_name="l2_drops"
 	local opt=$1; shift
 	local mz_pid
 
@@ -132,7 +130,7 @@ __vlan_tag_mismatch_test()
 	$MZ $h1 "$opt" -c 0 -p 100 -a own -b $dmac -t ip -d 1msec -q &
 	mz_pid=$!
 
-	devlink_trap_drop_test $trap_name $group_name $swp2 101
+	devlink_trap_drop_test $trap_name $swp2 101
 
 	# Add PVID and make sure packets are no longer dropped.
 	bridge vlan add vid 1 dev $swp1 pvid untagged master
@@ -140,7 +138,7 @@ __vlan_tag_mismatch_test()
 
 	devlink_trap_stats_idle_test $trap_name
 	check_err $? "Trap stats not idle when packets should not be dropped"
-	devlink_trap_group_stats_idle_test $group_name
+	devlink_trap_group_stats_idle_test $(devlink_trap_group_get $trap_name)
 	check_err $? "Trap group stats not idle with when packets should not be dropped"
 
 	tc_check_packets "dev $swp2 egress" 101 0
@@ -179,7 +177,6 @@ ingress_vlan_filter_test()
 {
 	local trap_name="ingress_vlan_filter"
 	local dmac=de:ad:be:ef:13:37
-	local group_name="l2_drops"
 	local mz_pid
 	local vid=10
 
@@ -193,7 +190,7 @@ ingress_vlan_filter_test()
 	$MZ $h1 -Q $vid -c 0 -p 100 -a own -b $dmac -t ip -d 1msec -q &
 	mz_pid=$!
 
-	devlink_trap_drop_test $trap_name $group_name $swp2 101
+	devlink_trap_drop_test $trap_name $swp2 101
 
 	# Add the VLAN on the bridge port and make sure packets are no longer
 	# dropped.
@@ -202,7 +199,7 @@ ingress_vlan_filter_test()
 
 	devlink_trap_stats_idle_test $trap_name
 	check_err $? "Trap stats not idle when packets should not be dropped"
-	devlink_trap_group_stats_idle_test $group_name
+	devlink_trap_group_stats_idle_test $(devlink_trap_group_get $trap_name)
 	check_err $? "Trap group stats not idle with when packets should not be dropped"
 
 	tc_check_packets "dev $swp2 egress" 101 0
@@ -222,7 +219,6 @@ __ingress_stp_filter_test()
 {
 	local trap_name="ingress_spanning_tree_filter"
 	local dmac=de:ad:be:ef:13:37
-	local group_name="l2_drops"
 	local state=$1; shift
 	local mz_pid
 	local vid=20
@@ -237,7 +233,7 @@ __ingress_stp_filter_test()
 	$MZ $h1 -Q $vid -c 0 -p 100 -a own -b $dmac -t ip -d 1msec -q &
 	mz_pid=$!
 
-	devlink_trap_drop_test $trap_name $group_name $swp2 101
+	devlink_trap_drop_test $trap_name $swp2 101
 
 	# Change STP state to forwarding and make sure packets are no longer
 	# dropped.
@@ -246,7 +242,7 @@ __ingress_stp_filter_test()
 
 	devlink_trap_stats_idle_test $trap_name
 	check_err $? "Trap stats not idle when packets should not be dropped"
-	devlink_trap_group_stats_idle_test $group_name
+	devlink_trap_group_stats_idle_test $(devlink_trap_group_get $trap_name)
 	check_err $? "Trap group stats not idle with when packets should not be dropped"
 
 	tc_check_packets "dev $swp2 egress" 101 0
@@ -292,7 +288,6 @@ port_list_is_empty_uc_test()
 {
 	local trap_name="port_list_is_empty"
 	local dmac=de:ad:be:ef:13:37
-	local group_name="l2_drops"
 	local mz_pid
 
 	# Disable unicast flooding on both ports, so that packets cannot egress
@@ -308,7 +303,7 @@ port_list_is_empty_uc_test()
 	$MZ $h1 -c 0 -p 100 -a own -b $dmac -t ip -d 1msec -q &
 	mz_pid=$!
 
-	devlink_trap_drop_test $trap_name $group_name $swp2 101
+	devlink_trap_drop_test $trap_name $swp2 101
 
 	# Allow packets to be flooded to one port.
 	ip link set dev $swp2 type bridge_slave flood on
@@ -316,7 +311,7 @@ port_list_is_empty_uc_test()
 
 	devlink_trap_stats_idle_test $trap_name
 	check_err $? "Trap stats not idle when packets should not be dropped"
-	devlink_trap_group_stats_idle_test $group_name
+	devlink_trap_group_stats_idle_test $(devlink_trap_group_get $trap_name)
 	check_err $? "Trap group stats not idle with when packets should not be dropped"
 
 	tc_check_packets "dev $swp2 egress" 101 0
@@ -335,7 +330,6 @@ port_list_is_empty_mc_test()
 {
 	local trap_name="port_list_is_empty"
 	local dmac=01:00:5e:00:00:01
-	local group_name="l2_drops"
 	local dip=239.0.0.1
 	local mz_pid
 
@@ -354,7 +348,7 @@ port_list_is_empty_mc_test()
 	$MZ $h1 -c 0 -p 100 -a own -b $dmac -t ip -B $dip -d 1msec -q &
 	mz_pid=$!
 
-	devlink_trap_drop_test $trap_name $group_name $swp2 101
+	devlink_trap_drop_test $trap_name $swp2 101
 
 	# Allow packets to be flooded to one port.
 	ip link set dev $swp2 type bridge_slave mcast_flood on
@@ -362,7 +356,7 @@ port_list_is_empty_mc_test()
 
 	devlink_trap_stats_idle_test $trap_name
 	check_err $? "Trap stats not idle when packets should not be dropped"
-	devlink_trap_group_stats_idle_test $group_name
+	devlink_trap_group_stats_idle_test $(devlink_trap_group_get $trap_name)
 	check_err $? "Trap group stats not idle with when packets should not be dropped"
 
 	tc_check_packets "dev $swp2 egress" 101 0
@@ -387,7 +381,6 @@ port_loopback_filter_uc_test()
 {
 	local trap_name="port_loopback_filter"
 	local dmac=de:ad:be:ef:13:37
-	local group_name="l2_drops"
 	local mz_pid
 
 	# Make sure packets can only egress the input port.
@@ -401,7 +394,7 @@ port_loopback_filter_uc_test()
 	$MZ $h1 -c 0 -p 100 -a own -b $dmac -t ip -d 1msec -q &
 	mz_pid=$!
 
-	devlink_trap_drop_test $trap_name $group_name $swp2 101
+	devlink_trap_drop_test $trap_name $swp2 101
 
 	# Allow packets to be flooded.
 	ip link set dev $swp2 type bridge_slave flood on
@@ -409,7 +402,7 @@ port_loopback_filter_uc_test()
 
 	devlink_trap_stats_idle_test $trap_name
 	check_err $? "Trap stats not idle when packets should not be dropped"
-	devlink_trap_group_stats_idle_test $group_name
+	devlink_trap_group_stats_idle_test $(devlink_trap_group_get $trap_name)
 	check_err $? "Trap group stats not idle with when packets should not be dropped"
 
 	tc_check_packets "dev $swp2 egress" 101 0
diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_drops.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_drops.sh
index 616f47d86a61..f5abb1ebd392 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_drops.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_drops.sh
@@ -161,7 +161,6 @@ ping_check()
 non_ip_test()
 {
 	local trap_name="non_ip"
-	local group_name="l3_drops"
 	local mz_pid
 
 	RET=0
@@ -176,7 +175,7 @@ non_ip_test()
 		00:00 de:ad:be:ef" &
 	mz_pid=$!
 
-	devlink_trap_drop_test $trap_name $group_name $rp2 101
+	devlink_trap_drop_test $trap_name $rp2 101
 
 	log_test "Non IP"
 
@@ -190,7 +189,6 @@ __uc_dip_over_mc_dmac_test()
 	local dip=$1; shift
 	local flags=${1:-""}; shift
 	local trap_name="uc_dip_over_mc_dmac"
-	local group_name="l3_drops"
 	local dmac=01:02:03:04:05:06
 	local mz_pid
 
@@ -206,7 +204,7 @@ __uc_dip_over_mc_dmac_test()
 		-B $dip -d 1msec -q &
 	mz_pid=$!
 
-	devlink_trap_drop_test $trap_name $group_name $rp2 101
+	devlink_trap_drop_test $trap_name $rp2 101
 
 	log_test "Unicast destination IP over multicast destination MAC: $desc"
 
@@ -227,7 +225,6 @@ __sip_is_loopback_test()
 	local dip=$1; shift
 	local flags=${1:-""}; shift
 	local trap_name="sip_is_loopback_address"
-	local group_name="l3_drops"
 	local mz_pid
 
 	RET=0
@@ -242,7 +239,7 @@ __sip_is_loopback_test()
 		-b $rp1mac -B $dip -d 1msec -q &
 	mz_pid=$!
 
-	devlink_trap_drop_test $trap_name $group_name $rp2 101
+	devlink_trap_drop_test $trap_name $rp2 101
 
 	log_test "Source IP is loopback address: $desc"
 
@@ -262,7 +259,6 @@ __dip_is_loopback_test()
 	local dip=$1; shift
 	local flags=${1:-""}; shift
 	local trap_name="dip_is_loopback_address"
-	local group_name="l3_drops"
 	local mz_pid
 
 	RET=0
@@ -277,7 +273,7 @@ __dip_is_loopback_test()
 		-B $dip -d 1msec -q &
 	mz_pid=$!
 
-	devlink_trap_drop_test $trap_name $group_name $rp2 101
+	devlink_trap_drop_test $trap_name $rp2 101
 
 	log_test "Destination IP is loopback address: $desc"
 
@@ -298,7 +294,6 @@ __sip_is_mc_test()
 	local dip=$1; shift
 	local flags=${1:-""}; shift
 	local trap_name="sip_is_mc"
-	local group_name="l3_drops"
 	local mz_pid
 
 	RET=0
@@ -313,7 +308,7 @@ __sip_is_mc_test()
 		-b $rp1mac -B $dip -d 1msec -q &
 	mz_pid=$!
 
-	devlink_trap_drop_test $trap_name $group_name $rp2 101
+	devlink_trap_drop_test $trap_name $rp2 101
 
 	log_test "Source IP is multicast: $desc"
 
@@ -329,7 +324,6 @@ sip_is_mc_test()
 ipv4_sip_is_limited_bc_test()
 {
 	local trap_name="ipv4_sip_is_limited_bc"
-	local group_name="l3_drops"
 	local sip=255.255.255.255
 	local mz_pid
 
@@ -345,7 +339,7 @@ ipv4_sip_is_limited_bc_test()
 		-B $h2_ipv4 -d 1msec -q &
 	mz_pid=$!
 
-	devlink_trap_drop_test $trap_name $group_name $rp2 101
+	devlink_trap_drop_test $trap_name $rp2 101
 
 	log_test "IPv4 source IP is limited broadcast"
 
@@ -382,7 +376,6 @@ __ipv4_header_corrupted_test()
 	local ihl=$1; shift
 	local checksum=$1; shift
 	local trap_name="ip_header_corrupted"
-	local group_name="l3_drops"
 	local payload
 	local mz_pid
 
@@ -399,7 +392,7 @@ __ipv4_header_corrupted_test()
 	$MZ $h1 -c 0 -d 1msec -a $h1mac -b $rp1mac -q p=$payload &
 	mz_pid=$!
 
-	devlink_trap_drop_test $trap_name $group_name $rp2 101
+	devlink_trap_drop_test $trap_name $rp2 101
 
 	log_test "IP header corrupted: $desc: IPv4"
 
@@ -429,7 +422,6 @@ __ipv6_header_corrupted_test()
 	local desc=$1; shift
 	local ipver=$1; shift
 	local trap_name="ip_header_corrupted"
-	local group_name="l3_drops"
 	local payload
 	local mz_pid
 
@@ -446,7 +438,7 @@ __ipv6_header_corrupted_test()
 	$MZ $h1 -c 0 -d 1msec -a $h1mac -b $rp1mac -q p=$payload &
 	mz_pid=$!
 
-	devlink_trap_drop_test $trap_name $group_name $rp2 101
+	devlink_trap_drop_test $trap_name $rp2 101
 
 	log_test "IP header corrupted: $desc: IPv6"
 
@@ -469,7 +461,6 @@ ip_header_corrupted_test()
 ipv6_mc_dip_reserved_scope_test()
 {
 	local trap_name="ipv6_mc_dip_reserved_scope"
-	local group_name="l3_drops"
 	local dip=FF00::
 	local mz_pid
 
@@ -485,7 +476,7 @@ ipv6_mc_dip_reserved_scope_test()
 		"33:33:00:00:00:00" -B $dip -d 1msec -q &
 	mz_pid=$!
 
-	devlink_trap_drop_test $trap_name $group_name $rp2 101
+	devlink_trap_drop_test $trap_name $rp2 101
 
 	log_test "IPv6 multicast destination IP reserved scope"
 
@@ -495,7 +486,6 @@ ipv6_mc_dip_reserved_scope_test()
 ipv6_mc_dip_interface_local_scope_test()
 {
 	local trap_name="ipv6_mc_dip_interface_local_scope"
-	local group_name="l3_drops"
 	local dip=FF01::
 	local mz_pid
 
@@ -511,7 +501,7 @@ ipv6_mc_dip_interface_local_scope_test()
 		"33:33:00:00:00:00" -B $dip -d 1msec -q &
 	mz_pid=$!
 
-	devlink_trap_drop_test $trap_name $group_name $rp2 101
+	devlink_trap_drop_test $trap_name $rp2 101
 
 	log_test "IPv6 multicast destination IP interface-local scope"
 
@@ -526,7 +516,6 @@ __blackhole_route_test()
 	local dip=$1; shift
 	local ip_proto=${1:-"icmp"}; shift
 	local trap_name="blackhole_route"
-	local group_name="l3_drops"
 	local mz_pid
 
 	RET=0
@@ -542,7 +531,7 @@ __blackhole_route_test()
 		-B $dip -d 1msec -q &
 	mz_pid=$!
 
-	devlink_trap_drop_test $trap_name $group_name $rp2 101
+	devlink_trap_drop_test $trap_name $rp2 101
 	log_test "Blackhole route: IPv$flags"
 
 	devlink_trap_drop_cleanup $mz_pid $rp2 $proto 1 101
@@ -558,7 +547,6 @@ blackhole_route_test()
 irif_disabled_test()
 {
 	local trap_name="irif_disabled"
-	local group_name="l3_drops"
 	local t0_packets t0_bytes
 	local t1_packets t1_bytes
 	local mz_pid
@@ -613,7 +601,6 @@ irif_disabled_test()
 erif_disabled_test()
 {
 	local trap_name="erif_disabled"
-	local group_name="l3_drops"
 	local t0_packets t0_bytes
 	local t1_packets t1_bytes
 	local mz_pid
diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_exceptions.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_exceptions.sh
index 2bc6df42d597..1fedfc9da434 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_exceptions.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_exceptions.sh
@@ -169,7 +169,6 @@ trap_action_check()
 mtu_value_is_too_small_test()
 {
 	local trap_name="mtu_value_is_too_small"
-	local group_name="l3_drops"
 	local expected_action="trap"
 	local mz_pid
 
@@ -191,7 +190,7 @@ mtu_value_is_too_small_test()
 		-B 198.51.100.1 -q &
 	mz_pid=$!
 
-	devlink_trap_exception_test $trap_name $group_name
+	devlink_trap_exception_test $trap_name
 
 	tc_check_packets_hitting "dev $h1 ingress" 101
 	check_err $? "Packets were not received to h1"
@@ -208,7 +207,6 @@ __ttl_value_is_too_small_test()
 {
 	local ttl_val=$1; shift
 	local trap_name="ttl_value_is_too_small"
-	local group_name="l3_drops"
 	local expected_action="trap"
 	local mz_pid
 
@@ -227,7 +225,7 @@ __ttl_value_is_too_small_test()
 		-b $rp1mac -B 198.51.100.1 -q &
 	mz_pid=$!
 
-	devlink_trap_exception_test $trap_name $group_name
+	devlink_trap_exception_test $trap_name
 
 	tc_check_packets_hitting "dev $h1 ingress" 101
 	check_err $? "Packets were not received to h1"
@@ -271,7 +269,6 @@ __mc_reverse_path_forwarding_test()
 	local proto=$1; shift
 	local flags=${1:-""}; shift
 	local trap_name="mc_reverse_path_forwarding"
-	local group_name="l3_drops"
 	local expected_action="trap"
 	local mz_pid
 
@@ -292,7 +289,7 @@ __mc_reverse_path_forwarding_test()
 
 	mz_pid=$!
 
-	devlink_trap_exception_test $trap_name $group_name
+	devlink_trap_exception_test $trap_name
 
 	tc_check_packets "dev $rp2 egress" 101 0
 	check_err $? "Packets were not dropped"
@@ -322,7 +319,6 @@ __reject_route_test()
 	local unreachable=$1; shift
 	local flags=${1:-""}; shift
 	local trap_name="reject_route"
-	local group_name="l3_drops"
 	local expected_action="trap"
 	local mz_pid
 
@@ -341,7 +337,7 @@ __reject_route_test()
 		-B $dst_ip -q &
 	mz_pid=$!
 
-	devlink_trap_exception_test $trap_name $group_name
+	devlink_trap_exception_test $trap_name
 
 	tc_check_packets_hitting "dev $h1 ingress" 101
 	check_err $? "ICMP packet was not received to h1"
@@ -370,7 +366,6 @@ __host_miss_test()
 	local desc=$1; shift
 	local dip=$1; shift
 	local trap_name="unresolved_neigh"
-	local group_name="l3_drops"
 	local expected_action="trap"
 	local mz_pid
 
@@ -405,7 +400,6 @@ __invalid_nexthop_test()
 	local subnet=$1; shift
 	local via_add=$1; shift
 	local trap_name="unresolved_neigh"
-	local group_name="l3_drops"
 	local expected_action="trap"
 	local mz_pid
 
@@ -494,7 +488,6 @@ vrf_without_routes_destroy()
 ipv4_lpm_miss_test()
 {
 	local trap_name="ipv4_lpm_miss"
-	local group_name="l3_drops"
 	local expected_action="trap"
 	local mz_pid
 
@@ -511,7 +504,7 @@ ipv4_lpm_miss_test()
 		-B 203.0.113.1 -q &
 	mz_pid=$!
 
-	devlink_trap_exception_test $trap_name $group_name
+	devlink_trap_exception_test $trap_name
 
 	log_test "LPM miss: IPv4"
 
@@ -522,7 +515,6 @@ ipv4_lpm_miss_test()
 ipv6_lpm_miss_test()
 {
 	local trap_name="ipv6_lpm_miss"
-	local group_name="l3_drops"
 	local expected_action="trap"
 	local mz_pid
 
@@ -539,7 +531,7 @@ ipv6_lpm_miss_test()
 		-B 2001:db8::1 -q &
 	mz_pid=$!
 
-	devlink_trap_exception_test $trap_name $group_name
+	devlink_trap_exception_test $trap_name
 
 	log_test "LPM miss: IPv6"
 
diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_ipip.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_ipip.sh
index 039629bb92a3..8817851da7a9 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_ipip.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_ipip.sh
@@ -140,7 +140,6 @@ ecn_payload_get()
 ecn_decap_test()
 {
 	local trap_name="decap_error"
-	local group_name="tunnel_drops"
 	local desc=$1; shift
 	local ecn_desc=$1; shift
 	local outer_tos=$1; shift
@@ -161,7 +160,7 @@ ecn_decap_test()
 
 	mz_pid=$!
 
-	devlink_trap_exception_test $trap_name $group_name
+	devlink_trap_exception_test $trap_name
 
 	tc_check_packets "dev $swp1 egress" 101 0
 	check_err $? "Packets were not dropped"
@@ -200,7 +199,6 @@ ipip_payload_get()
 no_matching_tunnel_test()
 {
 	local trap_name="decap_error"
-	local group_name="tunnel_drops"
 	local desc=$1; shift
 	local sip=$1; shift
 	local mz_pid
@@ -218,7 +216,7 @@ no_matching_tunnel_test()
 		-A $sip -B 192.0.2.65 -t ip len=48,proto=47,p=$payload -q &
 	mz_pid=$!
 
-	devlink_trap_exception_test $trap_name $group_name
+	devlink_trap_exception_test $trap_name
 
 	tc_check_packets "dev $swp1 egress" 101 0
 	check_err $? "Packets were not dropped"
diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_vxlan.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_vxlan.sh
index e11a416323cf..10e0f3dbc930 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_vxlan.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_vxlan.sh
@@ -159,7 +159,6 @@ ecn_payload_get()
 ecn_decap_test()
 {
 	local trap_name="decap_error"
-	local group_name="tunnel_drops"
 	local desc=$1; shift
 	local ecn_desc=$1; shift
 	local outer_tos=$1; shift
@@ -177,7 +176,7 @@ ecn_decap_test()
 		-t udp sp=12345,dp=$VXPORT,tos=$outer_tos,p=$payload -q &
 	mz_pid=$!
 
-	devlink_trap_exception_test $trap_name $group_name
+	devlink_trap_exception_test $trap_name
 
 	tc_check_packets "dev $swp1 egress" 101 0
 	check_err $? "Packets were not dropped"
@@ -228,7 +227,6 @@ short_payload_get()
 corrupted_packet_test()
 {
 	local trap_name="decap_error"
-	local group_name="tunnel_drops"
 	local desc=$1; shift
 	local payload_get=$1; shift
 	local mz_pid
@@ -246,7 +244,7 @@ corrupted_packet_test()
 		-B 192.0.2.17 -t udp sp=12345,dp=$VXPORT,p=$payload -q &
 	mz_pid=$!
 
-	devlink_trap_exception_test $trap_name $group_name
+	devlink_trap_exception_test $trap_name
 
 	tc_check_packets "dev $swp1 egress" 101 0
 	check_err $? "Packets were not dropped"
@@ -297,7 +295,6 @@ mc_smac_payload_get()
 overlay_smac_is_mc_test()
 {
 	local trap_name="overlay_smac_is_mc"
-	local group_name="tunnel_drops"
 	local mz_pid
 
 	RET=0
@@ -314,7 +311,7 @@ overlay_smac_is_mc_test()
 		-B 192.0.2.17 -t udp sp=12345,dp=$VXPORT,p=$payload -q &
 	mz_pid=$!
 
-	devlink_trap_drop_test $trap_name $group_name $swp1 101
+	devlink_trap_drop_test $trap_name $swp1 101
 
 	log_test "Overlay source MAC is multicast"
 
diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh
index 24dd8ed48580..b025daea062d 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh
@@ -300,7 +300,7 @@ test_uc_aware()
 	local i
 
 	for ((i = 0; i < attempts; ++i)); do
-		if $ARPING -c 1 -I $h1 -b 192.0.2.66 -q -w 0.1; then
+		if $ARPING -c 1 -I $h1 -b 192.0.2.66 -q -w 1; then
 			((passes++))
 		fi
 
diff --git a/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer.sh b/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer.sh
index 58f3a05f08af..7d9e73a43a49 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer.sh
@@ -15,7 +15,7 @@ source mlxsw_lib.sh
 SB_POOL_ING=0
 SB_POOL_EGR_CPU=10
 
-SB_ITC_CPU_IP=3
+SB_ITC_CPU_IP=2
 SB_ITC_CPU_ARP=2
 SB_ITC=0
 
diff --git a/tools/testing/selftests/drivers/net/mlxsw/tc_flower_restrictions.sh b/tools/testing/selftests/drivers/net/mlxsw/tc_restrictions.sh
index 68c80d0ec1ec..9241250c5921 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/tc_flower_restrictions.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/tc_restrictions.sh
@@ -7,6 +7,10 @@ ALL_TESTS="
 	shared_block_drop_test
 	egress_redirect_test
 	multi_mirror_test
+	matchall_sample_egress_test
+	matchall_mirror_behind_flower_ingress_test
+	matchall_sample_behind_flower_ingress_test
+	matchall_mirror_behind_flower_egress_test
 "
 NUM_NETIFS=2
 
@@ -155,6 +159,134 @@ multi_mirror_test()
 	log_test "multi mirror"
 }
 
+matchall_sample_egress_test()
+{
+	RET=0
+
+	# It is forbidden in mlxsw driver to have matchall with sample action
+	# bound on egress
+
+	tc qdisc add dev $swp1 clsact
+
+	tc filter add dev $swp1 ingress protocol all pref 1 handle 101 \
+		matchall skip_sw action sample rate 100 group 1
+	check_err $? "Failed to add rule with sample action on ingress"
+
+	tc filter del dev $swp1 ingress protocol all pref 1 handle 101 matchall
+
+	tc filter add dev $swp1 egress protocol all pref 1 handle 101 \
+		matchall skip_sw action sample rate 100 group 1
+	check_fail $? "Incorrect success to add rule with sample action on egress"
+
+	tc qdisc del dev $swp1 clsact
+
+	log_test "matchall sample egress"
+}
+
+matchall_behind_flower_ingress_test()
+{
+	local action=$1
+	local action_args=$2
+
+	RET=0
+
+	# On ingress, all matchall-mirror and matchall-sample
+	# rules have to be in front of the flower rules
+
+	tc qdisc add dev $swp1 clsact
+
+	tc filter add dev $swp1 ingress protocol ip pref 10 handle 101 flower \
+		skip_sw dst_ip 192.0.2.2 action drop
+
+	tc filter add dev $swp1 ingress protocol all pref 9 handle 102 \
+		matchall skip_sw action $action_args
+	check_err $? "Failed to add matchall rule in front of a flower rule"
+
+	tc filter del dev $swp1 ingress protocol all pref 9 handle 102 matchall
+
+	tc filter add dev $swp1 ingress protocol all pref 11 handle 102 \
+		matchall skip_sw action $action_args
+	check_fail $? "Incorrect success to add matchall rule behind a flower rule"
+
+	tc filter del dev $swp1 ingress protocol ip pref 10 handle 101 flower
+
+	tc filter add dev $swp1 ingress protocol all pref 9 handle 102 \
+		matchall skip_sw action $action_args
+
+	tc filter add dev $swp1 ingress protocol ip pref 10 handle 101 flower \
+		skip_sw dst_ip 192.0.2.2 action drop
+	check_err $? "Failed to add flower rule behind a matchall rule"
+
+	tc filter del dev $swp1 ingress protocol ip pref 10 handle 101 flower
+
+	tc filter add dev $swp1 ingress protocol ip pref 8 handle 101 flower \
+		skip_sw dst_ip 192.0.2.2 action drop
+	check_fail $? "Incorrect success to add flower rule in front of a matchall rule"
+
+	tc qdisc del dev $swp1 clsact
+
+	log_test "matchall $action flower ingress"
+}
+
+matchall_mirror_behind_flower_ingress_test()
+{
+	matchall_behind_flower_ingress_test "mirror" "mirred egress mirror dev $swp2"
+}
+
+matchall_sample_behind_flower_ingress_test()
+{
+	matchall_behind_flower_ingress_test "sample" "sample rate 100 group 1"
+}
+
+matchall_behind_flower_egress_test()
+{
+	local action=$1
+	local action_args=$2
+
+	RET=0
+
+	# On egress, all matchall-mirror rules have to be behind the flower rules
+
+	tc qdisc add dev $swp1 clsact
+
+	tc filter add dev $swp1 egress protocol ip pref 10 handle 101 flower \
+		skip_sw dst_ip 192.0.2.2 action drop
+
+	tc filter add dev $swp1 egress protocol all pref 11 handle 102 \
+		matchall skip_sw action $action_args
+	check_err $? "Failed to add matchall rule in front of a flower rule"
+
+	tc filter del dev $swp1 egress protocol all pref 11 handle 102 matchall
+
+	tc filter add dev $swp1 egress protocol all pref 9 handle 102 \
+		matchall skip_sw action $action_args
+	check_fail $? "Incorrect success to add matchall rule behind a flower rule"
+
+	tc filter del dev $swp1 egress protocol ip pref 10 handle 101 flower
+
+	tc filter add dev $swp1 egress protocol all pref 11 handle 102 \
+		matchall skip_sw action $action_args
+
+	tc filter add dev $swp1 egress protocol ip pref 10 handle 101 flower \
+		skip_sw dst_ip 192.0.2.2 action drop
+	check_err $? "Failed to add flower rule behind a matchall rule"
+
+	tc filter del dev $swp1 egress protocol ip pref 10 handle 101 flower
+
+	tc filter add dev $swp1 egress protocol ip pref 12 handle 101 flower \
+		skip_sw dst_ip 192.0.2.2 action drop
+	check_fail $? "Incorrect success to add flower rule in front of a matchall rule"
+
+	tc qdisc del dev $swp1 clsact
+
+	log_test "matchall $action flower egress"
+}
+
+matchall_mirror_behind_flower_egress_test()
+{
+	matchall_behind_flower_egress_test "mirror" "mirred egress mirror dev $swp2"
+}
+
 setup_prepare()
 {
 	swp1=${NETIFS[p1]}
diff --git a/tools/testing/selftests/drivers/net/netdevsim/devlink.sh b/tools/testing/selftests/drivers/net/netdevsim/devlink.sh
index 9f9741444549..de4b32fc4223 100755
--- a/tools/testing/selftests/drivers/net/netdevsim/devlink.sh
+++ b/tools/testing/selftests/drivers/net/netdevsim/devlink.sh
@@ -146,11 +146,39 @@ regions_test()
 
 	check_region_snapshot_count dummy post-first-request 3
 
+	devlink region dump $DL_HANDLE/dummy snapshot 25 >> /dev/null
+	check_err $? "Failed to dump snapshot with id 25"
+
+	devlink region read $DL_HANDLE/dummy snapshot 25 addr 0 len 1 >> /dev/null
+	check_err $? "Failed to read snapshot with id 25 (1 byte)"
+
+	devlink region read $DL_HANDLE/dummy snapshot 25 addr 128 len 128 >> /dev/null
+	check_err $? "Failed to read snapshot with id 25 (128 bytes)"
+
+	devlink region read $DL_HANDLE/dummy snapshot 25 addr 128 len $((1<<32)) >> /dev/null
+	check_err $? "Failed to read snapshot with id 25 (oversized)"
+
+	devlink region read $DL_HANDLE/dummy snapshot 25 addr $((1<<32)) len 128 >> /dev/null 2>&1
+	check_fail $? "Bad read of snapshot with id 25 did not fail"
+
 	devlink region del $DL_HANDLE/dummy snapshot 25
 	check_err $? "Failed to delete snapshot with id 25"
 
 	check_region_snapshot_count dummy post-second-delete 2
 
+	sid=$(devlink -j region new $DL_HANDLE/dummy | jq '.[][][][]')
+	check_err $? "Failed to create a new snapshot with id allocated by the kernel"
+
+	check_region_snapshot_count dummy post-first-request 3
+
+	devlink region dump $DL_HANDLE/dummy snapshot $sid >> /dev/null
+	check_err $? "Failed to dump a snapshot with id allocated by the kernel"
+
+	devlink region del $DL_HANDLE/dummy snapshot $sid
+	check_err $? "Failed to delete snapshot with id allocated by the kernel"
+
+	check_region_snapshot_count dummy post-first-request 2
+
 	log_test "regions test"
 }
 
diff --git a/tools/testing/selftests/drivers/net/netdevsim/devlink_trap.sh b/tools/testing/selftests/drivers/net/netdevsim/devlink_trap.sh
index dbd1e014ba17..da49ad2761b5 100755
--- a/tools/testing/selftests/drivers/net/netdevsim/devlink_trap.sh
+++ b/tools/testing/selftests/drivers/net/netdevsim/devlink_trap.sh
@@ -264,6 +264,8 @@ trap_policer_test()
 	local packets_t0
 	local packets_t1
 
+	RET=0
+
 	if [ $(devlink_trap_policers_num_get) -eq 0 ]; then
 		check_err 1 "Failed to dump policers"
 	fi
@@ -328,6 +330,8 @@ trap_group_check_policer()
 
 trap_policer_bind_test()
 {
+	RET=0
+
 	devlink trap group set $DEVLINK_DEV group l2_drops policer 1
 	check_err $? "Failed to bind a valid policer"
 	if [ $(devlink_trap_group_policer_get "l2_drops") -ne 1 ]; then
diff --git a/tools/testing/selftests/exec/.gitignore b/tools/testing/selftests/exec/.gitignore
index c078ece12ff0..94b02a18f230 100644
--- a/tools/testing/selftests/exec/.gitignore
+++ b/tools/testing/selftests/exec/.gitignore
@@ -9,3 +9,4 @@ execveat.ephemeral
 execveat.denatured
 /recursion-depth
 xxxxxxxx*
+pipe
diff --git a/tools/testing/selftests/exec/Makefile b/tools/testing/selftests/exec/Makefile
index 33339e31e365..4453b8f8def3 100644
--- a/tools/testing/selftests/exec/Makefile
+++ b/tools/testing/selftests/exec/Makefile
@@ -3,8 +3,9 @@ CFLAGS = -Wall
 CFLAGS += -Wno-nonnull
 CFLAGS += -D_GNU_SOURCE
 
+TEST_PROGS := binfmt_script
 TEST_GEN_PROGS := execveat
-TEST_GEN_FILES := execveat.symlink execveat.denatured script subdir
+TEST_GEN_FILES := execveat.symlink execveat.denatured script subdir pipe
 # Makefile is a run-time dependency, since it's accessed by the execveat test
 TEST_FILES := Makefile
 
diff --git a/tools/testing/selftests/exec/binfmt_script b/tools/testing/selftests/exec/binfmt_script
new file mode 100755
index 000000000000..05f94a741c7a
--- /dev/null
+++ b/tools/testing/selftests/exec/binfmt_script
@@ -0,0 +1,171 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test that truncation of bprm->buf doesn't cause unexpected execs paths, along
+# with various other pathological cases.
+import os, subprocess
+
+# Relevant commits
+#
+# b5372fe5dc84 ("exec: load_script: Do not exec truncated interpreter path")
+# 6eb3c3d0a52d ("exec: increase BINPRM_BUF_SIZE to 256")
+
+# BINPRM_BUF_SIZE
+SIZE=256
+
+NAME_MAX=int(subprocess.check_output(["getconf", "NAME_MAX", "."]))
+
+test_num=0
+
+code='''#!/usr/bin/perl
+print "Executed interpreter! Args:\n";
+print "0 : '$0'\n";
+$counter = 1;
+foreach my $a (@ARGV) {
+    print "$counter : '$a'\n";
+    $counter++;
+}
+'''
+
+##
+# test - produce a binfmt_script hashbang line for testing
+#
+# @size:     bytes for bprm->buf line, including hashbang but not newline
+# @good:     whether this script is expected to execute correctly
+# @hashbang: the special 2 bytes for running binfmt_script
+# @leading:  any leading whitespace before the executable path
+# @root:     start of executable pathname
+# @target:   end of executable pathname
+# @arg:      bytes following the executable pathname
+# @fill:     character to fill between @root and @target to reach @size bytes
+# @newline:  character to use as newline, not counted towards @size
+# ...
+def test(name, size, good=True, leading="", root="./", target="/perl",
+                     fill="A", arg="", newline="\n", hashbang="#!"):
+    global test_num, tests, NAME_MAX
+    test_num += 1
+    if test_num > tests:
+        raise ValueError("more binfmt_script tests than expected! (want %d, expected %d)"
+                         % (test_num, tests))
+
+    middle = ""
+    remaining = size - len(hashbang) - len(leading) - len(root) - len(target) - len(arg)
+    # The middle of the pathname must not exceed NAME_MAX
+    while remaining >= NAME_MAX:
+        middle += fill * (NAME_MAX - 1)
+        middle += '/'
+        remaining -= NAME_MAX
+    middle += fill * remaining
+
+    dirpath = root + middle
+    binary = dirpath + target
+    if len(target):
+        os.makedirs(dirpath, mode=0o755, exist_ok=True)
+        open(binary, "w").write(code)
+        os.chmod(binary, 0o755)
+
+    buf=hashbang + leading + root + middle + target + arg + newline
+    if len(newline) > 0:
+        buf += 'echo this is not really perl\n'
+
+    script = "binfmt_script-%s" % (name)
+    open(script, "w").write(buf)
+    os.chmod(script, 0o755)
+
+    proc = subprocess.Popen(["./%s" % (script)], shell=True,
+                            stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
+    stdout = proc.communicate()[0]
+
+    if proc.returncode == 0 and b'Executed interpreter' in stdout:
+        if good:
+            print("ok %d - binfmt_script %s (successful good exec)"
+                  % (test_num, name))
+        else:
+            print("not ok %d - binfmt_script %s succeeded when it should have failed"
+                  % (test_num, name))
+    else:
+        if good:
+            print("not ok %d - binfmt_script %s failed when it should have succeeded (rc:%d)"
+                  % (test_num, name, proc.returncode))
+        else:
+            print("ok %d - binfmt_script %s (correctly failed bad exec)"
+                  % (test_num, name))
+
+    # Clean up crazy binaries
+    os.unlink(script)
+    if len(target):
+        elements = binary.split('/')
+        os.unlink(binary)
+        elements.pop()
+        while len(elements) > 1:
+            os.rmdir("/".join(elements))
+            elements.pop()
+
+tests=27
+print("TAP version 1.3")
+print("1..%d" % (tests))
+
+### FAIL (8 tests)
+
+# Entire path is well past the BINFMT_BUF_SIZE.
+test(name="too-big",        size=SIZE+80, good=False)
+# Path is right at max size, making it impossible to tell if it was truncated.
+test(name="exact",          size=SIZE,    good=False)
+# Same as above, but with leading whitespace.
+test(name="exact-space",    size=SIZE,    good=False, leading=" ")
+# Huge buffer of only whitespace.
+test(name="whitespace-too-big", size=SIZE+71, good=False, root="",
+                                              fill=" ", target="")
+# A good path, but it gets truncated due to leading whitespace.
+test(name="truncated",      size=SIZE+17, good=False, leading=" " * 19)
+# Entirely empty except for #!
+test(name="empty",          size=2,       good=False, root="",
+                                          fill="", target="", newline="")
+# Within size, but entirely spaces
+test(name="spaces",         size=SIZE-1,  good=False, root="", fill=" ",
+                                          target="", newline="")
+# Newline before binary.
+test(name="newline-prefix", size=SIZE-1,  good=False, leading="\n",
+                                          root="", fill=" ", target="")
+
+### ok (19 tests)
+
+# The original test case that was broken by commit:
+# 8099b047ecc4 ("exec: load_script: don't blindly truncate shebang string")
+test(name="test.pl",        size=439, leading=" ",
+     root="./nix/store/bwav8kz8b3y471wjsybgzw84mrh4js9-perl-5.28.1/bin",
+     arg=" -I/nix/store/x6yyav38jgr924nkna62q3pkp0dgmzlx-perl5.28.1-File-Slurp-9999.25/lib/perl5/site_perl -I/nix/store/ha8v67sl8dac92r9z07vzr4gv1y9nwqz-perl5.28.1-Net-DBus-1.1.0/lib/perl5/site_perl -I/nix/store/dcrkvnjmwh69ljsvpbdjjdnqgwx90a9d-perl5.28.1-XML-Parser-2.44/lib/perl5/site_perl -I/nix/store/rmji88k2zz7h4zg97385bygcydrf2q8h-perl5.28.1-XML-Twig-3.52/lib/perl5/site_perl")
+# One byte under size, leaving newline visible.
+test(name="one-under",           size=SIZE-1)
+# Two bytes under size, leaving newline visible.
+test(name="two-under",           size=SIZE-2)
+# Exact size, but trailing whitespace visible instead of newline
+test(name="exact-trunc-whitespace", size=SIZE, arg=" ")
+# Exact size, but trailing space and first arg char visible instead of newline.
+test(name="exact-trunc-arg",     size=SIZE, arg=" f")
+# One bute under, with confirmed non-truncated arg since newline now visible.
+test(name="one-under-full-arg",  size=SIZE-1, arg=" f")
+# Short read buffer by one byte.
+test(name="one-under-no-nl",     size=SIZE-1, newline="")
+# Short read buffer by half buffer size.
+test(name="half-under-no-nl",    size=int(SIZE/2), newline="")
+# One byte under with whitespace arg. leaving wenline visible.
+test(name="one-under-trunc-arg", size=SIZE-1, arg=" ")
+# One byte under with whitespace leading. leaving wenline visible.
+test(name="one-under-leading",   size=SIZE-1, leading=" ")
+# One byte under with whitespace leading and as arg. leaving newline visible.
+test(name="one-under-leading-trunc-arg",  size=SIZE-1, leading=" ", arg=" ")
+# Same as above, but with 2 bytes under
+test(name="two-under-no-nl",     size=SIZE-2, newline="")
+test(name="two-under-trunc-arg", size=SIZE-2, arg=" ")
+test(name="two-under-leading",   size=SIZE-2, leading=" ")
+test(name="two-under-leading-trunc-arg",   size=SIZE-2, leading=" ", arg=" ")
+# Same as above, but with buffer half filled
+test(name="two-under-no-nl",     size=int(SIZE/2), newline="")
+test(name="two-under-trunc-arg", size=int(SIZE/2), arg=" ")
+test(name="two-under-leading",   size=int(SIZE/2), leading=" ")
+test(name="two-under-lead-trunc-arg", size=int(SIZE/2), leading=" ", arg=" ")
+
+if test_num != tests:
+    raise ValueError("fewer binfmt_script tests than expected! (ran %d, expected %d"
+                     % (test_num, tests))
diff --git a/tools/testing/selftests/exec/execveat.c b/tools/testing/selftests/exec/execveat.c
index cbb6efbdb786..67bf7254a48f 100644
--- a/tools/testing/selftests/exec/execveat.c
+++ b/tools/testing/selftests/exec/execveat.c
@@ -5,7 +5,9 @@
  * Selftests for execveat(2).
  */
 
+#ifndef _GNU_SOURCE
 #define _GNU_SOURCE  /* to get O_PATH, AT_EMPTY_PATH */
+#endif
 #include <sys/sendfile.h>
 #include <sys/stat.h>
 #include <sys/syscall.h>
@@ -311,6 +313,10 @@ static int run_tests(void)
 	fail += check_execveat_fail(AT_FDCWD, fullname_symlink,
 				    AT_SYMLINK_NOFOLLOW, ELOOP);
 
+	/*  Non-regular file failure */
+	fail += check_execveat_fail(dot_dfd, "pipe", 0, EACCES);
+	unlink("pipe");
+
 	/* Shell script wrapping executable file: */
 	/*   dfd + path */
 	fail += check_execveat(subdir_dfd, "../script", 0);
@@ -384,6 +390,8 @@ static void prerequisites(void)
 	fd = open("subdir.ephemeral/script", O_RDWR|O_CREAT|O_TRUNC, 0755);
 	write(fd, script, strlen(script));
 	close(fd);
+
+	mkfifo("pipe", 0755);
 }
 
 int main(int argc, char **argv)
diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/tracing-error-log.tc b/tools/testing/selftests/ftrace/test.d/ftrace/tracing-error-log.tc
index 021c03fd885d..23465823532b 100644
--- a/tools/testing/selftests/ftrace/test.d/ftrace/tracing-error-log.tc
+++ b/tools/testing/selftests/ftrace/test.d/ftrace/tracing-error-log.tc
@@ -14,6 +14,8 @@ if [ ! -f set_event ]; then
     exit_unsupported
 fi
 
+[ -f error_log ] || exit_unsupported
+
 ftrace_errlog_check 'event filter parse error' '((sig >= 10 && sig < 15) || dsig ^== 17) && comm != bash' 'events/signal/signal_generate/filter'
 
 exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/functions b/tools/testing/selftests/ftrace/test.d/functions
index 61a3c7e2634d..697c77ef2e2b 100644
--- a/tools/testing/selftests/ftrace/test.d/functions
+++ b/tools/testing/selftests/ftrace/test.d/functions
@@ -119,12 +119,14 @@ yield() {
     ping $LOCALHOST -c 1 || sleep .001 || usleep 1 || sleep 1
 }
 
+# Since probe event command may include backslash, explicitly use printf "%s"
+# to NOT interpret it.
 ftrace_errlog_check() { # err-prefix command-with-error-pos-by-^ command-file
-    pos=$(echo -n "${2%^*}" | wc -c) # error position
-    command=$(echo "$2" | tr -d ^)
+    pos=$(printf "%s" "${2%^*}" | wc -c) # error position
+    command=$(printf "%s" "$2" | tr -d ^)
     echo "Test command: $command"
     echo > error_log
-    (! echo "$command" >> "$3" ) 2> /dev/null
+    (! printf "%s" "$command" >> "$3" ) 2> /dev/null
     grep "$1: error:" -A 3 error_log
     N=$(tail -n 1 error_log | wc -c)
     # "  Command: " and "^\n" => 13
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc
index ef1e9bafb098..eb0f4ab4e070 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc
@@ -91,7 +91,9 @@ esac
 if grep -q "Create/append/" README && grep -q "imm-value" README; then
 echo 'p:kprobes/testevent _do_fork' > kprobe_events
 check_error '^r:kprobes/testevent do_exit'	# DIFF_PROBE_TYPE
-echo 'p:kprobes/testevent _do_fork abcd=\1' > kprobe_events
+
+# Explicitly use printf "%s" to not interpret \1
+printf "%s" 'p:kprobes/testevent _do_fork abcd=\1' > kprobe_events
 check_error 'p:kprobes/testevent _do_fork ^bcd=\1'	# DIFF_ARG_TYPE
 check_error 'p:kprobes/testevent _do_fork ^abcd=\1:u8'	# DIFF_ARG_TYPE
 check_error 'p:kprobes/testevent _do_fork ^abcd=\"foo"'	# DIFF_ARG_TYPE
diff --git a/tools/testing/selftests/gen_kselftest_tar.sh b/tools/testing/selftests/gen_kselftest_tar.sh
index 8b2b6088540d..4a974bc03385 100755
--- a/tools/testing/selftests/gen_kselftest_tar.sh
+++ b/tools/testing/selftests/gen_kselftest_tar.sh
@@ -49,6 +49,11 @@ main()
 	# directory
 	./kselftest_install.sh "$install_dir"
 	(cd "$install_work"; tar $copts "$dest"/kselftest${ext} $install_name)
+
+	# Don't put the message at the actual end as people may be parsing the
+	# "archive created" line in their scripts.
+	echo -e "\nConsider using 'make gen_tar' instead of this script\n"
+
 	echo "Kselftest archive kselftest${ext} created!"
 
 	# clean up top-level install work directory
diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h
index 2bb8c81fc0b4..c9f03ef93338 100644
--- a/tools/testing/selftests/kselftest_harness.h
+++ b/tools/testing/selftests/kselftest_harness.h
@@ -168,9 +168,17 @@
 
 #define __TEST_IMPL(test_name, _signal) \
 	static void test_name(struct __test_metadata *_metadata); \
+	static inline void wrapper_##test_name( \
+		struct __test_metadata *_metadata, \
+		struct __fixture_variant_metadata *variant) \
+	{ \
+		test_name(_metadata); \
+	} \
 	static struct __test_metadata _##test_name##_object = \
-		{ .name = "global." #test_name, \
-		  .fn = &test_name, .termsig = _signal, \
+		{ .name = #test_name, \
+		  .fn = &wrapper_##test_name, \
+		  .fixture = &_fixture_global, \
+		  .termsig = _signal, \
 		  .timeout = TEST_TIMEOUT_DEFAULT, }; \
 	static void __attribute__((constructor)) _register_##test_name(void) \
 	{ \
@@ -212,10 +220,13 @@
  * populated and cleaned up using FIXTURE_SETUP() and FIXTURE_TEARDOWN().
  */
 #define FIXTURE(fixture_name) \
+	FIXTURE_VARIANT(fixture_name); \
+	static struct __fixture_metadata _##fixture_name##_fixture_object = \
+		{ .name =  #fixture_name, }; \
 	static void __attribute__((constructor)) \
 	_register_##fixture_name##_data(void) \
 	{ \
-		__fixture_count++; \
+		__register_fixture(&_##fixture_name##_fixture_object); \
 	} \
 	FIXTURE_DATA(fixture_name)
 
@@ -241,7 +252,10 @@
 #define FIXTURE_SETUP(fixture_name) \
 	void fixture_name##_setup( \
 		struct __test_metadata __attribute__((unused)) *_metadata, \
-		FIXTURE_DATA(fixture_name) __attribute__((unused)) *self)
+		FIXTURE_DATA(fixture_name) __attribute__((unused)) *self, \
+		const FIXTURE_VARIANT(fixture_name) \
+			__attribute__((unused)) *variant)
+
 /**
  * FIXTURE_TEARDOWN(fixture_name)
  * *_metadata* is included so that EXPECT_* and ASSERT_* work correctly.
@@ -264,6 +278,59 @@
 		FIXTURE_DATA(fixture_name) __attribute__((unused)) *self)
 
 /**
+ * FIXTURE_VARIANT(fixture_name) - Optionally called once per fixture
+ * to declare fixture variant
+ *
+ * @fixture_name: fixture name
+ *
+ * .. code-block:: c
+ *
+ *     FIXTURE_VARIANT(datatype name) {
+ *       type property1;
+ *       ...
+ *     };
+ *
+ * Defines type of constant parameters provided to FIXTURE_SETUP() and TEST_F()
+ * as *variant*. Variants allow the same tests to be run with different
+ * arguments.
+ */
+#define FIXTURE_VARIANT(fixture_name) struct _fixture_variant_##fixture_name
+
+/**
+ * FIXTURE_VARIANT_ADD(fixture_name, variant_name) - Called once per fixture
+ * variant to setup and register the data
+ *
+ * @fixture_name: fixture name
+ * @variant_name: name of the parameter set
+ *
+ * .. code-block:: c
+ *
+ *     FIXTURE_ADD(datatype name) {
+ *       .property1 = val1;
+ *       ...
+ *     };
+ *
+ * Defines a variant of the test fixture, provided to FIXTURE_SETUP() and
+ * TEST_F() as *variant*. Tests of each fixture will be run once for each
+ * variant.
+ */
+#define FIXTURE_VARIANT_ADD(fixture_name, variant_name) \
+	extern FIXTURE_VARIANT(fixture_name) \
+		_##fixture_name##_##variant_name##_variant; \
+	static struct __fixture_variant_metadata \
+		_##fixture_name##_##variant_name##_object = \
+		{ .name = #variant_name, \
+		  .data = &_##fixture_name##_##variant_name##_variant}; \
+	static void __attribute__((constructor)) \
+		_register_##fixture_name##_##variant_name(void) \
+	{ \
+		__register_fixture_variant(&_##fixture_name##_fixture_object, \
+			&_##fixture_name##_##variant_name##_object);	\
+	} \
+	FIXTURE_VARIANT(fixture_name) \
+		_##fixture_name##_##variant_name##_variant =
+
+/**
  * TEST_F(fixture_name, test_name) - Emits test registration and helpers for
  * fixture-based test cases
  *
@@ -293,24 +360,27 @@
 #define __TEST_F_IMPL(fixture_name, test_name, signal, tmout) \
 	static void fixture_name##_##test_name( \
 		struct __test_metadata *_metadata, \
-		FIXTURE_DATA(fixture_name) *self); \
+		FIXTURE_DATA(fixture_name) *self, \
+		const FIXTURE_VARIANT(fixture_name) *variant); \
 	static inline void wrapper_##fixture_name##_##test_name( \
-		struct __test_metadata *_metadata) \
+		struct __test_metadata *_metadata, \
+		struct __fixture_variant_metadata *variant) \
 	{ \
 		/* fixture data is alloced, setup, and torn down per call. */ \
 		FIXTURE_DATA(fixture_name) self; \
 		memset(&self, 0, sizeof(FIXTURE_DATA(fixture_name))); \
-		fixture_name##_setup(_metadata, &self); \
+		fixture_name##_setup(_metadata, &self, variant->data); \
 		/* Let setup failure terminate early. */ \
 		if (!_metadata->passed) \
 			return; \
-		fixture_name##_##test_name(_metadata, &self); \
+		fixture_name##_##test_name(_metadata, &self, variant->data); \
 		fixture_name##_teardown(_metadata, &self); \
 	} \
 	static struct __test_metadata \
 		      _##fixture_name##_##test_name##_object = { \
-		.name = #fixture_name "." #test_name, \
+		.name = #test_name, \
 		.fn = &wrapper_##fixture_name##_##test_name, \
+		.fixture = &_##fixture_name##_fixture_object, \
 		.termsig = signal, \
 		.timeout = tmout, \
 	 }; \
@@ -321,7 +391,9 @@
 	} \
 	static void fixture_name##_##test_name( \
 		struct __test_metadata __attribute__((unused)) *_metadata, \
-		FIXTURE_DATA(fixture_name) __attribute__((unused)) *self)
+		FIXTURE_DATA(fixture_name) __attribute__((unused)) *self, \
+		const FIXTURE_VARIANT(fixture_name) \
+			__attribute__((unused)) *variant)
 
 /**
  * TEST_HARNESS_MAIN - Simple wrapper to run the test harness
@@ -631,11 +703,74 @@
 	} \
 } while (0); OPTIONAL_HANDLER(_assert)
 
+/* List helpers */
+#define __LIST_APPEND(head, item) \
+{ \
+	/* Circular linked list where only prev is circular. */ \
+	if (head == NULL) { \
+		head = item; \
+		item->next = NULL; \
+		item->prev = item; \
+		return;	\
+	} \
+	if (__constructor_order == _CONSTRUCTOR_ORDER_FORWARD) { \
+		item->next = NULL; \
+		item->prev = head->prev; \
+		item->prev->next = item; \
+		head->prev = item; \
+	} else { \
+		item->next = head; \
+		item->next->prev = item; \
+		item->prev = item; \
+		head = item; \
+	} \
+}
+
+struct __test_metadata;
+struct __fixture_variant_metadata;
+
+/* Contains all the information about a fixture. */
+struct __fixture_metadata {
+	const char *name;
+	struct __test_metadata *tests;
+	struct __fixture_variant_metadata *variant;
+	struct __fixture_metadata *prev, *next;
+} _fixture_global __attribute__((unused)) = {
+	.name = "global",
+	.prev = &_fixture_global,
+};
+
+static struct __fixture_metadata *__fixture_list = &_fixture_global;
+static int __constructor_order;
+
+#define _CONSTRUCTOR_ORDER_FORWARD   1
+#define _CONSTRUCTOR_ORDER_BACKWARD -1
+
+static inline void __register_fixture(struct __fixture_metadata *f)
+{
+	__LIST_APPEND(__fixture_list, f);
+}
+
+struct __fixture_variant_metadata {
+	const char *name;
+	const void *data;
+	struct __fixture_variant_metadata *prev, *next;
+};
+
+static inline void
+__register_fixture_variant(struct __fixture_metadata *f,
+			   struct __fixture_variant_metadata *variant)
+{
+	__LIST_APPEND(f->variant, variant);
+}
+
 /* Contains all the information for test execution and status checking. */
 struct __test_metadata {
 	const char *name;
-	void (*fn)(struct __test_metadata *);
+	void (*fn)(struct __test_metadata *,
+		   struct __fixture_variant_metadata *);
 	pid_t pid;	/* pid of test when being run */
+	struct __fixture_metadata *fixture;
 	int termsig;
 	int passed;
 	int trigger; /* extra handler after the evaluation */
@@ -646,15 +781,6 @@ struct __test_metadata {
 	struct __test_metadata *prev, *next;
 };
 
-/* Storage for the (global) tests to be run. */
-static struct __test_metadata *__test_list;
-static unsigned int __test_count;
-static unsigned int __fixture_count;
-static int __constructor_order;
-
-#define _CONSTRUCTOR_ORDER_FORWARD   1
-#define _CONSTRUCTOR_ORDER_BACKWARD -1
-
 /*
  * Since constructors are called in reverse order, reverse the test
  * list so tests are run in source declaration order.
@@ -666,25 +792,7 @@ static int __constructor_order;
  */
 static inline void __register_test(struct __test_metadata *t)
 {
-	__test_count++;
-	/* Circular linked list where only prev is circular. */
-	if (__test_list == NULL) {
-		__test_list = t;
-		t->next = NULL;
-		t->prev = t;
-		return;
-	}
-	if (__constructor_order == _CONSTRUCTOR_ORDER_FORWARD) {
-		t->next = NULL;
-		t->prev = __test_list->prev;
-		t->prev->next = t;
-		__test_list->prev = t;
-	} else {
-		t->next = __test_list;
-		t->next->prev = t;
-		t->prev = t;
-		__test_list = t;
-	}
+	__LIST_APPEND(t->fixture->tests, t);
 }
 
 static inline int __bail(int for_realz, bool no_print, __u8 step)
@@ -790,43 +898,67 @@ void __wait_for_test(struct __test_metadata *t)
 	}
 }
 
-void __run_test(struct __test_metadata *t)
+void __run_test(struct __fixture_metadata *f,
+		struct __fixture_variant_metadata *variant,
+		struct __test_metadata *t)
 {
+	/* reset test struct */
 	t->passed = 1;
 	t->trigger = 0;
-	printf("[ RUN      ] %s\n", t->name);
+	t->step = 0;
+	t->no_print = 0;
+
+	printf("[ RUN      ] %s%s%s.%s\n",
+	       f->name, variant->name[0] ? "." : "", variant->name, t->name);
 	t->pid = fork();
 	if (t->pid < 0) {
 		printf("ERROR SPAWNING TEST CHILD\n");
 		t->passed = 0;
 	} else if (t->pid == 0) {
-		t->fn(t);
+		t->fn(t, variant);
 		/* return the step that failed or 0 */
 		_exit(t->passed ? 0 : t->step);
 	} else {
 		__wait_for_test(t);
 	}
-	printf("[     %4s ] %s\n", (t->passed ? "OK" : "FAIL"), t->name);
+	printf("[     %4s ] %s%s%s.%s\n", (t->passed ? "OK" : "FAIL"),
+	       f->name, variant->name[0] ? "." : "", variant->name, t->name);
 }
 
 static int test_harness_run(int __attribute__((unused)) argc,
 			    char __attribute__((unused)) **argv)
 {
+	struct __fixture_variant_metadata no_variant = { .name = "", };
+	struct __fixture_variant_metadata *v;
+	struct __fixture_metadata *f;
 	struct __test_metadata *t;
 	int ret = 0;
+	unsigned int case_count = 0, test_count = 0;
 	unsigned int count = 0;
 	unsigned int pass_count = 0;
 
+	for (f = __fixture_list; f; f = f->next) {
+		for (v = f->variant ?: &no_variant; v; v = v->next) {
+			case_count++;
+			for (t = f->tests; t; t = t->next)
+				test_count++;
+		}
+	}
+
 	/* TODO(wad) add optional arguments similar to gtest. */
 	printf("[==========] Running %u tests from %u test cases.\n",
-	       __test_count, __fixture_count + 1);
-	for (t = __test_list; t; t = t->next) {
-		count++;
-		__run_test(t);
-		if (t->passed)
-			pass_count++;
-		else
-			ret = 1;
+	       test_count, case_count);
+	for (f = __fixture_list; f; f = f->next) {
+		for (v = f->variant ?: &no_variant; v; v = v->next) {
+			for (t = f->tests; t; t = t->next) {
+				count++;
+				__run_test(f, v, t);
+				if (t->passed)
+					pass_count++;
+				else
+					ret = 1;
+			}
+		}
 	}
 	printf("[==========] %u / %u tests passed.\n", pass_count, count);
 	printf("[  %s  ]\n", (ret ? "FAILED" : "PASSED"));
diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore
index a9b2b48947ff..f159718f90c0 100644
--- a/tools/testing/selftests/kvm/.gitignore
+++ b/tools/testing/selftests/kvm/.gitignore
@@ -7,10 +7,10 @@
 /x86_64/hyperv_cpuid
 /x86_64/mmio_warning_test
 /x86_64/platform_info_test
-/x86_64/set_memory_region_test
 /x86_64/set_sregs_test
 /x86_64/smm_test
 /x86_64/state_test
+/x86_64/vmx_preemption_timer_test
 /x86_64/svm_vmcall_test
 /x86_64/sync_regs_test
 /x86_64/vmx_close_while_nested_test
@@ -22,4 +22,5 @@
 /demand_paging_test
 /dirty_log_test
 /kvm_create_max_vcpus
+/set_memory_region_test
 /steal_time
diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index 42f4f49f2a48..b4ff112e5c7e 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -43,10 +43,10 @@ TEST_GEN_PROGS_x86_64 += x86_64/evmcs_test
 TEST_GEN_PROGS_x86_64 += x86_64/hyperv_cpuid
 TEST_GEN_PROGS_x86_64 += x86_64/mmio_warning_test
 TEST_GEN_PROGS_x86_64 += x86_64/platform_info_test
-TEST_GEN_PROGS_x86_64 += x86_64/set_memory_region_test
 TEST_GEN_PROGS_x86_64 += x86_64/set_sregs_test
 TEST_GEN_PROGS_x86_64 += x86_64/smm_test
 TEST_GEN_PROGS_x86_64 += x86_64/state_test
+TEST_GEN_PROGS_x86_64 += x86_64/vmx_preemption_timer_test
 TEST_GEN_PROGS_x86_64 += x86_64/svm_vmcall_test
 TEST_GEN_PROGS_x86_64 += x86_64/sync_regs_test
 TEST_GEN_PROGS_x86_64 += x86_64/vmx_close_while_nested_test
@@ -59,12 +59,14 @@ TEST_GEN_PROGS_x86_64 += clear_dirty_log_test
 TEST_GEN_PROGS_x86_64 += demand_paging_test
 TEST_GEN_PROGS_x86_64 += dirty_log_test
 TEST_GEN_PROGS_x86_64 += kvm_create_max_vcpus
+TEST_GEN_PROGS_x86_64 += set_memory_region_test
 TEST_GEN_PROGS_x86_64 += steal_time
 
 TEST_GEN_PROGS_aarch64 += clear_dirty_log_test
 TEST_GEN_PROGS_aarch64 += demand_paging_test
 TEST_GEN_PROGS_aarch64 += dirty_log_test
 TEST_GEN_PROGS_aarch64 += kvm_create_max_vcpus
+TEST_GEN_PROGS_aarch64 += set_memory_region_test
 TEST_GEN_PROGS_aarch64 += steal_time
 
 TEST_GEN_PROGS_s390x = s390x/memop
@@ -73,6 +75,7 @@ TEST_GEN_PROGS_s390x += s390x/sync_regs_test
 TEST_GEN_PROGS_s390x += demand_paging_test
 TEST_GEN_PROGS_s390x += dirty_log_test
 TEST_GEN_PROGS_s390x += kvm_create_max_vcpus
+TEST_GEN_PROGS_s390x += set_memory_region_test
 
 TEST_GEN_PROGS += $(TEST_GEN_PROGS_$(UNAME_M))
 LIBKVM += $(LIBKVM_$(UNAME_M))
diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h
index 92e184a422ee..919e161dd289 100644
--- a/tools/testing/selftests/kvm/include/kvm_util.h
+++ b/tools/testing/selftests/kvm/include/kvm_util.h
@@ -10,6 +10,7 @@
 #include "test_util.h"
 
 #include "asm/kvm.h"
+#include "linux/list.h"
 #include "linux/kvm.h"
 #include <sys/ioctl.h>
 
@@ -113,6 +114,7 @@ int _vcpu_ioctl(struct kvm_vm *vm, uint32_t vcpuid, unsigned long ioctl,
 void vm_ioctl(struct kvm_vm *vm, unsigned long ioctl, void *arg);
 void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags);
 void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa);
+void vm_mem_region_delete(struct kvm_vm *vm, uint32_t slot);
 void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid);
 vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min,
 			  uint32_t data_memslot, uint32_t pgd_memslot);
@@ -256,6 +258,7 @@ bool vm_is_unrestricted_guest(struct kvm_vm *vm);
 unsigned int vm_get_page_size(struct kvm_vm *vm);
 unsigned int vm_get_page_shift(struct kvm_vm *vm);
 unsigned int vm_get_max_gfn(struct kvm_vm *vm);
+int vm_get_fd(struct kvm_vm *vm);
 
 unsigned int vm_calc_num_guest_pages(enum vm_guest_mode mode, size_t size);
 unsigned int vm_num_host_pages(enum vm_guest_mode mode, unsigned int num_guest_pages);
@@ -311,13 +314,30 @@ void ucall_uninit(struct kvm_vm *vm);
 void ucall(uint64_t cmd, int nargs, ...);
 uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc);
 
+#define GUEST_SYNC_ARGS(stage, arg1, arg2, arg3, arg4)	\
+				ucall(UCALL_SYNC, 6, "hello", stage, arg1, arg2, arg3, arg4)
 #define GUEST_SYNC(stage)	ucall(UCALL_SYNC, 2, "hello", stage)
 #define GUEST_DONE()		ucall(UCALL_DONE, 0)
-#define GUEST_ASSERT(_condition) do {			\
-	if (!(_condition))				\
-		ucall(UCALL_ABORT, 2,			\
-			"Failed guest assert: "		\
-			#_condition, __LINE__);		\
+#define __GUEST_ASSERT(_condition, _nargs, _args...) do {	\
+	if (!(_condition))					\
+		ucall(UCALL_ABORT, 2 + _nargs,			\
+			"Failed guest assert: "			\
+			#_condition, __LINE__, _args);		\
 } while (0)
 
+#define GUEST_ASSERT(_condition) \
+	__GUEST_ASSERT((_condition), 0, 0)
+
+#define GUEST_ASSERT_1(_condition, arg1) \
+	__GUEST_ASSERT((_condition), 1, (arg1))
+
+#define GUEST_ASSERT_2(_condition, arg1, arg2) \
+	__GUEST_ASSERT((_condition), 2, (arg1), (arg2))
+
+#define GUEST_ASSERT_3(_condition, arg1, arg2, arg3) \
+	__GUEST_ASSERT((_condition), 3, (arg1), (arg2), (arg3))
+
+#define GUEST_ASSERT_4(_condition, arg1, arg2, arg3, arg4) \
+	__GUEST_ASSERT((_condition), 4, (arg1), (arg2), (arg3), (arg4))
+
 #endif /* SELFTEST_KVM_UTIL_H */
diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h
index 7428513a4c68..82b7fe16a824 100644
--- a/tools/testing/selftests/kvm/include/x86_64/processor.h
+++ b/tools/testing/selftests/kvm/include/x86_64/processor.h
@@ -79,13 +79,16 @@ static inline uint64_t get_desc64_base(const struct desc64 *desc)
 static inline uint64_t rdtsc(void)
 {
 	uint32_t eax, edx;
-
+	uint64_t tsc_val;
 	/*
 	 * The lfence is to wait (on Intel CPUs) until all previous
-	 * instructions have been executed.
+	 * instructions have been executed. If software requires RDTSC to be
+	 * executed prior to execution of any subsequent instruction, it can
+	 * execute LFENCE immediately after RDTSC
 	 */
-	__asm__ __volatile__("lfence; rdtsc" : "=a"(eax), "=d"(edx));
-	return ((uint64_t)edx) << 32 | eax;
+	__asm__ __volatile__("lfence; rdtsc; lfence" : "=a"(eax), "=d"(edx));
+	tsc_val = ((uint64_t)edx) << 32 | eax;
+	return tsc_val;
 }
 
 static inline uint64_t rdtscp(uint32_t *aux)
diff --git a/tools/testing/selftests/kvm/include/x86_64/svm_util.h b/tools/testing/selftests/kvm/include/x86_64/svm_util.h
index cd037917fece..674151d24fcf 100644
--- a/tools/testing/selftests/kvm/include/x86_64/svm_util.h
+++ b/tools/testing/selftests/kvm/include/x86_64/svm_util.h
@@ -35,4 +35,14 @@ void generic_svm_setup(struct svm_test_data *svm, void *guest_rip, void *guest_r
 void run_guest(struct vmcb *vmcb, uint64_t vmcb_gpa);
 void nested_svm_check_supported(void);
 
+static inline bool cpu_has_svm(void)
+{
+	u32 eax = 0x80000001, ecx;
+
+	asm("cpuid" :
+	    "=a" (eax), "=c" (ecx) : "0" (eax) : "ebx", "edx");
+
+	return ecx & CPUID_SVM;
+}
+
 #endif /* SELFTEST_KVM_SVM_UTILS_H */
diff --git a/tools/testing/selftests/kvm/include/x86_64/vmx.h b/tools/testing/selftests/kvm/include/x86_64/vmx.h
index 3d27069b9ed9..ccff3e6e2704 100644
--- a/tools/testing/selftests/kvm/include/x86_64/vmx.h
+++ b/tools/testing/selftests/kvm/include/x86_64/vmx.h
@@ -575,6 +575,33 @@ struct vmx_pages {
 	void *eptp;
 };
 
+union vmx_basic {
+	u64 val;
+	struct {
+		u32 revision;
+		u32	size:13,
+			reserved1:3,
+			width:1,
+			dual:1,
+			type:4,
+			insouts:1,
+			ctrl:1,
+			vm_entry_exception_ctrl:1,
+			reserved2:7;
+	};
+};
+
+union vmx_ctrl_msr {
+	u64 val;
+	struct {
+		u32 set, clr;
+	};
+};
+
+union vmx_basic basic;
+union vmx_ctrl_msr ctrl_pin_rev;
+union vmx_ctrl_msr ctrl_exit_rev;
+
 struct vmx_pages *vcpu_alloc_vmx(struct kvm_vm *vm, vm_vaddr_t *p_vmx_gva);
 bool prepare_for_vmx_operation(struct vmx_pages *vmx);
 void prepare_vmcs(struct vmx_pages *vmx, void *guest_rip, void *guest_rsp);
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
index 9622431069bc..c9cede5c7d0d 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -161,6 +161,9 @@ struct kvm_vm *_vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm)
 	vm = calloc(1, sizeof(*vm));
 	TEST_ASSERT(vm != NULL, "Insufficient Memory");
 
+	INIT_LIST_HEAD(&vm->vcpus);
+	INIT_LIST_HEAD(&vm->userspace_mem_regions);
+
 	vm->mode = mode;
 	vm->type = 0;
 
@@ -258,8 +261,7 @@ void kvm_vm_restart(struct kvm_vm *vmp, int perm)
 	if (vmp->has_irqchip)
 		vm_create_irqchip(vmp);
 
-	for (region = vmp->userspace_mem_region_head; region;
-		region = region->next) {
+	list_for_each_entry(region, &vmp->userspace_mem_regions, list) {
 		int ret = ioctl(vmp->fd, KVM_SET_USER_MEMORY_REGION, &region->region);
 		TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed,\n"
 			    "  rc: %i errno: %i\n"
@@ -319,8 +321,7 @@ userspace_mem_region_find(struct kvm_vm *vm, uint64_t start, uint64_t end)
 {
 	struct userspace_mem_region *region;
 
-	for (region = vm->userspace_mem_region_head; region;
-		region = region->next) {
+	list_for_each_entry(region, &vm->userspace_mem_regions, list) {
 		uint64_t existing_start = region->region.guest_phys_addr;
 		uint64_t existing_end = region->region.guest_phys_addr
 			+ region->region.memory_size - 1;
@@ -378,11 +379,11 @@ kvm_userspace_memory_region_find(struct kvm_vm *vm, uint64_t start,
  */
 struct vcpu *vcpu_find(struct kvm_vm *vm, uint32_t vcpuid)
 {
-	struct vcpu *vcpup;
+	struct vcpu *vcpu;
 
-	for (vcpup = vm->vcpu_head; vcpup; vcpup = vcpup->next) {
-		if (vcpup->id == vcpuid)
-			return vcpup;
+	list_for_each_entry(vcpu, &vm->vcpus, list) {
+		if (vcpu->id == vcpuid)
+			return vcpu;
 	}
 
 	return NULL;
@@ -392,18 +393,16 @@ struct vcpu *vcpu_find(struct kvm_vm *vm, uint32_t vcpuid)
  * VM VCPU Remove
  *
  * Input Args:
- *   vm - Virtual Machine
- *   vcpuid - VCPU ID
+ *   vcpu - VCPU to remove
  *
  * Output Args: None
  *
  * Return: None, TEST_ASSERT failures for all error conditions
  *
- * Within the VM specified by vm, removes the VCPU given by vcpuid.
+ * Removes a vCPU from a VM and frees its resources.
  */
-static void vm_vcpu_rm(struct kvm_vm *vm, uint32_t vcpuid)
+static void vm_vcpu_rm(struct vcpu *vcpu)
 {
-	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
 	int ret;
 
 	ret = munmap(vcpu->state, sizeof(*vcpu->state));
@@ -413,21 +412,17 @@ static void vm_vcpu_rm(struct kvm_vm *vm, uint32_t vcpuid)
 	TEST_ASSERT(ret == 0, "Close of VCPU fd failed, rc: %i "
 		"errno: %i", ret, errno);
 
-	if (vcpu->next)
-		vcpu->next->prev = vcpu->prev;
-	if (vcpu->prev)
-		vcpu->prev->next = vcpu->next;
-	else
-		vm->vcpu_head = vcpu->next;
+	list_del(&vcpu->list);
 	free(vcpu);
 }
 
 void kvm_vm_release(struct kvm_vm *vmp)
 {
+	struct vcpu *vcpu, *tmp;
 	int ret;
 
-	while (vmp->vcpu_head)
-		vm_vcpu_rm(vmp, vmp->vcpu_head->id);
+	list_for_each_entry_safe(vcpu, tmp, &vmp->vcpus, list)
+		vm_vcpu_rm(vcpu);
 
 	ret = close(vmp->fd);
 	TEST_ASSERT(ret == 0, "Close of vm fd failed,\n"
@@ -438,35 +433,38 @@ void kvm_vm_release(struct kvm_vm *vmp)
 		"  vmp->kvm_fd: %i rc: %i errno: %i", vmp->kvm_fd, ret, errno);
 }
 
+static void __vm_mem_region_delete(struct kvm_vm *vm,
+				   struct userspace_mem_region *region)
+{
+	int ret;
+
+	list_del(&region->list);
+
+	region->region.memory_size = 0;
+	ret = ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION, &region->region);
+	TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed, "
+		    "rc: %i errno: %i", ret, errno);
+
+	sparsebit_free(&region->unused_phy_pages);
+	ret = munmap(region->mmap_start, region->mmap_size);
+	TEST_ASSERT(ret == 0, "munmap failed, rc: %i errno: %i", ret, errno);
+
+	free(region);
+}
+
 /*
  * Destroys and frees the VM pointed to by vmp.
  */
 void kvm_vm_free(struct kvm_vm *vmp)
 {
-	int ret;
+	struct userspace_mem_region *region, *tmp;
 
 	if (vmp == NULL)
 		return;
 
 	/* Free userspace_mem_regions. */
-	while (vmp->userspace_mem_region_head) {
-		struct userspace_mem_region *region
-			= vmp->userspace_mem_region_head;
-
-		region->region.memory_size = 0;
-		ret = ioctl(vmp->fd, KVM_SET_USER_MEMORY_REGION,
-			&region->region);
-		TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed, "
-			"rc: %i errno: %i", ret, errno);
-
-		vmp->userspace_mem_region_head = region->next;
-		sparsebit_free(&region->unused_phy_pages);
-		ret = munmap(region->mmap_start, region->mmap_size);
-		TEST_ASSERT(ret == 0, "munmap failed, rc: %i errno: %i",
-			    ret, errno);
-
-		free(region);
-	}
+	list_for_each_entry_safe(region, tmp, &vmp->userspace_mem_regions, list)
+		__vm_mem_region_delete(vmp, region);
 
 	/* Free sparsebit arrays. */
 	sparsebit_free(&vmp->vpages_valid);
@@ -612,12 +610,10 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm,
 			(uint64_t) region->region.memory_size);
 
 	/* Confirm no region with the requested slot already exists. */
-	for (region = vm->userspace_mem_region_head; region;
-		region = region->next) {
-		if (region->region.slot == slot)
-			break;
-	}
-	if (region != NULL)
+	list_for_each_entry(region, &vm->userspace_mem_regions, list) {
+		if (region->region.slot != slot)
+			continue;
+
 		TEST_FAIL("A mem region with the requested slot "
 			"already exists.\n"
 			"  requested slot: %u paddr: 0x%lx npages: 0x%lx\n"
@@ -626,6 +622,7 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm,
 			region->region.slot,
 			(uint64_t) region->region.guest_phys_addr,
 			(uint64_t) region->region.memory_size);
+	}
 
 	/* Allocate and initialize new mem region structure. */
 	region = calloc(1, sizeof(*region));
@@ -686,10 +683,7 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm,
 		guest_paddr, (uint64_t) region->region.memory_size);
 
 	/* Add to linked-list of memory regions. */
-	if (vm->userspace_mem_region_head)
-		vm->userspace_mem_region_head->prev = region;
-	region->next = vm->userspace_mem_region_head;
-	vm->userspace_mem_region_head = region;
+	list_add(&region->list, &vm->userspace_mem_regions);
 }
 
 /*
@@ -712,20 +706,17 @@ memslot2region(struct kvm_vm *vm, uint32_t memslot)
 {
 	struct userspace_mem_region *region;
 
-	for (region = vm->userspace_mem_region_head; region;
-		region = region->next) {
+	list_for_each_entry(region, &vm->userspace_mem_regions, list) {
 		if (region->region.slot == memslot)
-			break;
-	}
-	if (region == NULL) {
-		fprintf(stderr, "No mem region with the requested slot found,\n"
-			"  requested slot: %u\n", memslot);
-		fputs("---- vm dump ----\n", stderr);
-		vm_dump(stderr, vm, 2);
-		TEST_FAIL("Mem region not found");
+			return region;
 	}
 
-	return region;
+	fprintf(stderr, "No mem region with the requested slot found,\n"
+		"  requested slot: %u\n", memslot);
+	fputs("---- vm dump ----\n", stderr);
+	vm_dump(stderr, vm, 2);
+	TEST_FAIL("Mem region not found");
+	return NULL;
 }
 
 /*
@@ -789,6 +780,24 @@ void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa)
 }
 
 /*
+ * VM Memory Region Delete
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *   slot - Slot of the memory region to delete
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Delete a memory region.
+ */
+void vm_mem_region_delete(struct kvm_vm *vm, uint32_t slot)
+{
+	__vm_mem_region_delete(vm, memslot2region(vm, slot));
+}
+
+/*
  * VCPU mmap Size
  *
  * Input Args: None
@@ -863,10 +872,7 @@ void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid)
 		"vcpu id: %u errno: %i", vcpuid, errno);
 
 	/* Add to linked-list of VCPUs. */
-	if (vm->vcpu_head)
-		vm->vcpu_head->prev = vcpu;
-	vcpu->next = vm->vcpu_head;
-	vm->vcpu_head = vcpu;
+	list_add(&vcpu->list, &vm->vcpus);
 }
 
 /*
@@ -1059,8 +1065,8 @@ void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
 void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa)
 {
 	struct userspace_mem_region *region;
-	for (region = vm->userspace_mem_region_head; region;
-	     region = region->next) {
+
+	list_for_each_entry(region, &vm->userspace_mem_regions, list) {
 		if ((gpa >= region->region.guest_phys_addr)
 			&& (gpa <= (region->region.guest_phys_addr
 				+ region->region.memory_size - 1)))
@@ -1092,8 +1098,8 @@ void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa)
 vm_paddr_t addr_hva2gpa(struct kvm_vm *vm, void *hva)
 {
 	struct userspace_mem_region *region;
-	for (region = vm->userspace_mem_region_head; region;
-	     region = region->next) {
+
+	list_for_each_entry(region, &vm->userspace_mem_regions, list) {
 		if ((hva >= region->host_mem)
 			&& (hva <= (region->host_mem
 				+ region->region.memory_size - 1)))
@@ -1529,8 +1535,7 @@ void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
 	fprintf(stream, "%*sfd: %i\n", indent, "", vm->fd);
 	fprintf(stream, "%*spage_size: 0x%x\n", indent, "", vm->page_size);
 	fprintf(stream, "%*sMem Regions:\n", indent, "");
-	for (region = vm->userspace_mem_region_head; region;
-		region = region->next) {
+	list_for_each_entry(region, &vm->userspace_mem_regions, list) {
 		fprintf(stream, "%*sguest_phys: 0x%lx size: 0x%lx "
 			"host_virt: %p\n", indent + 2, "",
 			(uint64_t) region->region.guest_phys_addr,
@@ -1549,7 +1554,7 @@ void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
 		virt_dump(stream, vm, indent + 4);
 	}
 	fprintf(stream, "%*sVCPUs:\n", indent, "");
-	for (vcpu = vm->vcpu_head; vcpu; vcpu = vcpu->next)
+	list_for_each_entry(vcpu, &vm->vcpus, list)
 		vcpu_dump(stream, vm, vcpu->id, indent + 2);
 }
 
@@ -1743,6 +1748,11 @@ unsigned int vm_get_max_gfn(struct kvm_vm *vm)
 	return vm->max_gfn;
 }
 
+int vm_get_fd(struct kvm_vm *vm)
+{
+	return vm->fd;
+}
+
 static unsigned int vm_calc_num_pages(unsigned int num_pages,
 				      unsigned int page_shift,
 				      unsigned int new_page_shift,
diff --git a/tools/testing/selftests/kvm/lib/kvm_util_internal.h b/tools/testing/selftests/kvm/lib/kvm_util_internal.h
index ca56a0133127..2ef446520748 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util_internal.h
+++ b/tools/testing/selftests/kvm/lib/kvm_util_internal.h
@@ -13,7 +13,6 @@
 #define KVM_DEV_PATH		"/dev/kvm"
 
 struct userspace_mem_region {
-	struct userspace_mem_region *next, *prev;
 	struct kvm_userspace_memory_region region;
 	struct sparsebit *unused_phy_pages;
 	int fd;
@@ -21,10 +20,11 @@ struct userspace_mem_region {
 	void *host_mem;
 	void *mmap_start;
 	size_t mmap_size;
+	struct list_head list;
 };
 
 struct vcpu {
-	struct vcpu *next, *prev;
+	struct list_head list;
 	uint32_t id;
 	int fd;
 	struct kvm_run *state;
@@ -41,8 +41,8 @@ struct kvm_vm {
 	unsigned int pa_bits;
 	unsigned int va_bits;
 	uint64_t max_gfn;
-	struct vcpu *vcpu_head;
-	struct userspace_mem_region *userspace_mem_region_head;
+	struct list_head vcpus;
+	struct list_head userspace_mem_regions;
 	struct sparsebit *vpages_valid;
 	struct sparsebit *vpages_mapped;
 	bool has_irqchip;
diff --git a/tools/testing/selftests/kvm/lib/s390x/processor.c b/tools/testing/selftests/kvm/lib/s390x/processor.c
index 8d94961bd046..a88c5d665725 100644
--- a/tools/testing/selftests/kvm/lib/s390x/processor.c
+++ b/tools/testing/selftests/kvm/lib/s390x/processor.c
@@ -233,7 +233,10 @@ void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...)
 
 void vcpu_dump(FILE *stream, struct kvm_vm *vm, uint32_t vcpuid, uint8_t indent)
 {
-	struct vcpu *vcpu = vm->vcpu_head;
+	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+
+	if (!vcpu)
+		return;
 
 	fprintf(stream, "%*spstate: psw: 0x%.16llx:0x%.16llx\n",
 		indent, "", vcpu->state->psw_mask, vcpu->state->psw_addr);
diff --git a/tools/testing/selftests/kvm/set_memory_region_test.c b/tools/testing/selftests/kvm/set_memory_region_test.c
new file mode 100644
index 000000000000..b3ece55a2da6
--- /dev/null
+++ b/tools/testing/selftests/kvm/set_memory_region_test.c
@@ -0,0 +1,408 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE /* for program_invocation_short_name */
+#include <fcntl.h>
+#include <pthread.h>
+#include <sched.h>
+#include <semaphore.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+
+#include <linux/compiler.h>
+
+#include <test_util.h>
+#include <kvm_util.h>
+#include <processor.h>
+
+#define VCPU_ID 0
+
+/*
+ * s390x needs at least 1MB alignment, and the x86_64 MOVE/DELETE tests need a
+ * 2MB sized and aligned region so that the initial region corresponds to
+ * exactly one large page.
+ */
+#define MEM_REGION_SIZE		0x200000
+
+#ifdef __x86_64__
+/*
+ * Somewhat arbitrary location and slot, intended to not overlap anything.
+ */
+#define MEM_REGION_GPA		0xc0000000
+#define MEM_REGION_SLOT		10
+
+static const uint64_t MMIO_VAL = 0xbeefull;
+
+extern const uint64_t final_rip_start;
+extern const uint64_t final_rip_end;
+
+static sem_t vcpu_ready;
+
+static inline uint64_t guest_spin_on_val(uint64_t spin_val)
+{
+	uint64_t val;
+
+	do {
+		val = READ_ONCE(*((uint64_t *)MEM_REGION_GPA));
+	} while (val == spin_val);
+
+	GUEST_SYNC(0);
+	return val;
+}
+
+static void *vcpu_worker(void *data)
+{
+	struct kvm_vm *vm = data;
+	struct kvm_run *run;
+	struct ucall uc;
+	uint64_t cmd;
+
+	/*
+	 * Loop until the guest is done.  Re-enter the guest on all MMIO exits,
+	 * which will occur if the guest attempts to access a memslot after it
+	 * has been deleted or while it is being moved .
+	 */
+	run = vcpu_state(vm, VCPU_ID);
+
+	while (1) {
+		vcpu_run(vm, VCPU_ID);
+
+		if (run->exit_reason == KVM_EXIT_IO) {
+			cmd = get_ucall(vm, VCPU_ID, &uc);
+			if (cmd != UCALL_SYNC)
+				break;
+
+			sem_post(&vcpu_ready);
+			continue;
+		}
+
+		if (run->exit_reason != KVM_EXIT_MMIO)
+			break;
+
+		TEST_ASSERT(!run->mmio.is_write, "Unexpected exit mmio write");
+		TEST_ASSERT(run->mmio.len == 8,
+			    "Unexpected exit mmio size = %u", run->mmio.len);
+
+		TEST_ASSERT(run->mmio.phys_addr == MEM_REGION_GPA,
+			    "Unexpected exit mmio address = 0x%llx",
+			    run->mmio.phys_addr);
+		memcpy(run->mmio.data, &MMIO_VAL, 8);
+	}
+
+	if (run->exit_reason == KVM_EXIT_IO && cmd == UCALL_ABORT)
+		TEST_FAIL("%s at %s:%ld, val = %lu", (const char *)uc.args[0],
+			  __FILE__, uc.args[1], uc.args[2]);
+
+	return NULL;
+}
+
+static void wait_for_vcpu(void)
+{
+	struct timespec ts;
+
+	TEST_ASSERT(!clock_gettime(CLOCK_REALTIME, &ts),
+		    "clock_gettime() failed: %d\n", errno);
+
+	ts.tv_sec += 2;
+	TEST_ASSERT(!sem_timedwait(&vcpu_ready, &ts),
+		    "sem_timedwait() failed: %d\n", errno);
+
+	/* Wait for the vCPU thread to reenter the guest. */
+	usleep(100000);
+}
+
+static struct kvm_vm *spawn_vm(pthread_t *vcpu_thread, void *guest_code)
+{
+	struct kvm_vm *vm;
+	uint64_t *hva;
+	uint64_t gpa;
+
+	vm = vm_create_default(VCPU_ID, 0, guest_code);
+
+	vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
+
+	vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS_THP,
+				    MEM_REGION_GPA, MEM_REGION_SLOT,
+				    MEM_REGION_SIZE / getpagesize(), 0);
+
+	/*
+	 * Allocate and map two pages so that the GPA accessed by guest_code()
+	 * stays valid across the memslot move.
+	 */
+	gpa = vm_phy_pages_alloc(vm, 2, MEM_REGION_GPA, MEM_REGION_SLOT);
+	TEST_ASSERT(gpa == MEM_REGION_GPA, "Failed vm_phy_pages_alloc\n");
+
+	virt_map(vm, MEM_REGION_GPA, MEM_REGION_GPA, 2, 0);
+
+	/* Ditto for the host mapping so that both pages can be zeroed. */
+	hva = addr_gpa2hva(vm, MEM_REGION_GPA);
+	memset(hva, 0, 2 * 4096);
+
+	pthread_create(vcpu_thread, NULL, vcpu_worker, vm);
+
+	/* Ensure the guest thread is spun up. */
+	wait_for_vcpu();
+
+	return vm;
+}
+
+
+static void guest_code_move_memory_region(void)
+{
+	uint64_t val;
+
+	GUEST_SYNC(0);
+
+	/*
+	 * Spin until the memory region is moved to a misaligned address.  This
+	 * may or may not trigger MMIO, as the window where the memslot is
+	 * invalid is quite small.
+	 */
+	val = guest_spin_on_val(0);
+	GUEST_ASSERT_1(val == 1 || val == MMIO_VAL, val);
+
+	/* Spin until the memory region is realigned. */
+	val = guest_spin_on_val(MMIO_VAL);
+	GUEST_ASSERT_1(val == 1, val);
+
+	GUEST_DONE();
+}
+
+static void test_move_memory_region(void)
+{
+	pthread_t vcpu_thread;
+	struct kvm_vm *vm;
+	uint64_t *hva;
+
+	vm = spawn_vm(&vcpu_thread, guest_code_move_memory_region);
+
+	hva = addr_gpa2hva(vm, MEM_REGION_GPA);
+
+	/*
+	 * Shift the region's base GPA.  The guest should not see "2" as the
+	 * hva->gpa translation is misaligned, i.e. the guest is accessing a
+	 * different host pfn.
+	 */
+	vm_mem_region_move(vm, MEM_REGION_SLOT, MEM_REGION_GPA - 4096);
+	WRITE_ONCE(*hva, 2);
+
+	/*
+	 * The guest _might_ see an invalid memslot and trigger MMIO, but it's
+	 * a tiny window.  Spin and defer the sync until the memslot is
+	 * restored and guest behavior is once again deterministic.
+	 */
+	usleep(100000);
+
+	/*
+	 * Note, value in memory needs to be changed *before* restoring the
+	 * memslot, else the guest could race the update and see "2".
+	 */
+	WRITE_ONCE(*hva, 1);
+
+	/* Restore the original base, the guest should see "1". */
+	vm_mem_region_move(vm, MEM_REGION_SLOT, MEM_REGION_GPA);
+	wait_for_vcpu();
+	/* Defered sync from when the memslot was misaligned (above). */
+	wait_for_vcpu();
+
+	pthread_join(vcpu_thread, NULL);
+
+	kvm_vm_free(vm);
+}
+
+static void guest_code_delete_memory_region(void)
+{
+	uint64_t val;
+
+	GUEST_SYNC(0);
+
+	/* Spin until the memory region is deleted. */
+	val = guest_spin_on_val(0);
+	GUEST_ASSERT_1(val == MMIO_VAL, val);
+
+	/* Spin until the memory region is recreated. */
+	val = guest_spin_on_val(MMIO_VAL);
+	GUEST_ASSERT_1(val == 0, val);
+
+	/* Spin until the memory region is deleted. */
+	val = guest_spin_on_val(0);
+	GUEST_ASSERT_1(val == MMIO_VAL, val);
+
+	asm("1:\n\t"
+	    ".pushsection .rodata\n\t"
+	    ".global final_rip_start\n\t"
+	    "final_rip_start: .quad 1b\n\t"
+	    ".popsection");
+
+	/* Spin indefinitely (until the code memslot is deleted). */
+	guest_spin_on_val(MMIO_VAL);
+
+	asm("1:\n\t"
+	    ".pushsection .rodata\n\t"
+	    ".global final_rip_end\n\t"
+	    "final_rip_end: .quad 1b\n\t"
+	    ".popsection");
+
+	GUEST_ASSERT_1(0, 0);
+}
+
+static void test_delete_memory_region(void)
+{
+	pthread_t vcpu_thread;
+	struct kvm_regs regs;
+	struct kvm_run *run;
+	struct kvm_vm *vm;
+
+	vm = spawn_vm(&vcpu_thread, guest_code_delete_memory_region);
+
+	/* Delete the memory region, the guest should not die. */
+	vm_mem_region_delete(vm, MEM_REGION_SLOT);
+	wait_for_vcpu();
+
+	/* Recreate the memory region.  The guest should see "0". */
+	vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS_THP,
+				    MEM_REGION_GPA, MEM_REGION_SLOT,
+				    MEM_REGION_SIZE / getpagesize(), 0);
+	wait_for_vcpu();
+
+	/* Delete the region again so that there's only one memslot left. */
+	vm_mem_region_delete(vm, MEM_REGION_SLOT);
+	wait_for_vcpu();
+
+	/*
+	 * Delete the primary memslot.  This should cause an emulation error or
+	 * shutdown due to the page tables getting nuked.
+	 */
+	vm_mem_region_delete(vm, 0);
+
+	pthread_join(vcpu_thread, NULL);
+
+	run = vcpu_state(vm, VCPU_ID);
+
+	TEST_ASSERT(run->exit_reason == KVM_EXIT_SHUTDOWN ||
+		    run->exit_reason == KVM_EXIT_INTERNAL_ERROR,
+		    "Unexpected exit reason = %d", run->exit_reason);
+
+	vcpu_regs_get(vm, VCPU_ID, &regs);
+
+	/*
+	 * On AMD, after KVM_EXIT_SHUTDOWN the VMCB has been reinitialized already,
+	 * so the instruction pointer would point to the reset vector.
+	 */
+	if (run->exit_reason == KVM_EXIT_INTERNAL_ERROR)
+		TEST_ASSERT(regs.rip >= final_rip_start &&
+			    regs.rip < final_rip_end,
+			    "Bad rip, expected 0x%lx - 0x%lx, got 0x%llx\n",
+			    final_rip_start, final_rip_end, regs.rip);
+
+	kvm_vm_free(vm);
+}
+
+static void test_zero_memory_regions(void)
+{
+	struct kvm_run *run;
+	struct kvm_vm *vm;
+
+	pr_info("Testing KVM_RUN with zero added memory regions\n");
+
+	vm = vm_create(VM_MODE_DEFAULT, 0, O_RDWR);
+	vm_vcpu_add(vm, VCPU_ID);
+
+	TEST_ASSERT(!ioctl(vm_get_fd(vm), KVM_SET_NR_MMU_PAGES, 64),
+		    "KVM_SET_NR_MMU_PAGES failed, errno = %d\n", errno);
+	vcpu_run(vm, VCPU_ID);
+
+	run = vcpu_state(vm, VCPU_ID);
+	TEST_ASSERT(run->exit_reason == KVM_EXIT_INTERNAL_ERROR,
+		    "Unexpected exit_reason = %u\n", run->exit_reason);
+
+	kvm_vm_free(vm);
+}
+#endif /* __x86_64__ */
+
+/*
+ * Test it can be added memory slots up to KVM_CAP_NR_MEMSLOTS, then any
+ * tentative to add further slots should fail.
+ */
+static void test_add_max_memory_regions(void)
+{
+	int ret;
+	struct kvm_vm *vm;
+	uint32_t max_mem_slots;
+	uint32_t slot;
+	uint64_t guest_addr = 0x0;
+	uint64_t mem_reg_npages;
+	void *mem;
+
+	max_mem_slots = kvm_check_cap(KVM_CAP_NR_MEMSLOTS);
+	TEST_ASSERT(max_mem_slots > 0,
+		    "KVM_CAP_NR_MEMSLOTS should be greater than 0");
+	pr_info("Allowed number of memory slots: %i\n", max_mem_slots);
+
+	vm = vm_create(VM_MODE_DEFAULT, 0, O_RDWR);
+
+	mem_reg_npages = vm_calc_num_guest_pages(VM_MODE_DEFAULT, MEM_REGION_SIZE);
+
+	/* Check it can be added memory slots up to the maximum allowed */
+	pr_info("Adding slots 0..%i, each memory region with %dK size\n",
+		(max_mem_slots - 1), MEM_REGION_SIZE >> 10);
+	for (slot = 0; slot < max_mem_slots; slot++) {
+		vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
+					    guest_addr, slot, mem_reg_npages,
+					    0);
+		guest_addr += MEM_REGION_SIZE;
+	}
+
+	/* Check it cannot be added memory slots beyond the limit */
+	mem = mmap(NULL, MEM_REGION_SIZE, PROT_READ | PROT_WRITE,
+		   MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+	TEST_ASSERT(mem != MAP_FAILED, "Failed to mmap() host");
+
+	ret = ioctl(vm_get_fd(vm), KVM_SET_USER_MEMORY_REGION,
+		    &(struct kvm_userspace_memory_region) {slot, 0, guest_addr,
+		    MEM_REGION_SIZE, (uint64_t) mem});
+	TEST_ASSERT(ret == -1 && errno == EINVAL,
+		    "Adding one more memory slot should fail with EINVAL");
+
+	munmap(mem, MEM_REGION_SIZE);
+	kvm_vm_free(vm);
+}
+
+int main(int argc, char *argv[])
+{
+#ifdef __x86_64__
+	int i, loops;
+#endif
+
+	/* Tell stdout not to buffer its content */
+	setbuf(stdout, NULL);
+
+#ifdef __x86_64__
+	/*
+	 * FIXME: the zero-memslot test fails on aarch64 and s390x because
+	 * KVM_RUN fails with ENOEXEC or EFAULT.
+	 */
+	test_zero_memory_regions();
+#endif
+
+	test_add_max_memory_regions();
+
+#ifdef __x86_64__
+	if (argc > 1)
+		loops = atoi(argv[1]);
+	else
+		loops = 10;
+
+	pr_info("Testing MOVE of in-use region, %d loops\n", loops);
+	for (i = 0; i < loops; i++)
+		test_move_memory_region();
+
+	pr_info("Testing DELETE of in-use region, %d loops\n", loops);
+	for (i = 0; i < loops; i++)
+		test_delete_memory_region();
+#endif
+
+	return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c b/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c
index 83323f3d7ca0..4a7967cca281 100644
--- a/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c
+++ b/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c
@@ -26,18 +26,18 @@ static void guest_code(void)
 {
 }
 
-static int smt_possible(void)
+static bool smt_possible(void)
 {
 	char buf[16];
 	FILE *f;
-	bool res = 1;
+	bool res = true;
 
 	f = fopen("/sys/devices/system/cpu/smt/control", "r");
 	if (f) {
 		if (fread(buf, sizeof(*buf), sizeof(buf), f) > 0) {
 			if (!strncmp(buf, "forceoff", 8) ||
 			    !strncmp(buf, "notsupported", 12))
-				res = 0;
+				res = false;
 		}
 		fclose(f);
 	}
@@ -46,29 +46,31 @@ static int smt_possible(void)
 }
 
 static void test_hv_cpuid(struct kvm_cpuid2 *hv_cpuid_entries,
-			  int evmcs_enabled)
+			  bool evmcs_enabled)
 {
 	int i;
+	int nent = 9;
+	u32 test_val;
 
-	if (!evmcs_enabled)
-		TEST_ASSERT(hv_cpuid_entries->nent == 6,
-			    "KVM_GET_SUPPORTED_HV_CPUID should return 6 entries"
-			    " when Enlightened VMCS is disabled (returned %d)",
-			    hv_cpuid_entries->nent);
-	else
-		TEST_ASSERT(hv_cpuid_entries->nent == 7,
-			    "KVM_GET_SUPPORTED_HV_CPUID should return 7 entries"
-			    " when Enlightened VMCS is enabled (returned %d)",
-			    hv_cpuid_entries->nent);
+	if (evmcs_enabled)
+		nent += 1; /* 0x4000000A */
+
+	TEST_ASSERT(hv_cpuid_entries->nent == nent,
+		    "KVM_GET_SUPPORTED_HV_CPUID should return %d entries"
+		    " with evmcs=%d (returned %d)",
+		    nent, evmcs_enabled, hv_cpuid_entries->nent);
 
 	for (i = 0; i < hv_cpuid_entries->nent; i++) {
 		struct kvm_cpuid_entry2 *entry = &hv_cpuid_entries->entries[i];
 
 		TEST_ASSERT((entry->function >= 0x40000000) &&
-			    (entry->function <= 0x4000000A),
+			    (entry->function <= 0x40000082),
 			    "function %x is our of supported range",
 			    entry->function);
 
+		TEST_ASSERT(evmcs_enabled || (entry->function != 0x4000000A),
+			    "0x4000000A leaf should not be reported");
+
 		TEST_ASSERT(entry->index == 0,
 			    ".index field should be zero");
 
@@ -78,12 +80,23 @@ static void test_hv_cpuid(struct kvm_cpuid2 *hv_cpuid_entries,
 		TEST_ASSERT(!entry->padding[0] && !entry->padding[1] &&
 			    !entry->padding[2], "padding should be zero");
 
-		if (entry->function == 0x40000004) {
-			int nononarchcs = !!(entry->eax & (1UL << 18));
+		switch (entry->function) {
+		case 0x40000000:
+			test_val = 0x40000082;
 
-			TEST_ASSERT(nononarchcs == !smt_possible(),
+			TEST_ASSERT(entry->eax == test_val,
+				    "Wrong max leaf report in 0x40000000.EAX: %x"
+				    " (evmcs=%d)",
+				    entry->eax, evmcs_enabled
+				);
+			break;
+		case 0x40000004:
+			test_val = entry->eax & (1UL << 18);
+
+			TEST_ASSERT(!!test_val == !smt_possible(),
 				    "NoNonArchitecturalCoreSharing bit"
 				    " doesn't reflect SMT setting");
+			break;
 		}
 
 		/*
@@ -133,8 +146,9 @@ struct kvm_cpuid2 *kvm_get_supported_hv_cpuid(struct kvm_vm *vm)
 int main(int argc, char *argv[])
 {
 	struct kvm_vm *vm;
-	int rv;
+	int rv, stage;
 	struct kvm_cpuid2 *hv_cpuid_entries;
+	bool evmcs_enabled;
 
 	/* Tell stdout not to buffer its content */
 	setbuf(stdout, NULL);
@@ -145,36 +159,31 @@ int main(int argc, char *argv[])
 		exit(KSFT_SKIP);
 	}
 
-	/* Create VM */
-	vm = vm_create_default(VCPU_ID, 0, guest_code);
-
-	test_hv_cpuid_e2big(vm);
-
-	hv_cpuid_entries = kvm_get_supported_hv_cpuid(vm);
-	if (!hv_cpuid_entries)
-		return 1;
-
-	test_hv_cpuid(hv_cpuid_entries, 0);
-
-	free(hv_cpuid_entries);
+	for (stage = 0; stage < 3; stage++) {
+		evmcs_enabled = false;
+
+		vm = vm_create_default(VCPU_ID, 0, guest_code);
+		switch (stage) {
+		case 0:
+			test_hv_cpuid_e2big(vm);
+			continue;
+		case 1:
+			break;
+		case 2:
+			if (!kvm_check_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS)) {
+				print_skip("Enlightened VMCS is unsupported");
+				continue;
+			}
+			vcpu_enable_evmcs(vm, VCPU_ID);
+			evmcs_enabled = true;
+			break;
+		}
 
-	if (!kvm_check_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS)) {
-		print_skip("Enlightened VMCS is unsupported");
-		goto vm_free;
+		hv_cpuid_entries = kvm_get_supported_hv_cpuid(vm);
+		test_hv_cpuid(hv_cpuid_entries, evmcs_enabled);
+		free(hv_cpuid_entries);
+		kvm_vm_free(vm);
 	}
 
-	vcpu_enable_evmcs(vm, VCPU_ID);
-
-	hv_cpuid_entries = kvm_get_supported_hv_cpuid(vm);
-	if (!hv_cpuid_entries)
-		return 1;
-
-	test_hv_cpuid(hv_cpuid_entries, 1);
-
-	free(hv_cpuid_entries);
-
-vm_free:
-	kvm_vm_free(vm);
-
 	return 0;
 }
diff --git a/tools/testing/selftests/kvm/x86_64/set_memory_region_test.c b/tools/testing/selftests/kvm/x86_64/set_memory_region_test.c
deleted file mode 100644
index c6691cff4e19..000000000000
--- a/tools/testing/selftests/kvm/x86_64/set_memory_region_test.c
+++ /dev/null
@@ -1,141 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#define _GNU_SOURCE /* for program_invocation_short_name */
-#include <fcntl.h>
-#include <pthread.h>
-#include <sched.h>
-#include <signal.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/ioctl.h>
-
-#include <linux/compiler.h>
-
-#include <test_util.h>
-#include <kvm_util.h>
-#include <processor.h>
-
-#define VCPU_ID 0
-
-/*
- * Somewhat arbitrary location and slot, intended to not overlap anything.  The
- * location and size are specifically 2mb sized/aligned so that the initial
- * region corresponds to exactly one large page.
- */
-#define MEM_REGION_GPA		0xc0000000
-#define MEM_REGION_SIZE		0x200000
-#define MEM_REGION_SLOT		10
-
-static void guest_code(void)
-{
-	uint64_t val;
-
-	do {
-		val = READ_ONCE(*((uint64_t *)MEM_REGION_GPA));
-	} while (!val);
-
-	if (val != 1)
-		ucall(UCALL_ABORT, 1, val);
-
-	GUEST_DONE();
-}
-
-static void *vcpu_worker(void *data)
-{
-	struct kvm_vm *vm = data;
-	struct kvm_run *run;
-	struct ucall uc;
-	uint64_t cmd;
-
-	/*
-	 * Loop until the guest is done.  Re-enter the guest on all MMIO exits,
-	 * which will occur if the guest attempts to access a memslot while it
-	 * is being moved.
-	 */
-	run = vcpu_state(vm, VCPU_ID);
-	do {
-		vcpu_run(vm, VCPU_ID);
-	} while (run->exit_reason == KVM_EXIT_MMIO);
-
-	TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
-		    "Unexpected exit reason = %d", run->exit_reason);
-
-	cmd = get_ucall(vm, VCPU_ID, &uc);
-	TEST_ASSERT(cmd == UCALL_DONE, "Unexpected val in guest = %lu", uc.args[0]);
-	return NULL;
-}
-
-static void test_move_memory_region(void)
-{
-	pthread_t vcpu_thread;
-	struct kvm_vm *vm;
-	uint64_t *hva;
-	uint64_t gpa;
-
-	vm = vm_create_default(VCPU_ID, 0, guest_code);
-
-	vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
-
-	vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS_THP,
-				    MEM_REGION_GPA, MEM_REGION_SLOT,
-				    MEM_REGION_SIZE / getpagesize(), 0);
-
-	/*
-	 * Allocate and map two pages so that the GPA accessed by guest_code()
-	 * stays valid across the memslot move.
-	 */
-	gpa = vm_phy_pages_alloc(vm, 2, MEM_REGION_GPA, MEM_REGION_SLOT);
-	TEST_ASSERT(gpa == MEM_REGION_GPA, "Failed vm_phy_pages_alloc\n");
-
-	virt_map(vm, MEM_REGION_GPA, MEM_REGION_GPA, 2, 0);
-
-	/* Ditto for the host mapping so that both pages can be zeroed. */
-	hva = addr_gpa2hva(vm, MEM_REGION_GPA);
-	memset(hva, 0, 2 * 4096);
-
-	pthread_create(&vcpu_thread, NULL, vcpu_worker, vm);
-
-	/* Ensure the guest thread is spun up. */
-	usleep(100000);
-
-	/*
-	 * Shift the region's base GPA.  The guest should not see "2" as the
-	 * hva->gpa translation is misaligned, i.e. the guest is accessing a
-	 * different host pfn.
-	 */
-	vm_mem_region_move(vm, MEM_REGION_SLOT, MEM_REGION_GPA - 4096);
-	WRITE_ONCE(*hva, 2);
-
-	usleep(100000);
-
-	/*
-	 * Note, value in memory needs to be changed *before* restoring the
-	 * memslot, else the guest could race the update and see "2".
-	 */
-	WRITE_ONCE(*hva, 1);
-
-	/* Restore the original base, the guest should see "1". */
-	vm_mem_region_move(vm, MEM_REGION_SLOT, MEM_REGION_GPA);
-
-	pthread_join(vcpu_thread, NULL);
-
-	kvm_vm_free(vm);
-}
-
-int main(int argc, char *argv[])
-{
-	int i, loops;
-
-	/* Tell stdout not to buffer its content */
-	setbuf(stdout, NULL);
-
-	if (argc > 1)
-		loops = atoi(argv[1]);
-	else
-		loops = 10;
-
-	for (i = 0; i < loops; i++)
-		test_move_memory_region();
-
-	return 0;
-}
diff --git a/tools/testing/selftests/kvm/x86_64/smm_test.c b/tools/testing/selftests/kvm/x86_64/smm_test.c
index 8230b6bc6b8f..6f8f478b3ceb 100644
--- a/tools/testing/selftests/kvm/x86_64/smm_test.c
+++ b/tools/testing/selftests/kvm/x86_64/smm_test.c
@@ -17,6 +17,7 @@
 #include "kvm_util.h"
 
 #include "vmx.h"
+#include "svm_util.h"
 
 #define VCPU_ID	      1
 
@@ -58,7 +59,7 @@ void self_smi(void)
 	      APIC_DEST_SELF | APIC_INT_ASSERT | APIC_DM_SMI);
 }
 
-void guest_code(struct vmx_pages *vmx_pages)
+void guest_code(void *arg)
 {
 	uint64_t apicbase = rdmsr(MSR_IA32_APICBASE);
 
@@ -72,8 +73,11 @@ void guest_code(struct vmx_pages *vmx_pages)
 
 	sync_with_host(4);
 
-	if (vmx_pages) {
-		GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
+	if (arg) {
+		if (cpu_has_svm())
+			generic_svm_setup(arg, NULL, NULL);
+		else
+			GUEST_ASSERT(prepare_for_vmx_operation(arg));
 
 		sync_with_host(5);
 
@@ -87,7 +91,7 @@ void guest_code(struct vmx_pages *vmx_pages)
 
 int main(int argc, char *argv[])
 {
-	vm_vaddr_t vmx_pages_gva = 0;
+	vm_vaddr_t nested_gva = 0;
 
 	struct kvm_regs regs;
 	struct kvm_vm *vm;
@@ -114,8 +118,11 @@ int main(int argc, char *argv[])
 	vcpu_set_msr(vm, VCPU_ID, MSR_IA32_SMBASE, SMRAM_GPA);
 
 	if (kvm_check_cap(KVM_CAP_NESTED_STATE)) {
-		vcpu_alloc_vmx(vm, &vmx_pages_gva);
-		vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva);
+		if (kvm_get_supported_cpuid_entry(0x80000001)->ecx & CPUID_SVM)
+			vcpu_alloc_svm(vm, &nested_gva);
+		else
+			vcpu_alloc_vmx(vm, &nested_gva);
+		vcpu_args_set(vm, VCPU_ID, 1, nested_gva);
 	} else {
 		pr_info("will skip SMM test with VMX enabled\n");
 		vcpu_args_set(vm, VCPU_ID, 1, 0);
diff --git a/tools/testing/selftests/kvm/x86_64/state_test.c b/tools/testing/selftests/kvm/x86_64/state_test.c
index 5b1a016edf55..d43b6f99b66c 100644
--- a/tools/testing/selftests/kvm/x86_64/state_test.c
+++ b/tools/testing/selftests/kvm/x86_64/state_test.c
@@ -18,14 +18,46 @@
 #include "kvm_util.h"
 #include "processor.h"
 #include "vmx.h"
+#include "svm_util.h"
 
 #define VCPU_ID		5
+#define L2_GUEST_STACK_SIZE 256
 
-void l2_guest_code(void)
+void svm_l2_guest_code(void)
 {
+	GUEST_SYNC(4);
+	/* Exit to L1 */
+	vmcall();
 	GUEST_SYNC(6);
+	/* Done, exit to L1 and never come back.  */
+	vmcall();
+}
 
-        /* Exit to L1 */
+static void svm_l1_guest_code(struct svm_test_data *svm)
+{
+	unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+	struct vmcb *vmcb = svm->vmcb;
+
+	GUEST_ASSERT(svm->vmcb_gpa);
+	/* Prepare for L2 execution. */
+	generic_svm_setup(svm, svm_l2_guest_code,
+			  &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+	GUEST_SYNC(3);
+	run_guest(vmcb, svm->vmcb_gpa);
+	GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL);
+	GUEST_SYNC(5);
+	vmcb->save.rip += 3;
+	run_guest(vmcb, svm->vmcb_gpa);
+	GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL);
+	GUEST_SYNC(7);
+}
+
+void vmx_l2_guest_code(void)
+{
+	GUEST_SYNC(6);
+
+	/* Exit to L1 */
 	vmcall();
 
 	/* L1 has now set up a shadow VMCS for us.  */
@@ -42,10 +74,9 @@ void l2_guest_code(void)
 	vmcall();
 }
 
-void l1_guest_code(struct vmx_pages *vmx_pages)
+static void vmx_l1_guest_code(struct vmx_pages *vmx_pages)
 {
-#define L2_GUEST_STACK_SIZE 64
-        unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+	unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
 
 	GUEST_ASSERT(vmx_pages->vmcs_gpa);
 	GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
@@ -56,7 +87,7 @@ void l1_guest_code(struct vmx_pages *vmx_pages)
 	GUEST_SYNC(4);
 	GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa);
 
-	prepare_vmcs(vmx_pages, l2_guest_code,
+	prepare_vmcs(vmx_pages, vmx_l2_guest_code,
 		     &l2_guest_stack[L2_GUEST_STACK_SIZE]);
 
 	GUEST_SYNC(5);
@@ -106,20 +137,24 @@ void l1_guest_code(struct vmx_pages *vmx_pages)
 	GUEST_ASSERT(vmresume());
 }
 
-void guest_code(struct vmx_pages *vmx_pages)
+static void __attribute__((__flatten__)) guest_code(void *arg)
 {
 	GUEST_SYNC(1);
 	GUEST_SYNC(2);
 
-	if (vmx_pages)
-		l1_guest_code(vmx_pages);
+	if (arg) {
+		if (cpu_has_svm())
+			svm_l1_guest_code(arg);
+		else
+			vmx_l1_guest_code(arg);
+	}
 
 	GUEST_DONE();
 }
 
 int main(int argc, char *argv[])
 {
-	vm_vaddr_t vmx_pages_gva = 0;
+	vm_vaddr_t nested_gva = 0;
 
 	struct kvm_regs regs1, regs2;
 	struct kvm_vm *vm;
@@ -136,8 +171,11 @@ int main(int argc, char *argv[])
 	vcpu_regs_get(vm, VCPU_ID, &regs1);
 
 	if (kvm_check_cap(KVM_CAP_NESTED_STATE)) {
-		vcpu_alloc_vmx(vm, &vmx_pages_gva);
-		vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva);
+		if (kvm_get_supported_cpuid_entry(0x80000001)->ecx & CPUID_SVM)
+			vcpu_alloc_svm(vm, &nested_gva);
+		else
+			vcpu_alloc_vmx(vm, &nested_gva);
+		vcpu_args_set(vm, VCPU_ID, 1, nested_gva);
 	} else {
 		pr_info("will skip nested state checks\n");
 		vcpu_args_set(vm, VCPU_ID, 1, 0);
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c b/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c
new file mode 100644
index 000000000000..cc72b6188ca7
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c
@@ -0,0 +1,255 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * VMX-preemption timer test
+ *
+ * Copyright (C) 2020, Google, LLC.
+ *
+ * Test to ensure the VM-Enter after migration doesn't
+ * incorrectly restarts the timer with the full timer
+ * value instead of partially decayed timer value
+ *
+ */
+#define _GNU_SOURCE /* for program_invocation_short_name */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+
+#include "kvm_util.h"
+#include "processor.h"
+#include "vmx.h"
+
+#define VCPU_ID		5
+#define PREEMPTION_TIMER_VALUE			100000000ull
+#define PREEMPTION_TIMER_VALUE_THRESHOLD1	 80000000ull
+
+u32 vmx_pt_rate;
+bool l2_save_restore_done;
+static u64 l2_vmx_pt_start;
+volatile u64 l2_vmx_pt_finish;
+
+void l2_guest_code(void)
+{
+	u64 vmx_pt_delta;
+
+	vmcall();
+	l2_vmx_pt_start = (rdtsc() >> vmx_pt_rate) << vmx_pt_rate;
+
+	/*
+	 * Wait until the 1st threshold has passed
+	 */
+	do {
+		l2_vmx_pt_finish = rdtsc();
+		vmx_pt_delta = (l2_vmx_pt_finish - l2_vmx_pt_start) >>
+				vmx_pt_rate;
+	} while (vmx_pt_delta < PREEMPTION_TIMER_VALUE_THRESHOLD1);
+
+	/*
+	 * Force L2 through Save and Restore cycle
+	 */
+	GUEST_SYNC(1);
+
+	l2_save_restore_done = 1;
+
+	/*
+	 * Now wait for the preemption timer to fire and
+	 * exit to L1
+	 */
+	while ((l2_vmx_pt_finish = rdtsc()))
+		;
+}
+
+void l1_guest_code(struct vmx_pages *vmx_pages)
+{
+#define L2_GUEST_STACK_SIZE 64
+	unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+	u64 l1_vmx_pt_start;
+	u64 l1_vmx_pt_finish;
+	u64 l1_tsc_deadline, l2_tsc_deadline;
+
+	GUEST_ASSERT(vmx_pages->vmcs_gpa);
+	GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
+	GUEST_ASSERT(load_vmcs(vmx_pages));
+	GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa);
+
+	prepare_vmcs(vmx_pages, l2_guest_code,
+		     &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+	/*
+	 * Check for Preemption timer support
+	 */
+	basic.val = rdmsr(MSR_IA32_VMX_BASIC);
+	ctrl_pin_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_PINBASED_CTLS
+			: MSR_IA32_VMX_PINBASED_CTLS);
+	ctrl_exit_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_EXIT_CTLS
+			: MSR_IA32_VMX_EXIT_CTLS);
+
+	if (!(ctrl_pin_rev.clr & PIN_BASED_VMX_PREEMPTION_TIMER) ||
+	    !(ctrl_exit_rev.clr & VM_EXIT_SAVE_VMX_PREEMPTION_TIMER))
+		return;
+
+	GUEST_ASSERT(!vmlaunch());
+	GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+	vmwrite(GUEST_RIP, vmreadz(GUEST_RIP) + vmreadz(VM_EXIT_INSTRUCTION_LEN));
+
+	/*
+	 * Turn on PIN control and resume the guest
+	 */
+	GUEST_ASSERT(!vmwrite(PIN_BASED_VM_EXEC_CONTROL,
+			      vmreadz(PIN_BASED_VM_EXEC_CONTROL) |
+			      PIN_BASED_VMX_PREEMPTION_TIMER));
+
+	GUEST_ASSERT(!vmwrite(VMX_PREEMPTION_TIMER_VALUE,
+			      PREEMPTION_TIMER_VALUE));
+
+	vmx_pt_rate = rdmsr(MSR_IA32_VMX_MISC) & 0x1F;
+
+	l2_save_restore_done = 0;
+
+	l1_vmx_pt_start = (rdtsc() >> vmx_pt_rate) << vmx_pt_rate;
+
+	GUEST_ASSERT(!vmresume());
+
+	l1_vmx_pt_finish = rdtsc();
+
+	/*
+	 * Ensure exit from L2 happens after L2 goes through
+	 * save and restore
+	 */
+	GUEST_ASSERT(l2_save_restore_done);
+
+	/*
+	 * Ensure the exit from L2 is due to preemption timer expiry
+	 */
+	GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_PREEMPTION_TIMER);
+
+	l1_tsc_deadline = l1_vmx_pt_start +
+		(PREEMPTION_TIMER_VALUE << vmx_pt_rate);
+
+	l2_tsc_deadline = l2_vmx_pt_start +
+		(PREEMPTION_TIMER_VALUE << vmx_pt_rate);
+
+	/*
+	 * Sync with the host and pass the l1|l2 pt_expiry_finish times and
+	 * tsc deadlines so that host can verify they are as expected
+	 */
+	GUEST_SYNC_ARGS(2, l1_vmx_pt_finish, l1_tsc_deadline,
+		l2_vmx_pt_finish, l2_tsc_deadline);
+}
+
+void guest_code(struct vmx_pages *vmx_pages)
+{
+	if (vmx_pages)
+		l1_guest_code(vmx_pages);
+
+	GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+	vm_vaddr_t vmx_pages_gva = 0;
+
+	struct kvm_regs regs1, regs2;
+	struct kvm_vm *vm;
+	struct kvm_run *run;
+	struct kvm_x86_state *state;
+	struct ucall uc;
+	int stage;
+
+	/*
+	 * AMD currently does not implement any VMX features, so for now we
+	 * just early out.
+	 */
+	nested_vmx_check_supported();
+
+	/* Create VM */
+	vm = vm_create_default(VCPU_ID, 0, guest_code);
+	vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
+	run = vcpu_state(vm, VCPU_ID);
+
+	vcpu_regs_get(vm, VCPU_ID, &regs1);
+
+	if (kvm_check_cap(KVM_CAP_NESTED_STATE)) {
+		vcpu_alloc_vmx(vm, &vmx_pages_gva);
+		vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva);
+	} else {
+		pr_info("will skip vmx preemption timer checks\n");
+		goto done;
+	}
+
+	for (stage = 1;; stage++) {
+		_vcpu_run(vm, VCPU_ID);
+		TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+			    "Stage %d: unexpected exit reason: %u (%s),\n",
+			    stage, run->exit_reason,
+			    exit_reason_str(run->exit_reason));
+
+		switch (get_ucall(vm, VCPU_ID, &uc)) {
+		case UCALL_ABORT:
+			TEST_FAIL("%s at %s:%ld", (const char *)uc.args[0],
+				  __FILE__, uc.args[1]);
+			/* NOT REACHED */
+		case UCALL_SYNC:
+			break;
+		case UCALL_DONE:
+			goto done;
+		default:
+			TEST_FAIL("Unknown ucall %lu", uc.cmd);
+		}
+
+		/* UCALL_SYNC is handled here.  */
+		TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") &&
+			    uc.args[1] == stage, "Stage %d: Unexpected register values vmexit, got %lx",
+			    stage, (ulong)uc.args[1]);
+		/*
+		 * If this stage 2 then we should verify the vmx pt expiry
+		 * is as expected.
+		 * From L1's perspective verify Preemption timer hasn't
+		 * expired too early.
+		 * From L2's perspective verify Preemption timer hasn't
+		 * expired too late.
+		 */
+		if (stage == 2) {
+
+			pr_info("Stage %d: L1 PT expiry TSC (%lu) , L1 TSC deadline (%lu)\n",
+				stage, uc.args[2], uc.args[3]);
+
+			pr_info("Stage %d: L2 PT expiry TSC (%lu) , L2 TSC deadline (%lu)\n",
+				stage, uc.args[4], uc.args[5]);
+
+			TEST_ASSERT(uc.args[2] >= uc.args[3],
+				"Stage %d: L1 PT expiry TSC (%lu) < L1 TSC deadline (%lu)",
+				stage, uc.args[2], uc.args[3]);
+
+			TEST_ASSERT(uc.args[4] < uc.args[5],
+				"Stage %d: L2 PT expiry TSC (%lu) > L2 TSC deadline (%lu)",
+				stage, uc.args[4], uc.args[5]);
+		}
+
+		state = vcpu_save_state(vm, VCPU_ID);
+		memset(&regs1, 0, sizeof(regs1));
+		vcpu_regs_get(vm, VCPU_ID, &regs1);
+
+		kvm_vm_release(vm);
+
+		/* Restore state in a new VM.  */
+		kvm_vm_restart(vm, O_RDWR);
+		vm_vcpu_add(vm, VCPU_ID);
+		vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
+		vcpu_load_state(vm, VCPU_ID, state);
+		run = vcpu_state(vm, VCPU_ID);
+		free(state);
+
+		memset(&regs2, 0, sizeof(regs2));
+		vcpu_regs_get(vm, VCPU_ID, &regs2);
+		TEST_ASSERT(!memcmp(&regs1, &regs2, sizeof(regs2)),
+			    "Unexpected register values after vcpu_load_state; rdi: %lx rsi: %lx",
+			    (ulong) regs2.rdi, (ulong) regs2.rsi);
+	}
+
+done:
+	kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/lib/config b/tools/testing/selftests/lib/config
index 14a77ea4a8da..b80ee3f6e265 100644
--- a/tools/testing/selftests/lib/config
+++ b/tools/testing/selftests/lib/config
@@ -2,3 +2,4 @@ CONFIG_TEST_PRINTF=m
 CONFIG_TEST_BITMAP=m
 CONFIG_PRIME_NUMBERS=m
 CONFIG_TEST_STRSCPY=m
+CONFIG_TEST_BITOPS=m
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index 3f386eb9e7d7..895ec992b2f1 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -16,6 +16,7 @@ TEST_PROGS += altnames.sh icmp_redirect.sh ip6_gre_headroom.sh
 TEST_PROGS += route_localnet.sh
 TEST_PROGS += reuseaddr_ports_exhausted.sh
 TEST_PROGS += txtimestamp.sh
+TEST_PROGS += vrf-xfrm-tests.sh
 TEST_PROGS_EXTENDED := in_netns.sh
 TEST_GEN_FILES =  socket nettest
 TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any
diff --git a/tools/testing/selftests/net/fib_nexthops.sh b/tools/testing/selftests/net/fib_nexthops.sh
index 6560ed796ac4..dee567f7576a 100755
--- a/tools/testing/selftests/net/fib_nexthops.sh
+++ b/tools/testing/selftests/net/fib_nexthops.sh
@@ -19,8 +19,8 @@ ret=0
 ksft_skip=4
 
 # all tests in this script. Can be overridden with -t option
-IPV4_TESTS="ipv4_fcnal ipv4_grp_fcnal ipv4_withv6_fcnal ipv4_fcnal_runtime"
-IPV6_TESTS="ipv6_fcnal ipv6_grp_fcnal ipv6_fcnal_runtime"
+IPV4_TESTS="ipv4_fcnal ipv4_grp_fcnal ipv4_withv6_fcnal ipv4_fcnal_runtime ipv4_large_grp ipv4_compat_mode ipv4_fdb_grp_fcnal ipv4_torture"
+IPV6_TESTS="ipv6_fcnal ipv6_grp_fcnal ipv6_fcnal_runtime ipv6_large_grp ipv6_compat_mode ipv6_fdb_grp_fcnal ipv6_torture"
 
 ALL_TESTS="basic ${IPV4_TESTS} ${IPV6_TESTS}"
 TESTS="${ALL_TESTS}"
@@ -146,35 +146,36 @@ setup()
 	create_ns remote
 
 	IP="ip -netns me"
+	BRIDGE="bridge -netns me"
 	set -e
 	$IP li add veth1 type veth peer name veth2
 	$IP li set veth1 up
 	$IP addr add 172.16.1.1/24 dev veth1
-	$IP -6 addr add 2001:db8:91::1/64 dev veth1
+	$IP -6 addr add 2001:db8:91::1/64 dev veth1 nodad
 
 	$IP li add veth3 type veth peer name veth4
 	$IP li set veth3 up
 	$IP addr add 172.16.2.1/24 dev veth3
-	$IP -6 addr add 2001:db8:92::1/64 dev veth3
+	$IP -6 addr add 2001:db8:92::1/64 dev veth3 nodad
 
 	$IP li set veth2 netns peer up
 	ip -netns peer addr add 172.16.1.2/24 dev veth2
-	ip -netns peer -6 addr add 2001:db8:91::2/64 dev veth2
+	ip -netns peer -6 addr add 2001:db8:91::2/64 dev veth2 nodad
 
 	$IP li set veth4 netns peer up
 	ip -netns peer addr add 172.16.2.2/24 dev veth4
-	ip -netns peer -6 addr add 2001:db8:92::2/64 dev veth4
+	ip -netns peer -6 addr add 2001:db8:92::2/64 dev veth4 nodad
 
 	ip -netns remote li add veth5 type veth peer name veth6
 	ip -netns remote li set veth5 up
 	ip -netns remote addr add dev veth5 172.16.101.1/24
-	ip -netns remote addr add dev veth5 2001:db8:101::1/64
+	ip -netns remote -6 addr add dev veth5 2001:db8:101::1/64 nodad
 	ip -netns remote ro add 172.16.0.0/22 via 172.16.101.2
 	ip -netns remote -6 ro add 2001:db8:90::/40 via 2001:db8:101::2
 
 	ip -netns remote li set veth6 netns peer up
 	ip -netns peer addr add dev veth6 172.16.101.2/24
-	ip -netns peer addr add dev veth6 2001:db8:101::2/64
+	ip -netns peer -6 addr add dev veth6 2001:db8:101::2/64 nodad
 	set +e
 }
 
@@ -248,11 +249,247 @@ check_route6()
 	local expected="$2"
 	local out
 
-	out=$($IP -6 route ls match ${pfx} 2>/dev/null)
+	out=$($IP -6 route ls match ${pfx} 2>/dev/null | sed -e 's/pref medium//')
 
 	check_output "${out}" "${expected}"
 }
 
+check_large_grp()
+{
+	local ipv=$1
+	local ecmp=$2
+	local grpnum=100
+	local nhidstart=100
+	local grpidstart=1000
+	local iter=0
+	local nhidstr=""
+	local grpidstr=""
+	local grpstr=""
+	local ipstr=""
+
+	if [ $ipv -eq 4 ]; then
+		ipstr="172.16.1."
+	else
+		ipstr="2001:db8:91::"
+	fi
+
+	#
+	# Create $grpnum groups with specified $ecmp and dump them
+	#
+
+	# create nexthops with different gateways
+	iter=2
+	while [ $iter -le $(($ecmp + 1)) ]
+	do
+		nhidstr="$(($nhidstart + $iter))"
+		run_cmd "$IP nexthop add id $nhidstr via $ipstr$iter dev veth1"
+		check_nexthop "id $nhidstr" "id $nhidstr via $ipstr$iter dev veth1 scope link"
+
+		if [ $iter -le $ecmp ]; then
+			grpstr+="$nhidstr/"
+		else
+			grpstr+="$nhidstr"
+		fi
+		((iter++))
+	done
+
+	# create duplicate large ecmp groups
+	iter=0
+	while [ $iter -le $grpnum ]
+	do
+		grpidstr="$(($grpidstart + $iter))"
+		run_cmd "$IP nexthop add id $grpidstr group $grpstr"
+		check_nexthop "id $grpidstr" "id $grpidstr group $grpstr"
+		((iter++))
+	done
+
+	# dump large groups
+	run_cmd "$IP nexthop list"
+	log_test $? 0 "Dump large (x$ecmp) ecmp groups"
+}
+
+start_ip_monitor()
+{
+	local mtype=$1
+
+	# start the monitor in the background
+	tmpfile=`mktemp /var/run/nexthoptestXXX`
+	mpid=`($IP monitor $mtype > $tmpfile & echo $!) 2>/dev/null`
+	sleep 0.2
+	echo "$mpid $tmpfile"
+}
+
+stop_ip_monitor()
+{
+	local mpid=$1
+	local tmpfile=$2
+	local el=$3
+
+	# check the monitor results
+	kill $mpid
+	lines=`wc -l $tmpfile | cut "-d " -f1`
+	test $lines -eq $el
+	rc=$?
+	rm -rf $tmpfile
+
+	return $rc
+}
+
+check_nexthop_fdb_support()
+{
+	$IP nexthop help 2>&1 | grep -q fdb
+	if [ $? -ne 0 ]; then
+		echo "SKIP: iproute2 too old, missing fdb nexthop support"
+		return $ksft_skip
+	fi
+}
+
+ipv6_fdb_grp_fcnal()
+{
+	local rc
+
+	echo
+	echo "IPv6 fdb groups functional"
+	echo "--------------------------"
+
+	check_nexthop_fdb_support
+	if [ $? -eq $ksft_skip ]; then
+		return $ksft_skip
+	fi
+
+	# create group with multiple nexthops
+	run_cmd "$IP nexthop add id 61 via 2001:db8:91::2 fdb"
+	run_cmd "$IP nexthop add id 62 via 2001:db8:91::3 fdb"
+	run_cmd "$IP nexthop add id 102 group 61/62 fdb"
+	check_nexthop "id 102" "id 102 group 61/62 fdb"
+	log_test $? 0 "Fdb Nexthop group with multiple nexthops"
+
+	## get nexthop group
+	run_cmd "$IP nexthop get id 102"
+	check_nexthop "id 102" "id 102 group 61/62 fdb"
+	log_test $? 0 "Get Fdb nexthop group by id"
+
+	# fdb nexthop group can only contain fdb nexthops
+	run_cmd "$IP nexthop add id 63 via 2001:db8:91::4"
+	run_cmd "$IP nexthop add id 64 via 2001:db8:91::5"
+	run_cmd "$IP nexthop add id 103 group 63/64 fdb"
+	log_test $? 2 "Fdb Nexthop group with non-fdb nexthops"
+
+	# Non fdb nexthop group can not contain fdb nexthops
+	run_cmd "$IP nexthop add id 65 via 2001:db8:91::5 fdb"
+	run_cmd "$IP nexthop add id 66 via 2001:db8:91::6 fdb"
+	run_cmd "$IP nexthop add id 104 group 65/66"
+	log_test $? 2 "Non-Fdb Nexthop group with fdb nexthops"
+
+	# fdb nexthop cannot have blackhole
+	run_cmd "$IP nexthop add id 67 blackhole fdb"
+	log_test $? 2 "Fdb Nexthop with blackhole"
+
+	# fdb nexthop with oif
+	run_cmd "$IP nexthop add id 68 via 2001:db8:91::7 dev veth1 fdb"
+	log_test $? 2 "Fdb Nexthop with oif"
+
+	# fdb nexthop with onlink
+	run_cmd "$IP nexthop add id 68 via 2001:db8:91::7 onlink fdb"
+	log_test $? 2 "Fdb Nexthop with onlink"
+
+	# fdb nexthop with encap
+	run_cmd "$IP nexthop add id 69 encap mpls 101 via 2001:db8:91::8 dev veth1 fdb"
+	log_test $? 2 "Fdb Nexthop with encap"
+
+	run_cmd "$IP link add name vx10 type vxlan id 1010 local 2001:db8:91::9 remote 2001:db8:91::10 dstport 4789 nolearning noudpcsum tos inherit ttl 100"
+	run_cmd "$BRIDGE fdb add 02:02:00:00:00:13 dev vx10 nhid 102 self"
+	log_test $? 0 "Fdb mac add with nexthop group"
+
+	## fdb nexthops can only reference nexthop groups and not nexthops
+	run_cmd "$BRIDGE fdb add 02:02:00:00:00:14 dev vx10 nhid 61 self"
+	log_test $? 255 "Fdb mac add with nexthop"
+
+	run_cmd "$IP -6 ro add 2001:db8:101::1/128 nhid 66"
+	log_test $? 2 "Route add with fdb nexthop"
+
+	run_cmd "$IP -6 ro add 2001:db8:101::1/128 nhid 103"
+	log_test $? 2 "Route add with fdb nexthop group"
+
+	run_cmd "$IP nexthop del id 102"
+	log_test $? 0 "Fdb nexthop delete"
+
+	$IP link del dev vx10
+}
+
+ipv4_fdb_grp_fcnal()
+{
+	local rc
+
+	echo
+	echo "IPv4 fdb groups functional"
+	echo "--------------------------"
+
+	check_nexthop_fdb_support
+	if [ $? -eq $ksft_skip ]; then
+		return $ksft_skip
+	fi
+
+	# create group with multiple nexthops
+	run_cmd "$IP nexthop add id 12 via 172.16.1.2 fdb"
+	run_cmd "$IP nexthop add id 13 via 172.16.1.3 fdb"
+	run_cmd "$IP nexthop add id 102 group 12/13 fdb"
+	check_nexthop "id 102" "id 102 group 12/13 fdb"
+	log_test $? 0 "Fdb Nexthop group with multiple nexthops"
+
+	# get nexthop group
+	run_cmd "$IP nexthop get id 102"
+	check_nexthop "id 102" "id 102 group 12/13 fdb"
+	log_test $? 0 "Get Fdb nexthop group by id"
+
+	# fdb nexthop group can only contain fdb nexthops
+	run_cmd "$IP nexthop add id 14 via 172.16.1.2"
+	run_cmd "$IP nexthop add id 15 via 172.16.1.3"
+	run_cmd "$IP nexthop add id 103 group 14/15 fdb"
+	log_test $? 2 "Fdb Nexthop group with non-fdb nexthops"
+
+	# Non fdb nexthop group can not contain fdb nexthops
+	run_cmd "$IP nexthop add id 16 via 172.16.1.2 fdb"
+	run_cmd "$IP nexthop add id 17 via 172.16.1.3 fdb"
+	run_cmd "$IP nexthop add id 104 group 14/15"
+	log_test $? 2 "Non-Fdb Nexthop group with fdb nexthops"
+
+	# fdb nexthop cannot have blackhole
+	run_cmd "$IP nexthop add id 18 blackhole fdb"
+	log_test $? 2 "Fdb Nexthop with blackhole"
+
+	# fdb nexthop with oif
+	run_cmd "$IP nexthop add id 16 via 172.16.1.2 dev veth1 fdb"
+	log_test $? 2 "Fdb Nexthop with oif"
+
+	# fdb nexthop with onlink
+	run_cmd "$IP nexthop add id 16 via 172.16.1.2 onlink fdb"
+	log_test $? 2 "Fdb Nexthop with onlink"
+
+	# fdb nexthop with encap
+	run_cmd "$IP nexthop add id 17 encap mpls 101 via 172.16.1.2 dev veth1 fdb"
+	log_test $? 2 "Fdb Nexthop with encap"
+
+	run_cmd "$IP link add name vx10 type vxlan id 1010 local 10.0.0.1 remote 10.0.0.2 dstport 4789 nolearning noudpcsum tos inherit ttl 100"
+	run_cmd "$BRIDGE fdb add 02:02:00:00:00:13 dev vx10 nhid 102 self"
+	log_test $? 0 "Fdb mac add with nexthop group"
+
+	# fdb nexthops can only reference nexthop groups and not nexthops
+	run_cmd "$BRIDGE fdb add 02:02:00:00:00:14 dev vx10 nhid 12 self"
+	log_test $? 255 "Fdb mac add with nexthop"
+
+	run_cmd "$IP ro add 172.16.0.0/22 nhid 15"
+	log_test $? 2 "Route add with fdb nexthop"
+
+	run_cmd "$IP ro add 172.16.0.0/22 nhid 103"
+	log_test $? 2 "Route add with fdb nexthop group"
+
+	run_cmd "$IP nexthop del id 102"
+	log_test $? 0 "Fdb nexthop delete"
+
+	$IP link del dev vx10
+}
+
 ################################################################################
 # basic operations (add, delete, replace) on nexthops and nexthop groups
 #
@@ -423,8 +660,6 @@ ipv6_fcnal_runtime()
 	echo "IPv6 functional runtime"
 	echo "-----------------------"
 
-	sleep 5
-
 	#
 	# IPv6 - the basics
 	#
@@ -481,12 +716,12 @@ ipv6_fcnal_runtime()
 	run_cmd "$IP -6 nexthop add id 85 dev veth1"
 	run_cmd "$IP ro replace 2001:db8:101::1/128 nhid 85"
 	log_test $? 0 "IPv6 route with device only nexthop"
-	check_route6 "2001:db8:101::1" "2001:db8:101::1 nhid 85 dev veth1 metric 1024 pref medium"
+	check_route6 "2001:db8:101::1" "2001:db8:101::1 nhid 85 dev veth1 metric 1024"
 
 	run_cmd "$IP nexthop add id 123 group 81/85"
 	run_cmd "$IP ro replace 2001:db8:101::1/128 nhid 123"
 	log_test $? 0 "IPv6 multipath route with nexthop mix - dev only + gw"
-	check_route6 "2001:db8:101::1" "2001:db8:101::1 nhid 123 metric 1024 nexthop via 2001:db8:91::2 dev veth1 weight 1 nexthop dev veth1 weight 1 pref medium"
+	check_route6 "2001:db8:101::1" "2001:db8:101::1 nhid 123 metric 1024 nexthop via 2001:db8:91::2 dev veth1 weight 1 nexthop dev veth1 weight 1"
 
 	#
 	# IPv6 route with v4 nexthop - not allowed
@@ -519,6 +754,75 @@ ipv6_fcnal_runtime()
 	# route with src address and using nexthop - not allowed
 }
 
+ipv6_large_grp()
+{
+	local ecmp=32
+
+	echo
+	echo "IPv6 large groups (x$ecmp)"
+	echo "---------------------"
+
+	check_large_grp 6 $ecmp
+
+	$IP nexthop flush >/dev/null 2>&1
+}
+
+ipv6_del_add_loop1()
+{
+	while :; do
+		$IP nexthop del id 100
+		$IP nexthop add id 100 via 2001:db8:91::2 dev veth1
+	done >/dev/null 2>&1
+}
+
+ipv6_grp_replace_loop()
+{
+	while :; do
+		$IP nexthop replace id 102 group 100/101
+	done >/dev/null 2>&1
+}
+
+ipv6_torture()
+{
+	local pid1
+	local pid2
+	local pid3
+	local pid4
+	local pid5
+
+	echo
+	echo "IPv6 runtime torture"
+	echo "--------------------"
+	if [ ! -x "$(command -v mausezahn)" ]; then
+		echo "SKIP: Could not run test; need mausezahn tool"
+		return
+	fi
+
+	run_cmd "$IP nexthop add id 100 via 2001:db8:91::2 dev veth1"
+	run_cmd "$IP nexthop add id 101 via 2001:db8:92::2 dev veth3"
+	run_cmd "$IP nexthop add id 102 group 100/101"
+	run_cmd "$IP route add 2001:db8:101::1 nhid 102"
+	run_cmd "$IP route add 2001:db8:101::2 nhid 102"
+
+	ipv6_del_add_loop1 &
+	pid1=$!
+	ipv6_grp_replace_loop &
+	pid2=$!
+	ip netns exec me ping -f 2001:db8:101::1 >/dev/null 2>&1 &
+	pid3=$!
+	ip netns exec me ping -f 2001:db8:101::2 >/dev/null 2>&1 &
+	pid4=$!
+	ip netns exec me mausezahn veth1 -B 2001:db8:101::2 -A 2001:db8:91::1 -c 0 -t tcp "dp=1-1023, flags=syn" >/dev/null 2>&1 &
+	pid5=$!
+
+	sleep 300
+	kill -9 $pid1 $pid2 $pid3 $pid4 $pid5
+
+	# if we did not crash, success
+	log_test 0 0 "IPv6 torture test"
+}
+
+
 ipv4_fcnal()
 {
 	local rc
@@ -866,6 +1170,11 @@ ipv4_fcnal_runtime()
 		$IP neigh sh | grep 'dev veth1'
 	fi
 
+	run_cmd "$IP ro del 172.16.101.1/32 via inet6 ${lladdr} dev veth1"
+	run_cmd "$IP -4 ro add default via inet6 ${lladdr} dev veth1"
+	run_cmd "ip netns exec me ping -c1 -w1 172.16.101.1"
+	log_test $? 0 "IPv4 default route with IPv6 gateway"
+
 	#
 	# MPLS as an example of LWT encap
 	#
@@ -880,6 +1189,241 @@ ipv4_fcnal_runtime()
 	log_test $? 0 "IPv4 route with MPLS encap, v6 gw - check"
 }
 
+ipv4_large_grp()
+{
+	local ecmp=32
+
+	echo
+	echo "IPv4 large groups (x$ecmp)"
+	echo "---------------------"
+
+	check_large_grp 4 $ecmp
+
+	$IP nexthop flush >/dev/null 2>&1
+}
+
+sysctl_nexthop_compat_mode_check()
+{
+	local sysctlname="net.ipv4.nexthop_compat_mode"
+	local lprefix=$1
+
+	IPE="ip netns exec me"
+
+	$IPE sysctl -q $sysctlname 2>&1 >/dev/null
+	if [ $? -ne 0 ]; then
+		echo "SKIP: kernel lacks nexthop compat mode sysctl control"
+		return $ksft_skip
+	fi
+
+	out=$($IPE sysctl $sysctlname 2>/dev/null)
+	log_test $? 0 "$lprefix default nexthop compat mode check"
+	check_output "${out}" "$sysctlname = 1"
+}
+
+sysctl_nexthop_compat_mode_set()
+{
+	local sysctlname="net.ipv4.nexthop_compat_mode"
+	local mode=$1
+	local lprefix=$2
+
+	IPE="ip netns exec me"
+
+	out=$($IPE sysctl -w $sysctlname=$mode)
+	log_test $? 0 "$lprefix set compat mode - $mode"
+	check_output "${out}" "net.ipv4.nexthop_compat_mode = $mode"
+}
+
+ipv6_compat_mode()
+{
+	local rc
+
+	echo
+	echo "IPv6 nexthop api compat mode test"
+	echo "--------------------------------"
+
+	sysctl_nexthop_compat_mode_check "IPv6"
+	if [ $? -eq $ksft_skip ]; then
+		return $ksft_skip
+	fi
+
+	run_cmd "$IP nexthop add id 62 via 2001:db8:91::2 dev veth1"
+	run_cmd "$IP nexthop add id 63 via 2001:db8:91::3 dev veth1"
+	run_cmd "$IP nexthop add id 122 group 62/63"
+	ipmout=$(start_ip_monitor route)
+
+	run_cmd "$IP -6 ro add 2001:db8:101::1/128 nhid 122"
+	# route add notification should contain expanded nexthops
+	stop_ip_monitor $ipmout 3
+	log_test $? 0 "IPv6 compat mode on - route add notification"
+
+	# route dump should contain expanded nexthops
+	check_route6 "2001:db8:101::1" "2001:db8:101::1 nhid 122 metric 1024 nexthop via 2001:db8:91::2 dev veth1 weight 1 nexthop via 2001:db8:91::3 dev veth1 weight 1"
+	log_test $? 0 "IPv6 compat mode on - route dump"
+
+	# change in nexthop group should generate route notification
+	run_cmd "$IP nexthop add id 64 via 2001:db8:91::4 dev veth1"
+	ipmout=$(start_ip_monitor route)
+	run_cmd "$IP nexthop replace id 122 group 62/64"
+	stop_ip_monitor $ipmout 3
+
+	log_test $? 0 "IPv6 compat mode on - nexthop change"
+
+	# set compat mode off
+	sysctl_nexthop_compat_mode_set 0 "IPv6"
+
+	run_cmd "$IP -6 ro del 2001:db8:101::1/128 nhid 122"
+
+	run_cmd "$IP nexthop add id 62 via 2001:db8:91::2 dev veth1"
+	run_cmd "$IP nexthop add id 63 via 2001:db8:91::3 dev veth1"
+	run_cmd "$IP nexthop add id 122 group 62/63"
+	ipmout=$(start_ip_monitor route)
+
+	run_cmd "$IP -6 ro add 2001:db8:101::1/128 nhid 122"
+	# route add notification should not contain expanded nexthops
+	stop_ip_monitor $ipmout 1
+	log_test $? 0 "IPv6 compat mode off - route add notification"
+
+	# route dump should not contain expanded nexthops
+	check_route6 "2001:db8:101::1" "2001:db8:101::1 nhid 122 metric 1024"
+	log_test $? 0 "IPv6 compat mode off - route dump"
+
+	# change in nexthop group should not generate route notification
+	run_cmd "$IP nexthop add id 64 via 2001:db8:91::4 dev veth1"
+	ipmout=$(start_ip_monitor route)
+	run_cmd "$IP nexthop replace id 122 group 62/64"
+	stop_ip_monitor $ipmout 0
+	log_test $? 0 "IPv6 compat mode off - nexthop change"
+
+	# nexthop delete should not generate route notification
+	ipmout=$(start_ip_monitor route)
+	run_cmd "$IP nexthop del id 122"
+	stop_ip_monitor $ipmout 0
+	log_test $? 0 "IPv6 compat mode off - nexthop delete"
+
+	# set compat mode back on
+	sysctl_nexthop_compat_mode_set 1 "IPv6"
+}
+
+ipv4_compat_mode()
+{
+	local rc
+
+	echo
+	echo "IPv4 nexthop api compat mode"
+	echo "----------------------------"
+
+	sysctl_nexthop_compat_mode_check "IPv4"
+	if [ $? -eq $ksft_skip ]; then
+		return $ksft_skip
+	fi
+
+	run_cmd "$IP nexthop add id 21 via 172.16.1.2 dev veth1"
+	run_cmd "$IP nexthop add id 22 via 172.16.1.2 dev veth1"
+	run_cmd "$IP nexthop add id 122 group 21/22"
+	ipmout=$(start_ip_monitor route)
+
+	run_cmd "$IP ro add 172.16.101.1/32 nhid 122"
+	stop_ip_monitor $ipmout 3
+
+	# route add notification should contain expanded nexthops
+	log_test $? 0 "IPv4 compat mode on - route add notification"
+
+	# route dump should contain expanded nexthops
+	check_route "172.16.101.1" "172.16.101.1 nhid 122 nexthop via 172.16.1.2 dev veth1 weight 1 nexthop via 172.16.1.2 dev veth1 weight 1"
+	log_test $? 0 "IPv4 compat mode on - route dump"
+
+	# change in nexthop group should generate route notification
+	run_cmd "$IP nexthop add id 23 via 172.16.1.3 dev veth1"
+	ipmout=$(start_ip_monitor route)
+	run_cmd "$IP nexthop replace id 122 group 21/23"
+	stop_ip_monitor $ipmout 3
+	log_test $? 0 "IPv4 compat mode on - nexthop change"
+
+	sysctl_nexthop_compat_mode_set 0 "IPv4"
+
+	# cleanup
+	run_cmd "$IP ro del 172.16.101.1/32 nhid 122"
+
+	ipmout=$(start_ip_monitor route)
+	run_cmd "$IP ro add 172.16.101.1/32 nhid 122"
+	stop_ip_monitor $ipmout 1
+	# route add notification should not contain expanded nexthops
+	log_test $? 0 "IPv4 compat mode off - route add notification"
+
+	# route dump should not contain expanded nexthops
+	check_route "172.16.101.1" "172.16.101.1 nhid 122"
+	log_test $? 0 "IPv4 compat mode off - route dump"
+
+	# change in nexthop group should not generate route notification
+	ipmout=$(start_ip_monitor route)
+	run_cmd "$IP nexthop replace id 122 group 21/22"
+	stop_ip_monitor $ipmout 0
+	log_test $? 0 "IPv4 compat mode off - nexthop change"
+
+	# nexthop delete should not generate route notification
+	ipmout=$(start_ip_monitor route)
+	run_cmd "$IP nexthop del id 122"
+	stop_ip_monitor $ipmout 0
+	log_test $? 0 "IPv4 compat mode off - nexthop delete"
+
+	sysctl_nexthop_compat_mode_set 1 "IPv4"
+}
+
+ipv4_del_add_loop1()
+{
+	while :; do
+		$IP nexthop del id 100
+		$IP nexthop add id 100 via 172.16.1.2 dev veth1
+	done >/dev/null 2>&1
+}
+
+ipv4_grp_replace_loop()
+{
+	while :; do
+		$IP nexthop replace id 102 group 100/101
+	done >/dev/null 2>&1
+}
+
+ipv4_torture()
+{
+	local pid1
+	local pid2
+	local pid3
+	local pid4
+	local pid5
+
+	echo
+	echo "IPv4 runtime torture"
+	echo "--------------------"
+	if [ ! -x "$(command -v mausezahn)" ]; then
+		echo "SKIP: Could not run test; need mausezahn tool"
+		return
+	fi
+
+	run_cmd "$IP nexthop add id 100 via 172.16.1.2 dev veth1"
+	run_cmd "$IP nexthop add id 101 via 172.16.2.2 dev veth3"
+	run_cmd "$IP nexthop add id 102 group 100/101"
+	run_cmd "$IP route add 172.16.101.1 nhid 102"
+	run_cmd "$IP route add 172.16.101.2 nhid 102"
+
+	ipv4_del_add_loop1 &
+	pid1=$!
+	ipv4_grp_replace_loop &
+	pid2=$!
+	ip netns exec me ping -f 172.16.101.1 >/dev/null 2>&1 &
+	pid3=$!
+	ip netns exec me ping -f 172.16.101.2 >/dev/null 2>&1 &
+	pid4=$!
+	ip netns exec me mausezahn veth1 -B 172.16.101.2 -A 172.16.1.1 -c 0 -t tcp "dp=1-1023, flags=syn" >/dev/null 2>&1 &
+	pid5=$!
+
+	sleep 300
+	kill -9 $pid1 $pid2 $pid3 $pid4 $pid5
+
+	# if we did not crash, success
+	log_test 0 0 "IPv4 torture test"
+}
+
 basic()
 {
 	echo
diff --git a/tools/testing/selftests/net/forwarding/devlink_lib.sh b/tools/testing/selftests/net/forwarding/devlink_lib.sh
index 155d48bd4d9e..f0e6be4c09e9 100644
--- a/tools/testing/selftests/net/forwarding/devlink_lib.sh
+++ b/tools/testing/selftests/net/forwarding/devlink_lib.sh
@@ -365,7 +365,9 @@ devlink_trap_group_stats_idle_test()
 devlink_trap_exception_test()
 {
 	local trap_name=$1; shift
-	local group_name=$1; shift
+	local group_name
+
+	group_name=$(devlink_trap_group_get $trap_name)
 
 	devlink_trap_stats_idle_test $trap_name
 	check_fail $? "Trap stats idle when packets should have been trapped"
@@ -377,9 +379,11 @@ devlink_trap_exception_test()
 devlink_trap_drop_test()
 {
 	local trap_name=$1; shift
-	local group_name=$1; shift
 	local dev=$1; shift
 	local handle=$1; shift
+	local group_name
+
+	group_name=$(devlink_trap_group_get $trap_name)
 
 	# This is the common part of all the tests. It checks that stats are
 	# initially idle, then non-idle after changing the trap action and
@@ -390,7 +394,6 @@ devlink_trap_drop_test()
 	devlink_trap_group_stats_idle_test $group_name
 	check_err $? "Trap group stats not idle with initial drop action"
 
-
 	devlink_trap_action_set $trap_name "trap"
 	devlink_trap_stats_idle_test $trap_name
 	check_fail $? "Trap stats idle after setting action to trap"
@@ -420,6 +423,29 @@ devlink_trap_drop_cleanup()
 	tc filter del dev $dev egress protocol $proto pref $pref handle $handle flower
 }
 
+devlink_trap_stats_test()
+{
+	local test_name=$1; shift
+	local trap_name=$1; shift
+	local send_one="$@"
+	local t0_packets
+	local t1_packets
+
+	RET=0
+
+	t0_packets=$(devlink_trap_rx_packets_get $trap_name)
+
+	$send_one && sleep 1
+
+	t1_packets=$(devlink_trap_rx_packets_get $trap_name)
+
+	if [[ $t1_packets -eq $t0_packets ]]; then
+		check_err 1 "Trap stats did not increase"
+	fi
+
+	log_test "$test_name"
+}
+
 devlink_trap_policers_num_get()
 {
 	devlink -j -p trap policer show | jq '.[]["'$DEVLINK_DEV'"] | length'
diff --git a/tools/testing/selftests/net/forwarding/mirror_lib.sh b/tools/testing/selftests/net/forwarding/mirror_lib.sh
index 00797597fcf5..c33bfd7ba214 100644
--- a/tools/testing/selftests/net/forwarding/mirror_lib.sh
+++ b/tools/testing/selftests/net/forwarding/mirror_lib.sh
@@ -29,11 +29,9 @@ mirror_test()
 	local pref=$1; shift
 	local expect=$1; shift
 
-	local ping_timeout=$((PING_TIMEOUT * 5))
 	local t0=$(tc_rule_stats_get $dev $pref)
-	ip vrf exec $vrf_name \
-	   ${PING} ${sip:+-I $sip} $dip -c 10 -i 0.5 -w $ping_timeout \
-		   &> /dev/null
+	$MZ $vrf_name ${sip:+-A $sip} -B $dip -a own -b bc -q \
+	    -c 10 -d 100ms -t icmp type=8
 	sleep 0.5
 	local t1=$(tc_rule_stats_get $dev $pref)
 	local delta=$((t1 - t0))
diff --git a/tools/testing/selftests/net/forwarding/pedit_dsfield.sh b/tools/testing/selftests/net/forwarding/pedit_dsfield.sh
index b50081855913..55eeacf59241 100755
--- a/tools/testing/selftests/net/forwarding/pedit_dsfield.sh
+++ b/tools/testing/selftests/net/forwarding/pedit_dsfield.sh
@@ -20,10 +20,14 @@
 
 ALL_TESTS="
 	ping_ipv4
+	ping_ipv6
 	test_ip_dsfield
 	test_ip_dscp
 	test_ip_ecn
 	test_ip_dscp_ecn
+	test_ip6_dsfield
+	test_ip6_dscp
+	test_ip6_ecn
 "
 
 NUM_NETIFS=4
@@ -107,6 +111,11 @@ ping_ipv4()
 	ping_test $h1 192.0.2.2
 }
 
+ping_ipv6()
+{
+	ping6_test $h1 2001:db8:1::2
+}
+
 do_test_pedit_dsfield_common()
 {
 	local pedit_locus=$1; shift
@@ -123,7 +132,12 @@ do_test_pedit_dsfield_common()
 	local pkts
 	pkts=$(busywait "$TC_HIT_TIMEOUT" until_counter_is ">= 10" \
 			tc_rule_handle_stats_get "dev $h2 ingress" 101)
-	check_err $? "Expected to get 10 packets, but got $pkts."
+	check_err $? "Expected to get 10 packets on test probe, but got $pkts."
+
+	pkts=$(tc_rule_handle_stats_get "$pedit_locus" 101)
+	((pkts >= 10))
+	check_err $? "Expected to get 10 packets on pedit rule, but got $pkts."
+
 	log_test "$pedit_locus pedit $pedit_action"
 }
 
@@ -228,6 +242,63 @@ test_ip_dscp_ecn()
 	do_test_ip_dscp_ecn "dev $swp2 egress"
 }
 
+do_test_ip6_dsfield()
+{
+	local locus=$1; shift
+	local dsfield
+
+	for dsfield in 0 1 2 3 128 252 253 254 255; do
+		do_test_pedit_dsfield "$locus"				\
+				  "ip6 traffic_class set $dsfield"	\
+				  ipv6 "ip_tos $dsfield"		\
+				  "-6 -A 2001:db8:1::1 -B 2001:db8:1::2"
+	done
+}
+
+test_ip6_dsfield()
+{
+	do_test_ip6_dsfield "dev $swp1 ingress"
+	do_test_ip6_dsfield "dev $swp2 egress"
+}
+
+do_test_ip6_dscp()
+{
+	local locus=$1; shift
+	local dscp
+
+	for dscp in 0 1 2 3 32 61 62 63; do
+		do_test_pedit_dsfield "$locus"				       \
+			    "ip6 traffic_class set $((dscp << 2)) retain 0xfc" \
+			    ipv6 "ip_tos $(((dscp << 2) | 1))"		       \
+			    "-6 -A 2001:db8:1::1 -B 2001:db8:1::2"
+	done
+}
+
+test_ip6_dscp()
+{
+	do_test_ip6_dscp "dev $swp1 ingress"
+	do_test_ip6_dscp "dev $swp2 egress"
+}
+
+do_test_ip6_ecn()
+{
+	local locus=$1; shift
+	local ecn
+
+	for ecn in 0 1 2 3; do
+		do_test_pedit_dsfield "$locus"				\
+				"ip6 traffic_class set $ecn retain 0x3"	\
+				ipv6 "ip_tos $((124 | $ecn))"		\
+				"-6 -A 2001:db8:1::1 -B 2001:db8:1::2"
+	done
+}
+
+test_ip6_ecn()
+{
+	do_test_ip6_ecn "dev $swp1 ingress"
+	do_test_ip6_ecn "dev $swp2 egress"
+}
+
 trap cleanup EXIT
 
 setup_prepare
diff --git a/tools/testing/selftests/net/forwarding/tc_actions.sh b/tools/testing/selftests/net/forwarding/tc_actions.sh
index 813d02d1939d..d9eca227136b 100755
--- a/tools/testing/selftests/net/forwarding/tc_actions.sh
+++ b/tools/testing/selftests/net/forwarding/tc_actions.sh
@@ -2,7 +2,8 @@
 # SPDX-License-Identifier: GPL-2.0
 
 ALL_TESTS="gact_drop_and_ok_test mirred_egress_redirect_test \
-	mirred_egress_mirror_test gact_trap_test"
+	mirred_egress_mirror_test matchall_mirred_egress_mirror_test \
+	gact_trap_test"
 NUM_NETIFS=4
 source tc_common.sh
 source lib.sh
@@ -50,6 +51,9 @@ switch_destroy()
 mirred_egress_test()
 {
 	local action=$1
+	local protocol=$2
+	local classifier=$3
+	local classifier_args=$4
 
 	RET=0
 
@@ -62,9 +66,9 @@ mirred_egress_test()
 	tc_check_packets "dev $h2 ingress" 101 1
 	check_fail $? "Matched without redirect rule inserted"
 
-	tc filter add dev $swp1 ingress protocol ip pref 1 handle 101 flower \
-		$tcflags dst_ip 192.0.2.2 action mirred egress $action \
-		dev $swp2
+	tc filter add dev $swp1 ingress protocol $protocol pref 1 handle 101 \
+		$classifier $tcflags $classifier_args \
+		action mirred egress $action dev $swp2
 
 	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
 		-t ip -q
@@ -72,10 +76,11 @@ mirred_egress_test()
 	tc_check_packets "dev $h2 ingress" 101 1
 	check_err $? "Did not match incoming $action packet"
 
-	tc filter del dev $swp1 ingress protocol ip pref 1 handle 101 flower
+	tc filter del dev $swp1 ingress protocol $protocol pref 1 handle 101 \
+		$classifier
 	tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
 
-	log_test "mirred egress $action ($tcflags)"
+	log_test "mirred egress $classifier $action ($tcflags)"
 }
 
 gact_drop_and_ok_test()
@@ -187,12 +192,17 @@ cleanup()
 
 mirred_egress_redirect_test()
 {
-	mirred_egress_test "redirect"
+	mirred_egress_test "redirect" "ip" "flower" "dst_ip 192.0.2.2"
 }
 
 mirred_egress_mirror_test()
 {
-	mirred_egress_test "mirror"
+	mirred_egress_test "mirror" "ip" "flower" "dst_ip 192.0.2.2"
+}
+
+matchall_mirred_egress_mirror_test()
+{
+	mirred_egress_test "mirror" "all" "matchall" ""
 }
 
 trap cleanup EXIT
diff --git a/tools/testing/selftests/net/ip_defrag.c b/tools/testing/selftests/net/ip_defrag.c
index c0c9ecb891e1..f9ed749fd8c7 100644
--- a/tools/testing/selftests/net/ip_defrag.c
+++ b/tools/testing/selftests/net/ip_defrag.c
@@ -192,9 +192,9 @@ static void send_fragment(int fd_raw, struct sockaddr *addr, socklen_t alen,
 	}
 
 	res = sendto(fd_raw, ip_frame, frag_len, 0, addr, alen);
-	if (res < 0)
+	if (res < 0 && errno != EPERM)
 		error(1, errno, "send_fragment");
-	if (res != frag_len)
+	if (res >= 0 && res != frag_len)
 		error(1, 0, "send_fragment: %d vs %d", res, frag_len);
 
 	frag_counter++;
@@ -313,9 +313,9 @@ static void send_udp_frags(int fd_raw, struct sockaddr *addr,
 			iphdr->ip_len = htons(frag_len);
 		}
 		res = sendto(fd_raw, ip_frame, frag_len, 0, addr, alen);
-		if (res < 0)
+		if (res < 0 && errno != EPERM)
 			error(1, errno, "sendto overlap: %d", frag_len);
-		if (res != frag_len)
+		if (res >= 0 && res != frag_len)
 			error(1, 0, "sendto overlap: %d vs %d", (int)res, frag_len);
 		frag_counter++;
 	}
diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh
index 71a62e7e35b1..77c09cd339c3 100755
--- a/tools/testing/selftests/net/pmtu.sh
+++ b/tools/testing/selftests/net/pmtu.sh
@@ -67,6 +67,10 @@
 #	Same as pmtu_ipv4_vxlan4, but using a generic UDP IPv4/IPv6
 #	encapsulation (GUE) over IPv4/IPv6, instead of VXLAN
 #
+# - pmtu_ipv{4,6}_ipv{4,6}_exception
+#	Same as pmtu_ipv4_vxlan4, but using a IPv4/IPv6 tunnel over IPv4/IPv6,
+#	instead of VXLAN
+#
 # - pmtu_vti4_exception
 #	Set up vti tunnel on top of veth, with xfrm states and policies, in two
 #	namespaces with matching endpoints. Check that route exception is not
@@ -151,6 +155,10 @@ tests="
 	pmtu_ipv6_gue4_exception	IPv6 over gue4: PMTU exceptions		1
 	pmtu_ipv4_gue6_exception	IPv4 over gue6: PMTU exceptions		1
 	pmtu_ipv6_gue6_exception	IPv6 over gue6: PMTU exceptions		1
+	pmtu_ipv4_ipv4_exception	IPv4 over IPv4: PMTU exceptions		1
+	pmtu_ipv6_ipv4_exception	IPv6 over IPv4: PMTU exceptions		1
+	pmtu_ipv4_ipv6_exception	IPv4 over IPv6: PMTU exceptions		1
+	pmtu_ipv6_ipv6_exception	IPv6 over IPv6: PMTU exceptions		1
 	pmtu_vti6_exception		vti6: PMTU exceptions			0
 	pmtu_vti4_exception		vti4: PMTU exceptions			0
 	pmtu_vti4_default_mtu		vti4: default MTU assignment		0
@@ -363,6 +371,62 @@ setup_gue66() {
 	setup_fou_or_gue 6 6 gue
 }
 
+setup_ipvX_over_ipvY() {
+	inner=${1}
+	outer=${2}
+
+	if [ "${outer}" -eq 4 ]; then
+		a_addr="${prefix4}.${a_r1}.1"
+		b_addr="${prefix4}.${b_r1}.1"
+		if [ "${inner}" -eq 4 ]; then
+			type="ipip"
+			mode="ipip"
+		else
+			type="sit"
+			mode="ip6ip"
+		fi
+	else
+		a_addr="${prefix6}:${a_r1}::1"
+		b_addr="${prefix6}:${b_r1}::1"
+		type="ip6tnl"
+		if [ "${inner}" -eq 4 ]; then
+			mode="ipip6"
+		else
+			mode="ip6ip6"
+		fi
+	fi
+
+	run_cmd ${ns_a} ip link add ip_a type ${type} local ${a_addr} remote ${b_addr} mode ${mode} || return 2
+	run_cmd ${ns_b} ip link add ip_b type ${type} local ${b_addr} remote ${a_addr} mode ${mode}
+
+	run_cmd ${ns_a} ip link set ip_a up
+	run_cmd ${ns_b} ip link set ip_b up
+
+	if [ "${inner}" = "4" ]; then
+		run_cmd ${ns_a} ip addr add ${tunnel4_a_addr}/${tunnel4_mask} dev ip_a
+		run_cmd ${ns_b} ip addr add ${tunnel4_b_addr}/${tunnel4_mask} dev ip_b
+	else
+		run_cmd ${ns_a} ip addr add ${tunnel6_a_addr}/${tunnel6_mask} dev ip_a
+		run_cmd ${ns_b} ip addr add ${tunnel6_b_addr}/${tunnel6_mask} dev ip_b
+	fi
+}
+
+setup_ip4ip4() {
+	setup_ipvX_over_ipvY 4 4
+}
+
+setup_ip6ip4() {
+	setup_ipvX_over_ipvY 6 4
+}
+
+setup_ip4ip6() {
+	setup_ipvX_over_ipvY 4 6
+}
+
+setup_ip6ip6() {
+	setup_ipvX_over_ipvY 6 6
+}
+
 setup_namespaces() {
 	for n in ${NS_A} ${NS_B} ${NS_R1} ${NS_R2}; do
 		ip netns add ${n} || return 1
@@ -908,6 +972,64 @@ test_pmtu_ipv6_gue6_exception() {
 	test_pmtu_ipvX_over_fouY_or_gueY 6 6 gue
 }
 
+test_pmtu_ipvX_over_ipvY_exception() {
+	inner=${1}
+	outer=${2}
+	ll_mtu=4000
+
+	setup namespaces routing ip${inner}ip${outer} || return 2
+
+	trace "${ns_a}" ip_a         "${ns_b}"  ip_b  \
+	      "${ns_a}" veth_A-R1    "${ns_r1}" veth_R1-A \
+	      "${ns_b}" veth_B-R1    "${ns_r1}" veth_R1-B
+
+	if [ ${inner} -eq 4 ]; then
+		ping=ping
+		dst=${tunnel4_b_addr}
+	else
+		ping=${ping6}
+		dst=${tunnel6_b_addr}
+	fi
+
+	if [ ${outer} -eq 4 ]; then
+		#                      IPv4 header
+		exp_mtu=$((${ll_mtu} - 20))
+	else
+		#                      IPv6 header   Option 4
+		exp_mtu=$((${ll_mtu} - 40          - 8))
+	fi
+
+	# Create route exception by exceeding link layer MTU
+	mtu "${ns_a}"  veth_A-R1 $((${ll_mtu} + 1000))
+	mtu "${ns_r1}" veth_R1-A $((${ll_mtu} + 1000))
+	mtu "${ns_b}"  veth_B-R1 ${ll_mtu}
+	mtu "${ns_r1}" veth_R1-B ${ll_mtu}
+
+	mtu "${ns_a}" ip_a $((${ll_mtu} + 1000)) || return
+	mtu "${ns_b}" ip_b $((${ll_mtu} + 1000)) || return
+	run_cmd ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s $((${ll_mtu} + 500)) ${dst}
+
+	# Check that exception was created
+	pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst})"
+	check_pmtu_value ${exp_mtu} "${pmtu}" "exceeding link layer MTU on ip${inner}ip${outer} interface"
+}
+
+test_pmtu_ipv4_ipv4_exception() {
+	test_pmtu_ipvX_over_ipvY_exception 4 4
+}
+
+test_pmtu_ipv6_ipv4_exception() {
+	test_pmtu_ipvX_over_ipvY_exception 6 4
+}
+
+test_pmtu_ipv4_ipv6_exception() {
+	test_pmtu_ipvX_over_ipvY_exception 4 6
+}
+
+test_pmtu_ipv6_ipv6_exception() {
+	test_pmtu_ipvX_over_ipvY_exception 6 6
+}
+
 test_pmtu_vti4_exception() {
 	setup namespaces veth vti4 xfrm4 || return 2
 	trace "${ns_a}" veth_a    "${ns_b}" veth_b \
diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c
index 0ea44d975b6c..c5282e62df75 100644
--- a/tools/testing/selftests/net/tls.c
+++ b/tools/testing/selftests/net/tls.c
@@ -101,6 +101,21 @@ FIXTURE(tls)
 	bool notls;
 };
 
+FIXTURE_VARIANT(tls)
+{
+	unsigned int tls_version;
+};
+
+FIXTURE_VARIANT_ADD(tls, 12)
+{
+	.tls_version = TLS_1_2_VERSION,
+};
+
+FIXTURE_VARIANT_ADD(tls, 13)
+{
+	.tls_version = TLS_1_3_VERSION,
+};
+
 FIXTURE_SETUP(tls)
 {
 	struct tls12_crypto_info_aes_gcm_128 tls12;
@@ -112,7 +127,7 @@ FIXTURE_SETUP(tls)
 	len = sizeof(addr);
 
 	memset(&tls12, 0, sizeof(tls12));
-	tls12.info.version = TLS_1_3_VERSION;
+	tls12.info.version = variant->tls_version;
 	tls12.info.cipher_type = TLS_CIPHER_AES_GCM_128;
 
 	addr.sin_family = AF_INET;
@@ -733,7 +748,7 @@ TEST_F(tls, bidir)
 		struct tls12_crypto_info_aes_gcm_128 tls12;
 
 		memset(&tls12, 0, sizeof(tls12));
-		tls12.info.version = TLS_1_3_VERSION;
+		tls12.info.version = variant->tls_version;
 		tls12.info.cipher_type = TLS_CIPHER_AES_GCM_128;
 
 		ret = setsockopt(self->fd, SOL_TLS, TLS_RX, &tls12,
@@ -1258,78 +1273,4 @@ TEST(keysizes) {
 	close(cfd);
 }
 
-TEST(tls12) {
-	int fd, cfd;
-	bool notls;
-
-	struct tls12_crypto_info_aes_gcm_128 tls12;
-	struct sockaddr_in addr;
-	socklen_t len;
-	int sfd, ret;
-
-	notls = false;
-	len = sizeof(addr);
-
-	memset(&tls12, 0, sizeof(tls12));
-	tls12.info.version = TLS_1_2_VERSION;
-	tls12.info.cipher_type = TLS_CIPHER_AES_GCM_128;
-
-	addr.sin_family = AF_INET;
-	addr.sin_addr.s_addr = htonl(INADDR_ANY);
-	addr.sin_port = 0;
-
-	fd = socket(AF_INET, SOCK_STREAM, 0);
-	sfd = socket(AF_INET, SOCK_STREAM, 0);
-
-	ret = bind(sfd, &addr, sizeof(addr));
-	ASSERT_EQ(ret, 0);
-	ret = listen(sfd, 10);
-	ASSERT_EQ(ret, 0);
-
-	ret = getsockname(sfd, &addr, &len);
-	ASSERT_EQ(ret, 0);
-
-	ret = connect(fd, &addr, sizeof(addr));
-	ASSERT_EQ(ret, 0);
-
-	ret = setsockopt(fd, IPPROTO_TCP, TCP_ULP, "tls", sizeof("tls"));
-	if (ret != 0) {
-		notls = true;
-		printf("Failure setting TCP_ULP, testing without tls\n");
-	}
-
-	if (!notls) {
-		ret = setsockopt(fd, SOL_TLS, TLS_TX, &tls12,
-				 sizeof(tls12));
-		ASSERT_EQ(ret, 0);
-	}
-
-	cfd = accept(sfd, &addr, &len);
-	ASSERT_GE(cfd, 0);
-
-	if (!notls) {
-		ret = setsockopt(cfd, IPPROTO_TCP, TCP_ULP, "tls",
-				 sizeof("tls"));
-		ASSERT_EQ(ret, 0);
-
-		ret = setsockopt(cfd, SOL_TLS, TLS_RX, &tls12,
-				 sizeof(tls12));
-		ASSERT_EQ(ret, 0);
-	}
-
-	close(sfd);
-
-	char const *test_str = "test_read";
-	int send_len = 10;
-	char buf[10];
-
-	send_len = strlen(test_str) + 1;
-	EXPECT_EQ(send(fd, test_str, send_len, 0), send_len);
-	EXPECT_NE(recv(cfd, buf, send_len, 0), -1);
-	EXPECT_EQ(memcmp(buf, test_str, send_len), 0);
-
-	close(fd);
-	close(cfd);
-}
-
 TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/net/vrf-xfrm-tests.sh b/tools/testing/selftests/net/vrf-xfrm-tests.sh
new file mode 100755
index 000000000000..184da81f554f
--- /dev/null
+++ b/tools/testing/selftests/net/vrf-xfrm-tests.sh
@@ -0,0 +1,436 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Various combinations of VRF with xfrms and qdisc.
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+PAUSE_ON_FAIL=no
+VERBOSE=0
+ret=0
+
+HOST1_4=192.168.1.1
+HOST2_4=192.168.1.2
+HOST1_6=2001:db8:1::1
+HOST2_6=2001:db8:1::2
+
+XFRM1_4=10.0.1.1
+XFRM2_4=10.0.1.2
+XFRM1_6=fc00:1000::1
+XFRM2_6=fc00:1000::2
+IF_ID=123
+
+VRF=red
+TABLE=300
+
+AUTH_1=0xd94fcfea65fddf21dc6e0d24a0253508
+AUTH_2=0xdc6e0d24a0253508d94fcfea65fddf21
+ENC_1=0xfc46c20f8048be9725930ff3fb07ac2a91f0347dffeacf62
+ENC_2=0x3fb07ac2a91f0347dffeacf62fc46c20f8048be9725930ff
+SPI_1=0x02122b77
+SPI_2=0x2b770212
+
+which ping6 > /dev/null 2>&1 && ping6=$(which ping6) || ping6=$(which ping)
+
+################################################################################
+#
+log_test()
+{
+	local rc=$1
+	local expected=$2
+	local msg="$3"
+
+	if [ ${rc} -eq ${expected} ]; then
+		printf "TEST: %-60s  [ OK ]\n" "${msg}"
+		nsuccess=$((nsuccess+1))
+	else
+		ret=1
+		nfail=$((nfail+1))
+		printf "TEST: %-60s  [FAIL]\n" "${msg}"
+		if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+			echo
+			echo "hit enter to continue, 'q' to quit"
+			read a
+			[ "$a" = "q" ] && exit 1
+		fi
+	fi
+}
+
+run_cmd_host1()
+{
+	local cmd="$*"
+	local out
+	local rc
+
+	if [ "$VERBOSE" = "1" ]; then
+		printf "    COMMAND: $cmd\n"
+	fi
+
+	out=$(eval ip netns exec host1 $cmd 2>&1)
+	rc=$?
+	if [ "$VERBOSE" = "1" ]; then
+		if [ -n "$out" ]; then
+			echo
+			echo "    $out"
+		fi
+		echo
+	fi
+
+	return $rc
+}
+
+################################################################################
+# create namespaces for hosts and sws
+
+create_vrf()
+{
+	local ns=$1
+	local vrf=$2
+	local table=$3
+
+	if [ -n "${ns}" ]; then
+		ns="-netns ${ns}"
+	fi
+
+	ip ${ns} link add ${vrf} type vrf table ${table}
+	ip ${ns} link set ${vrf} up
+	ip ${ns} route add vrf ${vrf} unreachable default metric 8192
+	ip ${ns} -6 route add vrf ${vrf} unreachable default metric 8192
+
+	ip ${ns} addr add 127.0.0.1/8 dev ${vrf}
+	ip ${ns} -6 addr add ::1 dev ${vrf} nodad
+
+	ip ${ns} ru del pref 0
+	ip ${ns} ru add pref 32765 from all lookup local
+	ip ${ns} -6 ru del pref 0
+	ip ${ns} -6 ru add pref 32765 from all lookup local
+}
+
+create_ns()
+{
+	local ns=$1
+	local addr=$2
+	local addr6=$3
+
+	[ -z "${addr}" ] && addr="-"
+	[ -z "${addr6}" ] && addr6="-"
+
+	ip netns add ${ns}
+
+	ip -netns ${ns} link set lo up
+	if [ "${addr}" != "-" ]; then
+		ip -netns ${ns} addr add dev lo ${addr}
+	fi
+	if [ "${addr6}" != "-" ]; then
+		ip -netns ${ns} -6 addr add dev lo ${addr6}
+	fi
+
+	ip -netns ${ns} ro add unreachable default metric 8192
+	ip -netns ${ns} -6 ro add unreachable default metric 8192
+
+	ip netns exec ${ns} sysctl -qw net.ipv4.ip_forward=1
+	ip netns exec ${ns} sysctl -qw net.ipv6.conf.all.keep_addr_on_down=1
+	ip netns exec ${ns} sysctl -qw net.ipv6.conf.all.forwarding=1
+	ip netns exec ${ns} sysctl -qw net.ipv6.conf.default.forwarding=1
+	ip netns exec ${ns} sysctl -qw net.ipv6.conf.default.accept_dad=0
+}
+
+# create veth pair to connect namespaces and apply addresses.
+connect_ns()
+{
+	local ns1=$1
+	local ns1_dev=$2
+	local ns1_addr=$3
+	local ns1_addr6=$4
+	local ns2=$5
+	local ns2_dev=$6
+	local ns2_addr=$7
+	local ns2_addr6=$8
+	local ns1arg
+	local ns2arg
+
+	if [ -n "${ns1}" ]; then
+		ns1arg="-netns ${ns1}"
+	fi
+	if [ -n "${ns2}" ]; then
+		ns2arg="-netns ${ns2}"
+	fi
+
+	ip ${ns1arg} li add ${ns1_dev} type veth peer name tmp
+	ip ${ns1arg} li set ${ns1_dev} up
+	ip ${ns1arg} li set tmp netns ${ns2} name ${ns2_dev}
+	ip ${ns2arg} li set ${ns2_dev} up
+
+	if [ "${ns1_addr}" != "-" ]; then
+		ip ${ns1arg} addr add dev ${ns1_dev} ${ns1_addr}
+		ip ${ns2arg} addr add dev ${ns2_dev} ${ns2_addr}
+	fi
+
+	if [ "${ns1_addr6}" != "-" ]; then
+		ip ${ns1arg} addr add dev ${ns1_dev} ${ns1_addr6} nodad
+		ip ${ns2arg} addr add dev ${ns2_dev} ${ns2_addr6} nodad
+	fi
+}
+
+################################################################################
+
+cleanup()
+{
+	ip netns del host1
+	ip netns del host2
+}
+
+setup()
+{
+	create_ns "host1"
+	create_ns "host2"
+
+	connect_ns "host1" eth0 ${HOST1_4}/24 ${HOST1_6}/64 \
+	           "host2" eth0 ${HOST2_4}/24 ${HOST2_6}/64
+
+	create_vrf "host1" ${VRF} ${TABLE}
+	ip -netns host1 link set dev eth0 master ${VRF}
+}
+
+cleanup_xfrm()
+{
+	for ns in host1 host2
+	do
+		for x in state policy
+		do
+			ip -netns ${ns} xfrm ${x} flush
+			ip -6 -netns ${ns} xfrm ${x} flush
+		done
+	done
+}
+
+setup_xfrm()
+{
+	local h1_4=$1
+	local h2_4=$2
+	local h1_6=$3
+	local h2_6=$4
+	local devarg="$5"
+
+	#
+	# policy
+	#
+
+	# host1 - IPv4 out
+	ip -netns host1 xfrm policy add \
+	  src ${h1_4} dst ${h2_4} ${devarg} dir out \
+	  tmpl src ${HOST1_4} dst ${HOST2_4} proto esp mode tunnel
+
+	# host2 - IPv4 in
+	ip -netns host2 xfrm policy add \
+	  src ${h1_4} dst ${h2_4} dir in \
+	  tmpl src ${HOST1_4} dst ${HOST2_4} proto esp mode tunnel
+
+	# host1 - IPv4 in
+	ip -netns host1 xfrm policy add \
+	  src ${h2_4} dst ${h1_4} ${devarg} dir in \
+	  tmpl src ${HOST2_4} dst ${HOST1_4} proto esp mode tunnel
+
+	# host2 - IPv4 out
+	ip -netns host2 xfrm policy add \
+	  src ${h2_4} dst ${h1_4} dir out \
+	  tmpl src ${HOST2_4} dst ${HOST1_4} proto esp mode tunnel
+
+
+	# host1 - IPv6 out
+	ip -6 -netns host1 xfrm policy add \
+	  src ${h1_6} dst ${h2_6} ${devarg} dir out \
+	  tmpl src ${HOST1_6} dst ${HOST2_6} proto esp mode tunnel
+
+	# host2 - IPv6 in
+	ip -6 -netns host2 xfrm policy add \
+	  src ${h1_6} dst ${h2_6} dir in \
+	  tmpl src ${HOST1_6} dst ${HOST2_6} proto esp mode tunnel
+
+	# host1 - IPv6 in
+	ip -6 -netns host1 xfrm policy add \
+	  src ${h2_6} dst ${h1_6} ${devarg} dir in \
+	  tmpl src ${HOST2_6} dst ${HOST1_6} proto esp mode tunnel
+
+	# host2 - IPv6 out
+	ip -6 -netns host2 xfrm policy add \
+	  src ${h2_6} dst ${h1_6} dir out \
+	  tmpl src ${HOST2_6} dst ${HOST1_6} proto esp mode tunnel
+
+	#
+	# state
+	#
+	ip -netns host1 xfrm state add src ${HOST1_4} dst ${HOST2_4} \
+	    proto esp spi ${SPI_1} reqid 0 mode tunnel \
+	    replay-window 4 replay-oseq 0x4 \
+	    auth-trunc 'hmac(md5)' ${AUTH_1} 96 \
+	    enc 'cbc(des3_ede)' ${ENC_1} \
+	    sel src ${h1_4} dst ${h2_4} ${devarg}
+
+	ip -netns host2 xfrm state add src ${HOST1_4} dst ${HOST2_4} \
+	    proto esp spi ${SPI_1} reqid 0 mode tunnel \
+	    replay-window 4 replay-oseq 0x4 \
+	    auth-trunc 'hmac(md5)' ${AUTH_1} 96 \
+	    enc 'cbc(des3_ede)' ${ENC_1} \
+	    sel src ${h1_4} dst ${h2_4}
+
+
+	ip -netns host1 xfrm state add src ${HOST2_4} dst ${HOST1_4} \
+	    proto esp spi ${SPI_2} reqid 0 mode tunnel \
+	    replay-window 4 replay-oseq 0x4 \
+	    auth-trunc 'hmac(md5)' ${AUTH_2} 96 \
+	    enc 'cbc(des3_ede)' ${ENC_2} \
+	    sel src ${h2_4} dst ${h1_4} ${devarg}
+
+	ip -netns host2 xfrm state add src ${HOST2_4} dst ${HOST1_4} \
+	    proto esp spi ${SPI_2} reqid 0 mode tunnel \
+	    replay-window 4 replay-oseq 0x4 \
+	    auth-trunc 'hmac(md5)' ${AUTH_2} 96 \
+	    enc 'cbc(des3_ede)' ${ENC_2} \
+	    sel src ${h2_4} dst ${h1_4}
+
+
+	ip -6 -netns host1 xfrm state add src ${HOST1_6} dst ${HOST2_6} \
+	    proto esp spi ${SPI_1} reqid 0 mode tunnel \
+	    replay-window 4 replay-oseq 0x4 \
+	    auth-trunc 'hmac(md5)' ${AUTH_1} 96 \
+	    enc 'cbc(des3_ede)' ${ENC_1} \
+	    sel src ${h1_6} dst ${h2_6} ${devarg}
+
+	ip -6 -netns host2 xfrm state add src ${HOST1_6} dst ${HOST2_6} \
+	    proto esp spi ${SPI_1} reqid 0 mode tunnel \
+	    replay-window 4 replay-oseq 0x4 \
+	    auth-trunc 'hmac(md5)' ${AUTH_1} 96 \
+	    enc 'cbc(des3_ede)' ${ENC_1} \
+	    sel src ${h1_6} dst ${h2_6}
+
+
+	ip -6 -netns host1 xfrm state add src ${HOST2_6} dst ${HOST1_6} \
+	    proto esp spi ${SPI_2} reqid 0 mode tunnel \
+	    replay-window 4 replay-oseq 0x4 \
+	    auth-trunc 'hmac(md5)' ${AUTH_2} 96 \
+	    enc 'cbc(des3_ede)' ${ENC_2} \
+	    sel src ${h2_6} dst ${h1_6} ${devarg}
+
+	ip -6 -netns host2 xfrm state add src ${HOST2_6} dst ${HOST1_6} \
+	    proto esp spi ${SPI_2} reqid 0 mode tunnel \
+	    replay-window 4 replay-oseq 0x4 \
+	    auth-trunc 'hmac(md5)' ${AUTH_2} 96 \
+	    enc 'cbc(des3_ede)' ${ENC_2} \
+	    sel src ${h2_6} dst ${h1_6}
+}
+
+cleanup_xfrm_dev()
+{
+	ip -netns host1 li del xfrm0
+	ip -netns host2 addr del ${XFRM2_4}/24 dev eth0
+	ip -netns host2 addr del ${XFRM2_6}/64 dev eth0
+}
+
+setup_xfrm_dev()
+{
+	local vrfarg="vrf ${VRF}"
+
+	ip -netns host1 li add type xfrm dev eth0 if_id ${IF_ID}
+	ip -netns host1 li set xfrm0 ${vrfarg} up
+	ip -netns host1 addr add ${XFRM1_4}/24 dev xfrm0
+	ip -netns host1 addr add ${XFRM1_6}/64 dev xfrm0
+
+	ip -netns host2 addr add ${XFRM2_4}/24 dev eth0
+	ip -netns host2 addr add ${XFRM2_6}/64 dev eth0
+
+	setup_xfrm ${XFRM1_4} ${XFRM2_4} ${XFRM1_6} ${XFRM2_6} "if_id ${IF_ID}"
+}
+
+run_tests()
+{
+	cleanup_xfrm
+
+	# no IPsec
+	run_cmd_host1 ip vrf exec ${VRF} ping -c1 -w1 ${HOST2_4}
+	log_test $? 0 "IPv4 no xfrm policy"
+	run_cmd_host1 ip vrf exec ${VRF} ${ping6} -c1 -w1 ${HOST2_6}
+	log_test $? 0 "IPv6 no xfrm policy"
+
+	# xfrm without VRF in sel
+	setup_xfrm ${HOST1_4} ${HOST2_4} ${HOST1_6} ${HOST2_6}
+	run_cmd_host1 ip vrf exec ${VRF} ping -c1 -w1 ${HOST2_4}
+	log_test $? 0 "IPv4 xfrm policy based on address"
+	run_cmd_host1 ip vrf exec ${VRF} ${ping6} -c1 -w1 ${HOST2_6}
+	log_test $? 0 "IPv6 xfrm policy based on address"
+	cleanup_xfrm
+
+	# xfrm with VRF in sel
+	# Known failure: ipv4 resets the flow oif after the lookup. Fix is
+	# not straightforward.
+	# setup_xfrm ${HOST1_4} ${HOST2_4} ${HOST1_6} ${HOST2_6} "dev ${VRF}"
+	# run_cmd_host1 ip vrf exec ${VRF} ping -c1 -w1 ${HOST2_4}
+	# log_test $? 0 "IPv4 xfrm policy with VRF in selector"
+	run_cmd_host1 ip vrf exec ${VRF} ${ping6} -c1 -w1 ${HOST2_6}
+	log_test $? 0 "IPv6 xfrm policy with VRF in selector"
+	cleanup_xfrm
+
+	# xfrm with enslaved device in sel
+	# Known failures: combined with the above, __xfrm{4,6}_selector_match
+	# needs to consider both l3mdev and enslaved device index.
+	# setup_xfrm ${HOST1_4} ${HOST2_4} ${HOST1_6} ${HOST2_6} "dev eth0"
+	# run_cmd_host1 ip vrf exec ${VRF} ping -c1 -w1 ${HOST2_4}
+	# log_test $? 0 "IPv4 xfrm policy with enslaved device in selector"
+	# run_cmd_host1 ip vrf exec ${VRF} ${ping6} -c1 -w1 ${HOST2_6}
+	# log_test $? 0 "IPv6 xfrm policy with enslaved device in selector"
+	# cleanup_xfrm
+
+	# xfrm device
+	setup_xfrm_dev
+	run_cmd_host1 ip vrf exec ${VRF} ping -c1 -w1 ${XFRM2_4}
+	log_test $? 0 "IPv4 xfrm policy with xfrm device"
+	run_cmd_host1 ip vrf exec ${VRF} ${ping6} -c1 -w1 ${XFRM2_6}
+	log_test $? 0 "IPv6 xfrm policy with xfrm device"
+	cleanup_xfrm_dev
+}
+
+################################################################################
+# usage
+
+usage()
+{
+        cat <<EOF
+usage: ${0##*/} OPTS
+
+        -p          Pause on fail
+        -v          verbose mode (show commands and output)
+
+done
+EOF
+}
+
+################################################################################
+# main
+
+while getopts :pv o
+do
+	case $o in
+		p) PAUSE_ON_FAIL=yes;;
+		v) VERBOSE=$(($VERBOSE + 1));;
+		h) usage; exit 0;;
+		*) usage; exit 1;;
+	esac
+done
+
+cleanup 2>/dev/null
+setup
+
+echo
+echo "No qdisc on VRF device"
+run_tests
+
+run_cmd_host1 tc qdisc add dev ${VRF} root netem delay 100ms
+echo
+echo "netem qdisc on VRF device"
+run_tests
+
+printf "\nTests passed: %3d\n" ${nsuccess}
+printf "Tests failed: %3d\n"   ${nfail}
+
+exit $ret
diff --git a/tools/testing/selftests/ntb/ntb_test.sh b/tools/testing/selftests/ntb/ntb_test.sh
index 9c60337317c6..020137b61407 100755
--- a/tools/testing/selftests/ntb/ntb_test.sh
+++ b/tools/testing/selftests/ntb/ntb_test.sh
@@ -241,7 +241,7 @@ function get_files_count()
 	split_remote $LOC
 
 	if [[ "$REMOTE" == "" ]]; then
-		echo $(ls -1 "$LOC"/${NAME}* 2>/dev/null | wc -l)
+		echo $(ls -1 "$VPATH"/${NAME}* 2>/dev/null | wc -l)
 	else
 		echo $(ssh "$REMOTE" "ls -1 \"$VPATH\"/${NAME}* | \
 		       wc -l" 2> /dev/null)
diff --git a/tools/testing/selftests/pidfd/.gitignore b/tools/testing/selftests/pidfd/.gitignore
index 2d4db5afb142..973198a3ec3d 100644
--- a/tools/testing/selftests/pidfd/.gitignore
+++ b/tools/testing/selftests/pidfd/.gitignore
@@ -5,3 +5,4 @@ pidfd_test
 pidfd_wait
 pidfd_fdinfo_test
 pidfd_getfd_test
+pidfd_setns_test
diff --git a/tools/testing/selftests/pidfd/Makefile b/tools/testing/selftests/pidfd/Makefile
index 75a545861375..f4a2f28f926b 100644
--- a/tools/testing/selftests/pidfd/Makefile
+++ b/tools/testing/selftests/pidfd/Makefile
@@ -1,7 +1,8 @@
 # SPDX-License-Identifier: GPL-2.0-only
 CFLAGS += -g -I../../../../usr/include/ -pthread
 
-TEST_GEN_PROGS := pidfd_test pidfd_fdinfo_test pidfd_open_test pidfd_poll_test pidfd_wait pidfd_getfd_test
+TEST_GEN_PROGS := pidfd_test pidfd_fdinfo_test pidfd_open_test \
+	pidfd_poll_test pidfd_wait pidfd_getfd_test pidfd_setns_test
 
 include ../lib.mk
 
diff --git a/tools/testing/selftests/pidfd/config b/tools/testing/selftests/pidfd/config
new file mode 100644
index 000000000000..bb11de90c0c9
--- /dev/null
+++ b/tools/testing/selftests/pidfd/config
@@ -0,0 +1,6 @@
+CONFIG_UTS_NS=y
+CONFIG_IPC_NS=y
+CONFIG_USER_NS=y
+CONFIG_PID_NS=y
+CONFIG_NET_NS=y
+CONFIG_CGROUPS=y
diff --git a/tools/testing/selftests/pidfd/pidfd_setns_test.c b/tools/testing/selftests/pidfd/pidfd_setns_test.c
new file mode 100644
index 000000000000..133ec5b6cda8
--- /dev/null
+++ b/tools/testing/selftests/pidfd/pidfd_setns_test.c
@@ -0,0 +1,473 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <linux/types.h>
+#include <sched.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <syscall.h>
+#include <sys/prctl.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <linux/kcmp.h>
+
+#include "pidfd.h"
+#include "../clone3/clone3_selftests.h"
+#include "../kselftest.h"
+#include "../kselftest_harness.h"
+
+enum {
+	PIDFD_NS_USER,
+	PIDFD_NS_MNT,
+	PIDFD_NS_PID,
+	PIDFD_NS_UTS,
+	PIDFD_NS_IPC,
+	PIDFD_NS_NET,
+	PIDFD_NS_CGROUP,
+	PIDFD_NS_PIDCLD,
+	PIDFD_NS_MAX
+};
+
+const struct ns_info {
+	const char *name;
+	int flag;
+} ns_info[] = {
+	[PIDFD_NS_USER]   = { "user",             CLONE_NEWUSER,   },
+	[PIDFD_NS_MNT]    = { "mnt",              CLONE_NEWNS,     },
+	[PIDFD_NS_PID]    = { "pid",              CLONE_NEWPID,    },
+	[PIDFD_NS_UTS]    = { "uts",              CLONE_NEWUTS,    },
+	[PIDFD_NS_IPC]    = { "ipc",              CLONE_NEWIPC,    },
+	[PIDFD_NS_NET]    = { "net",              CLONE_NEWNET,    },
+	[PIDFD_NS_CGROUP] = { "cgroup",           CLONE_NEWCGROUP, },
+	[PIDFD_NS_PIDCLD] = { "pid_for_children", 0,               },
+};
+
+FIXTURE(current_nsset)
+{
+	pid_t pid;
+	int pidfd;
+	int nsfds[PIDFD_NS_MAX];
+
+	pid_t child_pid_exited;
+	int child_pidfd_exited;
+
+	pid_t child_pid1;
+	int child_pidfd1;
+	int child_nsfds1[PIDFD_NS_MAX];
+
+	pid_t child_pid2;
+	int child_pidfd2;
+	int child_nsfds2[PIDFD_NS_MAX];
+};
+
+static int sys_waitid(int which, pid_t pid, int options)
+{
+	return syscall(__NR_waitid, which, pid, NULL, options, NULL);
+}
+
+pid_t create_child(int *pidfd, unsigned flags)
+{
+	struct clone_args args = {
+		.flags		= CLONE_PIDFD | flags,
+		.exit_signal	= SIGCHLD,
+		.pidfd		= ptr_to_u64(pidfd),
+	};
+
+	return sys_clone3(&args, sizeof(struct clone_args));
+}
+
+FIXTURE_SETUP(current_nsset)
+{
+	int i, proc_fd, ret;
+
+	for (i = 0; i < PIDFD_NS_MAX; i++) {
+		self->nsfds[i]		= -EBADF;
+		self->child_nsfds1[i]	= -EBADF;
+		self->child_nsfds2[i]	= -EBADF;
+	}
+
+	proc_fd = open("/proc/self/ns", O_DIRECTORY | O_CLOEXEC);
+	ASSERT_GE(proc_fd, 0) {
+		TH_LOG("%m - Failed to open /proc/self/ns");
+	}
+
+	self->pid = getpid();
+	for (i = 0; i < PIDFD_NS_MAX; i++) {
+		const struct ns_info *info = &ns_info[i];
+		self->nsfds[i] = openat(proc_fd, info->name, O_RDONLY | O_CLOEXEC);
+		if (self->nsfds[i] < 0) {
+			EXPECT_EQ(errno, ENOENT) {
+				TH_LOG("%m - Failed to open %s namespace for process %d",
+				       info->name, self->pid);
+			}
+		}
+	}
+
+	self->pidfd = sys_pidfd_open(self->pid, 0);
+	EXPECT_GT(self->pidfd, 0) {
+		TH_LOG("%m - Failed to open pidfd for process %d", self->pid);
+	}
+
+	/* Create task that exits right away. */
+	self->child_pid_exited = create_child(&self->child_pidfd_exited,
+					      CLONE_NEWUSER | CLONE_NEWNET);
+	EXPECT_GT(self->child_pid_exited, 0);
+
+	if (self->child_pid_exited == 0)
+		_exit(EXIT_SUCCESS);
+
+	ASSERT_EQ(sys_waitid(P_PID, self->child_pid_exited, WEXITED | WNOWAIT), 0);
+
+	self->pidfd = sys_pidfd_open(self->pid, 0);
+	EXPECT_GE(self->pidfd, 0) {
+		TH_LOG("%m - Failed to open pidfd for process %d", self->pid);
+	}
+
+	/* Create tasks that will be stopped. */
+	self->child_pid1 = create_child(&self->child_pidfd1,
+					CLONE_NEWUSER | CLONE_NEWNS |
+					CLONE_NEWCGROUP | CLONE_NEWIPC |
+					CLONE_NEWUTS | CLONE_NEWPID |
+					CLONE_NEWNET);
+	EXPECT_GE(self->child_pid1, 0);
+
+	if (self->child_pid1 == 0) {
+		pause();
+		_exit(EXIT_SUCCESS);
+	}
+
+	self->child_pid2 = create_child(&self->child_pidfd2,
+					CLONE_NEWUSER | CLONE_NEWNS |
+					CLONE_NEWCGROUP | CLONE_NEWIPC |
+					CLONE_NEWUTS | CLONE_NEWPID |
+					CLONE_NEWNET);
+	EXPECT_GE(self->child_pid2, 0);
+
+	if (self->child_pid2 == 0) {
+		pause();
+		_exit(EXIT_SUCCESS);
+	}
+
+	for (i = 0; i < PIDFD_NS_MAX; i++) {
+		char p[100];
+
+		const struct ns_info *info = &ns_info[i];
+
+		self->nsfds[i] = openat(proc_fd, info->name, O_RDONLY | O_CLOEXEC);
+		if (self->nsfds[i] < 0) {
+			EXPECT_EQ(errno, ENOENT) {
+				TH_LOG("%m - Failed to open %s namespace for process %d",
+				       info->name, self->pid);
+			}
+		}
+
+		ret = snprintf(p, sizeof(p), "/proc/%d/ns/%s",
+			       self->child_pid1, info->name);
+		EXPECT_GT(ret, 0);
+		EXPECT_LT(ret, sizeof(p));
+
+		self->child_nsfds1[i] = open(p, O_RDONLY | O_CLOEXEC);
+		if (self->child_nsfds1[i] < 0) {
+			EXPECT_EQ(errno, ENOENT) {
+				TH_LOG("%m - Failed to open %s namespace for process %d",
+				       info->name, self->child_pid1);
+			}
+		}
+
+		ret = snprintf(p, sizeof(p), "/proc/%d/ns/%s",
+			       self->child_pid2, info->name);
+		EXPECT_GT(ret, 0);
+		EXPECT_LT(ret, sizeof(p));
+
+		self->child_nsfds2[i] = open(p, O_RDONLY | O_CLOEXEC);
+		if (self->child_nsfds2[i] < 0) {
+			EXPECT_EQ(errno, ENOENT) {
+				TH_LOG("%m - Failed to open %s namespace for process %d",
+				       info->name, self->child_pid1);
+			}
+		}
+	}
+
+	close(proc_fd);
+}
+
+FIXTURE_TEARDOWN(current_nsset)
+{
+	int i;
+
+	ASSERT_EQ(sys_pidfd_send_signal(self->child_pidfd1,
+					SIGKILL, NULL, 0), 0);
+	ASSERT_EQ(sys_pidfd_send_signal(self->child_pidfd2,
+					SIGKILL, NULL, 0), 0);
+
+	for (i = 0; i < PIDFD_NS_MAX; i++) {
+		if (self->nsfds[i] >= 0)
+			close(self->nsfds[i]);
+		if (self->child_nsfds1[i] >= 0)
+			close(self->child_nsfds1[i]);
+		if (self->child_nsfds2[i] >= 0)
+			close(self->child_nsfds2[i]);
+	}
+
+	if (self->child_pidfd1 >= 0)
+		EXPECT_EQ(0, close(self->child_pidfd1));
+	if (self->child_pidfd2 >= 0)
+		EXPECT_EQ(0, close(self->child_pidfd2));
+	ASSERT_EQ(sys_waitid(P_PID, self->child_pid_exited, WEXITED), 0);
+	ASSERT_EQ(sys_waitid(P_PID, self->child_pid1, WEXITED), 0);
+	ASSERT_EQ(sys_waitid(P_PID, self->child_pid2, WEXITED), 0);
+}
+
+static int preserve_ns(const int pid, const char *ns)
+{
+	int ret;
+	char path[50];
+
+	ret = snprintf(path, sizeof(path), "/proc/%d/ns/%s", pid, ns);
+	if (ret < 0 || (size_t)ret >= sizeof(path))
+		return -EIO;
+
+	return open(path, O_RDONLY | O_CLOEXEC);
+}
+
+static int in_same_namespace(int ns_fd1, pid_t pid2, const char *ns)
+{
+	int ns_fd2 = -EBADF;
+	int ret = -1;
+	struct stat ns_st1, ns_st2;
+
+	ret = fstat(ns_fd1, &ns_st1);
+	if (ret < 0)
+		return -1;
+
+	ns_fd2 = preserve_ns(pid2, ns);
+	if (ns_fd2 < 0)
+		return -1;
+
+	ret = fstat(ns_fd2, &ns_st2);
+	close(ns_fd2);
+	if (ret < 0)
+		return -1;
+
+	/* processes are in the same namespace */
+	if ((ns_st1.st_dev == ns_st2.st_dev) &&
+	    (ns_st1.st_ino == ns_st2.st_ino))
+		return 1;
+
+	/* processes are in different namespaces */
+	return 0;
+}
+
+/* Test that we can't pass garbage to the kernel. */
+TEST_F(current_nsset, invalid_flags)
+{
+	ASSERT_NE(setns(self->pidfd, 0), 0);
+	EXPECT_EQ(errno, EINVAL);
+
+	ASSERT_NE(setns(self->pidfd, -1), 0);
+	EXPECT_EQ(errno, EINVAL);
+
+	ASSERT_NE(setns(self->pidfd, CLONE_VM), 0);
+	EXPECT_EQ(errno, EINVAL);
+
+	ASSERT_NE(setns(self->pidfd, CLONE_NEWUSER | CLONE_VM), 0);
+	EXPECT_EQ(errno, EINVAL);
+}
+
+/* Test that we can't attach to a task that has already exited. */
+TEST_F(current_nsset, pidfd_exited_child)
+{
+	int i;
+	pid_t pid;
+
+	ASSERT_NE(setns(self->child_pidfd_exited, CLONE_NEWUSER | CLONE_NEWNET),
+		  0);
+	EXPECT_EQ(errno, ESRCH);
+
+	pid = getpid();
+	for (i = 0; i < PIDFD_NS_MAX; i++) {
+		const struct ns_info *info = &ns_info[i];
+		/* Verify that we haven't changed any namespaces. */
+		if (self->nsfds[i] >= 0)
+			ASSERT_EQ(in_same_namespace(self->nsfds[i], pid, info->name), 1);
+	}
+}
+
+TEST_F(current_nsset, pidfd_incremental_setns)
+{
+	int i;
+	pid_t pid;
+
+	pid = getpid();
+	for (i = 0; i < PIDFD_NS_MAX; i++) {
+		const struct ns_info *info = &ns_info[i];
+		int nsfd;
+
+		if (self->child_nsfds1[i] < 0)
+			continue;
+
+		if (info->flag) {
+			ASSERT_EQ(setns(self->child_pidfd1, info->flag), 0) {
+				TH_LOG("%m - Failed to setns to %s namespace of %d via pidfd %d",
+				       info->name, self->child_pid1,
+				       self->child_pidfd1);
+			}
+		}
+
+		/* Verify that we have changed to the correct namespaces. */
+		if (info->flag == CLONE_NEWPID)
+			nsfd = self->nsfds[i];
+		else
+			nsfd = self->child_nsfds1[i];
+		ASSERT_EQ(in_same_namespace(nsfd, pid, info->name), 1) {
+			TH_LOG("setns failed to place us correctly into %s namespace of %d via pidfd %d",
+			       info->name, self->child_pid1,
+			       self->child_pidfd1);
+		}
+		TH_LOG("Managed to correctly setns to %s namespace of %d via pidfd %d",
+		       info->name, self->child_pid1, self->child_pidfd1);
+	}
+}
+
+TEST_F(current_nsset, nsfd_incremental_setns)
+{
+	int i;
+	pid_t pid;
+
+	pid = getpid();
+	for (i = 0; i < PIDFD_NS_MAX; i++) {
+		const struct ns_info *info = &ns_info[i];
+		int nsfd;
+
+		if (self->child_nsfds1[i] < 0)
+			continue;
+
+		if (info->flag) {
+			ASSERT_EQ(setns(self->child_nsfds1[i], info->flag), 0) {
+				TH_LOG("%m - Failed to setns to %s namespace of %d via nsfd %d",
+				       info->name, self->child_pid1,
+				       self->child_nsfds1[i]);
+			}
+		}
+
+		/* Verify that we have changed to the correct namespaces. */
+		if (info->flag == CLONE_NEWPID)
+			nsfd = self->nsfds[i];
+		else
+			nsfd = self->child_nsfds1[i];
+		ASSERT_EQ(in_same_namespace(nsfd, pid, info->name), 1) {
+			TH_LOG("setns failed to place us correctly into %s namespace of %d via nsfd %d",
+			       info->name, self->child_pid1,
+			       self->child_nsfds1[i]);
+		}
+		TH_LOG("Managed to correctly setns to %s namespace of %d via nsfd %d",
+		       info->name, self->child_pid1, self->child_nsfds1[i]);
+	}
+}
+
+TEST_F(current_nsset, pidfd_one_shot_setns)
+{
+	unsigned flags = 0;
+	int i;
+	pid_t pid;
+
+	for (i = 0; i < PIDFD_NS_MAX; i++) {
+		const struct ns_info *info = &ns_info[i];
+
+		if (self->child_nsfds1[i] < 0)
+			continue;
+
+		flags |= info->flag;
+		TH_LOG("Adding %s namespace of %d to list of namespaces to attach to",
+		       info->name, self->child_pid1);
+	}
+
+	ASSERT_EQ(setns(self->child_pidfd1, flags), 0) {
+		TH_LOG("%m - Failed to setns to namespaces of %d",
+		       self->child_pid1);
+	}
+
+	pid = getpid();
+	for (i = 0; i < PIDFD_NS_MAX; i++) {
+		const struct ns_info *info = &ns_info[i];
+		int nsfd;
+
+		if (self->child_nsfds1[i] < 0)
+			continue;
+
+		/* Verify that we have changed to the correct namespaces. */
+		if (info->flag == CLONE_NEWPID)
+			nsfd = self->nsfds[i];
+		else
+			nsfd = self->child_nsfds1[i];
+		ASSERT_EQ(in_same_namespace(nsfd, pid, info->name), 1) {
+			TH_LOG("setns failed to place us correctly into %s namespace of %d",
+			       info->name, self->child_pid1);
+		}
+		TH_LOG("Managed to correctly setns to %s namespace of %d",
+		       info->name, self->child_pid1);
+	}
+}
+
+TEST_F(current_nsset, no_foul_play)
+{
+	unsigned flags = 0;
+	int i;
+
+	for (i = 0; i < PIDFD_NS_MAX; i++) {
+		const struct ns_info *info = &ns_info[i];
+
+		if (self->child_nsfds1[i] < 0)
+			continue;
+
+		flags |= info->flag;
+		if (info->flag) /* No use logging pid_for_children. */
+			TH_LOG("Adding %s namespace of %d to list of namespaces to attach to",
+			       info->name, self->child_pid1);
+	}
+
+	ASSERT_EQ(setns(self->child_pidfd1, flags), 0) {
+		TH_LOG("%m - Failed to setns to namespaces of %d vid pidfd %d",
+		       self->child_pid1, self->child_pidfd1);
+	}
+
+	/*
+	 * Can't setns to a user namespace outside of our hierarchy since we
+	 * don't have caps in there and didn't create it. That means that under
+	 * no circumstances should we be able to setns to any of the other
+	 * ones since they aren't owned by our user namespace.
+	 */
+	for (i = 0; i < PIDFD_NS_MAX; i++) {
+		const struct ns_info *info = &ns_info[i];
+
+		if (self->child_nsfds2[i] < 0 || !info->flag)
+			continue;
+
+		ASSERT_NE(setns(self->child_pidfd2, info->flag), 0) {
+			TH_LOG("Managed to setns to %s namespace of %d via pidfd %d",
+			       info->name, self->child_pid2,
+			       self->child_pidfd2);
+		}
+		TH_LOG("%m - Correctly failed to setns to %s namespace of %d via pidfd %d",
+		       info->name, self->child_pid2,
+		       self->child_pidfd2);
+
+		ASSERT_NE(setns(self->child_nsfds2[i], info->flag), 0) {
+			TH_LOG("Managed to setns to %s namespace of %d via nsfd %d",
+			       info->name, self->child_pid2,
+			       self->child_nsfds2[i]);
+		}
+		TH_LOG("%m - Correctly failed to setns to %s namespace of %d via nsfd %d",
+		       info->name, self->child_pid2,
+		       self->child_nsfds2[i]);
+	}
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/powerpc/Makefile b/tools/testing/selftests/powerpc/Makefile
index 644770c3b754..0830e63818c1 100644
--- a/tools/testing/selftests/powerpc/Makefile
+++ b/tools/testing/selftests/powerpc/Makefile
@@ -19,6 +19,7 @@ SUB_DIRS = alignment		\
 	   copyloops		\
 	   dscr			\
 	   mm			\
+	   nx-gzip		\
 	   pmu			\
 	   signal		\
 	   primitives		\
diff --git a/tools/testing/selftests/powerpc/nx-gzip/99-nx-gzip.rules b/tools/testing/selftests/powerpc/nx-gzip/99-nx-gzip.rules
new file mode 100644
index 000000000000..5a7118495cb3
--- /dev/null
+++ b/tools/testing/selftests/powerpc/nx-gzip/99-nx-gzip.rules
@@ -0,0 +1 @@
+SUBSYSTEM=="nxgzip", KERNEL=="nx-gzip", MODE="0666"
diff --git a/tools/testing/selftests/powerpc/nx-gzip/Makefile b/tools/testing/selftests/powerpc/nx-gzip/Makefile
new file mode 100644
index 000000000000..640fad6cc2c7
--- /dev/null
+++ b/tools/testing/selftests/powerpc/nx-gzip/Makefile
@@ -0,0 +1,8 @@
+CFLAGS = -O3 -m64 -I./include
+
+TEST_GEN_FILES := gzfht_test gunz_test
+TEST_PROGS := nx-gzip-test.sh
+
+include ../../lib.mk
+
+$(TEST_GEN_FILES): gzip_vas.c
diff --git a/tools/testing/selftests/powerpc/nx-gzip/README b/tools/testing/selftests/powerpc/nx-gzip/README
new file mode 100644
index 000000000000..9809dbaa1905
--- /dev/null
+++ b/tools/testing/selftests/powerpc/nx-gzip/README
@@ -0,0 +1,45 @@
+Test the nx-gzip function:
+=========================
+
+Verify that following device exists:
+  /dev/crypto/nx-gzip
+If you get a permission error run as sudo or set the device permissions:
+   sudo chmod go+rw /dev/crypto/nx-gzip
+However, chmod may not survive across boots. You may create a udev file such
+as:
+   /etc/udev/rules.d/99-nx-gzip.rules
+
+
+To manually build and run:
+$ gcc -O3 -I./include -o gzfht_test gzfht_test.c gzip_vas.c
+$ gcc -O3 -I./include -o gunz_test gunz_test.c gzip_vas.c
+
+
+Compress any file using Fixed Huffman mode. Output will have a .nx.gz suffix:
+$ ./gzfht_test gzip_vas.c
+file gzip_vas.c read, 6413 bytes
+compressed 6413 to 3124 bytes total, crc32 checksum = abd15e8a
+
+
+Uncompress the previous output. Output will have a .nx.gunzip suffix:
+./gunz_test gzip_vas.c.nx.gz
+gzHeader FLG 0
+00 00 00 00 04 03
+gzHeader MTIME, XFL, OS ignored
+computed checksum abd15e8a isize 0000190d
+stored   checksum abd15e8a isize 0000190d
+decomp is complete: fclose
+
+
+Compare two files:
+$ sha1sum gzip_vas.c.nx.gz.nx.gunzip gzip_vas.c
+bf43e3c0c3651f5f22b6f9784cd9b1eeab4120b6  gzip_vas.c.nx.gz.nx.gunzip
+bf43e3c0c3651f5f22b6f9784cd9b1eeab4120b6  gzip_vas.c
+
+
+Note that the code here are intended for testing the nx-gzip hardware function.
+They are not intended for demonstrating performance or compression ratio.
+By being simplistic these selftests expect to allocate the entire set of source
+and target pages in the memory so it needs enough memory to work.
+For more information and source code consider using:
+https://github.com/libnxz/power-gzip
diff --git a/tools/testing/selftests/powerpc/nx-gzip/gunz_test.c b/tools/testing/selftests/powerpc/nx-gzip/gunz_test.c
new file mode 100644
index 000000000000..6ee0fded0391
--- /dev/null
+++ b/tools/testing/selftests/powerpc/nx-gzip/gunz_test.c
@@ -0,0 +1,1028 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+/* P9 gunzip sample code for demonstrating the P9 NX hardware
+ * interface.  Not intended for productive uses or for performance or
+ * compression ratio measurements.  Note also that /dev/crypto/gzip,
+ * VAS and skiboot support are required
+ *
+ * Copyright 2020 IBM Corp.
+ *
+ * Author: Bulent Abali <abali@us.ibm.com>
+ *
+ * https://github.com/libnxz/power-gzip for zlib api and other utils
+ * Definitions of acronyms used here.  See
+ * P9 NX Gzip Accelerator User's Manual for details:
+ * https://github.com/libnxz/power-gzip/blob/develop/doc/power_nx_gzip_um.pdf
+ *
+ * adler/crc: 32 bit checksums appended to stream tail
+ * ce:       completion extension
+ * cpb:      coprocessor parameter block (metadata)
+ * crb:      coprocessor request block (command)
+ * csb:      coprocessor status block (status)
+ * dht:      dynamic huffman table
+ * dde:      data descriptor element (address, length)
+ * ddl:      list of ddes
+ * dh/fh:    dynamic and fixed huffman types
+ * fc:       coprocessor function code
+ * histlen:  history/dictionary length
+ * history:  sliding window of up to 32KB of data
+ * lzcount:  Deflate LZ symbol counts
+ * rembytecnt: remaining byte count
+ * sfbt:     source final block type; last block's type during decomp
+ * spbc:     source processed byte count
+ * subc:     source unprocessed bit count
+ * tebc:     target ending bit count; valid bits in the last byte
+ * tpbc:     target processed byte count
+ * vas:      virtual accelerator switch; the user mode interface
+ */
+
+#define _ISOC11_SOURCE	// For aligned_alloc()
+#define _DEFAULT_SOURCE	// For endian.h
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <stdint.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/fcntl.h>
+#include <sys/mman.h>
+#include <endian.h>
+#include <bits/endian.h>
+#include <sys/ioctl.h>
+#include <assert.h>
+#include <errno.h>
+#include <signal.h>
+#include "nxu.h"
+#include "nx.h"
+#include "crb.h"
+
+int nx_dbg;
+FILE *nx_gzip_log;
+
+#define NX_MIN(X, Y) (((X) < (Y))?(X):(Y))
+#define NX_MAX(X, Y) (((X) > (Y))?(X):(Y))
+
+#define GETINPC(X) fgetc(X)
+#define FNAME_MAX 1024
+
+/* fifo queue management */
+#define fifo_used_bytes(used) (used)
+#define fifo_free_bytes(used, len) ((len)-(used))
+/* amount of free bytes in the first and last parts */
+#define fifo_free_first_bytes(cur, used, len)  ((((cur)+(used)) <= (len)) \
+						  ? (len)-((cur)+(used)) : 0)
+#define fifo_free_last_bytes(cur, used, len)   ((((cur)+(used)) <= (len)) \
+						  ? (cur) : (len)-(used))
+/* amount of used bytes in the first and last parts */
+#define fifo_used_first_bytes(cur, used, len)  ((((cur)+(used)) <= (len)) \
+						  ? (used) : (len)-(cur))
+#define fifo_used_last_bytes(cur, used, len)   ((((cur)+(used)) <= (len)) \
+						  ? 0 : ((used)+(cur))-(len))
+/* first and last free parts start here */
+#define fifo_free_first_offset(cur, used)      ((cur)+(used))
+#define fifo_free_last_offset(cur, used, len)  \
+					   fifo_used_last_bytes(cur, used, len)
+/* first and last used parts start here */
+#define fifo_used_first_offset(cur)            (cur)
+#define fifo_used_last_offset(cur)             (0)
+
+const int fifo_in_len = 1<<24;
+const int fifo_out_len = 1<<24;
+const int page_sz = 1<<16;
+const int line_sz = 1<<7;
+const int window_max = 1<<15;
+
+/*
+ * Adds an (address, len) pair to the list of ddes (ddl) and updates
+ * the base dde.  ddl[0] is the only dde in a direct dde which
+ * contains a single (addr,len) pair.  For more pairs, ddl[0] becomes
+ * the indirect (base) dde that points to a list of direct ddes.
+ * See Section 6.4 of the NX-gzip user manual for DDE description.
+ * Addr=NULL, len=0 clears the ddl[0].  Returns the total number of
+ * bytes in ddl.  Caller is responsible for allocting the array of
+ * nx_dde_t *ddl.  If N addresses are required in the scatter-gather
+ * list, the ddl array must have N+1 entries minimum.
+ */
+static inline uint32_t nx_append_dde(struct nx_dde_t *ddl, void *addr,
+					uint32_t len)
+{
+	uint32_t ddecnt;
+	uint32_t bytes;
+
+	if (addr == NULL && len == 0) {
+		clearp_dde(ddl);
+		return 0;
+	}
+
+	NXPRT(fprintf(stderr, "%d: %s addr %p len %x\n", __LINE__, addr,
+			__func__, len));
+
+	/* Number of ddes in the dde list ; == 0 when it is a direct dde */
+	ddecnt = getpnn(ddl, dde_count);
+	bytes = getp32(ddl, ddebc);
+
+	if (ddecnt == 0 && bytes == 0) {
+		/* First dde is unused; make it a direct dde */
+		bytes = len;
+		putp32(ddl, ddebc, bytes);
+		putp64(ddl, ddead, (uint64_t) addr);
+	} else if (ddecnt == 0) {
+		/* Converting direct to indirect dde
+		 * ddl[0] becomes head dde of ddl
+		 * copy direct to indirect first.
+		 */
+		ddl[1] = ddl[0];
+
+		/* Add the new dde next */
+		clear_dde(ddl[2]);
+		put32(ddl[2], ddebc, len);
+		put64(ddl[2], ddead, (uint64_t) addr);
+
+		/* Ddl head points to 2 direct ddes */
+		ddecnt = 2;
+		putpnn(ddl, dde_count, ddecnt);
+		bytes = bytes + len;
+		putp32(ddl, ddebc, bytes);
+		/* Pointer to the first direct dde */
+		putp64(ddl, ddead, (uint64_t) &ddl[1]);
+	} else {
+		/* Append a dde to an existing indirect ddl */
+		++ddecnt;
+		clear_dde(ddl[ddecnt]);
+		put64(ddl[ddecnt], ddead, (uint64_t) addr);
+		put32(ddl[ddecnt], ddebc, len);
+
+		putpnn(ddl, dde_count, ddecnt);
+		bytes = bytes + len;
+		putp32(ddl, ddebc, bytes); /* byte sum of all dde */
+	}
+	return bytes;
+}
+
+/*
+ * Touch specified number of pages represented in number bytes
+ * beginning from the first buffer in a dde list.
+ * Do not touch the pages past buf_sz-th byte's page.
+ *
+ * Set buf_sz = 0 to touch all pages described by the ddep.
+ */
+static int nx_touch_pages_dde(struct nx_dde_t *ddep, long buf_sz, long page_sz,
+				int wr)
+{
+	uint32_t indirect_count;
+	uint32_t buf_len;
+	long total;
+	uint64_t buf_addr;
+	struct nx_dde_t *dde_list;
+	int i;
+
+	assert(!!ddep);
+
+	indirect_count = getpnn(ddep, dde_count);
+
+	NXPRT(fprintf(stderr, "%s dde_count %d request len ", __func__,
+			indirect_count));
+	NXPRT(fprintf(stderr, "0x%lx\n", buf_sz));
+
+	if (indirect_count == 0) {
+		/* Direct dde */
+		buf_len = getp32(ddep, ddebc);
+		buf_addr = getp64(ddep, ddead);
+
+		NXPRT(fprintf(stderr, "touch direct ddebc 0x%x ddead %p\n",
+				buf_len, (void *)buf_addr));
+
+		if (buf_sz == 0)
+			nxu_touch_pages((void *)buf_addr, buf_len, page_sz, wr);
+		else
+			nxu_touch_pages((void *)buf_addr, NX_MIN(buf_len,
+					buf_sz), page_sz, wr);
+
+		return ERR_NX_OK;
+	}
+
+	/* Indirect dde */
+	if (indirect_count > MAX_DDE_COUNT)
+		return ERR_NX_EXCESSIVE_DDE;
+
+	/* First address of the list */
+	dde_list = (struct nx_dde_t *) getp64(ddep, ddead);
+
+	if (buf_sz == 0)
+		buf_sz = getp32(ddep, ddebc);
+
+	total = 0;
+	for (i = 0; i < indirect_count; i++) {
+		buf_len = get32(dde_list[i], ddebc);
+		buf_addr = get64(dde_list[i], ddead);
+		total += buf_len;
+
+		NXPRT(fprintf(stderr, "touch loop len 0x%x ddead %p total ",
+				buf_len, (void *)buf_addr));
+		NXPRT(fprintf(stderr, "0x%lx\n", total));
+
+		/* Touching fewer pages than encoded in the ddebc */
+		if (total > buf_sz) {
+			buf_len = NX_MIN(buf_len, total - buf_sz);
+			nxu_touch_pages((void *)buf_addr, buf_len, page_sz, wr);
+			NXPRT(fprintf(stderr, "touch loop break len 0x%x ",
+				      buf_len));
+			NXPRT(fprintf(stderr, "ddead %p\n", (void *)buf_addr));
+			break;
+		}
+		nxu_touch_pages((void *)buf_addr, buf_len, page_sz, wr);
+	}
+	return ERR_NX_OK;
+}
+
+/*
+ * Src and dst buffers are supplied in scatter gather lists.
+ * NX function code and other parameters supplied in cmdp.
+ */
+static int nx_submit_job(struct nx_dde_t *src, struct nx_dde_t *dst,
+			 struct nx_gzip_crb_cpb_t *cmdp, void *handle)
+{
+	uint64_t csbaddr;
+
+	memset((void *)&cmdp->crb.csb, 0, sizeof(cmdp->crb.csb));
+
+	cmdp->crb.source_dde = *src;
+	cmdp->crb.target_dde = *dst;
+
+	/* Status, output byte count in tpbc */
+	csbaddr = ((uint64_t) &cmdp->crb.csb) & csb_address_mask;
+	put64(cmdp->crb, csb_address, csbaddr);
+
+	/* NX reports input bytes in spbc; cleared */
+	cmdp->cpb.out_spbc_comp_wrap = 0;
+	cmdp->cpb.out_spbc_comp_with_count = 0;
+	cmdp->cpb.out_spbc_decomp = 0;
+
+	/* Clear output */
+	put32(cmdp->cpb, out_crc, INIT_CRC);
+	put32(cmdp->cpb, out_adler, INIT_ADLER);
+
+	/* Submit the crb, the job descriptor, to the accelerator. */
+	return nxu_submit_job(cmdp, handle);
+}
+
+int decompress_file(int argc, char **argv, void *devhandle)
+{
+	FILE *inpf = NULL;
+	FILE *outf = NULL;
+
+	int c, expect, i, cc, rc = 0;
+	char gzfname[FNAME_MAX];
+
+	/* Queuing, file ops, byte counting */
+	char *fifo_in, *fifo_out;
+	int used_in, cur_in, used_out, cur_out, read_sz, n;
+	int first_free, last_free, first_used, last_used;
+	int first_offset, last_offset;
+	int write_sz, free_space, source_sz;
+	int source_sz_estimate, target_sz_estimate;
+	uint64_t last_comp_ratio = 0; /* 1000 max */
+	uint64_t total_out = 0;
+	int is_final, is_eof;
+
+	/* nx hardware */
+	int sfbt, subc, spbc, tpbc, nx_ce, fc, resuming = 0;
+	int history_len = 0;
+	struct nx_gzip_crb_cpb_t cmd, *cmdp;
+	struct nx_dde_t *ddl_in;
+	struct nx_dde_t dde_in[6] __aligned(128);
+	struct nx_dde_t *ddl_out;
+	struct nx_dde_t dde_out[6] __aligned(128);
+	int pgfault_retries;
+
+	/* when using mmap'ed files */
+	off_t input_file_offset;
+
+	if (argc > 2) {
+		fprintf(stderr, "usage: %s <fname> or stdin\n", argv[0]);
+		fprintf(stderr, "    writes to stdout or <fname>.nx.gunzip\n");
+		return -1;
+	}
+
+	if (argc == 1) {
+		inpf = stdin;
+		outf = stdout;
+	} else if (argc == 2) {
+		char w[1024];
+		char *wp;
+
+		inpf = fopen(argv[1], "r");
+		if (inpf == NULL) {
+			perror(argv[1]);
+			return -1;
+		}
+
+		/* Make a new file name to write to.  Ignoring '.gz' */
+		wp = (NULL != (wp = strrchr(argv[1], '/'))) ? (wp+1) : argv[1];
+		strcpy(w, wp);
+		strcat(w, ".nx.gunzip");
+
+		outf = fopen(w, "w");
+		if (outf == NULL) {
+			perror(w);
+			return -1;
+		}
+	}
+
+	/* Decode the gzip header */
+	c = GETINPC(inpf); expect = 0x1f; /* ID1 */
+	if (c != expect)
+		goto err1;
+
+	c = GETINPC(inpf); expect = 0x8b; /* ID2 */
+	if (c != expect)
+		goto err1;
+
+	c = GETINPC(inpf); expect = 0x08; /* CM */
+	if (c != expect)
+		goto err1;
+
+	int flg = GETINPC(inpf); /* FLG */
+
+	if (flg & 0xE0 || flg & 0x4 || flg == EOF)
+		goto err2;
+
+	fprintf(stderr, "gzHeader FLG %x\n", flg);
+
+	/* Read 6 bytes; ignoring the MTIME, XFL, OS fields in this
+	 * sample code.
+	 */
+	for (i = 0; i < 6; i++) {
+		char tmp[10];
+
+		tmp[i] = GETINPC(inpf);
+		if (tmp[i] == EOF)
+			goto err3;
+		fprintf(stderr, "%02x ", tmp[i]);
+		if (i == 5)
+			fprintf(stderr, "\n");
+	}
+	fprintf(stderr, "gzHeader MTIME, XFL, OS ignored\n");
+
+	/* FNAME */
+	if (flg & 0x8) {
+		int k = 0;
+
+		do {
+			c = GETINPC(inpf);
+			if (c == EOF || k >= FNAME_MAX)
+				goto err3;
+			gzfname[k++] = c;
+		} while (c);
+		fprintf(stderr, "gzHeader FNAME: %s\n", gzfname);
+	}
+
+	/* FHCRC */
+	if (flg & 0x2) {
+		c = GETINPC(inpf);
+		if (c == EOF)
+			goto err3;
+		c = GETINPC(inpf);
+		if (c == EOF)
+			goto err3;
+		fprintf(stderr, "gzHeader FHCRC: ignored\n");
+	}
+
+	used_in = cur_in = used_out = cur_out = 0;
+	is_final = is_eof = 0;
+
+	/* Allocate one page larger to prevent page faults due to NX
+	 * overfetching.
+	 * Either do this (char*)(uintptr_t)aligned_alloc or use
+	 * -std=c11 flag to make the int-to-pointer warning go away.
+	 */
+	assert((fifo_in  = (char *)(uintptr_t)aligned_alloc(line_sz,
+				   fifo_in_len + page_sz)) != NULL);
+	assert((fifo_out = (char *)(uintptr_t)aligned_alloc(line_sz,
+				   fifo_out_len + page_sz + line_sz)) != NULL);
+	/* Leave unused space due to history rounding rules */
+	fifo_out = fifo_out + line_sz;
+	nxu_touch_pages(fifo_out, fifo_out_len, page_sz, 1);
+
+	ddl_in  = &dde_in[0];
+	ddl_out = &dde_out[0];
+	cmdp = &cmd;
+	memset(&cmdp->crb, 0, sizeof(cmdp->crb));
+
+read_state:
+
+	/* Read from .gz file */
+
+	NXPRT(fprintf(stderr, "read_state:\n"));
+
+	if (is_eof != 0)
+		goto write_state;
+
+	/* We read in to fifo_in in two steps: first: read in to from
+	 * cur_in to the end of the buffer.  last: if free space wrapped
+	 * around, read from fifo_in offset 0 to offset cur_in.
+	 */
+
+	/* Reset fifo head to reduce unnecessary wrap arounds */
+	cur_in = (used_in == 0) ? 0 : cur_in;
+
+	/* Free space total is reduced by a gap */
+	free_space = NX_MAX(0, fifo_free_bytes(used_in, fifo_in_len)
+			    - line_sz);
+
+	/* Free space may wrap around as first and last */
+	first_free = fifo_free_first_bytes(cur_in, used_in, fifo_in_len);
+	last_free  = fifo_free_last_bytes(cur_in, used_in, fifo_in_len);
+
+	/* Start offsets of the free memory */
+	first_offset = fifo_free_first_offset(cur_in, used_in);
+	last_offset  = fifo_free_last_offset(cur_in, used_in, fifo_in_len);
+
+	/* Reduce read_sz because of the line_sz gap */
+	read_sz = NX_MIN(free_space, first_free);
+	n = 0;
+	if (read_sz > 0) {
+		/* Read in to offset cur_in + used_in */
+		n = fread(fifo_in + first_offset, 1, read_sz, inpf);
+		used_in = used_in + n;
+		free_space = free_space - n;
+		assert(n <= read_sz);
+		if (n != read_sz) {
+			/* Either EOF or error; exit the read loop */
+			is_eof = 1;
+			goto write_state;
+		}
+	}
+
+	/* If free space wrapped around */
+	if (last_free > 0) {
+		/* Reduce read_sz because of the line_sz gap */
+		read_sz = NX_MIN(free_space, last_free);
+		n = 0;
+		if (read_sz > 0) {
+			n = fread(fifo_in + last_offset, 1, read_sz, inpf);
+			used_in = used_in + n;       /* Increase used space */
+			free_space = free_space - n; /* Decrease free space */
+			assert(n <= read_sz);
+			if (n != read_sz) {
+				/* Either EOF or error; exit the read loop */
+				is_eof = 1;
+				goto write_state;
+			}
+		}
+	}
+
+	/* At this point we have used_in bytes in fifo_in with the
+	 * data head starting at cur_in and possibly wrapping around.
+	 */
+
+write_state:
+
+	/* Write decompressed data to output file */
+
+	NXPRT(fprintf(stderr, "write_state:\n"));
+
+	if (used_out == 0)
+		goto decomp_state;
+
+	/* If fifo_out has data waiting, write it out to the file to
+	 * make free target space for the accelerator used bytes in
+	 * the first and last parts of fifo_out.
+	 */
+
+	first_used = fifo_used_first_bytes(cur_out, used_out, fifo_out_len);
+	last_used  = fifo_used_last_bytes(cur_out, used_out, fifo_out_len);
+
+	write_sz = first_used;
+
+	n = 0;
+	if (write_sz > 0) {
+		n = fwrite(fifo_out + cur_out, 1, write_sz, outf);
+		used_out = used_out - n;
+		/* Move head of the fifo */
+		cur_out = (cur_out + n) % fifo_out_len;
+		assert(n <= write_sz);
+		if (n != write_sz) {
+			fprintf(stderr, "error: write\n");
+			rc = -1;
+			goto err5;
+		}
+	}
+
+	if (last_used > 0) { /* If more data available in the last part */
+		write_sz = last_used; /* Keep it here for later */
+		n = 0;
+		if (write_sz > 0) {
+			n = fwrite(fifo_out, 1, write_sz, outf);
+			used_out = used_out - n;
+			cur_out = (cur_out + n) % fifo_out_len;
+			assert(n <= write_sz);
+			if (n != write_sz) {
+				fprintf(stderr, "error: write\n");
+				rc = -1;
+				goto err5;
+			}
+		}
+	}
+
+decomp_state:
+
+	/* NX decompresses input data */
+
+	NXPRT(fprintf(stderr, "decomp_state:\n"));
+
+	if (is_final)
+		goto finish_state;
+
+	/* Address/len lists */
+	clearp_dde(ddl_in);
+	clearp_dde(ddl_out);
+
+	/* FC, CRC, HistLen, Table 6-6 */
+	if (resuming) {
+		/* Resuming a partially decompressed input.
+		 * The key to resume is supplying the 32KB
+		 * dictionary (history) to NX, which is basically
+		 * the last 32KB of output produced.
+		 */
+		fc = GZIP_FC_DECOMPRESS_RESUME;
+
+		cmdp->cpb.in_crc   = cmdp->cpb.out_crc;
+		cmdp->cpb.in_adler = cmdp->cpb.out_adler;
+
+		/* Round up the history size to quadword.  Section 2.10 */
+		history_len = (history_len + 15) / 16;
+		putnn(cmdp->cpb, in_histlen, history_len);
+		history_len = history_len * 16; /* bytes */
+
+		if (history_len > 0) {
+			/* Chain in the history buffer to the DDE list */
+			if (cur_out >= history_len) {
+				nx_append_dde(ddl_in, fifo_out
+					      + (cur_out - history_len),
+					      history_len);
+			} else {
+				nx_append_dde(ddl_in, fifo_out
+					      + ((fifo_out_len + cur_out)
+					      - history_len),
+					      history_len - cur_out);
+				/* Up to 32KB history wraps around fifo_out */
+				nx_append_dde(ddl_in, fifo_out, cur_out);
+			}
+
+		}
+	} else {
+		/* First decompress job */
+		fc = GZIP_FC_DECOMPRESS;
+
+		history_len = 0;
+		/* Writing 0 clears out subc as well */
+		cmdp->cpb.in_histlen = 0;
+		total_out = 0;
+
+		put32(cmdp->cpb, in_crc, INIT_CRC);
+		put32(cmdp->cpb, in_adler, INIT_ADLER);
+		put32(cmdp->cpb, out_crc, INIT_CRC);
+		put32(cmdp->cpb, out_adler, INIT_ADLER);
+
+		/* Assuming 10% compression ratio initially; use the
+		 * most recently measured compression ratio as a
+		 * heuristic to estimate the input and output
+		 * sizes.  If we give too much input, the target buffer
+		 * overflows and NX cycles are wasted, and then we
+		 * must retry with smaller input size.  1000 is 100%.
+		 */
+		last_comp_ratio = 100UL;
+	}
+	cmdp->crb.gzip_fc = 0;
+	putnn(cmdp->crb, gzip_fc, fc);
+
+	/*
+	 * NX source buffers
+	 */
+	first_used = fifo_used_first_bytes(cur_in, used_in, fifo_in_len);
+	last_used = fifo_used_last_bytes(cur_in, used_in, fifo_in_len);
+
+	if (first_used > 0)
+		nx_append_dde(ddl_in, fifo_in + cur_in, first_used);
+
+	if (last_used > 0)
+		nx_append_dde(ddl_in, fifo_in, last_used);
+
+	/*
+	 * NX target buffers
+	 */
+	first_free = fifo_free_first_bytes(cur_out, used_out, fifo_out_len);
+	last_free = fifo_free_last_bytes(cur_out, used_out, fifo_out_len);
+
+	/* Reduce output free space amount not to overwrite the history */
+	int target_max = NX_MAX(0, fifo_free_bytes(used_out, fifo_out_len)
+				- (1<<16));
+
+	NXPRT(fprintf(stderr, "target_max %d (0x%x)\n", target_max,
+		      target_max));
+
+	first_free = NX_MIN(target_max, first_free);
+	if (first_free > 0) {
+		first_offset = fifo_free_first_offset(cur_out, used_out);
+		nx_append_dde(ddl_out, fifo_out + first_offset, first_free);
+	}
+
+	if (last_free > 0) {
+		last_free = NX_MIN(target_max - first_free, last_free);
+		if (last_free > 0) {
+			last_offset = fifo_free_last_offset(cur_out, used_out,
+							    fifo_out_len);
+			nx_append_dde(ddl_out, fifo_out + last_offset,
+				      last_free);
+		}
+	}
+
+	/* Target buffer size is used to limit the source data size
+	 * based on previous measurements of compression ratio.
+	 */
+
+	/* source_sz includes history */
+	source_sz = getp32(ddl_in, ddebc);
+	assert(source_sz > history_len);
+	source_sz = source_sz - history_len;
+
+	/* Estimating how much source is needed to 3/4 fill a
+	 * target_max size target buffer.  If we overshoot, then NX
+	 * must repeat the job with smaller input and we waste
+	 * bandwidth.  If we undershoot then we use more NX calls than
+	 * necessary.
+	 */
+
+	source_sz_estimate = ((uint64_t)target_max * last_comp_ratio * 3UL)
+				/ 4000;
+
+	if (source_sz_estimate < source_sz) {
+		/* Target might be small, therefore limiting the
+		 * source data.
+		 */
+		source_sz = source_sz_estimate;
+		target_sz_estimate = target_max;
+	} else {
+		/* Source file might be small, therefore limiting target
+		 * touch pages to a smaller value to save processor cycles.
+		 */
+		target_sz_estimate = ((uint64_t)source_sz * 1000UL)
+					/ (last_comp_ratio + 1);
+		target_sz_estimate = NX_MIN(2 * target_sz_estimate,
+					    target_max);
+	}
+
+	source_sz = source_sz + history_len;
+
+	/* Some NX condition codes require submitting the NX job again.
+	 * Kernel doesn't handle NX page faults. Expects user code to
+	 * touch pages.
+	 */
+	pgfault_retries = NX_MAX_FAULTS;
+
+restart_nx:
+
+	putp32(ddl_in, ddebc, source_sz);
+
+	/* Fault in pages */
+	nxu_touch_pages(cmdp, sizeof(struct nx_gzip_crb_cpb_t), page_sz, 1);
+	nx_touch_pages_dde(ddl_in, 0, page_sz, 0);
+	nx_touch_pages_dde(ddl_out, target_sz_estimate, page_sz, 1);
+
+	/* Send job to NX */
+	cc = nx_submit_job(ddl_in, ddl_out, cmdp, devhandle);
+
+	switch (cc) {
+
+	case ERR_NX_TRANSLATION:
+
+		/* We touched the pages ahead of time.  In the most common case
+		 * we shouldn't be here.  But may be some pages were paged out.
+		 * Kernel should have placed the faulting address to fsaddr.
+		 */
+		NXPRT(fprintf(stderr, "ERR_NX_TRANSLATION %p\n",
+			      (void *)cmdp->crb.csb.fsaddr));
+
+		if (pgfault_retries == NX_MAX_FAULTS) {
+			/* Try once with exact number of pages */
+			--pgfault_retries;
+			goto restart_nx;
+		} else if (pgfault_retries > 0) {
+			/* If still faulting try fewer input pages
+			 * assuming memory outage
+			 */
+			if (source_sz > page_sz)
+				source_sz = NX_MAX(source_sz / 2, page_sz);
+			--pgfault_retries;
+			goto restart_nx;
+		} else {
+			fprintf(stderr, "cannot make progress; too many ");
+			fprintf(stderr, "page fault retries cc= %d\n", cc);
+			rc = -1;
+			goto err5;
+		}
+
+	case ERR_NX_DATA_LENGTH:
+
+		NXPRT(fprintf(stderr, "ERR_NX_DATA_LENGTH; "));
+		NXPRT(fprintf(stderr, "stream may have trailing data\n"));
+
+		/* Not an error in the most common case; it just says
+		 * there is trailing data that we must examine.
+		 *
+		 * CC=3 CE(1)=0 CE(0)=1 indicates partial completion
+		 * Fig.6-7 and Table 6-8.
+		 */
+		nx_ce = get_csb_ce_ms3b(cmdp->crb.csb);
+
+		if (!csb_ce_termination(nx_ce) &&
+		    csb_ce_partial_completion(nx_ce)) {
+			/* Check CPB for more information
+			 * spbc and tpbc are valid
+			 */
+			sfbt = getnn(cmdp->cpb, out_sfbt); /* Table 6-4 */
+			subc = getnn(cmdp->cpb, out_subc); /* Table 6-4 */
+			spbc = get32(cmdp->cpb, out_spbc_decomp);
+			tpbc = get32(cmdp->crb.csb, tpbc);
+			assert(target_max >= tpbc);
+
+			goto ok_cc3; /* not an error */
+		} else {
+			/* History length error when CE(1)=1 CE(0)=0. */
+			rc = -1;
+			fprintf(stderr, "history length error cc= %d\n", cc);
+			goto err5;
+		}
+
+	case ERR_NX_TARGET_SPACE:
+
+		/* Target buffer not large enough; retry smaller input
+		 * data; give at least 1 byte.  SPBC/TPBC are not valid.
+		 */
+		assert(source_sz > history_len);
+		source_sz = ((source_sz - history_len + 2) / 2) + history_len;
+		NXPRT(fprintf(stderr, "ERR_NX_TARGET_SPACE; retry with "));
+		NXPRT(fprintf(stderr, "smaller input data src %d hist %d\n",
+			      source_sz, history_len));
+		goto restart_nx;
+
+	case ERR_NX_OK:
+
+		/* This should not happen for gzip formatted data;
+		 * we need trailing crc and isize
+		 */
+		fprintf(stderr, "ERR_NX_OK\n");
+		spbc = get32(cmdp->cpb, out_spbc_decomp);
+		tpbc = get32(cmdp->crb.csb, tpbc);
+		assert(target_max >= tpbc);
+		assert(spbc >= history_len);
+		source_sz = spbc - history_len;
+		goto offsets_state;
+
+	default:
+		fprintf(stderr, "error: cc= %d\n", cc);
+		rc = -1;
+		goto err5;
+	}
+
+ok_cc3:
+
+	NXPRT(fprintf(stderr, "cc3: sfbt: %x\n", sfbt));
+
+	assert(spbc > history_len);
+	source_sz = spbc - history_len;
+
+	/* Table 6-4: Source Final Block Type (SFBT) describes the
+	 * last processed deflate block and clues the software how to
+	 * resume the next job.  SUBC indicates how many input bits NX
+	 * consumed but did not process.  SPBC indicates how many
+	 * bytes of source were given to the accelerator including
+	 * history bytes.
+	 */
+
+	switch (sfbt) {
+		int dhtlen;
+
+	case 0x0: /* Deflate final EOB received */
+
+		/* Calculating the checksum start position. */
+
+		source_sz = source_sz - subc / 8;
+		is_final = 1;
+		break;
+
+		/* Resume decompression cases are below. Basically
+		 * indicates where NX has suspended and how to resume
+		 * the input stream.
+		 */
+
+	case 0x8: /* Within a literal block; use rembytecount */
+	case 0x9: /* Within a literal block; use rembytecount; bfinal=1 */
+
+		/* Supply the partially processed source byte again */
+		source_sz = source_sz - ((subc + 7) / 8);
+
+		/* SUBC LS 3bits: number of bits in the first source byte need
+		 * to be processed.
+		 * 000 means all 8 bits;  Table 6-3
+		 * Clear subc, histlen, sfbt, rembytecnt, dhtlen
+		 */
+		cmdp->cpb.in_subc = 0;
+		cmdp->cpb.in_sfbt = 0;
+		putnn(cmdp->cpb, in_subc, subc % 8);
+		putnn(cmdp->cpb, in_sfbt, sfbt);
+		putnn(cmdp->cpb, in_rembytecnt, getnn(cmdp->cpb,
+						      out_rembytecnt));
+		break;
+
+	case 0xA: /* Within a FH block; */
+	case 0xB: /* Within a FH block; bfinal=1 */
+
+		source_sz = source_sz - ((subc + 7) / 8);
+
+		/* Clear subc, histlen, sfbt, rembytecnt, dhtlen */
+		cmdp->cpb.in_subc = 0;
+		cmdp->cpb.in_sfbt = 0;
+		putnn(cmdp->cpb, in_subc, subc % 8);
+		putnn(cmdp->cpb, in_sfbt, sfbt);
+		break;
+
+	case 0xC: /* Within a DH block; */
+	case 0xD: /* Within a DH block; bfinal=1 */
+
+		source_sz = source_sz - ((subc + 7) / 8);
+
+		/* Clear subc, histlen, sfbt, rembytecnt, dhtlen */
+		cmdp->cpb.in_subc = 0;
+		cmdp->cpb.in_sfbt = 0;
+		putnn(cmdp->cpb, in_subc, subc % 8);
+		putnn(cmdp->cpb, in_sfbt, sfbt);
+
+		dhtlen = getnn(cmdp->cpb, out_dhtlen);
+		putnn(cmdp->cpb, in_dhtlen, dhtlen);
+		assert(dhtlen >= 42);
+
+		/* Round up to a qword */
+		dhtlen = (dhtlen + 127) / 128;
+
+		while (dhtlen > 0) { /* Copy dht from cpb.out to cpb.in */
+			--dhtlen;
+			cmdp->cpb.in_dht[dhtlen] = cmdp->cpb.out_dht[dhtlen];
+		}
+		break;
+
+	case 0xE: /* Within a block header; bfinal=0; */
+		     /* Also given if source data exactly ends (SUBC=0) with
+		      * EOB code with BFINAL=0.  Means the next byte will
+		      * contain a block header.
+		      */
+	case 0xF: /* within a block header with BFINAL=1. */
+
+		source_sz = source_sz - ((subc + 7) / 8);
+
+		/* Clear subc, histlen, sfbt, rembytecnt, dhtlen */
+		cmdp->cpb.in_subc = 0;
+		cmdp->cpb.in_sfbt = 0;
+		putnn(cmdp->cpb, in_subc, subc % 8);
+		putnn(cmdp->cpb, in_sfbt, sfbt);
+
+		/* Engine did not process any data */
+		if (is_eof && (source_sz == 0))
+			is_final = 1;
+	}
+
+offsets_state:
+
+	/* Adjust the source and target buffer offsets and lengths  */
+
+	NXPRT(fprintf(stderr, "offsets_state:\n"));
+
+	/* Delete input data from fifo_in */
+	used_in = used_in - source_sz;
+	cur_in = (cur_in + source_sz) % fifo_in_len;
+	input_file_offset = input_file_offset + source_sz;
+
+	/* Add output data to fifo_out */
+	used_out = used_out + tpbc;
+
+	assert(used_out <= fifo_out_len);
+
+	total_out = total_out + tpbc;
+
+	/* Deflate history is 32KB max.  No need to supply more
+	 * than 32KB on a resume.
+	 */
+	history_len = (total_out > window_max) ? window_max : total_out;
+
+	/* To estimate expected expansion in the next NX job; 500 means 50%.
+	 * Deflate best case is around 1 to 1000.
+	 */
+	last_comp_ratio = (1000UL * ((uint64_t)source_sz + 1))
+			  / ((uint64_t)tpbc + 1);
+	last_comp_ratio = NX_MAX(NX_MIN(1000UL, last_comp_ratio), 1);
+	NXPRT(fprintf(stderr, "comp_ratio %ld source_sz %d spbc %d tpbc %d\n",
+		      last_comp_ratio, source_sz, spbc, tpbc));
+
+	resuming = 1;
+
+finish_state:
+
+	NXPRT(fprintf(stderr, "finish_state:\n"));
+
+	if (is_final) {
+		if (used_out)
+			goto write_state; /* More data to write out */
+		else if (used_in < 8) {
+			/* Need at least 8 more bytes containing gzip crc
+			 * and isize.
+			 */
+			rc = -1;
+			goto err4;
+		} else {
+			/* Compare checksums and exit */
+			int i;
+			unsigned char tail[8];
+			uint32_t cksum, isize;
+
+			for (i = 0; i < 8; i++)
+				tail[i] = fifo_in[(cur_in + i) % fifo_in_len];
+			fprintf(stderr, "computed checksum %08x isize %08x\n",
+				cmdp->cpb.out_crc, (uint32_t) (total_out
+				% (1ULL<<32)));
+			cksum = ((uint32_t) tail[0] | (uint32_t) tail[1]<<8
+				 | (uint32_t) tail[2]<<16
+				 | (uint32_t) tail[3]<<24);
+			isize = ((uint32_t) tail[4] | (uint32_t) tail[5]<<8
+				 | (uint32_t) tail[6]<<16
+				 | (uint32_t) tail[7]<<24);
+			fprintf(stderr, "stored   checksum %08x isize %08x\n",
+				cksum, isize);
+
+			if (cksum == cmdp->cpb.out_crc && isize == (uint32_t)
+			    (total_out % (1ULL<<32))) {
+				rc = 0;	goto ok1;
+			} else {
+				rc = -1; goto err4;
+			}
+		}
+	} else
+		goto read_state;
+
+	return -1;
+
+err1:
+	fprintf(stderr, "error: not a gzip file, expect %x, read %x\n",
+		expect, c);
+	return -1;
+
+err2:
+	fprintf(stderr, "error: the FLG byte is wrong or not being handled\n");
+	return -1;
+
+err3:
+	fprintf(stderr, "error: gzip header\n");
+	return -1;
+
+err4:
+	fprintf(stderr, "error: checksum missing or mismatch\n");
+
+err5:
+ok1:
+	fprintf(stderr, "decomp is complete: fclose\n");
+	fclose(outf);
+
+	return rc;
+}
+
+
+int main(int argc, char **argv)
+{
+	int rc;
+	struct sigaction act;
+	void *handle;
+
+	nx_dbg = 0;
+	nx_gzip_log = NULL;
+	act.sa_handler = 0;
+	act.sa_sigaction = nxu_sigsegv_handler;
+	act.sa_flags = SA_SIGINFO;
+	act.sa_restorer = 0;
+	sigemptyset(&act.sa_mask);
+	sigaction(SIGSEGV, &act, NULL);
+
+	handle = nx_function_begin(NX_FUNC_COMP_GZIP, 0);
+	if (!handle) {
+		fprintf(stderr, "Unable to init NX, errno %d\n", errno);
+		exit(-1);
+	}
+
+	rc = decompress_file(argc, argv, handle);
+
+	nx_function_end(handle);
+
+	return rc;
+}
diff --git a/tools/testing/selftests/powerpc/nx-gzip/gzfht_test.c b/tools/testing/selftests/powerpc/nx-gzip/gzfht_test.c
new file mode 100644
index 000000000000..7496a83f9c9d
--- /dev/null
+++ b/tools/testing/selftests/powerpc/nx-gzip/gzfht_test.c
@@ -0,0 +1,433 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+/* P9 gzip sample code for demonstrating the P9 NX hardware interface.
+ * Not intended for productive uses or for performance or compression
+ * ratio measurements.  For simplicity of demonstration, this sample
+ * code compresses in to fixed Huffman blocks only (Deflate btype=1)
+ * and has very simple memory management.  Dynamic Huffman blocks
+ * (Deflate btype=2) are more involved as detailed in the user guide.
+ * Note also that /dev/crypto/gzip, VAS and skiboot support are
+ * required.
+ *
+ * Copyright 2020 IBM Corp.
+ *
+ * https://github.com/libnxz/power-gzip for zlib api and other utils
+ *
+ * Author: Bulent Abali <abali@us.ibm.com>
+ *
+ * Definitions of acronyms used here. See
+ * P9 NX Gzip Accelerator User's Manual for details:
+ * https://github.com/libnxz/power-gzip/blob/develop/doc/power_nx_gzip_um.pdf
+ *
+ * adler/crc: 32 bit checksums appended to stream tail
+ * ce:       completion extension
+ * cpb:      coprocessor parameter block (metadata)
+ * crb:      coprocessor request block (command)
+ * csb:      coprocessor status block (status)
+ * dht:      dynamic huffman table
+ * dde:      data descriptor element (address, length)
+ * ddl:      list of ddes
+ * dh/fh:    dynamic and fixed huffman types
+ * fc:       coprocessor function code
+ * histlen:  history/dictionary length
+ * history:  sliding window of up to 32KB of data
+ * lzcount:  Deflate LZ symbol counts
+ * rembytecnt: remaining byte count
+ * sfbt:     source final block type; last block's type during decomp
+ * spbc:     source processed byte count
+ * subc:     source unprocessed bit count
+ * tebc:     target ending bit count; valid bits in the last byte
+ * tpbc:     target processed byte count
+ * vas:      virtual accelerator switch; the user mode interface
+ */
+
+#define _ISOC11_SOURCE	// For aligned_alloc()
+#define _DEFAULT_SOURCE	// For endian.h
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <stdint.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/fcntl.h>
+#include <sys/mman.h>
+#include <endian.h>
+#include <bits/endian.h>
+#include <sys/ioctl.h>
+#include <assert.h>
+#include <errno.h>
+#include <signal.h>
+#include "nxu.h"
+#include "nx.h"
+
+int nx_dbg;
+FILE *nx_gzip_log;
+
+#define NX_MIN(X, Y) (((X) < (Y)) ? (X) : (Y))
+#define FNAME_MAX 1024
+#define FEXT ".nx.gz"
+
+/*
+ * LZ counts returned in the user supplied nx_gzip_crb_cpb_t structure.
+ */
+static int compress_fht_sample(char *src, uint32_t srclen, char *dst,
+				uint32_t dstlen, int with_count,
+				struct nx_gzip_crb_cpb_t *cmdp, void *handle)
+{
+	uint32_t fc;
+
+	assert(!!cmdp);
+
+	put32(cmdp->crb, gzip_fc, 0);  /* clear */
+	fc = (with_count) ? GZIP_FC_COMPRESS_RESUME_FHT_COUNT :
+			    GZIP_FC_COMPRESS_RESUME_FHT;
+	putnn(cmdp->crb, gzip_fc, fc);
+	putnn(cmdp->cpb, in_histlen, 0); /* resuming with no history */
+	memset((void *) &cmdp->crb.csb, 0, sizeof(cmdp->crb.csb));
+
+	/* Section 6.6 programming notes; spbc may be in two different
+	 * places depending on FC.
+	 */
+	if (!with_count)
+		put32(cmdp->cpb, out_spbc_comp, 0);
+	else
+		put32(cmdp->cpb, out_spbc_comp_with_count, 0);
+
+	/* Figure 6-3 6-4; CSB location */
+	put64(cmdp->crb, csb_address, 0);
+	put64(cmdp->crb, csb_address,
+	      (uint64_t) &cmdp->crb.csb & csb_address_mask);
+
+	/* Source direct dde (scatter-gather list) */
+	clear_dde(cmdp->crb.source_dde);
+	putnn(cmdp->crb.source_dde, dde_count, 0);
+	put32(cmdp->crb.source_dde, ddebc, srclen);
+	put64(cmdp->crb.source_dde, ddead, (uint64_t) src);
+
+	/* Target direct dde (scatter-gather list) */
+	clear_dde(cmdp->crb.target_dde);
+	putnn(cmdp->crb.target_dde, dde_count, 0);
+	put32(cmdp->crb.target_dde, ddebc, dstlen);
+	put64(cmdp->crb.target_dde, ddead, (uint64_t) dst);
+
+	/* Submit the crb, the job descriptor, to the accelerator */
+	return nxu_submit_job(cmdp, handle);
+}
+
+/*
+ * Prepares a blank no filename no timestamp gzip header and returns
+ * the number of bytes written to buf.
+ * Gzip specification at https://tools.ietf.org/html/rfc1952
+ */
+int gzip_header_blank(char *buf)
+{
+	int i = 0;
+
+	buf[i++] = 0x1f; /* ID1 */
+	buf[i++] = 0x8b; /* ID2 */
+	buf[i++] = 0x08; /* CM  */
+	buf[i++] = 0x00; /* FLG */
+	buf[i++] = 0x00; /* MTIME */
+	buf[i++] = 0x00; /* MTIME */
+	buf[i++] = 0x00; /* MTIME */
+	buf[i++] = 0x00; /* MTIME */
+	buf[i++] = 0x04; /* XFL 4=fastest */
+	buf[i++] = 0x03; /* OS UNIX */
+
+	return i;
+}
+
+/* Caller must free the allocated buffer return nonzero on error. */
+int read_alloc_input_file(char *fname, char **buf, size_t *bufsize)
+{
+	struct stat statbuf;
+	FILE *fp;
+	char *p;
+	size_t num_bytes;
+
+	if (stat(fname, &statbuf)) {
+		perror(fname);
+		return(-1);
+	}
+	fp = fopen(fname, "r");
+	if (fp == NULL) {
+		perror(fname);
+		return(-1);
+	}
+	assert(NULL != (p = (char *) malloc(statbuf.st_size)));
+	num_bytes = fread(p, 1, statbuf.st_size, fp);
+	if (ferror(fp) || (num_bytes != statbuf.st_size)) {
+		perror(fname);
+		return(-1);
+	}
+	*buf = p;
+	*bufsize = num_bytes;
+	return 0;
+}
+
+/* Returns nonzero on error */
+int write_output_file(char *fname, char *buf, size_t bufsize)
+{
+	FILE *fp;
+	size_t num_bytes;
+
+	fp = fopen(fname, "w");
+	if (fp == NULL) {
+		perror(fname);
+		return(-1);
+	}
+	num_bytes = fwrite(buf, 1, bufsize, fp);
+	if (ferror(fp) || (num_bytes != bufsize)) {
+		perror(fname);
+		return(-1);
+	}
+	fclose(fp);
+	return 0;
+}
+
+/*
+ * Z_SYNC_FLUSH as described in zlib.h.
+ * Returns number of appended bytes
+ */
+int append_sync_flush(char *buf, int tebc, int final)
+{
+	uint64_t flush;
+	int shift = (tebc & 0x7);
+
+	if (tebc > 0) {
+		/* Last byte is partially full */
+		buf = buf - 1;
+		*buf = *buf & (unsigned char) ((1<<tebc)-1);
+	} else
+		*buf = 0;
+	flush = ((0x1ULL & final) << shift) | *buf;
+	shift = shift + 3; /* BFINAL and BTYPE written */
+	shift = (shift <= 8) ? 8 : 16;
+	flush |= (0xFFFF0000ULL) << shift; /* Zero length block */
+	shift = shift + 32;
+	while (shift > 0) {
+		*buf++ = (unsigned char) (flush & 0xffULL);
+		flush = flush >> 8;
+		shift = shift - 8;
+	}
+	return(((tebc > 5) || (tebc == 0)) ? 5 : 4);
+}
+
+/*
+ * Final deflate block bit.  This call assumes the block
+ * beginning is byte aligned.
+ */
+static void set_bfinal(void *buf, int bfinal)
+{
+	char *b = buf;
+
+	if (bfinal)
+		*b = *b | (unsigned char) 0x01;
+	else
+		*b = *b & (unsigned char) 0xfe;
+}
+
+int compress_file(int argc, char **argv, void *handle)
+{
+	char *inbuf, *outbuf, *srcbuf, *dstbuf;
+	char outname[FNAME_MAX];
+	uint32_t srclen, dstlen;
+	uint32_t flushlen, chunk;
+	size_t inlen, outlen, dsttotlen, srctotlen;
+	uint32_t crc, spbc, tpbc, tebc;
+	int lzcounts = 0;
+	int cc;
+	int num_hdr_bytes;
+	struct nx_gzip_crb_cpb_t *cmdp;
+	uint32_t pagelen = 65536;
+	int fault_tries = NX_MAX_FAULTS;
+
+	cmdp = (void *)(uintptr_t)
+		aligned_alloc(sizeof(struct nx_gzip_crb_cpb_t),
+			      sizeof(struct nx_gzip_crb_cpb_t));
+
+	if (argc != 2) {
+		fprintf(stderr, "usage: %s <fname>\n", argv[0]);
+		exit(-1);
+	}
+	if (read_alloc_input_file(argv[1], &inbuf, &inlen))
+		exit(-1);
+	fprintf(stderr, "file %s read, %ld bytes\n", argv[1], inlen);
+
+	/* Generous output buffer for header/trailer */
+	outlen = 2 * inlen + 1024;
+
+	assert(NULL != (outbuf = (char *)malloc(outlen)));
+	nxu_touch_pages(outbuf, outlen, pagelen, 1);
+
+	/* Compress piecemeal in smallish chunks */
+	chunk = 1<<22;
+
+	/* Write the gzip header to the stream */
+	num_hdr_bytes = gzip_header_blank(outbuf);
+	dstbuf    = outbuf + num_hdr_bytes;
+	outlen    = outlen - num_hdr_bytes;
+	dsttotlen = num_hdr_bytes;
+
+	srcbuf    = inbuf;
+	srctotlen = 0;
+
+	/* Init the CRB, the coprocessor request block */
+	memset(&cmdp->crb, 0, sizeof(cmdp->crb));
+
+	/* Initial gzip crc32 */
+	put32(cmdp->cpb, in_crc, 0);
+
+	while (inlen > 0) {
+
+		/* Submit chunk size source data per job */
+		srclen = NX_MIN(chunk, inlen);
+		/* Supply large target in case data expands */
+		dstlen = NX_MIN(2*srclen, outlen);
+
+		/* Page faults are handled by the user code */
+
+		/* Fault-in pages; an improved code wouldn't touch so
+		 * many pages but would try to estimate the
+		 * compression ratio and adjust both the src and dst
+		 * touch amounts.
+		 */
+		nxu_touch_pages(cmdp, sizeof(struct nx_gzip_crb_cpb_t), pagelen,
+				1);
+		nxu_touch_pages(srcbuf, srclen, pagelen, 0);
+		nxu_touch_pages(dstbuf, dstlen, pagelen, 1);
+
+		cc = compress_fht_sample(
+			srcbuf, srclen,
+			dstbuf, dstlen,
+			lzcounts, cmdp, handle);
+
+		if (cc != ERR_NX_OK && cc != ERR_NX_TPBC_GT_SPBC &&
+		    cc != ERR_NX_TRANSLATION) {
+			fprintf(stderr, "nx error: cc= %d\n", cc);
+			exit(-1);
+		}
+
+		/* Page faults are handled by the user code */
+		if (cc == ERR_NX_TRANSLATION) {
+			NXPRT(fprintf(stderr, "page fault: cc= %d, ", cc));
+			NXPRT(fprintf(stderr, "try= %d, fsa= %08llx\n",
+				  fault_tries,
+				  (unsigned long long) cmdp->crb.csb.fsaddr));
+			fault_tries--;
+			if (fault_tries > 0) {
+				continue;
+			} else {
+				fprintf(stderr, "error: cannot progress; ");
+				fprintf(stderr, "too many faults\n");
+				exit(-1);
+			};
+		}
+
+		fault_tries = NX_MAX_FAULTS; /* Reset for the next chunk */
+
+		inlen     = inlen - srclen;
+		srcbuf    = srcbuf + srclen;
+		srctotlen = srctotlen + srclen;
+
+		/* Two possible locations for spbc depending on the function
+		 * code.
+		 */
+		spbc = (!lzcounts) ? get32(cmdp->cpb, out_spbc_comp) :
+			get32(cmdp->cpb, out_spbc_comp_with_count);
+		assert(spbc == srclen);
+
+		/* Target byte count */
+		tpbc = get32(cmdp->crb.csb, tpbc);
+		/* Target ending bit count */
+		tebc = getnn(cmdp->cpb, out_tebc);
+		NXPRT(fprintf(stderr, "compressed chunk %d ", spbc));
+		NXPRT(fprintf(stderr, "to %d bytes, tebc= %d\n", tpbc, tebc));
+
+		if (inlen > 0) { /* More chunks to go */
+			set_bfinal(dstbuf, 0);
+			dstbuf    = dstbuf + tpbc;
+			dsttotlen = dsttotlen + tpbc;
+			outlen    = outlen - tpbc;
+			/* Round up to the next byte with a flush
+			 * block; do not set the BFINAqL bit.
+			 */
+			flushlen  = append_sync_flush(dstbuf, tebc, 0);
+			dsttotlen = dsttotlen + flushlen;
+			outlen    = outlen - flushlen;
+			dstbuf    = dstbuf + flushlen;
+			NXPRT(fprintf(stderr, "added sync_flush %d bytes\n",
+					flushlen));
+		} else {  /* Done */
+			/* Set the BFINAL bit of the last block per Deflate
+			 * specification.
+			 */
+			set_bfinal(dstbuf, 1);
+			dstbuf    = dstbuf + tpbc;
+			dsttotlen = dsttotlen + tpbc;
+			outlen    = outlen - tpbc;
+		}
+
+		/* Resuming crc32 for the next chunk */
+		crc = get32(cmdp->cpb, out_crc);
+		put32(cmdp->cpb, in_crc, crc);
+		crc = be32toh(crc);
+	}
+
+	/* Append crc32 and ISIZE to the end */
+	memcpy(dstbuf, &crc, 4);
+	memcpy(dstbuf+4, &srctotlen, 4);
+	dsttotlen = dsttotlen + 8;
+	outlen    = outlen - 8;
+
+	assert(FNAME_MAX > (strlen(argv[1]) + strlen(FEXT)));
+	strcpy(outname, argv[1]);
+	strcat(outname, FEXT);
+	if (write_output_file(outname, outbuf, dsttotlen)) {
+		fprintf(stderr, "write error: %s\n", outname);
+		exit(-1);
+	}
+
+	fprintf(stderr, "compressed %ld to %ld bytes total, ", srctotlen,
+		dsttotlen);
+	fprintf(stderr, "crc32 checksum = %08x\n", crc);
+
+	if (inbuf != NULL)
+		free(inbuf);
+
+	if (outbuf != NULL)
+		free(outbuf);
+
+	return 0;
+}
+
+int main(int argc, char **argv)
+{
+	int rc;
+	struct sigaction act;
+	void *handle;
+
+	nx_dbg = 0;
+	nx_gzip_log = NULL;
+	act.sa_handler = 0;
+	act.sa_sigaction = nxu_sigsegv_handler;
+	act.sa_flags = SA_SIGINFO;
+	act.sa_restorer = 0;
+	sigemptyset(&act.sa_mask);
+	sigaction(SIGSEGV, &act, NULL);
+
+	handle = nx_function_begin(NX_FUNC_COMP_GZIP, 0);
+	if (!handle) {
+		fprintf(stderr, "Unable to init NX, errno %d\n", errno);
+		exit(-1);
+	}
+
+	rc = compress_file(argc, argv, handle);
+
+	nx_function_end(handle);
+
+	return rc;
+}
diff --git a/tools/testing/selftests/powerpc/nx-gzip/gzip_vas.c b/tools/testing/selftests/powerpc/nx-gzip/gzip_vas.c
new file mode 100644
index 000000000000..c055885da40a
--- /dev/null
+++ b/tools/testing/selftests/powerpc/nx-gzip/gzip_vas.c
@@ -0,0 +1,316 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+/*
+ * Copyright 2020 IBM Corp.
+ *
+ * Author: Bulent Abali <abali@us.ibm.com>
+ *
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <stdint.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/fcntl.h>
+#include <sys/mman.h>
+#include <endian.h>
+#include <bits/endian.h>
+#include <sys/ioctl.h>
+#include <assert.h>
+#include <errno.h>
+#include <signal.h>
+#include "vas-api.h"
+#include "nx.h"
+#include "copy-paste.h"
+#include "nxu.h"
+#include "nx_dbg.h"
+#include <sys/platform/ppc.h>
+
+#define barrier()
+#define hwsync()    ({ asm volatile("sync" ::: "memory"); })
+
+#ifndef NX_NO_CPU_PRI
+#define cpu_pri_default()  ({ asm volatile ("or 2, 2, 2"); })
+#define cpu_pri_low()      ({ asm volatile ("or 31, 31, 31"); })
+#else
+#define cpu_pri_default()
+#define cpu_pri_low()
+#endif
+
+void *nx_fault_storage_address;
+
+struct nx_handle {
+	int fd;
+	int function;
+	void *paste_addr;
+};
+
+static int open_device_nodes(char *devname, int pri, struct nx_handle *handle)
+{
+	int rc, fd;
+	void *addr;
+	struct vas_tx_win_open_attr txattr;
+
+	fd = open(devname, O_RDWR);
+	if (fd < 0) {
+		fprintf(stderr, " open device name %s\n", devname);
+		return -errno;
+	}
+
+	memset(&txattr, 0, sizeof(txattr));
+	txattr.version = 1;
+	txattr.vas_id = pri;
+	rc = ioctl(fd, VAS_TX_WIN_OPEN, (unsigned long)&txattr);
+	if (rc < 0) {
+		fprintf(stderr, "ioctl() n %d, error %d\n", rc, errno);
+		rc = -errno;
+		goto out;
+	}
+
+	addr = mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0ULL);
+	if (addr == MAP_FAILED) {
+		fprintf(stderr, "mmap() failed, errno %d\n", errno);
+		rc = -errno;
+		goto out;
+	}
+	handle->fd = fd;
+	handle->paste_addr = (void *)((char *)addr + 0x400);
+
+	rc = 0;
+out:
+	close(fd);
+	return rc;
+}
+
+void *nx_function_begin(int function, int pri)
+{
+	int rc;
+	char *devname = "/dev/crypto/nx-gzip";
+	struct nx_handle *nxhandle;
+
+	if (function != NX_FUNC_COMP_GZIP) {
+		errno = EINVAL;
+		fprintf(stderr, " NX_FUNC_COMP_GZIP not found\n");
+		return NULL;
+	}
+
+
+	nxhandle = malloc(sizeof(*nxhandle));
+	if (!nxhandle) {
+		errno = ENOMEM;
+		fprintf(stderr, " No memory\n");
+		return NULL;
+	}
+
+	nxhandle->function = function;
+	rc = open_device_nodes(devname, pri, nxhandle);
+	if (rc < 0) {
+		errno = -rc;
+		fprintf(stderr, " open_device_nodes failed\n");
+		return NULL;
+	}
+
+	return nxhandle;
+}
+
+int nx_function_end(void *handle)
+{
+	int rc = 0;
+	struct nx_handle *nxhandle = handle;
+
+	rc = munmap(nxhandle->paste_addr - 0x400, 4096);
+	if (rc < 0) {
+		fprintf(stderr, "munmap() failed, errno %d\n", errno);
+		return rc;
+	}
+	close(nxhandle->fd);
+	free(nxhandle);
+
+	return rc;
+}
+
+static int nx_wait_for_csb(struct nx_gzip_crb_cpb_t *cmdp)
+{
+	long poll = 0;
+	uint64_t t;
+
+	/* Save power and let other threads use the h/w. top may show
+	 * 100% but only because OS doesn't know we slowed the this
+	 * h/w thread while polling. We're letting other threads have
+	 * higher throughput on the core.
+	 */
+	cpu_pri_low();
+
+#define CSB_MAX_POLL 200000000UL
+#define USLEEP_TH     300000UL
+
+	t = __ppc_get_timebase();
+
+	while (getnn(cmdp->crb.csb, csb_v) == 0) {
+		++poll;
+		hwsync();
+
+		cpu_pri_low();
+
+		/* usleep(0) takes around 29000 ticks ~60 us.
+		 * 300000 is spinning for about 600 us then
+		 * start sleeping.
+		 */
+		if ((__ppc_get_timebase() - t) > USLEEP_TH) {
+			cpu_pri_default();
+			usleep(1);
+		}
+
+		if (poll > CSB_MAX_POLL)
+			break;
+
+		/* Fault address from signal handler */
+		if (nx_fault_storage_address) {
+			cpu_pri_default();
+			return -EAGAIN;
+		}
+
+	}
+
+	cpu_pri_default();
+
+	/* hw has updated csb and output buffer */
+	hwsync();
+
+	/* Check CSB flags. */
+	if (getnn(cmdp->crb.csb, csb_v) == 0) {
+		fprintf(stderr, "CSB still not valid after %d polls.\n",
+			(int) poll);
+		prt_err("CSB still not valid after %d polls, giving up.\n",
+			(int) poll);
+		return -ETIMEDOUT;
+	}
+
+	return 0;
+}
+
+static int nxu_run_job(struct nx_gzip_crb_cpb_t *cmdp, void *handle)
+{
+	int i, ret, retries;
+	struct nx_handle *nxhandle = handle;
+
+	assert(handle != NULL);
+	i = 0;
+	retries = 5000;
+	while (i++ < retries) {
+		hwsync();
+		vas_copy(&cmdp->crb, 0);
+		ret = vas_paste(nxhandle->paste_addr, 0);
+		hwsync();
+
+		NXPRT(fprintf(stderr, "Paste attempt %d/%d returns 0x%x\n",
+				i, retries, ret));
+
+		if ((ret == 2) || (ret == 3)) {
+
+			ret = nx_wait_for_csb(cmdp);
+			if (!ret) {
+				goto out;
+			} else if (ret == -EAGAIN) {
+				long x;
+
+				prt_err("Touching address %p, 0x%lx\n",
+					 nx_fault_storage_address,
+					 *(long *) nx_fault_storage_address);
+				x = *(long *) nx_fault_storage_address;
+				*(long *) nx_fault_storage_address = x;
+				nx_fault_storage_address = 0;
+				continue;
+			} else {
+				prt_err("wait_for_csb() returns %d\n", ret);
+				break;
+			}
+		} else {
+			if (i < 10) {
+				/* spin for few ticks */
+#define SPIN_TH 500UL
+				uint64_t fail_spin;
+
+				fail_spin = __ppc_get_timebase();
+				while ((__ppc_get_timebase() - fail_spin) <
+					 SPIN_TH)
+					;
+			} else {
+				/* sleep */
+				unsigned int pr = 0;
+
+				if (pr++ % 100 == 0) {
+					prt_err("Paste attempt %d/", i);
+					prt_err("%d, failed pid= %d\n", retries,
+						getpid());
+				}
+				usleep(1);
+			}
+			continue;
+		}
+	}
+
+out:
+	cpu_pri_default();
+
+	return ret;
+}
+
+int nxu_submit_job(struct nx_gzip_crb_cpb_t *cmdp, void *handle)
+{
+	int cc;
+
+	cc = nxu_run_job(cmdp, handle);
+
+	if (!cc)
+		cc = getnn(cmdp->crb.csb, csb_cc);      /* CC Table 6-8 */
+
+	return cc;
+}
+
+
+void nxu_sigsegv_handler(int sig, siginfo_t *info, void *ctx)
+{
+	fprintf(stderr, "%d: Got signal %d si_code %d, si_addr %p\n", getpid(),
+		sig, info->si_code, info->si_addr);
+
+	nx_fault_storage_address = info->si_addr;
+}
+
+/*
+ * Fault in pages prior to NX job submission.  wr=1 may be required to
+ * touch writeable pages.  System zero pages do not fault-in the page as
+ * intended.  Typically set wr=1 for NX target pages and set wr=0 for NX
+ * source pages.
+ */
+int nxu_touch_pages(void *buf, long buf_len, long page_len, int wr)
+{
+	char *begin = buf;
+	char *end = (char *) buf + buf_len - 1;
+	volatile char t;
+
+	assert(buf_len >= 0 && !!buf);
+
+	NXPRT(fprintf(stderr, "touch %p %p len 0x%lx wr=%d\n", buf,
+			(buf + buf_len), buf_len, wr));
+
+	if (buf_len <= 0 || buf == NULL)
+		return -1;
+
+	do {
+		t = *begin;
+		if (wr)
+			*begin = t;
+		begin = begin + page_len;
+	} while (begin < end);
+
+	/* When buf_sz is small or buf tail is in another page */
+	t = *end;
+	if (wr)
+		*end = t;
+
+	return 0;
+}
diff --git a/tools/testing/selftests/powerpc/nx-gzip/include/copy-paste.h b/tools/testing/selftests/powerpc/nx-gzip/include/copy-paste.h
new file mode 100644
index 000000000000..0db2d6485037
--- /dev/null
+++ b/tools/testing/selftests/powerpc/nx-gzip/include/copy-paste.h
@@ -0,0 +1,56 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+/* From asm-compat.h */
+#define __stringify_in_c(...)	#__VA_ARGS__
+#define stringify_in_c(...)	__stringify_in_c(__VA_ARGS__) " "
+
+/*
+ * Macros taken from arch/powerpc/include/asm/ppc-opcode.h and other
+ * header files.
+ */
+#define ___PPC_RA(a)    (((a) & 0x1f) << 16)
+#define ___PPC_RB(b)    (((b) & 0x1f) << 11)
+
+#define PPC_INST_COPY                   0x7c20060c
+#define PPC_INST_PASTE                  0x7c20070d
+
+#define PPC_COPY(a, b)          stringify_in_c(.long PPC_INST_COPY | \
+						___PPC_RA(a) | ___PPC_RB(b))
+#define PPC_PASTE(a, b)         stringify_in_c(.long PPC_INST_PASTE | \
+						___PPC_RA(a) | ___PPC_RB(b))
+#define CR0_SHIFT	28
+#define CR0_MASK	0xF
+/*
+ * Copy/paste instructions:
+ *
+ *	copy RA,RB
+ *		Copy contents of address (RA) + effective_address(RB)
+ *		to internal copy-buffer.
+ *
+ *	paste RA,RB
+ *		Paste contents of internal copy-buffer to the address
+ *		(RA) + effective_address(RB)
+ */
+static inline int vas_copy(void *crb, int offset)
+{
+	asm volatile(PPC_COPY(%0, %1)";"
+		:
+		: "b" (offset), "b" (crb)
+		: "memory");
+
+	return 0;
+}
+
+static inline int vas_paste(void *paste_address, int offset)
+{
+	__u32 cr;
+
+	cr = 0;
+	asm volatile(PPC_PASTE(%1, %2)";"
+		"mfocrf %0, 0x80;"
+		: "=r" (cr)
+		: "b" (offset), "b" (paste_address)
+		: "memory", "cr0");
+
+	return (cr >> CR0_SHIFT) & CR0_MASK;
+}
diff --git a/tools/testing/selftests/powerpc/nx-gzip/include/crb.h b/tools/testing/selftests/powerpc/nx-gzip/include/crb.h
new file mode 100644
index 000000000000..ab101085fa7e
--- /dev/null
+++ b/tools/testing/selftests/powerpc/nx-gzip/include/crb.h
@@ -0,0 +1,155 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef __CRB_H
+#define __CRB_H
+#include <linux/types.h>
+#include "nx.h"
+
+/* CCW 842 CI/FC masks
+ * NX P8 workbook, section 4.3.1, figure 4-6
+ * "CI/FC Boundary by NX CT type"
+ */
+#define CCW_CI_842              (0x00003ff8)
+#define CCW_FC_842              (0x00000007)
+
+/* Chapter 6.5.8 Coprocessor-Completion Block (CCB) */
+
+#define CCB_VALUE		(0x3fffffffffffffff)
+#define CCB_ADDRESS		(0xfffffffffffffff8)
+#define CCB_CM			(0x0000000000000007)
+#define CCB_CM0			(0x0000000000000004)
+#define CCB_CM12		(0x0000000000000003)
+
+#define CCB_CM0_ALL_COMPLETIONS	(0x0)
+#define CCB_CM0_LAST_IN_CHAIN	(0x4)
+#define CCB_CM12_STORE		(0x0)
+#define CCB_CM12_INTERRUPT	(0x1)
+
+#define CCB_SIZE		(0x10)
+#define CCB_ALIGN		CCB_SIZE
+
+struct coprocessor_completion_block {
+	__be64 value;
+	__be64 address;
+} __aligned(CCB_ALIGN);
+
+
+/* Chapter 6.5.7 Coprocessor-Status Block (CSB) */
+
+#define CSB_V			(0x80)
+#define CSB_F			(0x04)
+#define CSB_CH			(0x03)
+#define CSB_CE_INCOMPLETE	(0x80)
+#define CSB_CE_TERMINATION	(0x40)
+#define CSB_CE_TPBC		(0x20)
+
+#define CSB_CC_SUCCESS		(0)
+#define CSB_CC_INVALID_ALIGN	(1)
+#define CSB_CC_OPERAND_OVERLAP	(2)
+#define CSB_CC_DATA_LENGTH	(3)
+#define CSB_CC_TRANSLATION	(5)
+#define CSB_CC_PROTECTION	(6)
+#define CSB_CC_RD_EXTERNAL	(7)
+#define CSB_CC_INVALID_OPERAND	(8)
+#define CSB_CC_PRIVILEGE	(9)
+#define CSB_CC_INTERNAL		(10)
+#define CSB_CC_WR_EXTERNAL	(12)
+#define CSB_CC_NOSPC		(13)
+#define CSB_CC_EXCESSIVE_DDE	(14)
+#define CSB_CC_WR_TRANSLATION	(15)
+#define CSB_CC_WR_PROTECTION	(16)
+#define CSB_CC_UNKNOWN_CODE	(17)
+#define CSB_CC_ABORT		(18)
+#define CSB_CC_TRANSPORT	(20)
+#define CSB_CC_SEGMENTED_DDL	(31)
+#define CSB_CC_PROGRESS_POINT	(32)
+#define CSB_CC_DDE_OVERFLOW	(33)
+#define CSB_CC_SESSION		(34)
+#define CSB_CC_PROVISION	(36)
+#define CSB_CC_CHAIN		(37)
+#define CSB_CC_SEQUENCE		(38)
+#define CSB_CC_HW		(39)
+
+#define CSB_SIZE		(0x10)
+#define CSB_ALIGN		CSB_SIZE
+
+struct coprocessor_status_block {
+	__u8 flags;
+	__u8 cs;
+	__u8 cc;
+	__u8 ce;
+	__be32 count;
+	__be64 address;
+} __aligned(CSB_ALIGN);
+
+
+/* Chapter 6.5.10 Data-Descriptor List (DDL)
+ * each list contains one or more Data-Descriptor Entries (DDE)
+ */
+
+#define DDE_P			(0x8000)
+
+#define DDE_SIZE		(0x10)
+#define DDE_ALIGN		DDE_SIZE
+
+struct data_descriptor_entry {
+	__be16 flags;
+	__u8 count;
+	__u8 index;
+	__be32 length;
+	__be64 address;
+} __aligned(DDE_ALIGN);
+
+
+/* Chapter 6.5.2 Coprocessor-Request Block (CRB) */
+
+#define CRB_SIZE		(0x80)
+#define CRB_ALIGN		(0x100) /* Errata: requires 256 alignment */
+
+
+/* Coprocessor Status Block field
+ *   ADDRESS	address of CSB
+ *   C		CCB is valid
+ *   AT		0 = addrs are virtual, 1 = addrs are phys
+ *   M		enable perf monitor
+ */
+#define CRB_CSB_ADDRESS		(0xfffffffffffffff0)
+#define CRB_CSB_C		(0x0000000000000008)
+#define CRB_CSB_AT		(0x0000000000000002)
+#define CRB_CSB_M		(0x0000000000000001)
+
+struct coprocessor_request_block {
+	__be32 ccw;
+	__be32 flags;
+	__be64 csb_addr;
+
+	struct data_descriptor_entry source;
+	struct data_descriptor_entry target;
+
+	struct coprocessor_completion_block ccb;
+
+	__u8 reserved[48];
+
+	struct coprocessor_status_block csb;
+} __aligned(CRB_ALIGN);
+
+#define crb_csb_addr(c)         __be64_to_cpu(c->csb_addr)
+#define crb_nx_fault_addr(c)    __be64_to_cpu(c->stamp.nx.fault_storage_addr)
+#define crb_nx_flags(c)         c->stamp.nx.flags
+#define crb_nx_fault_status(c)  c->stamp.nx.fault_status
+#define crb_nx_pswid(c)		c->stamp.nx.pswid
+
+
+/* RFC02167 Initiate Coprocessor Instructions document
+ * Chapter 8.2.1.1.1 RS
+ * Chapter 8.2.3 Coprocessor Directive
+ * Chapter 8.2.4 Execution
+ *
+ * The CCW must be converted to BE before passing to icswx()
+ */
+
+#define CCW_PS                  (0xff000000)
+#define CCW_CT                  (0x00ff0000)
+#define CCW_CD                  (0x0000ffff)
+#define CCW_CL                  (0x0000c000)
+
+#endif
diff --git a/tools/testing/selftests/powerpc/nx-gzip/include/nx.h b/tools/testing/selftests/powerpc/nx-gzip/include/nx.h
new file mode 100644
index 000000000000..1abe23fc29e8
--- /dev/null
+++ b/tools/testing/selftests/powerpc/nx-gzip/include/nx.h
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright 2020 IBM Corp.
+ *
+ */
+#ifndef _NX_H
+#define _NX_H
+
+#include <stdbool.h>
+
+#define	NX_FUNC_COMP_842	1
+#define NX_FUNC_COMP_GZIP	2
+
+#ifndef __aligned
+#define __aligned(x)	__attribute__((aligned(x)))
+#endif
+
+struct nx842_func_args {
+	bool use_crc;
+	bool decompress;		/* true decompress; false compress */
+	bool move_data;
+	int timeout;			/* seconds */
+};
+
+struct nxbuf_t {
+	int len;
+	char *buf;
+};
+
+/* @function should be EFT (aka 842), GZIP etc */
+void *nx_function_begin(int function, int pri);
+
+int nx_function(void *handle, struct nxbuf_t *in, struct nxbuf_t *out,
+		void *arg);
+
+int nx_function_end(void *handle);
+
+#endif	/* _NX_H */
diff --git a/tools/testing/selftests/powerpc/nx-gzip/include/nx_dbg.h b/tools/testing/selftests/powerpc/nx-gzip/include/nx_dbg.h
new file mode 100644
index 000000000000..16464e19c47f
--- /dev/null
+++ b/tools/testing/selftests/powerpc/nx-gzip/include/nx_dbg.h
@@ -0,0 +1,95 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright 2020 IBM Corporation
+ *
+ */
+
+#ifndef _NXU_DBG_H_
+#define _NXU_DBG_H_
+
+#include <sys/file.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <time.h>
+#include <pthread.h>
+
+extern FILE * nx_gzip_log;
+extern int nx_gzip_trace;
+extern unsigned int nx_gzip_inflate_impl;
+extern unsigned int nx_gzip_deflate_impl;
+extern unsigned int nx_gzip_inflate_flags;
+extern unsigned int nx_gzip_deflate_flags;
+
+extern int nx_dbg;
+pthread_mutex_t mutex_log;
+
+#define nx_gzip_trace_enabled()       (nx_gzip_trace & 0x1)
+#define nx_gzip_hw_trace_enabled()    (nx_gzip_trace & 0x2)
+#define nx_gzip_sw_trace_enabled()    (nx_gzip_trace & 0x4)
+#define nx_gzip_gather_statistics()   (nx_gzip_trace & 0x8)
+#define nx_gzip_per_stream_stat()     (nx_gzip_trace & 0x10)
+
+#define prt(fmt, ...) do { \
+	pthread_mutex_lock(&mutex_log);					\
+	flock(nx_gzip_log->_fileno, LOCK_EX);				\
+	time_t t; struct tm *m; time(&t); m = localtime(&t);		\
+	fprintf(nx_gzip_log, "[%04d/%02d/%02d %02d:%02d:%02d] "		\
+		"pid %d: " fmt,	\
+		(int)m->tm_year + 1900, (int)m->tm_mon+1, (int)m->tm_mday, \
+		(int)m->tm_hour, (int)m->tm_min, (int)m->tm_sec,	\
+		(int)getpid(), ## __VA_ARGS__);				\
+	fflush(nx_gzip_log);						\
+	flock(nx_gzip_log->_fileno, LOCK_UN);				\
+	pthread_mutex_unlock(&mutex_log);				\
+} while (0)
+
+/* Use in case of an error */
+#define prt_err(fmt, ...) do { if (nx_dbg >= 0) {			\
+	prt("%s:%u: Error: "fmt,					\
+		__FILE__, __LINE__, ## __VA_ARGS__);			\
+}} while (0)
+
+/* Use in case of an warning */
+#define prt_warn(fmt, ...) do {	if (nx_dbg >= 1) {			\
+	prt("%s:%u: Warning: "fmt,					\
+		__FILE__, __LINE__, ## __VA_ARGS__);			\
+}} while (0)
+
+/* Informational printouts */
+#define prt_info(fmt, ...) do {	if (nx_dbg >= 2) {			\
+	prt("Info: "fmt, ## __VA_ARGS__);				\
+}} while (0)
+
+/* Trace zlib wrapper code */
+#define prt_trace(fmt, ...) do { if (nx_gzip_trace_enabled()) {		\
+	prt("### "fmt, ## __VA_ARGS__);					\
+}} while (0)
+
+/* Trace statistics */
+#define prt_stat(fmt, ...) do {	if (nx_gzip_gather_statistics()) {	\
+	prt("### "fmt, ## __VA_ARGS__);					\
+}} while (0)
+
+/* Trace zlib hardware implementation */
+#define hw_trace(fmt, ...) do {						\
+		if (nx_gzip_hw_trace_enabled())				\
+			fprintf(nx_gzip_log, "hhh " fmt, ## __VA_ARGS__); \
+	} while (0)
+
+/* Trace zlib software implementation */
+#define sw_trace(fmt, ...) do {						\
+		if (nx_gzip_sw_trace_enabled())				\
+			fprintf(nx_gzip_log, "sss " fmt, ## __VA_ARGS__); \
+	} while (0)
+
+
+/**
+ * str_to_num - Convert string into number and copy with endings like
+ *              KiB for kilobyte
+ *              MiB for megabyte
+ *              GiB for gigabyte
+ */
+uint64_t str_to_num(char *str);
+void nx_lib_debug(int onoff);
+
+#endif	/* _NXU_DBG_H_ */
diff --git a/tools/testing/selftests/powerpc/nx-gzip/include/nxu.h b/tools/testing/selftests/powerpc/nx-gzip/include/nxu.h
new file mode 100644
index 000000000000..20a4e883e0d3
--- /dev/null
+++ b/tools/testing/selftests/powerpc/nx-gzip/include/nxu.h
@@ -0,0 +1,650 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Hardware interface of the NX-GZIP compression accelerator
+ *
+ * Copyright (C) IBM Corporation, 2020
+ *
+ * Author: Bulent Abali <abali@us.ibm.com>
+ *
+ */
+
+#ifndef _NXU_H
+#define _NXU_H
+
+#include <stdint.h>
+#include <endian.h>
+#include "nx.h"
+
+/* deflate */
+#define LLSZ   286
+#define DSZ    30
+
+/* nx */
+#define DHTSZ  18
+#define DHT_MAXSZ 288
+#define MAX_DDE_COUNT 256
+
+/* util */
+#ifdef NXDBG
+#define NXPRT(X)	X
+#else
+#define NXPRT(X)
+#endif
+
+#ifdef NXTIMER
+#include <sys/platform/ppc.h>
+#define NX_CLK(X)	X
+#define nx_get_time()	__ppc_get_timebase()
+#define nx_get_freq()	__ppc_get_timebase_freq()
+#else
+#define NX_CLK(X)
+#define nx_get_time()  (-1)
+#define nx_get_freq()  (-1)
+#endif
+
+#define NX_MAX_FAULTS  500
+
+/*
+ * Definitions of acronyms used here. See
+ * P9 NX Gzip Accelerator User's Manual for details:
+ * https://github.com/libnxz/power-gzip/blob/develop/doc/power_nx_gzip_um.pdf
+ *
+ * adler/crc: 32 bit checksums appended to stream tail
+ * ce:       completion extension
+ * cpb:      coprocessor parameter block (metadata)
+ * crb:      coprocessor request block (command)
+ * csb:      coprocessor status block (status)
+ * dht:      dynamic huffman table
+ * dde:      data descriptor element (address, length)
+ * ddl:      list of ddes
+ * dh/fh:    dynamic and fixed huffman types
+ * fc:       coprocessor function code
+ * histlen:  history/dictionary length
+ * history:  sliding window of up to 32KB of data
+ * lzcount:  Deflate LZ symbol counts
+ * rembytecnt: remaining byte count
+ * sfbt:     source final block type; last block's type during decomp
+ * spbc:     source processed byte count
+ * subc:     source unprocessed bit count
+ * tebc:     target ending bit count; valid bits in the last byte
+ * tpbc:     target processed byte count
+ * vas:      virtual accelerator switch; the user mode interface
+ */
+
+union nx_qw_t {
+	uint32_t word[4];
+	uint64_t dword[2];
+} __aligned(16);
+
+/*
+ * Note: NX registers with fewer than 32 bits are declared by
+ * convention as uint32_t variables in unions. If *_offset and *_mask
+ * are defined for a variable, then use get_ put_ macros to
+ * conveniently access the register fields for endian conversions.
+ */
+
+struct nx_dde_t {
+	/* Data Descriptor Element, Section 6.4 */
+	union {
+		uint32_t dde_count;
+		/* When dde_count == 0 ddead is a pointer to a data buffer;
+		 * ddebc is the buffer length bytes.
+		 * When dde_count > 0 dde is an indirect dde; ddead is a
+		 * pointer to a contiguous list of direct ddes; ddebc is the
+		 * total length of all data pointed to by the list of direct
+		 * ddes. Note that only one level of indirection is permitted.
+		 * See Section 6.4 of the user manual for additional details.
+		 */
+	};
+	uint32_t ddebc; /* dde byte count */
+	uint64_t ddead; /* dde address */
+} __aligned(16);
+
+struct nx_csb_t {
+	/* Coprocessor Status Block, Section 6.6  */
+	union {
+		uint32_t csb_v;
+		/* Valid bit. v must be set to 0 by the program
+		 * before submitting the coprocessor command.
+		 * Software can poll for the v bit
+		 */
+
+		uint32_t csb_f;
+		/* 16B CSB size. Written to 0 by DMA when it writes the CPB */
+
+		uint32_t csb_cs;
+		/* cs completion sequence; unused */
+
+		uint32_t csb_cc;
+		/* cc completion code; cc != 0 exception occurred */
+
+		uint32_t csb_ce;
+		/* ce completion extension */
+
+	};
+	uint32_t tpbc;
+	/* target processed byte count TPBC */
+
+	uint64_t fsaddr;
+	/* Section 6.12.1 CSB NonZero error summary.  FSA Failing storage
+	 * address.  Address where error occurred. When available, written
+	 * to A field of CSB
+	 */
+} __aligned(16);
+
+struct nx_ccb_t {
+	/* Coprocessor Completion Block, Section 6.7 */
+
+	uint32_t reserved[3];
+	union {
+		/* When crb.c==0 (no ccb defined) it is reserved;
+		 * When crb.c==1 (ccb defined) it is cm
+		 */
+
+		uint32_t ccb_cm;
+		/* Signal interrupt of crb.c==1 and cm==1 */
+
+		uint32_t word;
+		/* generic access to the 32bit word */
+	};
+} __aligned(16);
+
+struct vas_stamped_crb_t {
+	/*
+	 * CRB operand of the paste coprocessor instruction is stamped
+	 * in quadword 4 with the information shown here as its written
+	 * in to the receive FIFO of the coprocessor
+	 */
+
+	union {
+		uint32_t vas_buf_num;
+		/* Verification only vas buffer number which correlates to
+		 * the low order bits of the atag in the paste command
+		 */
+
+		uint32_t send_wc_id;
+		/* Pointer to Send Window Context that provides for NX address
+		 * translation information, such as MSR and LPCR bits, job
+		 * completion interrupt RA, PSWID, and job utilization counter.
+		 */
+
+	};
+	union {
+		uint32_t recv_wc_id;
+		/* Pointer to Receive Window Context. NX uses this to return
+		 * credits to a Receive FIFO as entries are dequeued.
+		 */
+
+	};
+	uint32_t reserved2;
+	union {
+		uint32_t vas_invalid;
+		/* Invalid bit. If this bit is 1 the CRB is discarded by
+		 * NX upon fetching from the receive FIFO. If this bit is 0
+		 * the CRB is processed normally. The bit is stamped to 0
+		 * by VAS and may be written to 1 by hypervisor while
+		 * the CRB is in the receive FIFO (in memory).
+		 */
+
+	};
+};
+
+struct nx_stamped_fault_crb_t {
+	/*
+	 * A CRB that has a translation fault is stamped by NX in quadword 4
+	 * and pasted to the Fault Send Window in VAS.
+	 */
+	uint64_t fsa;
+	union {
+		uint32_t nxsf_t;
+		uint32_t nxsf_fs;
+	};
+	uint32_t pswid;
+};
+
+union stamped_crb_t {
+	struct vas_stamped_crb_t      vas;
+	struct nx_stamped_fault_crb_t nx;
+};
+
+struct nx_gzip_cpb_t {
+	/*
+	 * Coprocessor Parameter Block In/Out are used to pass metadata
+	 * to/from accelerator.  Tables 6.5 and 6.6 of the user manual.
+	 */
+
+	/* CPBInput */
+
+	struct {
+		union {
+		union nx_qw_t qw0;
+			struct {
+				uint32_t in_adler;            /* bits 0:31  */
+				uint32_t in_crc;              /* bits 32:63 */
+				union {
+					uint32_t in_histlen;  /* bits 64:75 */
+					uint32_t in_subc;     /* bits 93:95 */
+				};
+				union {
+					/* bits 108:111 */
+					uint32_t in_sfbt;
+					/* bits 112:127 */
+					uint32_t in_rembytecnt;
+					/* bits 116:127 */
+					uint32_t in_dhtlen;
+				};
+			};
+		};
+		union {
+			union nx_qw_t  in_dht[DHTSZ];	/* qw[1:18]     */
+			char in_dht_char[DHT_MAXSZ];	/* byte access  */
+		};
+		union nx_qw_t  reserved[5];		/* qw[19:23]    */
+	};
+
+	/* CPBOutput */
+
+	volatile struct {
+		union {
+			union nx_qw_t qw24;
+			struct {
+				uint32_t out_adler;    /* bits 0:31  qw[24] */
+				uint32_t out_crc;      /* bits 32:63 qw[24] */
+				union {
+					/* bits 77:79 qw[24] */
+					uint32_t out_tebc;
+					/* bits 80:95 qw[24] */
+					uint32_t out_subc;
+				};
+				union {
+					/* bits 108:111 qw[24] */
+					uint32_t out_sfbt;
+					/* bits 112:127 qw[24] */
+					uint32_t out_rembytecnt;
+					/* bits 116:127 qw[24] */
+					uint32_t out_dhtlen;
+				};
+			};
+		};
+		union {
+			union nx_qw_t  qw25[79];        /* qw[25:103] */
+			/* qw[25] compress no lzcounts or wrap */
+			uint32_t out_spbc_comp_wrap;
+			uint32_t out_spbc_wrap;         /* qw[25] wrap */
+			/* qw[25] compress no lzcounts */
+			uint32_t out_spbc_comp;
+			 /* 286 LL and 30 D symbol counts */
+			uint32_t out_lzcount[LLSZ+DSZ];
+			struct {
+				union nx_qw_t  out_dht[DHTSZ];  /* qw[25:42] */
+				/* qw[43] decompress */
+				uint32_t out_spbc_decomp;
+			};
+		};
+		/* qw[104] compress with lzcounts */
+		uint32_t out_spbc_comp_with_count;
+	};
+} __aligned(128);
+
+struct nx_gzip_crb_t {
+	union {                   /* byte[0:3]   */
+		uint32_t gzip_fc;     /* bits[24-31] */
+	};
+	uint32_t reserved1;       /* byte[4:7]   */
+	union {
+		uint64_t csb_address; /* byte[8:15]  */
+		struct {
+			uint32_t reserved2;
+			union {
+				uint32_t crb_c;
+				/* c==0 no ccb defined */
+
+				uint32_t crb_at;
+				/* at==0 address type is ignored;
+				 * all addrs effective assumed.
+				 */
+
+			};
+		};
+	};
+	struct nx_dde_t source_dde;           /* byte[16:31] */
+	struct nx_dde_t target_dde;           /* byte[32:47] */
+	volatile struct nx_ccb_t ccb;         /* byte[48:63] */
+	volatile union {
+		/* byte[64:239] shift csb by 128 bytes out of the crb; csb was
+		 * in crb earlier; JReilly says csb written with partial inject
+		 */
+		union nx_qw_t reserved64[11];
+		union stamped_crb_t stamp;       /* byte[64:79] */
+	};
+	volatile struct nx_csb_t csb;
+} __aligned(128);
+
+struct nx_gzip_crb_cpb_t {
+	struct nx_gzip_crb_t crb;
+	struct nx_gzip_cpb_t cpb;
+} __aligned(2048);
+
+
+/*
+ * NX hardware convention has the msb bit on the left numbered 0.
+ * The defines below has *_offset defined as the right most bit
+ * position of a field.  x of size_mask(x) is the field width in bits.
+ */
+
+#define size_mask(x)          ((1U<<(x))-1)
+
+/*
+ * Offsets and Widths within the containing 32 bits of the various NX
+ * gzip hardware registers.  Use the getnn/putnn macros to access
+ * these regs
+ */
+
+#define dde_count_mask        size_mask(8)
+#define dde_count_offset      23
+
+/* CSB */
+
+#define csb_v_mask            size_mask(1)
+#define csb_v_offset          0
+#define csb_f_mask            size_mask(1)
+#define csb_f_offset          6
+#define csb_cs_mask           size_mask(8)
+#define csb_cs_offset         15
+#define csb_cc_mask           size_mask(8)
+#define csb_cc_offset         23
+#define csb_ce_mask           size_mask(8)
+#define csb_ce_offset         31
+
+/* CCB */
+
+#define ccb_cm_mask           size_mask(3)
+#define ccb_cm_offset         31
+
+/* VAS stamped CRB fields */
+
+#define vas_buf_num_mask      size_mask(6)
+#define vas_buf_num_offset    5
+#define send_wc_id_mask       size_mask(16)
+#define send_wc_id_offset     31
+#define recv_wc_id_mask       size_mask(16)
+#define recv_wc_id_offset     31
+#define vas_invalid_mask      size_mask(1)
+#define vas_invalid_offset    31
+
+/* NX stamped fault CRB fields */
+
+#define nxsf_t_mask           size_mask(1)
+#define nxsf_t_offset         23
+#define nxsf_fs_mask          size_mask(8)
+#define nxsf_fs_offset        31
+
+/* CPB input */
+
+#define in_histlen_mask       size_mask(12)
+#define in_histlen_offset     11
+#define in_dhtlen_mask        size_mask(12)
+#define in_dhtlen_offset      31
+#define in_subc_mask          size_mask(3)
+#define in_subc_offset        31
+#define in_sfbt_mask          size_mask(4)
+#define in_sfbt_offset        15
+#define in_rembytecnt_mask    size_mask(16)
+#define in_rembytecnt_offset  31
+
+/* CPB output */
+
+#define out_tebc_mask         size_mask(3)
+#define out_tebc_offset       15
+#define out_subc_mask         size_mask(16)
+#define out_subc_offset       31
+#define out_sfbt_mask         size_mask(4)
+#define out_sfbt_offset       15
+#define out_rembytecnt_mask   size_mask(16)
+#define out_rembytecnt_offset 31
+#define out_dhtlen_mask       size_mask(12)
+#define out_dhtlen_offset     31
+
+/* CRB */
+
+#define gzip_fc_mask          size_mask(8)
+#define gzip_fc_offset        31
+#define crb_c_mask            size_mask(1)
+#define crb_c_offset          28
+#define crb_at_mask           size_mask(1)
+#define crb_at_offset         30
+#define csb_address_mask      ~(15UL) /* mask off bottom 4b */
+
+/*
+ * Access macros for the registers.  Do not access registers directly
+ * because of the endian conversion.  P9 processor may run either as
+ * Little or Big endian. However the NX coprocessor regs are always
+ * big endian.
+ * Use the 32 and 64b macros to access respective
+ * register sizes.
+ * Use nn forms for the register fields shorter than 32 bits.
+ */
+
+#define getnn(ST, REG)      ((be32toh(ST.REG) >> (31-REG##_offset)) \
+				 & REG##_mask)
+#define getpnn(ST, REG)     ((be32toh((ST)->REG) >> (31-REG##_offset)) \
+				 & REG##_mask)
+#define get32(ST, REG)      (be32toh(ST.REG))
+#define getp32(ST, REG)     (be32toh((ST)->REG))
+#define get64(ST, REG)      (be64toh(ST.REG))
+#define getp64(ST, REG)     (be64toh((ST)->REG))
+
+#define unget32(ST, REG)    (get32(ST, REG) & ~((REG##_mask) \
+				<< (31-REG##_offset)))
+/* get 32bits less the REG field */
+
+#define ungetp32(ST, REG)   (getp32(ST, REG) & ~((REG##_mask) \
+				<< (31-REG##_offset)))
+/* get 32bits less the REG field */
+
+#define clear_regs(ST)      memset((void *)(&(ST)), 0, sizeof(ST))
+#define clear_dde(ST)       do { ST.dde_count = ST.ddebc = 0; ST.ddead = 0; \
+				} while (0)
+#define clearp_dde(ST)      do { (ST)->dde_count = (ST)->ddebc = 0; \
+				 (ST)->ddead = 0; \
+				} while (0)
+#define clear_struct(ST)    memset((void *)(&(ST)), 0, sizeof(ST))
+#define putnn(ST, REG, X)   (ST.REG = htobe32(unget32(ST, REG) | (((X) \
+				 & REG##_mask) << (31-REG##_offset))))
+#define putpnn(ST, REG, X)  ((ST)->REG = htobe32(ungetp32(ST, REG) \
+				| (((X) & REG##_mask) << (31-REG##_offset))))
+
+#define put32(ST, REG, X)   (ST.REG = htobe32(X))
+#define putp32(ST, REG, X)  ((ST)->REG = htobe32(X))
+#define put64(ST, REG, X)   (ST.REG = htobe64(X))
+#define putp64(ST, REG, X)  ((ST)->REG = htobe64(X))
+
+/*
+ * Completion extension ce(0) ce(1) ce(2).  Bits ce(3-7)
+ * unused.  Section 6.6 Figure 6.7.
+ */
+
+#define get_csb_ce(ST) ((uint32_t)getnn(ST, csb_ce))
+#define get_csb_ce_ms3b(ST) (get_csb_ce(ST) >> 5)
+#define put_csb_ce_ms3b(ST, X) putnn(ST, csb_ce, ((uint32_t)(X) << 5))
+
+#define CSB_CE_PARTIAL         0x4
+#define CSB_CE_TERMINATE       0x2
+#define CSB_CE_TPBC_VALID      0x1
+
+#define csb_ce_termination(X)         (!!((X) & CSB_CE_TERMINATE))
+/* termination, output buffers may be modified, SPBC/TPBC invalid Fig.6-7 */
+
+#define csb_ce_check_completion(X)    (!csb_ce_termination(X))
+/* if not terminated then check full or partial completion */
+
+#define csb_ce_partial_completion(X)  (!!((X) & CSB_CE_PARTIAL))
+#define csb_ce_full_completion(X)     (!csb_ce_partial_completion(X))
+#define csb_ce_tpbc_valid(X)          (!!((X) & CSB_CE_TPBC_VALID))
+/* TPBC indicates successfully stored data count */
+
+#define csb_ce_default_err(X)         csb_ce_termination(X)
+/* most error CEs have CE(0)=0 and CE(1)=1 */
+
+#define csb_ce_cc3_partial(X)         csb_ce_partial_completion(X)
+/* some CC=3 are partially completed, Table 6-8 */
+
+#define csb_ce_cc64(X)                ((X)&(CSB_CE_PARTIAL \
+					| CSB_CE_TERMINATE) == 0)
+/* Compression: when TPBC>SPBC then CC=64 Table 6-8; target didn't
+ * compress smaller than source.
+ */
+
+/* Decompress SFBT combinations Tables 5-3, 6-4, 6-6 */
+
+#define SFBT_BFINAL 0x1
+#define SFBT_LIT    0x4
+#define SFBT_FHT    0x5
+#define SFBT_DHT    0x6
+#define SFBT_HDR    0x7
+
+/*
+ * NX gzip function codes. Table 6.2.
+ * Bits 0:4 are the FC. Bit 5 is used by the DMA controller to
+ * select one of the two Byte Count Limits.
+ */
+
+#define GZIP_FC_LIMIT_MASK                               0x01
+#define GZIP_FC_COMPRESS_FHT                             0x00
+#define GZIP_FC_COMPRESS_DHT                             0x02
+#define GZIP_FC_COMPRESS_FHT_COUNT                       0x04
+#define GZIP_FC_COMPRESS_DHT_COUNT                       0x06
+#define GZIP_FC_COMPRESS_RESUME_FHT                      0x08
+#define GZIP_FC_COMPRESS_RESUME_DHT                      0x0a
+#define GZIP_FC_COMPRESS_RESUME_FHT_COUNT                0x0c
+#define GZIP_FC_COMPRESS_RESUME_DHT_COUNT                0x0e
+#define GZIP_FC_DECOMPRESS                               0x10
+#define GZIP_FC_DECOMPRESS_SINGLE_BLK_N_SUSPEND          0x12
+#define GZIP_FC_DECOMPRESS_RESUME                        0x14
+#define GZIP_FC_DECOMPRESS_RESUME_SINGLE_BLK_N_SUSPEND   0x16
+#define GZIP_FC_WRAP                                     0x1e
+
+#define fc_is_compress(fc)  (((fc) & 0x10) == 0)
+#define fc_has_count(fc)    (fc_is_compress(fc) && (((fc) & 0x4) != 0))
+
+/* CSB.CC Error codes */
+
+#define ERR_NX_OK             0
+#define ERR_NX_ALIGNMENT      1
+#define ERR_NX_OPOVERLAP      2
+#define ERR_NX_DATA_LENGTH    3
+#define ERR_NX_TRANSLATION    5
+#define ERR_NX_PROTECTION     6
+#define ERR_NX_EXTERNAL_UE7   7
+#define ERR_NX_INVALID_OP     8
+#define ERR_NX_PRIVILEGE      9
+#define ERR_NX_INTERNAL_UE   10
+#define ERR_NX_EXTERN_UE_WR  12
+#define ERR_NX_TARGET_SPACE  13
+#define ERR_NX_EXCESSIVE_DDE 14
+#define ERR_NX_TRANSL_WR     15
+#define ERR_NX_PROTECT_WR    16
+#define ERR_NX_SUBFUNCTION   17
+#define ERR_NX_FUNC_ABORT    18
+#define ERR_NX_BYTE_MAX      19
+#define ERR_NX_CORRUPT_CRB   20
+#define ERR_NX_INVALID_CRB   21
+#define ERR_NX_INVALID_DDE   30
+#define ERR_NX_SEGMENTED_DDL 31
+#define ERR_NX_DDE_OVERFLOW  33
+#define ERR_NX_TPBC_GT_SPBC  64
+#define ERR_NX_MISSING_CODE  66
+#define ERR_NX_INVALID_DIST  67
+#define ERR_NX_INVALID_DHT   68
+#define ERR_NX_EXTERNAL_UE90 90
+#define ERR_NX_WDOG_TIMER   224
+#define ERR_NX_AT_FAULT     250
+#define ERR_NX_INTR_SERVER  252
+#define ERR_NX_UE253        253
+#define ERR_NX_NO_HW        254
+#define ERR_NX_HUNG_OP      255
+#define ERR_NX_END          256
+
+/* initial values for non-resume operations */
+#define INIT_CRC   0  /* crc32(0L, Z_NULL, 0) */
+#define INIT_ADLER 1  /* adler32(0L, Z_NULL, 0)  adler is initialized to 1 */
+
+/* prototypes */
+int nxu_submit_job(struct nx_gzip_crb_cpb_t *c, void *handle);
+
+extern void nxu_sigsegv_handler(int sig, siginfo_t *info, void *ctx);
+extern int nxu_touch_pages(void *buf, long buf_len, long page_len, int wr);
+
+/* caller supplies a print buffer 4*sizeof(crb) */
+
+char *nx_crb_str(struct nx_gzip_crb_t *crb, char *prbuf);
+char *nx_cpb_str(struct nx_gzip_cpb_t *cpb, char *prbuf);
+char *nx_prt_hex(void *cp, int sz, char *prbuf);
+char *nx_lzcount_str(struct nx_gzip_cpb_t *cpb, char *prbuf);
+char *nx_strerror(int e);
+
+#ifdef NX_SIM
+#include <stdio.h>
+int nx_sim_init(void *ctx);
+int nx_sim_end(void *ctx);
+int nxu_run_sim_job(struct nx_gzip_crb_cpb_t *c, void *ctx);
+#endif /* NX_SIM */
+
+/* Deflate stream manipulation */
+
+#define set_final_bit(x)	(x |= (unsigned char)1)
+#define clr_final_bit(x)	(x &= ~(unsigned char)1)
+
+#define append_empty_fh_blk(p, b) do { *(p) = (2 | (1&(b))); *((p)+1) = 0; \
+					} while (0)
+/* append 10 bits 0000001b 00...... ;
+ * assumes appending starts on a byte boundary; b is the final bit.
+ */
+
+
+#ifdef NX_842
+
+/* 842 Engine */
+
+struct nx_eft_crb_t {
+	union {                   /* byte[0:3]   */
+		uint32_t eft_fc;      /* bits[29-31] */
+	};
+	uint32_t reserved1;       /* byte[4:7]   */
+	union {
+		uint64_t csb_address; /* byte[8:15]  */
+		struct {
+			uint32_t reserved2;
+			union {
+				uint32_t crb_c;
+				/* c==0 no ccb defined */
+
+				uint32_t crb_at;
+				/* at==0 address type is ignored;
+				 * all addrs effective assumed.
+				 */
+
+			};
+		};
+	};
+	struct nx_dde_t source_dde;           /* byte[16:31] */
+	struct nx_dde_t target_dde;           /* byte[32:47] */
+	struct nx_ccb_t ccb;                  /* byte[48:63] */
+	union {
+		union nx_qw_t reserved64[3];     /* byte[64:96] */
+	};
+	struct nx_csb_t csb;
+} __aligned(128);
+
+/* 842 CRB */
+
+#define EFT_FC_MASK                 size_mask(3)
+#define EFT_FC_OFFSET               31
+#define EFT_FC_COMPRESS             0x0
+#define EFT_FC_COMPRESS_WITH_CRC    0x1
+#define EFT_FC_DECOMPRESS           0x2
+#define EFT_FC_DECOMPRESS_WITH_CRC  0x3
+#define EFT_FC_BLK_DATA_MOVE        0x4
+#endif /* NX_842 */
+
+#endif /* _NXU_H */
diff --git a/tools/testing/selftests/powerpc/nx-gzip/include/vas-api.h b/tools/testing/selftests/powerpc/nx-gzip/include/vas-api.h
new file mode 120000
index 000000000000..77fb4c7236d0
--- /dev/null
+++ b/tools/testing/selftests/powerpc/nx-gzip/include/vas-api.h
@@ -0,0 +1 @@
+../../../../../../arch/powerpc/include/uapi/asm/vas-api.h
+\ No newline at end of file
diff --git a/tools/testing/selftests/powerpc/nx-gzip/nx-gzip-test.sh b/tools/testing/selftests/powerpc/nx-gzip/nx-gzip-test.sh
new file mode 100755
index 000000000000..c7b46c5fd7b3
--- /dev/null
+++ b/tools/testing/selftests/powerpc/nx-gzip/nx-gzip-test.sh
@@ -0,0 +1,46 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0-or-later
+
+if [[ ! -w /dev/crypto/nx-gzip ]]; then
+	echo "Can't access /dev/crypto/nx-gzip, skipping"
+	echo "skip: $0"
+	exit 4
+fi
+
+set -e
+
+function cleanup
+{
+	rm -f nx-tempfile*
+}
+
+trap cleanup EXIT
+
+function test_sizes
+{
+	local n=$1
+	local fname="nx-tempfile.$n"
+
+	for size in 4K 64K 1M 64M
+	do
+		echo "Testing $size ($n) ..."
+		dd if=/dev/urandom of=$fname bs=$size count=1
+		./gzfht_test $fname
+		./gunz_test ${fname}.nx.gz
+	done
+}
+
+echo "Doing basic test of different sizes ..."
+test_sizes 0
+
+echo "Running tests in parallel ..."
+for i in {1..16}
+do
+	test_sizes $i &
+done
+
+wait
+
+echo "OK"
+
+exit 0
diff --git a/tools/testing/selftests/powerpc/pmu/.gitignore b/tools/testing/selftests/powerpc/pmu/.gitignore
index ff7896903d7b..f69b1e2641a1 100644
--- a/tools/testing/selftests/powerpc/pmu/.gitignore
+++ b/tools/testing/selftests/powerpc/pmu/.gitignore
@@ -2,3 +2,4 @@
 count_instructions
 l3_bank_test
 per_event_excludes
+count_stcx_fail
diff --git a/tools/testing/selftests/powerpc/pmu/Makefile b/tools/testing/selftests/powerpc/pmu/Makefile
index 19046db995fe..904672fb78dd 100644
--- a/tools/testing/selftests/powerpc/pmu/Makefile
+++ b/tools/testing/selftests/powerpc/pmu/Makefile
@@ -2,7 +2,7 @@
 noarg:
 	$(MAKE) -C ../
 
-TEST_GEN_PROGS := count_instructions l3_bank_test per_event_excludes
+TEST_GEN_PROGS := count_instructions count_stcx_fail l3_bank_test per_event_excludes
 EXTRA_SOURCES := ../harness.c event.c lib.c ../utils.c
 
 top_srcdir = ../../../../..
@@ -13,8 +13,12 @@ all: $(TEST_GEN_PROGS) ebb
 $(TEST_GEN_PROGS): $(EXTRA_SOURCES)
 
 # loop.S can only be built 64-bit
+$(OUTPUT)/count_instructions: CFLAGS += -m64
 $(OUTPUT)/count_instructions: loop.S count_instructions.c $(EXTRA_SOURCES)
-	$(CC) $(CFLAGS) -m64 -o $@ $^
+
+$(OUTPUT)/count_stcx_fail: CFLAGS += -m64
+$(OUTPUT)/count_stcx_fail: loop.S $(EXTRA_SOURCES)
+
 
 $(OUTPUT)/per_event_excludes: ../utils.c
 
diff --git a/tools/testing/selftests/powerpc/pmu/count_stcx_fail.c b/tools/testing/selftests/powerpc/pmu/count_stcx_fail.c
new file mode 100644
index 000000000000..7b4ac4537702
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/count_stcx_fail.c
@@ -0,0 +1,161 @@
+/*
+ * Copyright 2013, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#define _GNU_SOURCE
+
+#include <stdio.h>
+#include <stdbool.h>
+#include <string.h>
+#include <sys/prctl.h>
+
+#include "event.h"
+#include "utils.h"
+#include "lib.h"
+
+extern void thirty_two_instruction_loop_with_ll_sc(u64 loops, u64 *ll_sc_target);
+
+static void setup_event(struct event *e, u64 config, int type, char *name)
+{
+	event_init_opts(e, config, type, name);
+
+	e->attr.disabled = 1;
+	e->attr.exclude_kernel = 1;
+	e->attr.exclude_hv = 1;
+	e->attr.exclude_idle = 1;
+}
+
+static int do_count_loop(struct event *events, u64 instructions,
+			 u64 overhead, bool report)
+{
+	s64 difference, expected;
+	double percentage;
+	u64 dummy;
+
+	prctl(PR_TASK_PERF_EVENTS_ENABLE);
+
+	/* Run for 1M instructions */
+	thirty_two_instruction_loop_with_ll_sc(instructions >> 5, &dummy);
+
+	prctl(PR_TASK_PERF_EVENTS_DISABLE);
+
+	event_read(&events[0]);
+	event_read(&events[1]);
+	event_read(&events[2]);
+
+	expected = instructions + overhead + (events[2].result.value * 10);
+	difference = events[0].result.value - expected;
+	percentage = (double)difference / events[0].result.value * 100;
+
+	if (report) {
+		printf("-----\n");
+		event_report(&events[0]);
+		event_report(&events[1]);
+		event_report(&events[2]);
+
+		printf("Looped for %llu instructions, overhead %llu\n", instructions, overhead);
+		printf("Expected %llu\n", expected);
+		printf("Actual   %llu\n", events[0].result.value);
+		printf("Delta    %lld, %f%%\n", difference, percentage);
+	}
+
+	event_reset(&events[0]);
+	event_reset(&events[1]);
+	event_reset(&events[2]);
+
+	if (difference < 0)
+		difference = -difference;
+
+	/* Tolerate a difference below 0.0001 % */
+	difference *= 10000 * 100;
+	if (difference / events[0].result.value)
+		return -1;
+
+	return 0;
+}
+
+/* Count how many instructions it takes to do a null loop */
+static u64 determine_overhead(struct event *events)
+{
+	u64 current, overhead;
+	int i;
+
+	do_count_loop(events, 0, 0, false);
+	overhead = events[0].result.value;
+
+	for (i = 0; i < 100; i++) {
+		do_count_loop(events, 0, 0, false);
+		current = events[0].result.value;
+		if (current < overhead) {
+			printf("Replacing overhead %llu with %llu\n", overhead, current);
+			overhead = current;
+		}
+	}
+
+	return overhead;
+}
+
+#define	PM_MRK_STCX_FAIL	0x03e158
+#define PM_STCX_FAIL	0x01e058
+
+static int test_body(void)
+{
+	struct event events[3];
+	u64 overhead;
+
+	setup_event(&events[0], PERF_COUNT_HW_INSTRUCTIONS, PERF_TYPE_HARDWARE, "instructions");
+	setup_event(&events[1], PERF_COUNT_HW_CPU_CYCLES, PERF_TYPE_HARDWARE, "cycles");
+	setup_event(&events[2], PM_STCX_FAIL, PERF_TYPE_RAW, "stcx_fail");
+
+	if (event_open(&events[0])) {
+		perror("perf_event_open");
+		return -1;
+	}
+
+	if (event_open_with_group(&events[1], events[0].fd)) {
+		perror("perf_event_open");
+		return -1;
+	}
+
+	if (event_open_with_group(&events[2], events[0].fd)) {
+		perror("perf_event_open");
+		return -1;
+	}
+
+	overhead = determine_overhead(events);
+	printf("Overhead of null loop: %llu instructions\n", overhead);
+
+	/* Run for 1Mi instructions */
+	FAIL_IF(do_count_loop(events, 1000000, overhead, true));
+
+	/* Run for 10Mi instructions */
+	FAIL_IF(do_count_loop(events, 10000000, overhead, true));
+
+	/* Run for 100Mi instructions */
+	FAIL_IF(do_count_loop(events, 100000000, overhead, true));
+
+	/* Run for 1Bi instructions */
+	FAIL_IF(do_count_loop(events, 1000000000, overhead, true));
+
+	/* Run for 16Bi instructions */
+	FAIL_IF(do_count_loop(events, 16000000000, overhead, true));
+
+	/* Run for 64Bi instructions */
+	FAIL_IF(do_count_loop(events, 64000000000, overhead, true));
+
+	event_close(&events[0]);
+	event_close(&events[1]);
+
+	return 0;
+}
+
+static int count_ll_sc(void)
+{
+	return eat_cpu(test_body);
+}
+
+int main(void)
+{
+	return test_harness(count_ll_sc, "count_ll_sc");
+}
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/trace.h b/tools/testing/selftests/powerpc/pmu/ebb/trace.h
index 7c0fb5d2bdb1..da2a3be5441f 100644
--- a/tools/testing/selftests/powerpc/pmu/ebb/trace.h
+++ b/tools/testing/selftests/powerpc/pmu/ebb/trace.h
@@ -18,7 +18,7 @@ struct trace_entry
 {
 	u8 type;
 	u8 length;
-	u8 data[0];
+	u8 data[];
 };
 
 struct trace_buffer
@@ -26,7 +26,7 @@ struct trace_buffer
 	u64  size;
 	bool overflow;
 	void *tail;
-	u8   data[0];
+	u8   data[];
 };
 
 struct trace_buffer *trace_buffer_allocate(u64 size);
diff --git a/tools/testing/selftests/powerpc/pmu/loop.S b/tools/testing/selftests/powerpc/pmu/loop.S
index 8cc9b5e2c9de..c52ba09b6fed 100644
--- a/tools/testing/selftests/powerpc/pmu/loop.S
+++ b/tools/testing/selftests/powerpc/pmu/loop.S
@@ -41,3 +41,38 @@ FUNC_START(thirty_two_instruction_loop)
 	subi	r3,r3,1
 	b	FUNC_NAME(thirty_two_instruction_loop)
 FUNC_END(thirty_two_instruction_loop)
+
+FUNC_START(thirty_two_instruction_loop_with_ll_sc)
+	cmpdi	r3,0
+	beqlr
+	addi	r5,r5,1
+	addi	r5,r5,1
+	addi	r5,r5,1		# 5
+	addi	r5,r5,1
+	addi	r5,r5,1
+	addi	r5,r5,1
+	addi	r5,r5,1
+1:	ldarx	r6,0,r4		# 10
+	addi	r5,r5,1
+	addi	r5,r5,1
+	addi	r5,r5,1
+	addi	r5,r5,1
+	addi	r5,r5,1		# 15
+	addi	r5,r5,1
+	addi	r5,r5,1
+	stdcx.	r6,0,r4
+	bne-	1b
+	addi	r5,r5,1		# 20
+	addi	r5,r5,1
+	addi	r5,r5,1
+	addi	r5,r5,1
+	addi	r5,r5,1
+	addi	r5,r5,1		# 25
+	addi	r5,r5,1
+	addi	r5,r5,1
+	addi	r5,r5,1
+	addi	r5,r5,1
+	addi	r5,r5,1		# 30
+	subi	r3,r3,1
+	b	FUNC_NAME(thirty_two_instruction_loop_with_ll_sc)
+FUNC_END(thirty_two_instruction_loop_with_ll_sc)
diff --git a/tools/testing/selftests/powerpc/signal/Makefile b/tools/testing/selftests/powerpc/signal/Makefile
index 932a032bf036..d6ae54663aed 100644
--- a/tools/testing/selftests/powerpc/signal/Makefile
+++ b/tools/testing/selftests/powerpc/signal/Makefile
@@ -1,5 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0
-TEST_GEN_PROGS := signal signal_tm sigfuz sigreturn_vdso
+TEST_GEN_PROGS := signal signal_tm sigfuz sigreturn_vdso sig_sc_double_restart
 
 CFLAGS += -maltivec
 $(OUTPUT)/signal_tm: CFLAGS += -mhtm
diff --git a/tools/testing/selftests/powerpc/signal/sig_sc_double_restart.c b/tools/testing/selftests/powerpc/signal/sig_sc_double_restart.c
new file mode 100644
index 000000000000..e3972264615b
--- /dev/null
+++ b/tools/testing/selftests/powerpc/signal/sig_sc_double_restart.c
@@ -0,0 +1,174 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Test that a syscall does not get restarted twice, handled by trap_norestart()
+ *
+ * Based on Al's description, and a test for the bug fixed in this commit:
+ *
+ * commit 9a81c16b527528ad307843be5571111aa8d35a80
+ * Author: Al Viro <viro@zeniv.linux.org.uk>
+ * Date:   Mon Sep 20 21:48:57 2010 +0100
+ *
+ *  powerpc: fix double syscall restarts
+ *
+ *  Make sigreturn zero regs->trap, make do_signal() do the same on all
+ *  paths.  As it is, signal interrupting e.g. read() from fd 512 (==
+ *  ERESTARTSYS) with another signal getting unblocked when the first
+ *  handler finishes will lead to restart one insn earlier than it ought
+ *  to.  Same for multiple signals with in-kernel handlers interrupting
+ *  that sucker at the same time.  Same for multiple signals of any kind
+ *  interrupting that sucker on 64bit...
+ */
+#define _GNU_SOURCE
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+#include <signal.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "utils.h"
+
+static void SIGUSR1_handler(int sig)
+{
+	kill(getpid(), SIGUSR2);
+	/*
+	 * SIGUSR2 is blocked until the handler exits, at which point it will
+	 * be raised again and think there is a restart to be done because the
+	 * pending restarted syscall has 512 (ERESTARTSYS) in r3. The second
+	 * restart will retreat NIP another 4 bytes to fail case branch.
+	 */
+}
+
+static void SIGUSR2_handler(int sig)
+{
+}
+
+static ssize_t raw_read(int fd, void *buf, size_t count)
+{
+	register long nr asm("r0") = __NR_read;
+	register long _fd asm("r3") = fd;
+	register void *_buf asm("r4") = buf;
+	register size_t _count asm("r5") = count;
+
+	asm volatile(
+"		b	0f		\n"
+"		b	1f		\n"
+"	0:	sc	0		\n"
+"		bns	2f		\n"
+"		neg	%0,%0		\n"
+"		b	2f		\n"
+"	1:				\n"
+"		li	%0,%4		\n"
+"	2:				\n"
+		: "+r"(_fd), "+r"(nr), "+r"(_buf), "+r"(_count)
+		: "i"(-ENOANO)
+		: "memory", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "ctr", "cr0");
+
+	if (_fd < 0) {
+		errno = -_fd;
+		_fd = -1;
+	}
+
+	return _fd;
+}
+
+#define DATA "test 123"
+#define DLEN (strlen(DATA)+1)
+
+int test_restart(void)
+{
+	int pipefd[2];
+	pid_t pid;
+	char buf[512];
+
+	if (pipe(pipefd) == -1) {
+		perror("pipe");
+		exit(EXIT_FAILURE);
+	}
+
+	pid = fork();
+	if (pid == -1) {
+		perror("fork");
+		exit(EXIT_FAILURE);
+	}
+
+	if (pid == 0) { /* Child reads from pipe */
+		struct sigaction act;
+		int fd;
+
+		memset(&act, 0, sizeof(act));
+		sigaddset(&act.sa_mask, SIGUSR2);
+		act.sa_handler = SIGUSR1_handler;
+		act.sa_flags = SA_RESTART;
+		if (sigaction(SIGUSR1, &act, NULL) == -1) {
+			perror("sigaction");
+			exit(EXIT_FAILURE);
+		}
+
+		memset(&act, 0, sizeof(act));
+		act.sa_handler = SIGUSR2_handler;
+		act.sa_flags = SA_RESTART;
+		if (sigaction(SIGUSR2, &act, NULL) == -1) {
+			perror("sigaction");
+			exit(EXIT_FAILURE);
+		}
+
+		/* Let's get ERESTARTSYS into r3 */
+		while ((fd = dup(pipefd[0])) != 512) {
+			if (fd == -1) {
+				perror("dup");
+				exit(EXIT_FAILURE);
+			}
+		}
+
+		if (raw_read(fd, buf, 512) == -1) {
+			if (errno == ENOANO) {
+				fprintf(stderr, "Double restart moved restart before sc instruction.\n");
+				_exit(EXIT_FAILURE);
+			}
+			perror("read");
+			exit(EXIT_FAILURE);
+		}
+
+		if (strncmp(buf, DATA, DLEN)) {
+			fprintf(stderr, "bad test string %s\n", buf);
+			exit(EXIT_FAILURE);
+		}
+
+		return 0;
+
+	} else {
+		int wstatus;
+
+		usleep(100000);		/* Hack to get reader waiting */
+		kill(pid, SIGUSR1);
+		usleep(100000);
+		if (write(pipefd[1], DATA, DLEN) != DLEN) {
+			perror("write");
+			exit(EXIT_FAILURE);
+		}
+		close(pipefd[0]);
+		close(pipefd[1]);
+		if (wait(&wstatus) == -1) {
+			perror("wait");
+			exit(EXIT_FAILURE);
+		}
+		if (!WIFEXITED(wstatus)) {
+			fprintf(stderr, "child exited abnormally\n");
+			exit(EXIT_FAILURE);
+		}
+
+		FAIL_IF(WEXITSTATUS(wstatus) != EXIT_SUCCESS);
+
+		return 0;
+	}
+}
+
+int main(void)
+{
+	test_harness_set_timeout(10);
+	return test_harness(test_restart, "sig sys restart");
+}
diff --git a/tools/testing/selftests/proc/.gitignore b/tools/testing/selftests/proc/.gitignore
index 4bca5a9327a4..bed4b5318a86 100644
--- a/tools/testing/selftests/proc/.gitignore
+++ b/tools/testing/selftests/proc/.gitignore
@@ -2,7 +2,9 @@
 /fd-001-lookup
 /fd-002-posix-eq
 /fd-003-kthread
+/proc-fsconfig-hidepid
 /proc-loadavg-001
+/proc-multiple-procfs
 /proc-pid-vm
 /proc-self-map-files-001
 /proc-self-map-files-002
diff --git a/tools/testing/selftests/proc/Makefile b/tools/testing/selftests/proc/Makefile
index a8ed0f684829..8be8a03d2973 100644
--- a/tools/testing/selftests/proc/Makefile
+++ b/tools/testing/selftests/proc/Makefile
@@ -19,5 +19,7 @@ TEST_GEN_PROGS += self
 TEST_GEN_PROGS += setns-dcache
 TEST_GEN_PROGS += setns-sysvipc
 TEST_GEN_PROGS += thread-self
+TEST_GEN_PROGS += proc-multiple-procfs
+TEST_GEN_PROGS += proc-fsconfig-hidepid
 
 include ../lib.mk
diff --git a/tools/testing/selftests/proc/proc-fsconfig-hidepid.c b/tools/testing/selftests/proc/proc-fsconfig-hidepid.c
new file mode 100644
index 000000000000..b9af8f537185
--- /dev/null
+++ b/tools/testing/selftests/proc/proc-fsconfig-hidepid.c
@@ -0,0 +1,50 @@
+/*
+ * Copyright © 2020 Alexey Gladkov <gladkov.alexey@gmail.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+#include <assert.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <linux/mount.h>
+#include <linux/unistd.h>
+
+static inline int fsopen(const char *fsname, unsigned int flags)
+{
+	return syscall(__NR_fsopen, fsname, flags);
+}
+
+static inline int fsconfig(int fd, unsigned int cmd, const char *key, const void *val, int aux)
+{
+	return syscall(__NR_fsconfig, fd, cmd, key, val, aux);
+}
+
+int main(void)
+{
+	int fsfd, ret;
+	int hidepid = 2;
+
+	assert((fsfd = fsopen("proc", 0)) != -1);
+
+	ret = fsconfig(fsfd, FSCONFIG_SET_BINARY, "hidepid", &hidepid, 0);
+	assert(ret == -1);
+	assert(errno == EINVAL);
+
+	assert(!fsconfig(fsfd, FSCONFIG_SET_STRING, "hidepid", "2", 0));
+	assert(!fsconfig(fsfd, FSCONFIG_SET_STRING, "hidepid", "invisible", 0));
+
+	assert(!close(fsfd));
+
+	return 0;
+}
diff --git a/tools/testing/selftests/proc/proc-multiple-procfs.c b/tools/testing/selftests/proc/proc-multiple-procfs.c
new file mode 100644
index 000000000000..ab912ad95dab
--- /dev/null
+++ b/tools/testing/selftests/proc/proc-multiple-procfs.c
@@ -0,0 +1,48 @@
+/*
+ * Copyright © 2020 Alexey Gladkov <gladkov.alexey@gmail.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+#include <assert.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <sys/mount.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+int main(void)
+{
+	struct stat proc_st1, proc_st2;
+	char procbuff[] = "/tmp/proc.XXXXXX/meminfo";
+	char procdir1[] = "/tmp/proc.XXXXXX";
+	char procdir2[] = "/tmp/proc.XXXXXX";
+
+	assert(mkdtemp(procdir1) != NULL);
+	assert(mkdtemp(procdir2) != NULL);
+
+	assert(!mount("proc", procdir1, "proc", 0, "hidepid=1"));
+	assert(!mount("proc", procdir2, "proc", 0, "hidepid=2"));
+
+	snprintf(procbuff, sizeof(procbuff), "%s/meminfo", procdir1);
+	assert(!stat(procbuff, &proc_st1));
+
+	snprintf(procbuff, sizeof(procbuff), "%s/meminfo", procdir2);
+	assert(!stat(procbuff, &proc_st2));
+
+	umount(procdir1);
+	umount(procdir2);
+
+	assert(proc_st1.st_dev != proc_st2.st_dev);
+
+	return 0;
+}
diff --git a/tools/testing/selftests/pstore/pstore_tests b/tools/testing/selftests/pstore/pstore_tests
index 1cef54458aff..2aa9a3852a84 100755
--- a/tools/testing/selftests/pstore/pstore_tests
+++ b/tools/testing/selftests/pstore/pstore_tests
@@ -10,7 +10,7 @@
 . ./common_tests
 
 prlog -n "Checking pstore console is registered ... "
-dmesg | grep -q "console \[pstore"
+dmesg | grep -Eq "console \[(pstore|${backend})"
 show_result $?
 
 prlog -n "Checking /dev/pmsg0 exists ... "
diff --git a/tools/testing/selftests/ptp/testptp.c b/tools/testing/selftests/ptp/testptp.c
index c0dd10257df5..da7a9dda9490 100644
--- a/tools/testing/selftests/ptp/testptp.c
+++ b/tools/testing/selftests/ptp/testptp.c
@@ -269,14 +269,16 @@ int main(int argc, char *argv[])
 			       "  %d programmable periodic signals\n"
 			       "  %d pulse per second\n"
 			       "  %d programmable pins\n"
-			       "  %d cross timestamping\n",
+			       "  %d cross timestamping\n"
+			       "  %d adjust_phase\n",
 			       caps.max_adj,
 			       caps.n_alarm,
 			       caps.n_ext_ts,
 			       caps.n_per_out,
 			       caps.pps,
 			       caps.n_pins,
-			       caps.cross_timestamping);
+			       caps.cross_timestamping,
+			       caps.adjust_phase);
 		}
 	}
 
diff --git a/tools/testing/selftests/rcutorture/bin/kcsan-collapse.sh b/tools/testing/selftests/rcutorture/bin/kcsan-collapse.sh
new file mode 100755
index 000000000000..e5cc6b2f195e
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/bin/kcsan-collapse.sh
@@ -0,0 +1,22 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0+
+#
+# If this was a KCSAN run, collapse the reports in the various console.log
+# files onto pairs of functions.
+#
+# Usage: kcsan-collapse.sh resultsdir
+#
+# Copyright (C) 2020 Facebook, Inc.
+#
+# Authors: Paul E. McKenney <paulmck@kernel.org>
+
+if test -z "$TORTURE_KCONFIG_KCSAN_ARG"
+then
+	exit 0
+fi
+cat $1/*/console.log |
+	grep "BUG: KCSAN: " |
+	sed -e 's/^\[[^]]*] //' |
+	sort |
+	uniq -c |
+	sort -k1nr > $1/kcsan.sum
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh
index 9d9a41625dd9..1706cd4466b4 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh
@@ -41,7 +41,21 @@ else
 		title="$title ($ngpsps/s)"
 	fi
 	echo $title $stopstate $fwdprog
-	nclosecalls=`grep --binary-files=text 'torture: Reader Batch' $i/console.log | tail -1 | awk '{for (i=NF-8;i<=NF;i++) sum+=$i; } END {print sum}'`
+	nclosecalls=`grep --binary-files=text 'torture: Reader Batch' $i/console.log | tail -1 | \
+		awk -v sum=0 '
+		{
+			for (i = 0; i <= NF; i++) {
+				sum += $i;
+				if ($i ~ /Batch:/) {
+					sum = 0;
+					i = i + 2;
+				}
+			}
+		}
+
+		END {
+			print sum
+		}'`
 	if test -z "$nclosecalls"
 	then
 		exit 0
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh
index 0326f4a5ff9c..736f04749b90 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh
@@ -70,6 +70,15 @@ do
 			fi
 		fi
 	done
+	if test -f "$rd/kcsan.sum"
+	then
+		if test -s "$rd/kcsan.sum"
+		then
+			echo KCSAN summary in $rd/kcsan.sum
+		else
+			echo Clean KCSAN run in $rd
+		fi
+	fi
 done
 EDITOR=echo kvm-find-errors.sh "${@: -1}" > $T 2>&1
 ret=$?
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
index e0352304b98b..6ff611c630d1 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
@@ -44,30 +44,32 @@ then
 fi
 echo ' ---' `date`: Starting build
 echo ' ---' Kconfig fragment at: $config_template >> $resdir/log
-touch $resdir/ConfigFragment.input $resdir/ConfigFragment
-if test -r "$config_dir/CFcommon"
-then
-	echo " --- $config_dir/CFcommon" >> $resdir/ConfigFragment.input
-	cat < $config_dir/CFcommon >> $resdir/ConfigFragment.input
-	config_override.sh $config_dir/CFcommon $config_template > $T/Kc1
-	grep '#CHECK#' $config_dir/CFcommon >> $resdir/ConfigFragment
-else
-	cp $config_template $T/Kc1
-fi
-echo " --- $config_template" >> $resdir/ConfigFragment.input
-cat $config_template >> $resdir/ConfigFragment.input
-grep '#CHECK#' $config_template >> $resdir/ConfigFragment
-if test -n "$TORTURE_KCONFIG_ARG"
-then
-	echo $TORTURE_KCONFIG_ARG | tr -s " " "\012" > $T/cmdline
-	echo " --- --kconfig argument" >> $resdir/ConfigFragment.input
-	cat $T/cmdline >> $resdir/ConfigFragment.input
-	config_override.sh $T/Kc1 $T/cmdline > $T/Kc2
-	# Note that "#CHECK#" is not permitted on commandline.
-else
-	cp $T/Kc1 $T/Kc2
-fi
-cat $T/Kc2 >> $resdir/ConfigFragment
+touch $resdir/ConfigFragment.input
+
+# Combine additional Kconfig options into an existing set such that
+# newer options win.  The first argument is the Kconfig source ID, the
+# second the to-be-updated file within $T, and the third and final the
+# list of additional Kconfig options.  Note that a $2.tmp file is
+# created when doing the update.
+config_override_param () {
+	if test -n "$3"
+	then
+		echo $3 | sed -e 's/^ *//' -e 's/ *$//' | tr -s " " "\012" > $T/Kconfig_args
+		echo " --- $1" >> $resdir/ConfigFragment.input
+		cat $T/Kconfig_args >> $resdir/ConfigFragment.input
+		config_override.sh $T/$2 $T/Kconfig_args > $T/$2.tmp
+		mv $T/$2.tmp $T/$2
+		# Note that "#CHECK#" is not permitted on commandline.
+	fi
+}
+
+echo > $T/KcList
+config_override_param "$config_dir/CFcommon" KcList "`cat $config_dir/CFcommon 2> /dev/null`"
+config_override_param "$config_template" KcList "`cat $config_template 2> /dev/null`"
+config_override_param "--kasan options" KcList "$TORTURE_KCONFIG_KASAN_ARG"
+config_override_param "--kcsan options" KcList "$TORTURE_KCONFIG_KCSAN_ARG"
+config_override_param "--kconfig argument" KcList "$TORTURE_KCONFIG_ARG"
+cp $T/KcList $resdir/ConfigFragment
 
 base_resdir=`echo $resdir | sed -e 's/\.[0-9]\+$//'`
 if test "$base_resdir" != "$resdir" -a -f $base_resdir/bzImage -a -f $base_resdir/vmlinux
@@ -80,7 +82,7 @@ then
 	ln -s $base_resdir/.config $resdir  # for kvm-recheck.sh
 	# Arch-independent indicator
 	touch $resdir/builtkernel
-elif kvm-build.sh $T/Kc2 $resdir
+elif kvm-build.sh $T/KcList $resdir
 then
 	# Had to build a kernel for this test.
 	QEMU="`identify_qemu vmlinux`"
diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh
index 2315e2ec12d6..c279cf9cb010 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm.sh
@@ -31,6 +31,8 @@ TORTURE_DEFCONFIG=defconfig
 TORTURE_BOOT_IMAGE=""
 TORTURE_INITRD="$KVM/initrd"; export TORTURE_INITRD
 TORTURE_KCONFIG_ARG=""
+TORTURE_KCONFIG_KASAN_ARG=""
+TORTURE_KCONFIG_KCSAN_ARG=""
 TORTURE_KMAKE_ARG=""
 TORTURE_QEMU_MEM=512
 TORTURE_SHUTDOWN_GRACE=180
@@ -133,6 +135,12 @@ do
 		TORTURE_KCONFIG_ARG="$2"
 		shift
 		;;
+	--kasan)
+		TORTURE_KCONFIG_KASAN_ARG="CONFIG_DEBUG_INFO=y CONFIG_KASAN=y"; export TORTURE_KCONFIG_KASAN_ARG
+		;;
+	--kcsan)
+		TORTURE_KCONFIG_KCSAN_ARG="CONFIG_DEBUG_INFO=y CONFIG_KCSAN=y CONFIG_KCSAN_ASSUME_PLAIN_WRITES_ATOMIC=n CONFIG_KCSAN_REPORT_VALUE_CHANGE_ONLY=n CONFIG_KCSAN_REPORT_ONCE_IN_MS=100000 CONFIG_KCSAN_VERBOSE=y CONFIG_KCSAN_INTERRUPT_WATCHER=y"; export TORTURE_KCONFIG_KCSAN_ARG
+		;;
 	--kmake-arg)
 		checkarg --kmake-arg "(kernel make arguments)" $# "$2" '.*' '^error$'
 		TORTURE_KMAKE_ARG="$2"
@@ -310,6 +318,8 @@ TORTURE_BUILDONLY="$TORTURE_BUILDONLY"; export TORTURE_BUILDONLY
 TORTURE_DEFCONFIG="$TORTURE_DEFCONFIG"; export TORTURE_DEFCONFIG
 TORTURE_INITRD="$TORTURE_INITRD"; export TORTURE_INITRD
 TORTURE_KCONFIG_ARG="$TORTURE_KCONFIG_ARG"; export TORTURE_KCONFIG_ARG
+TORTURE_KCONFIG_KASAN_ARG="$TORTURE_KCONFIG_KASAN_ARG"; export TORTURE_KCONFIG_KASAN_ARG
+TORTURE_KCONFIG_KCSAN_ARG="$TORTURE_KCONFIG_KCSAN_ARG"; export TORTURE_KCONFIG_KCSAN_ARG
 TORTURE_KMAKE_ARG="$TORTURE_KMAKE_ARG"; export TORTURE_KMAKE_ARG
 TORTURE_QEMU_CMD="$TORTURE_QEMU_CMD"; export TORTURE_QEMU_CMD
 TORTURE_QEMU_INTERACTIVE="$TORTURE_QEMU_INTERACTIVE"; export TORTURE_QEMU_INTERACTIVE
@@ -464,6 +474,7 @@ echo
 echo
 echo " --- `date` Test summary:"
 echo Results directory: $resdir/$ds
+kcsan-collapse.sh $resdir/$ds
 kvm-recheck.sh $resdir/$ds
 ___EOF___
 
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/CFLIST b/tools/testing/selftests/rcutorture/configs/rcu/CFLIST
index c3c1fb5a9e1f..f2b20db9e296 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/CFLIST
+++ b/tools/testing/selftests/rcutorture/configs/rcu/CFLIST
@@ -14,3 +14,6 @@ TINY02
 TASKS01
 TASKS02
 TASKS03
+RUDE01
+TRACE01
+TRACE02
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/RUDE01 b/tools/testing/selftests/rcutorture/configs/rcu/RUDE01
new file mode 100644
index 000000000000..bafe94cbd739
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/RUDE01
@@ -0,0 +1,10 @@
+CONFIG_SMP=y
+CONFIG_NR_CPUS=2
+CONFIG_HOTPLUG_CPU=y
+CONFIG_PREEMPT_NONE=n
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=y
+CONFIG_DEBUG_LOCK_ALLOC=y
+CONFIG_PROVE_LOCKING=y
+#CHECK#CONFIG_PROVE_RCU=y
+CONFIG_RCU_EXPERT=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/RUDE01.boot b/tools/testing/selftests/rcutorture/configs/rcu/RUDE01.boot
new file mode 100644
index 000000000000..9363708c9075
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/RUDE01.boot
@@ -0,0 +1 @@
+rcutorture.torture_type=tasks-rude
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TRACE01 b/tools/testing/selftests/rcutorture/configs/rcu/TRACE01
new file mode 100644
index 000000000000..12e7661b86f5
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TRACE01
@@ -0,0 +1,11 @@
+CONFIG_SMP=y
+CONFIG_NR_CPUS=4
+CONFIG_HOTPLUG_CPU=y
+CONFIG_PREEMPT_NONE=y
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=n
+CONFIG_DEBUG_LOCK_ALLOC=y
+CONFIG_PROVE_LOCKING=y
+#CHECK#CONFIG_PROVE_RCU=y
+CONFIG_TASKS_TRACE_RCU_READ_MB=y
+CONFIG_RCU_EXPERT=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TRACE01.boot b/tools/testing/selftests/rcutorture/configs/rcu/TRACE01.boot
new file mode 100644
index 000000000000..9675ad632dcc
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TRACE01.boot
@@ -0,0 +1 @@
+rcutorture.torture_type=tasks-tracing
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TRACE02 b/tools/testing/selftests/rcutorture/configs/rcu/TRACE02
new file mode 100644
index 000000000000..b69ed6673c41
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TRACE02
@@ -0,0 +1,11 @@
+CONFIG_SMP=y
+CONFIG_NR_CPUS=4
+CONFIG_HOTPLUG_CPU=y
+CONFIG_PREEMPT_NONE=n
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=y
+CONFIG_DEBUG_LOCK_ALLOC=n
+CONFIG_PROVE_LOCKING=n
+#CHECK#CONFIG_PROVE_RCU=n
+CONFIG_TASKS_TRACE_RCU_READ_MB=n
+CONFIG_RCU_EXPERT=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TRACE02.boot b/tools/testing/selftests/rcutorture/configs/rcu/TRACE02.boot
new file mode 100644
index 000000000000..9675ad632dcc
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TRACE02.boot
@@ -0,0 +1 @@
+rcutorture.torture_type=tasks-tracing
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE10 b/tools/testing/selftests/rcutorture/configs/rcu/TREE10
index 2debe7891aeb..7311f84a5876 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE10
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE10
@@ -1,5 +1,5 @@
 CONFIG_SMP=y
-CONFIG_NR_CPUS=100
+CONFIG_NR_CPUS=56
 CONFIG_PREEMPT_NONE=y
 CONFIG_PREEMPT_VOLUNTARY=n
 CONFIG_PREEMPT=n
diff --git a/tools/testing/selftests/sysctl/config b/tools/testing/selftests/sysctl/config
index 6ca14800d755..fc263efd1fad 100644
--- a/tools/testing/selftests/sysctl/config
+++ b/tools/testing/selftests/sysctl/config
@@ -1 +1 @@
-CONFIG_TEST_SYSCTL=y
+CONFIG_TEST_SYSCTL=m
diff --git a/tools/testing/selftests/sysctl/sysctl.sh b/tools/testing/selftests/sysctl/sysctl.sh
index 6a970b127c9b..19515dcb7d04 100755
--- a/tools/testing/selftests/sysctl/sysctl.sh
+++ b/tools/testing/selftests/sysctl/sysctl.sh
@@ -39,16 +39,7 @@ ALL_TESTS="$ALL_TESTS 0003:1:1:int_0002"
 ALL_TESTS="$ALL_TESTS 0004:1:1:uint_0001"
 ALL_TESTS="$ALL_TESTS 0005:3:1:int_0003"
 ALL_TESTS="$ALL_TESTS 0006:50:1:bitmap_0001"
-
-test_modprobe()
-{
-       if [ ! -d $DIR ]; then
-               echo "$0: $DIR not present" >&2
-               echo "You must have the following enabled in your kernel:" >&2
-               cat $TEST_DIR/config >&2
-               exit $ksft_skip
-       fi
-}
+ALL_TESTS="$ALL_TESTS 0007:1:1:boot_int"
 
 function allow_user_defaults()
 {
@@ -122,13 +113,15 @@ test_reqs()
 
 function load_req_mod()
 {
-	if [ ! -d $DIR ]; then
+	if [ ! -d $SYSCTL ]; then
 		if ! modprobe -q -n $TEST_DRIVER; then
 			echo "$0: module $TEST_DRIVER not found [SKIP]"
+			echo "You must set CONFIG_TEST_SYSCTL=m in your kernel" >&2
 			exit $ksft_skip
 		fi
 		modprobe $TEST_DRIVER
 		if [ $? -ne 0 ]; then
+			echo "$0: modprobe $TEST_DRIVER failed."
 			exit
 		fi
 	fi
@@ -752,6 +745,46 @@ sysctl_test_0006()
 	run_bitmaptest
 }
 
+sysctl_test_0007()
+{
+	TARGET="${SYSCTL}/boot_int"
+	if [ ! -f $TARGET ]; then
+		echo "Skipping test for $TARGET as it is not present ..."
+		return $ksft_skip
+	fi
+
+	if [ -d $DIR ]; then
+		echo "Boot param test only possible sysctl_test is built-in, not module:"
+		cat $TEST_DIR/config >&2
+		return $ksft_skip
+	fi
+
+	echo -n "Testing if $TARGET is set to 1 ..."
+	ORIG=$(cat "${TARGET}")
+
+	if [ x$ORIG = "x1" ]; then
+		echo "ok"
+		return 0
+	fi
+	echo "FAIL"
+	echo "Checking if /proc/cmdline contains setting of the expected parameter ..."
+	if [ ! -f /proc/cmdline ]; then
+		echo "/proc/cmdline does not exist, test inconclusive"
+		return 0
+	fi
+
+	FOUND=$(grep -c "sysctl[./]debug[./]test_sysctl[./]boot_int=1" /proc/cmdline)
+	if [ $FOUND = "1" ]; then
+		echo "Kernel param found but $TARGET is not 1, TEST FAILED"
+		rc=1
+		test_rc
+	fi
+
+	echo "Skipping test, expected kernel parameter missing."
+	echo "To perform this test, make sure kernel is booted with parameter: sysctl.debug.test_sysctl.boot_int=1"
+	return $ksft_skip
+}
+
 list_tests()
 {
 	echo "Test ID list:"
@@ -766,6 +799,7 @@ list_tests()
 	echo "0004 x $(get_test_count 0004) - tests proc_douintvec()"
 	echo "0005 x $(get_test_count 0005) - tests proc_douintvec() array"
 	echo "0006 x $(get_test_count 0006) - tests proc_do_large_bitmap()"
+	echo "0007 x $(get_test_count 0007) - tests setting sysctl from kernel boot param"
 }
 
 usage()
@@ -929,7 +963,6 @@ test_reqs
 allow_user_defaults
 check_production_sysctl_writes_strict
 load_req_mod
-test_modprobe
 
 trap "test_finish" EXIT
 
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/pedit.json b/tools/testing/selftests/tc-testing/tc-tests/actions/pedit.json
index f8ea6f5fa8e9..72cdc3c800a5 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/actions/pedit.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/pedit.json
@@ -1472,6 +1472,31 @@
         ]
     },
     {
+        "id": "94bb",
+        "name": "Add pedit action with LAYERED_OP ip6 traffic_class",
+        "category": [
+            "actions",
+            "pedit",
+            "layered_op"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action pedit",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action pedit ex munge ip6 traffic_class set 0x40 continue",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action pedit",
+        "matchPattern": "ipv6\\+0: val 04000000 mask f00fffff",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action pedit"
+        ]
+    },
+    {
         "id": "6f5e",
         "name": "Add pedit action with LAYERED_OP ip6 flow_lbl",
         "category": [
diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json b/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json
index 8877f7b2b809..bb543bf69d69 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json
@@ -32,7 +32,7 @@
         "setup": [
             "$TC qdisc add dev $DEV2 ingress"
         ],
-        "cmdUnderTest": "$TC filter add dev $DEV2 protocol ip pref 1 parent ffff: handle 0xffffffff flower action ok",
+        "cmdUnderTest": "$TC filter add dev $DEV2 protocol ip pref 1 ingress handle 0xffffffff flower action ok",
         "expExitCode": "0",
         "verifyCmd": "$TC filter show dev $DEV2 ingress",
         "matchPattern": "filter protocol ip pref 1 flower.*handle 0xffffffff",
@@ -77,9 +77,9 @@
         },
         "setup": [
             "$TC qdisc add dev $DEV2 ingress",
-            "$TC filter add dev $DEV2 protocol ip prio 1 parent ffff: flower dst_mac e4:11:22:11:4a:51 src_mac e4:11:22:11:4a:50 ip_proto tcp src_ip 1.1.1.1 dst_ip 2.2.2.2 action drop"
+            "$TC filter add dev $DEV2 protocol ip prio 1 ingress flower dst_mac e4:11:22:11:4a:51 src_mac e4:11:22:11:4a:50 ip_proto tcp src_ip 1.1.1.1 dst_ip 2.2.2.2 action drop"
         ],
-        "cmdUnderTest": "$TC filter add dev $DEV2 protocol ip prio 1 parent ffff: flower dst_mac e4:11:22:11:4a:51 src_mac e4:11:22:11:4a:50 ip_proto tcp src_ip 1.1.1.1 dst_ip 2.2.2.2 action drop",
+        "cmdUnderTest": "$TC filter add dev $DEV2 protocol ip prio 1 ingress flower dst_mac e4:11:22:11:4a:51 src_mac e4:11:22:11:4a:50 ip_proto tcp src_ip 1.1.1.1 dst_ip 2.2.2.2 action drop",
         "expExitCode": "2",
         "verifyCmd": "$TC -s filter show dev $DEV2 ingress",
         "matchPattern": "filter protocol ip pref 1 flower chain 0 handle",
@@ -87,5 +87,43 @@
         "teardown": [
             "$TC qdisc del dev $DEV2 ingress"
         ]
+    },
+    {
+        "id": "7c65",
+        "name": "Add flower filter and then terse dump it",
+        "category": [
+            "filter",
+            "flower"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV2 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV2 protocol ip pref 1 ingress flower dst_mac e4:11:22:11:4a:51 action drop",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter show terse dev $DEV2 ingress",
+        "matchPattern": "filter protocol ip pref 1 flower.*handle",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV2 ingress"
+        ]
+    },
+    {
+        "id": "d45e",
+        "name": "Add flower filter and verify that terse dump doesn't output filter key",
+        "category": [
+            "filter",
+            "flower"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV2 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV2 protocol ip pref 1 ingress flower dst_mac e4:11:22:11:4a:51 action drop",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter show terse dev $DEV2 ingress",
+        "matchPattern": "  dst_mac e4:11:22:11:4a:51",
+        "matchCount": "0",
+        "teardown": [
+            "$TC qdisc del dev $DEV2 ingress"
+        ]
     }
 ]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq_pie.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq_pie.json
new file mode 100644
index 000000000000..1cda2e11b3ad
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq_pie.json
@@ -0,0 +1,21 @@
+[
+    {
+        "id": "83be",
+        "name": "Create FQ-PIE with invalid number of flows",
+        "category": [
+            "qdisc",
+            "fq_pie"
+        ],
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY root fq_pie flows 65536",
+        "expExitCode": "2",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc",
+        "matchCount": "0",
+        "teardown": [
+            "$IP link del dev $DUMMY"
+        ]
+    }
+]
diff --git a/tools/testing/selftests/tc-testing/tdc_batch.py b/tools/testing/selftests/tc-testing/tdc_batch.py
index 6a2bd2cf528e..995f66ce43eb 100755
--- a/tools/testing/selftests/tc-testing/tdc_batch.py
+++ b/tools/testing/selftests/tc-testing/tdc_batch.py
@@ -72,21 +72,21 @@ mac_prefix = args.mac_prefix
 
 def format_add_filter(device, prio, handle, skip, src_mac, dst_mac,
                       share_action):
-    return ("filter add dev {} {} protocol ip parent ffff: handle {} "
+    return ("filter add dev {} {} protocol ip ingress handle {} "
             " flower {} src_mac {} dst_mac {} action drop {}".format(
                 device, prio, handle, skip, src_mac, dst_mac, share_action))
 
 
 def format_rep_filter(device, prio, handle, skip, src_mac, dst_mac,
                       share_action):
-    return ("filter replace dev {} {} protocol ip parent ffff: handle {} "
+    return ("filter replace dev {} {} protocol ip ingress handle {} "
             " flower {} src_mac {} dst_mac {} action drop {}".format(
                 device, prio, handle, skip, src_mac, dst_mac, share_action))
 
 
 def format_del_filter(device, prio, handle, skip, src_mac, dst_mac,
                       share_action):
-    return ("filter del dev {} {} protocol ip parent ffff: handle {} "
+    return ("filter del dev {} {} protocol ip ingress handle {} "
             "flower".format(device, prio, handle))
 
 
diff --git a/tools/testing/selftests/timens/clock_nanosleep.c b/tools/testing/selftests/timens/clock_nanosleep.c
index 8e7b7c72ef65..72d41b955fb2 100644
--- a/tools/testing/selftests/timens/clock_nanosleep.c
+++ b/tools/testing/selftests/timens/clock_nanosleep.c
@@ -119,7 +119,7 @@ int main(int argc, char *argv[])
 
 	ksft_set_plan(4);
 
-	check_config_posix_timers();
+	check_supported_timers();
 
 	if (unshare_timens())
 		return 1;
diff --git a/tools/testing/selftests/timens/timens.c b/tools/testing/selftests/timens/timens.c
index 098be7c83be3..52b6a1185f52 100644
--- a/tools/testing/selftests/timens/timens.c
+++ b/tools/testing/selftests/timens/timens.c
@@ -155,7 +155,7 @@ int main(int argc, char *argv[])
 
 	nscheck();
 
-	check_config_posix_timers();
+	check_supported_timers();
 
 	ksft_set_plan(ARRAY_SIZE(clocks) * 2);
 
diff --git a/tools/testing/selftests/timens/timens.h b/tools/testing/selftests/timens/timens.h
index e09e7e39bc52..d4fc52d47146 100644
--- a/tools/testing/selftests/timens/timens.h
+++ b/tools/testing/selftests/timens/timens.h
@@ -14,15 +14,26 @@
 #endif
 
 static int config_posix_timers = true;
+static int config_alarm_timers = true;
 
-static inline void check_config_posix_timers(void)
+static inline void check_supported_timers(void)
 {
+	struct timespec ts;
+
 	if (timer_create(-1, 0, 0) == -1 && errno == ENOSYS)
 		config_posix_timers = false;
+
+	if (clock_gettime(CLOCK_BOOTTIME_ALARM, &ts) == -1 && errno == EINVAL)
+		config_alarm_timers = false;
 }
 
 static inline bool check_skip(int clockid)
 {
+	if (!config_alarm_timers && clockid == CLOCK_BOOTTIME_ALARM) {
+		ksft_test_result_skip("CLOCK_BOOTTIME_ALARM isn't supported\n");
+		return true;
+	}
+
 	if (config_posix_timers)
 		return false;
 
diff --git a/tools/testing/selftests/timens/timer.c b/tools/testing/selftests/timens/timer.c
index 96dba11ebe44..5e7f0051bd7b 100644
--- a/tools/testing/selftests/timens/timer.c
+++ b/tools/testing/selftests/timens/timer.c
@@ -22,6 +22,9 @@ int run_test(int clockid, struct timespec now)
 	timer_t fd;
 	int i;
 
+	if (check_skip(clockid))
+		return 0;
+
 	for (i = 0; i < 2; i++) {
 		struct sigevent sevp = {.sigev_notify = SIGEV_NONE};
 		int flags = 0;
@@ -74,6 +77,8 @@ int main(int argc, char *argv[])
 
 	nscheck();
 
+	check_supported_timers();
+
 	ksft_set_plan(3);
 
 	clock_gettime(CLOCK_MONOTONIC, &mtime_now);
diff --git a/tools/testing/selftests/timens/timerfd.c b/tools/testing/selftests/timens/timerfd.c
index eff1ec5ff215..9edd43d6b2c1 100644
--- a/tools/testing/selftests/timens/timerfd.c
+++ b/tools/testing/selftests/timens/timerfd.c
@@ -28,6 +28,9 @@ int run_test(int clockid, struct timespec now)
 	long long elapsed;
 	int fd, i;
 
+	if (check_skip(clockid))
+		return 0;
+
 	if (tclock_gettime(clockid, &now))
 		return pr_perror("clock_gettime(%d)", clockid);
 
@@ -81,6 +84,8 @@ int main(int argc, char *argv[])
 
 	nscheck();
 
+	check_supported_timers();
+
 	ksft_set_plan(3);
 
 	clock_gettime(CLOCK_MONOTONIC, &mtime_now);
diff --git a/tools/testing/selftests/tpm2/test_smoke.sh b/tools/testing/selftests/tpm2/test_smoke.sh
index 8155c2ea7ccb..663062701d5a 100755
--- a/tools/testing/selftests/tpm2/test_smoke.sh
+++ b/tools/testing/selftests/tpm2/test_smoke.sh
@@ -1,6 +1,11 @@
 #!/bin/bash
 # SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
 
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+[ -f /dev/tpm0 ] || exit $ksft_skip
+
 python -m unittest -v tpm2_tests.SmokeTest
 python -m unittest -v tpm2_tests.AsyncTest
 
diff --git a/tools/testing/selftests/tpm2/test_space.sh b/tools/testing/selftests/tpm2/test_space.sh
index a6f5e346635e..36c9d030a1c6 100755
--- a/tools/testing/selftests/tpm2/test_space.sh
+++ b/tools/testing/selftests/tpm2/test_space.sh
@@ -1,4 +1,9 @@
 #!/bin/bash
 # SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
 
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+[ -f /dev/tpmrm0 ] || exit $ksft_skip
+
 python -m unittest -v tpm2_tests.SpaceTest
diff --git a/tools/testing/selftests/vDSO/.gitignore b/tools/testing/selftests/vDSO/.gitignore
index 382cfb39a1a3..5eb64d41e541 100644
--- a/tools/testing/selftests/vDSO/.gitignore
+++ b/tools/testing/selftests/vDSO/.gitignore
@@ -1,3 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0-only
 vdso_test
+vdso_test_gettimeofday
+vdso_test_getcpu
 vdso_standalone_test_x86
diff --git a/tools/testing/selftests/vDSO/Makefile b/tools/testing/selftests/vDSO/Makefile
index 9e03d61f52fd..0069f2f83f86 100644
--- a/tools/testing/selftests/vDSO/Makefile
+++ b/tools/testing/selftests/vDSO/Makefile
@@ -4,7 +4,7 @@ include ../lib.mk
 uname_M := $(shell uname -m 2>/dev/null || echo not)
 ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/)
 
-TEST_GEN_PROGS := $(OUTPUT)/vdso_test
+TEST_GEN_PROGS := $(OUTPUT)/vdso_test_gettimeofday $(OUTPUT)/vdso_test_getcpu
 ifeq ($(ARCH),x86)
 TEST_GEN_PROGS += $(OUTPUT)/vdso_standalone_test_x86
 endif
@@ -17,7 +17,8 @@ LDLIBS += -lgcc_s
 endif
 
 all: $(TEST_GEN_PROGS)
-$(OUTPUT)/vdso_test: parse_vdso.c vdso_test.c
+$(OUTPUT)/vdso_test_gettimeofday: parse_vdso.c vdso_test_gettimeofday.c
+$(OUTPUT)/vdso_test_getcpu: parse_vdso.c vdso_test_getcpu.c
 $(OUTPUT)/vdso_standalone_test_x86: vdso_standalone_test_x86.c parse_vdso.c
 	$(CC) $(CFLAGS) $(CFLAGS_vdso_standalone_test_x86) \
 		vdso_standalone_test_x86.c parse_vdso.c \
diff --git a/tools/testing/selftests/vDSO/parse_vdso.c b/tools/testing/selftests/vDSO/parse_vdso.c
index 1dbb4b87268f..413f75620a35 100644
--- a/tools/testing/selftests/vDSO/parse_vdso.c
+++ b/tools/testing/selftests/vDSO/parse_vdso.c
@@ -21,29 +21,7 @@
 #include <limits.h>
 #include <elf.h>
 
-/*
- * To use this vDSO parser, first call one of the vdso_init_* functions.
- * If you've already parsed auxv, then pass the value of AT_SYSINFO_EHDR
- * to vdso_init_from_sysinfo_ehdr.  Otherwise pass auxv to vdso_init_from_auxv.
- * Then call vdso_sym for each symbol you want.  For example, to look up
- * gettimeofday on x86_64, use:
- *
- *     <some pointer> = vdso_sym("LINUX_2.6", "gettimeofday");
- * or
- *     <some pointer> = vdso_sym("LINUX_2.6", "__vdso_gettimeofday");
- *
- * vdso_sym will return 0 if the symbol doesn't exist or if the init function
- * failed or was not called.  vdso_sym is a little slow, so its return value
- * should be cached.
- *
- * vdso_sym is threadsafe; the init functions are not.
- *
- * These are the prototypes:
- */
-extern void vdso_init_from_auxv(void *auxv);
-extern void vdso_init_from_sysinfo_ehdr(uintptr_t base);
-extern void *vdso_sym(const char *version, const char *name);
-
+#include "parse_vdso.h"
 
 /* And here's the code. */
 #ifndef ELF_BITS
diff --git a/tools/testing/selftests/vDSO/parse_vdso.h b/tools/testing/selftests/vDSO/parse_vdso.h
new file mode 100644
index 000000000000..de0453067d7c
--- /dev/null
+++ b/tools/testing/selftests/vDSO/parse_vdso.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef PARSE_VDSO_H
+#define PARSE_VDSO_H
+
+#include <stdint.h>
+
+/*
+ * To use this vDSO parser, first call one of the vdso_init_* functions.
+ * If you've already parsed auxv, then pass the value of AT_SYSINFO_EHDR
+ * to vdso_init_from_sysinfo_ehdr.  Otherwise pass auxv to vdso_init_from_auxv.
+ * Then call vdso_sym for each symbol you want.  For example, to look up
+ * gettimeofday on x86_64, use:
+ *
+ *     <some pointer> = vdso_sym("LINUX_2.6", "gettimeofday");
+ * or
+ *     <some pointer> = vdso_sym("LINUX_2.6", "__vdso_gettimeofday");
+ *
+ * vdso_sym will return 0 if the symbol doesn't exist or if the init function
+ * failed or was not called.  vdso_sym is a little slow, so its return value
+ * should be cached.
+ *
+ * vdso_sym is threadsafe; the init functions are not.
+ *
+ * These are the prototypes:
+ */
+void *vdso_sym(const char *version, const char *name);
+void vdso_init_from_sysinfo_ehdr(uintptr_t base);
+void vdso_init_from_auxv(void *auxv);
+
+#endif
diff --git a/tools/testing/selftests/vDSO/vdso_standalone_test_x86.c b/tools/testing/selftests/vDSO/vdso_standalone_test_x86.c
index 5ac4b00acfbc..8a44ff973ee1 100644
--- a/tools/testing/selftests/vDSO/vdso_standalone_test_x86.c
+++ b/tools/testing/selftests/vDSO/vdso_standalone_test_x86.c
@@ -16,9 +16,7 @@
 #include <unistd.h>
 #include <stdint.h>
 
-extern void *vdso_sym(const char *version, const char *name);
-extern void vdso_init_from_sysinfo_ehdr(uintptr_t base);
-extern void vdso_init_from_auxv(void *auxv);
+#include "parse_vdso.h"
 
 /* We need a libc functions... */
 int strcmp(const char *a, const char *b)
diff --git a/tools/testing/selftests/vDSO/vdso_test_getcpu.c b/tools/testing/selftests/vDSO/vdso_test_getcpu.c
new file mode 100644
index 000000000000..fc25ede131b8
--- /dev/null
+++ b/tools/testing/selftests/vDSO/vdso_test_getcpu.c
@@ -0,0 +1,54 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * vdso_test_getcpu.c: Sample code to test parse_vdso.c and vDSO getcpu()
+ *
+ * Copyright (c) 2020 Arm Ltd
+ */
+
+#include <stdint.h>
+#include <elf.h>
+#include <stdio.h>
+#include <sys/auxv.h>
+#include <sys/time.h>
+
+#include "../kselftest.h"
+#include "parse_vdso.h"
+
+const char *version = "LINUX_2.6";
+const char *name = "__vdso_getcpu";
+
+struct getcpu_cache;
+typedef long (*getcpu_t)(unsigned int *, unsigned int *,
+			 struct getcpu_cache *);
+
+int main(int argc, char **argv)
+{
+	unsigned long sysinfo_ehdr;
+	unsigned int cpu, node;
+	getcpu_t get_cpu;
+	long ret;
+
+	sysinfo_ehdr = getauxval(AT_SYSINFO_EHDR);
+	if (!sysinfo_ehdr) {
+		printf("AT_SYSINFO_EHDR is not present!\n");
+		return KSFT_SKIP;
+	}
+
+	vdso_init_from_sysinfo_ehdr(getauxval(AT_SYSINFO_EHDR));
+
+	get_cpu = (getcpu_t)vdso_sym(version, name);
+	if (!get_cpu) {
+		printf("Could not find %s\n", name);
+		return KSFT_SKIP;
+	}
+
+	ret = get_cpu(&cpu, &node, 0);
+	if (ret == 0) {
+		printf("Running on CPU %u node %u\n", cpu, node);
+	} else {
+		printf("%s failed\n", name);
+		return KSFT_FAIL;
+	}
+
+	return 0;
+}
diff --git a/tools/testing/selftests/vDSO/vdso_test.c b/tools/testing/selftests/vDSO/vdso_test_gettimeofday.c
index 719d5a6bd664..8ccc73ed8240 100644
--- a/tools/testing/selftests/vDSO/vdso_test.c
+++ b/tools/testing/selftests/vDSO/vdso_test_gettimeofday.c
@@ -1,10 +1,11 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /*
- * vdso_test.c: Sample code to test parse_vdso.c
+ * vdso_test_gettimeofday.c: Sample code to test parse_vdso.c and
+ *                           vDSO gettimeofday()
  * Copyright (c) 2014 Andy Lutomirski
  *
  * Compile with:
- * gcc -std=gnu99 vdso_test.c parse_vdso.c
+ * gcc -std=gnu99 vdso_test_gettimeofday.c parse_vdso_gettimeofday.c
  *
  * Tested on x86, 32-bit and 64-bit.  It may work on other architectures, too.
  */
@@ -16,10 +17,7 @@
 #include <sys/time.h>
 
 #include "../kselftest.h"
-
-extern void *vdso_sym(const char *version, const char *name);
-extern void vdso_init_from_sysinfo_ehdr(uintptr_t base);
-extern void vdso_init_from_auxv(void *auxv);
+#include "parse_vdso.h"
 
 /*
  * ARM64's vDSO exports its gettimeofday() implementation with a different
diff --git a/tools/testing/selftests/vm/.gitignore b/tools/testing/selftests/vm/.gitignore
index 0edb6d900e8d..849e8226395a 100644
--- a/tools/testing/selftests/vm/.gitignore
+++ b/tools/testing/selftests/vm/.gitignore
@@ -1,13 +1,16 @@
 # SPDX-License-Identifier: GPL-2.0-only
 hugepage-mmap
 hugepage-shm
+khugepaged
 map_hugetlb
 map_populate
 thuge-gen
 compaction_test
 mlock2-tests
+mremap_dontunmap
 on-fault-limit
 transhuge-stress
+protection_keys
 userfaultfd
 mlock-intersect-test
 mlock-random-test
@@ -16,3 +19,4 @@ gup_benchmark
 va_128TBswitch
 map_fixed_noreplace
 write_to_hugetlbfs
+hmm-tests
diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile
index 6998877f707e..a9026706d597 100644
--- a/tools/testing/selftests/vm/Makefile
+++ b/tools/testing/selftests/vm/Makefile
@@ -7,6 +7,7 @@ CFLAGS = -Wall -I ../../../../usr/include $(EXTRA_CFLAGS)
 LDLIBS = -lrt
 TEST_GEN_FILES = compaction_test
 TEST_GEN_FILES += gup_benchmark
+TEST_GEN_FILES += hmm-tests
 TEST_GEN_FILES += hugepage-mmap
 TEST_GEN_FILES += hugepage-shm
 TEST_GEN_FILES += map_hugetlb
@@ -19,6 +20,31 @@ TEST_GEN_FILES += on-fault-limit
 TEST_GEN_FILES += thuge-gen
 TEST_GEN_FILES += transhuge-stress
 TEST_GEN_FILES += userfaultfd
+TEST_GEN_FILES += khugepaged
+
+ifeq ($(ARCH),x86_64)
+CAN_BUILD_I386 := $(shell ./../x86/check_cc.sh $(CC) ../x86/trivial_32bit_program.c -m32)
+CAN_BUILD_X86_64 := $(shell ./../x86/check_cc.sh $(CC) ../x86/trivial_64bit_program.c)
+CAN_BUILD_WITH_NOPIE := $(shell ./../x86/check_cc.sh $(CC) ../x86/trivial_program.c -no-pie)
+
+TARGETS := protection_keys
+BINARIES_32 := $(TARGETS:%=%_32)
+BINARIES_64 := $(TARGETS:%=%_64)
+
+ifeq ($(CAN_BUILD_WITH_NOPIE),1)
+CFLAGS += -no-pie
+endif
+
+ifeq ($(CAN_BUILD_I386),1)
+TEST_GEN_FILES += $(BINARIES_32)
+endif
+
+ifeq ($(CAN_BUILD_X86_64),1)
+TEST_GEN_FILES += $(BINARIES_64)
+endif
+else
+TEST_GEN_FILES += protection_keys
+endif
 
 ifneq (,$(filter $(MACHINE),arm64 ia64 mips64 parisc64 ppc64 ppc64le riscv64 s390x sh64 sparc64 x86_64))
 TEST_GEN_FILES += va_128TBswitch
@@ -33,6 +59,57 @@ TEST_FILES := test_vmalloc.sh
 KSFT_KHDR_INSTALL := 1
 include ../lib.mk
 
+$(OUTPUT)/hmm-tests: LDLIBS += -lhugetlbfs -lpthread
+
+ifeq ($(ARCH),x86_64)
+BINARIES_32 := $(patsubst %,$(OUTPUT)/%,$(BINARIES_32))
+BINARIES_64 := $(patsubst %,$(OUTPUT)/%,$(BINARIES_64))
+
+define gen-target-rule-32
+$(1) $(1)_32: $(OUTPUT)/$(1)_32
+.PHONY: $(1) $(1)_32
+endef
+
+define gen-target-rule-64
+$(1) $(1)_64: $(OUTPUT)/$(1)_64
+.PHONY: $(1) $(1)_64
+endef
+
+ifeq ($(CAN_BUILD_I386),1)
+$(BINARIES_32): CFLAGS += -m32
+$(BINARIES_32): LDLIBS += -lrt -ldl -lm
+$(BINARIES_32): %_32: %.c
+	$(CC) $(CFLAGS) $(EXTRA_CFLAGS) $(notdir $^) $(LDLIBS) -o $@
+$(foreach t,$(TARGETS),$(eval $(call gen-target-rule-32,$(t))))
+endif
+
+ifeq ($(CAN_BUILD_X86_64),1)
+$(BINARIES_64): CFLAGS += -m64
+$(BINARIES_64): LDLIBS += -lrt -ldl
+$(BINARIES_64): %_64: %.c
+	$(CC) $(CFLAGS) $(EXTRA_CFLAGS) $(notdir $^) $(LDLIBS) -o $@
+$(foreach t,$(TARGETS),$(eval $(call gen-target-rule-64,$(t))))
+endif
+
+# x86_64 users should be encouraged to install 32-bit libraries
+ifeq ($(CAN_BUILD_I386)$(CAN_BUILD_X86_64),01)
+all: warn_32bit_failure
+
+warn_32bit_failure:
+	@echo "Warning: you seem to have a broken 32-bit build" 2>&1;		\
+	echo  "environment. This will reduce test coverage of 64-bit" 2>&1;	\
+	echo  "kernels. If you are using a Debian-like distribution," 2>&1;	\
+	echo  "try:"; 2>&1;							\
+	echo  "";								\
+	echo  "  apt-get install gcc-multilib libc6-i386 libc6-dev-i386";	\
+	echo  "";								\
+	echo  "If you are using a Fedora-like distribution, try:";		\
+	echo  "";								\
+	echo  "  yum install glibc-devel.*i686";				\
+	exit 0;
+endif
+endif
+
 $(OUTPUT)/userfaultfd: LDLIBS += -lpthread
 
 $(OUTPUT)/mlock-random-test: LDLIBS += -lcap
diff --git a/tools/testing/selftests/vm/config b/tools/testing/selftests/vm/config
index 93b90a9b1eeb..3ba674b64fa9 100644
--- a/tools/testing/selftests/vm/config
+++ b/tools/testing/selftests/vm/config
@@ -1,3 +1,5 @@
 CONFIG_SYSVIPC=y
 CONFIG_USERFAULTFD=y
 CONFIG_TEST_VMALLOC=m
+CONFIG_DEVICE_PRIVATE=y
+CONFIG_TEST_HMM=m
diff --git a/tools/testing/selftests/vm/hmm-tests.c b/tools/testing/selftests/vm/hmm-tests.c
new file mode 100644
index 000000000000..79db22604019
--- /dev/null
+++ b/tools/testing/selftests/vm/hmm-tests.c
@@ -0,0 +1,1359 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * HMM stands for Heterogeneous Memory Management, it is a helper layer inside
+ * the linux kernel to help device drivers mirror a process address space in
+ * the device. This allows the device to use the same address space which
+ * makes communication and data exchange a lot easier.
+ *
+ * This framework's sole purpose is to exercise various code paths inside
+ * the kernel to make sure that HMM performs as expected and to flush out any
+ * bugs.
+ */
+
+#include "../kselftest_harness.h"
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <strings.h>
+#include <time.h>
+#include <pthread.h>
+#include <hugetlbfs.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+#include <sys/ioctl.h>
+
+/*
+ * This is a private UAPI to the kernel test module so it isn't exported
+ * in the usual include/uapi/... directory.
+ */
+#include "../../../../lib/test_hmm_uapi.h"
+
+struct hmm_buffer {
+	void		*ptr;
+	void		*mirror;
+	unsigned long	size;
+	int		fd;
+	uint64_t	cpages;
+	uint64_t	faults;
+};
+
+#define TWOMEG		(1 << 21)
+#define HMM_BUFFER_SIZE (1024 << 12)
+#define HMM_PATH_MAX    64
+#define NTIMES		256
+
+#define ALIGN(x, a) (((x) + (a - 1)) & (~((a) - 1)))
+
+FIXTURE(hmm)
+{
+	int		fd;
+	unsigned int	page_size;
+	unsigned int	page_shift;
+};
+
+FIXTURE(hmm2)
+{
+	int		fd0;
+	int		fd1;
+	unsigned int	page_size;
+	unsigned int	page_shift;
+};
+
+static int hmm_open(int unit)
+{
+	char pathname[HMM_PATH_MAX];
+	int fd;
+
+	snprintf(pathname, sizeof(pathname), "/dev/hmm_dmirror%d", unit);
+	fd = open(pathname, O_RDWR, 0);
+	if (fd < 0)
+		fprintf(stderr, "could not open hmm dmirror driver (%s)\n",
+			pathname);
+	return fd;
+}
+
+FIXTURE_SETUP(hmm)
+{
+	self->page_size = sysconf(_SC_PAGE_SIZE);
+	self->page_shift = ffs(self->page_size) - 1;
+
+	self->fd = hmm_open(0);
+	ASSERT_GE(self->fd, 0);
+}
+
+FIXTURE_SETUP(hmm2)
+{
+	self->page_size = sysconf(_SC_PAGE_SIZE);
+	self->page_shift = ffs(self->page_size) - 1;
+
+	self->fd0 = hmm_open(0);
+	ASSERT_GE(self->fd0, 0);
+	self->fd1 = hmm_open(1);
+	ASSERT_GE(self->fd1, 0);
+}
+
+FIXTURE_TEARDOWN(hmm)
+{
+	int ret = close(self->fd);
+
+	ASSERT_EQ(ret, 0);
+	self->fd = -1;
+}
+
+FIXTURE_TEARDOWN(hmm2)
+{
+	int ret = close(self->fd0);
+
+	ASSERT_EQ(ret, 0);
+	self->fd0 = -1;
+
+	ret = close(self->fd1);
+	ASSERT_EQ(ret, 0);
+	self->fd1 = -1;
+}
+
+static int hmm_dmirror_cmd(int fd,
+			   unsigned long request,
+			   struct hmm_buffer *buffer,
+			   unsigned long npages)
+{
+	struct hmm_dmirror_cmd cmd;
+	int ret;
+
+	/* Simulate a device reading system memory. */
+	cmd.addr = (__u64)buffer->ptr;
+	cmd.ptr = (__u64)buffer->mirror;
+	cmd.npages = npages;
+
+	for (;;) {
+		ret = ioctl(fd, request, &cmd);
+		if (ret == 0)
+			break;
+		if (errno == EINTR)
+			continue;
+		return -errno;
+	}
+	buffer->cpages = cmd.cpages;
+	buffer->faults = cmd.faults;
+
+	return 0;
+}
+
+static void hmm_buffer_free(struct hmm_buffer *buffer)
+{
+	if (buffer == NULL)
+		return;
+
+	if (buffer->ptr)
+		munmap(buffer->ptr, buffer->size);
+	free(buffer->mirror);
+	free(buffer);
+}
+
+/*
+ * Create a temporary file that will be deleted on close.
+ */
+static int hmm_create_file(unsigned long size)
+{
+	char path[HMM_PATH_MAX];
+	int fd;
+
+	strcpy(path, "/tmp");
+	fd = open(path, O_TMPFILE | O_EXCL | O_RDWR, 0600);
+	if (fd >= 0) {
+		int r;
+
+		do {
+			r = ftruncate(fd, size);
+		} while (r == -1 && errno == EINTR);
+		if (!r)
+			return fd;
+		close(fd);
+	}
+	return -1;
+}
+
+/*
+ * Return a random unsigned number.
+ */
+static unsigned int hmm_random(void)
+{
+	static int fd = -1;
+	unsigned int r;
+
+	if (fd < 0) {
+		fd = open("/dev/urandom", O_RDONLY);
+		if (fd < 0) {
+			fprintf(stderr, "%s:%d failed to open /dev/urandom\n",
+					__FILE__, __LINE__);
+			return ~0U;
+		}
+	}
+	read(fd, &r, sizeof(r));
+	return r;
+}
+
+static void hmm_nanosleep(unsigned int n)
+{
+	struct timespec t;
+
+	t.tv_sec = 0;
+	t.tv_nsec = n;
+	nanosleep(&t, NULL);
+}
+
+/*
+ * Simple NULL test of device open/close.
+ */
+TEST_F(hmm, open_close)
+{
+}
+
+/*
+ * Read private anonymous memory.
+ */
+TEST_F(hmm, anon_read)
+{
+	struct hmm_buffer *buffer;
+	unsigned long npages;
+	unsigned long size;
+	unsigned long i;
+	int *ptr;
+	int ret;
+	int val;
+
+	npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift;
+	ASSERT_NE(npages, 0);
+	size = npages << self->page_shift;
+
+	buffer = malloc(sizeof(*buffer));
+	ASSERT_NE(buffer, NULL);
+
+	buffer->fd = -1;
+	buffer->size = size;
+	buffer->mirror = malloc(size);
+	ASSERT_NE(buffer->mirror, NULL);
+
+	buffer->ptr = mmap(NULL, size,
+			   PROT_READ | PROT_WRITE,
+			   MAP_PRIVATE | MAP_ANONYMOUS,
+			   buffer->fd, 0);
+	ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+	/*
+	 * Initialize buffer in system memory but leave the first two pages
+	 * zero (pte_none and pfn_zero).
+	 */
+	i = 2 * self->page_size / sizeof(*ptr);
+	for (ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+		ptr[i] = i;
+
+	/* Set buffer permission to read-only. */
+	ret = mprotect(buffer->ptr, size, PROT_READ);
+	ASSERT_EQ(ret, 0);
+
+	/* Populate the CPU page table with a special zero page. */
+	val = *(int *)(buffer->ptr + self->page_size);
+	ASSERT_EQ(val, 0);
+
+	/* Simulate a device reading system memory. */
+	ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_READ, buffer, npages);
+	ASSERT_EQ(ret, 0);
+	ASSERT_EQ(buffer->cpages, npages);
+	ASSERT_EQ(buffer->faults, 1);
+
+	/* Check what the device read. */
+	ptr = buffer->mirror;
+	for (i = 0; i < 2 * self->page_size / sizeof(*ptr); ++i)
+		ASSERT_EQ(ptr[i], 0);
+	for (; i < size / sizeof(*ptr); ++i)
+		ASSERT_EQ(ptr[i], i);
+
+	hmm_buffer_free(buffer);
+}
+
+/*
+ * Read private anonymous memory which has been protected with
+ * mprotect() PROT_NONE.
+ */
+TEST_F(hmm, anon_read_prot)
+{
+	struct hmm_buffer *buffer;
+	unsigned long npages;
+	unsigned long size;
+	unsigned long i;
+	int *ptr;
+	int ret;
+
+	npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift;
+	ASSERT_NE(npages, 0);
+	size = npages << self->page_shift;
+
+	buffer = malloc(sizeof(*buffer));
+	ASSERT_NE(buffer, NULL);
+
+	buffer->fd = -1;
+	buffer->size = size;
+	buffer->mirror = malloc(size);
+	ASSERT_NE(buffer->mirror, NULL);
+
+	buffer->ptr = mmap(NULL, size,
+			   PROT_READ | PROT_WRITE,
+			   MAP_PRIVATE | MAP_ANONYMOUS,
+			   buffer->fd, 0);
+	ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+	/* Initialize buffer in system memory. */
+	for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+		ptr[i] = i;
+
+	/* Initialize mirror buffer so we can verify it isn't written. */
+	for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+		ptr[i] = -i;
+
+	/* Protect buffer from reading. */
+	ret = mprotect(buffer->ptr, size, PROT_NONE);
+	ASSERT_EQ(ret, 0);
+
+	/* Simulate a device reading system memory. */
+	ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_READ, buffer, npages);
+	ASSERT_EQ(ret, -EFAULT);
+
+	/* Allow CPU to read the buffer so we can check it. */
+	ret = mprotect(buffer->ptr, size, PROT_READ);
+	ASSERT_EQ(ret, 0);
+	for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+		ASSERT_EQ(ptr[i], i);
+
+	/* Check what the device read. */
+	for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+		ASSERT_EQ(ptr[i], -i);
+
+	hmm_buffer_free(buffer);
+}
+
+/*
+ * Write private anonymous memory.
+ */
+TEST_F(hmm, anon_write)
+{
+	struct hmm_buffer *buffer;
+	unsigned long npages;
+	unsigned long size;
+	unsigned long i;
+	int *ptr;
+	int ret;
+
+	npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift;
+	ASSERT_NE(npages, 0);
+	size = npages << self->page_shift;
+
+	buffer = malloc(sizeof(*buffer));
+	ASSERT_NE(buffer, NULL);
+
+	buffer->fd = -1;
+	buffer->size = size;
+	buffer->mirror = malloc(size);
+	ASSERT_NE(buffer->mirror, NULL);
+
+	buffer->ptr = mmap(NULL, size,
+			   PROT_READ | PROT_WRITE,
+			   MAP_PRIVATE | MAP_ANONYMOUS,
+			   buffer->fd, 0);
+	ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+	/* Initialize data that the device will write to buffer->ptr. */
+	for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+		ptr[i] = i;
+
+	/* Simulate a device writing system memory. */
+	ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_WRITE, buffer, npages);
+	ASSERT_EQ(ret, 0);
+	ASSERT_EQ(buffer->cpages, npages);
+	ASSERT_EQ(buffer->faults, 1);
+
+	/* Check what the device wrote. */
+	for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+		ASSERT_EQ(ptr[i], i);
+
+	hmm_buffer_free(buffer);
+}
+
+/*
+ * Write private anonymous memory which has been protected with
+ * mprotect() PROT_READ.
+ */
+TEST_F(hmm, anon_write_prot)
+{
+	struct hmm_buffer *buffer;
+	unsigned long npages;
+	unsigned long size;
+	unsigned long i;
+	int *ptr;
+	int ret;
+
+	npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift;
+	ASSERT_NE(npages, 0);
+	size = npages << self->page_shift;
+
+	buffer = malloc(sizeof(*buffer));
+	ASSERT_NE(buffer, NULL);
+
+	buffer->fd = -1;
+	buffer->size = size;
+	buffer->mirror = malloc(size);
+	ASSERT_NE(buffer->mirror, NULL);
+
+	buffer->ptr = mmap(NULL, size,
+			   PROT_READ,
+			   MAP_PRIVATE | MAP_ANONYMOUS,
+			   buffer->fd, 0);
+	ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+	/* Simulate a device reading a zero page of memory. */
+	ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_READ, buffer, 1);
+	ASSERT_EQ(ret, 0);
+	ASSERT_EQ(buffer->cpages, 1);
+	ASSERT_EQ(buffer->faults, 1);
+
+	/* Initialize data that the device will write to buffer->ptr. */
+	for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+		ptr[i] = i;
+
+	/* Simulate a device writing system memory. */
+	ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_WRITE, buffer, npages);
+	ASSERT_EQ(ret, -EPERM);
+
+	/* Check what the device wrote. */
+	for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+		ASSERT_EQ(ptr[i], 0);
+
+	/* Now allow writing and see that the zero page is replaced. */
+	ret = mprotect(buffer->ptr, size, PROT_WRITE | PROT_READ);
+	ASSERT_EQ(ret, 0);
+
+	/* Simulate a device writing system memory. */
+	ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_WRITE, buffer, npages);
+	ASSERT_EQ(ret, 0);
+	ASSERT_EQ(buffer->cpages, npages);
+	ASSERT_EQ(buffer->faults, 1);
+
+	/* Check what the device wrote. */
+	for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+		ASSERT_EQ(ptr[i], i);
+
+	hmm_buffer_free(buffer);
+}
+
+/*
+ * Check that a device writing an anonymous private mapping
+ * will copy-on-write if a child process inherits the mapping.
+ */
+TEST_F(hmm, anon_write_child)
+{
+	struct hmm_buffer *buffer;
+	unsigned long npages;
+	unsigned long size;
+	unsigned long i;
+	int *ptr;
+	pid_t pid;
+	int child_fd;
+	int ret;
+
+	npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift;
+	ASSERT_NE(npages, 0);
+	size = npages << self->page_shift;
+
+	buffer = malloc(sizeof(*buffer));
+	ASSERT_NE(buffer, NULL);
+
+	buffer->fd = -1;
+	buffer->size = size;
+	buffer->mirror = malloc(size);
+	ASSERT_NE(buffer->mirror, NULL);
+
+	buffer->ptr = mmap(NULL, size,
+			   PROT_READ | PROT_WRITE,
+			   MAP_PRIVATE | MAP_ANONYMOUS,
+			   buffer->fd, 0);
+	ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+	/* Initialize buffer->ptr so we can tell if it is written. */
+	for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+		ptr[i] = i;
+
+	/* Initialize data that the device will write to buffer->ptr. */
+	for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+		ptr[i] = -i;
+
+	pid = fork();
+	if (pid == -1)
+		ASSERT_EQ(pid, 0);
+	if (pid != 0) {
+		waitpid(pid, &ret, 0);
+		ASSERT_EQ(WIFEXITED(ret), 1);
+
+		/* Check that the parent's buffer did not change. */
+		for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+			ASSERT_EQ(ptr[i], i);
+		return;
+	}
+
+	/* Check that we see the parent's values. */
+	for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+		ASSERT_EQ(ptr[i], i);
+	for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+		ASSERT_EQ(ptr[i], -i);
+
+	/* The child process needs its own mirror to its own mm. */
+	child_fd = hmm_open(0);
+	ASSERT_GE(child_fd, 0);
+
+	/* Simulate a device writing system memory. */
+	ret = hmm_dmirror_cmd(child_fd, HMM_DMIRROR_WRITE, buffer, npages);
+	ASSERT_EQ(ret, 0);
+	ASSERT_EQ(buffer->cpages, npages);
+	ASSERT_EQ(buffer->faults, 1);
+
+	/* Check what the device wrote. */
+	for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+		ASSERT_EQ(ptr[i], -i);
+
+	close(child_fd);
+	exit(0);
+}
+
+/*
+ * Check that a device writing an anonymous shared mapping
+ * will not copy-on-write if a child process inherits the mapping.
+ */
+TEST_F(hmm, anon_write_child_shared)
+{
+	struct hmm_buffer *buffer;
+	unsigned long npages;
+	unsigned long size;
+	unsigned long i;
+	int *ptr;
+	pid_t pid;
+	int child_fd;
+	int ret;
+
+	npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift;
+	ASSERT_NE(npages, 0);
+	size = npages << self->page_shift;
+
+	buffer = malloc(sizeof(*buffer));
+	ASSERT_NE(buffer, NULL);
+
+	buffer->fd = -1;
+	buffer->size = size;
+	buffer->mirror = malloc(size);
+	ASSERT_NE(buffer->mirror, NULL);
+
+	buffer->ptr = mmap(NULL, size,
+			   PROT_READ | PROT_WRITE,
+			   MAP_SHARED | MAP_ANONYMOUS,
+			   buffer->fd, 0);
+	ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+	/* Initialize buffer->ptr so we can tell if it is written. */
+	for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+		ptr[i] = i;
+
+	/* Initialize data that the device will write to buffer->ptr. */
+	for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+		ptr[i] = -i;
+
+	pid = fork();
+	if (pid == -1)
+		ASSERT_EQ(pid, 0);
+	if (pid != 0) {
+		waitpid(pid, &ret, 0);
+		ASSERT_EQ(WIFEXITED(ret), 1);
+
+		/* Check that the parent's buffer did change. */
+		for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+			ASSERT_EQ(ptr[i], -i);
+		return;
+	}
+
+	/* Check that we see the parent's values. */
+	for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+		ASSERT_EQ(ptr[i], i);
+	for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+		ASSERT_EQ(ptr[i], -i);
+
+	/* The child process needs its own mirror to its own mm. */
+	child_fd = hmm_open(0);
+	ASSERT_GE(child_fd, 0);
+
+	/* Simulate a device writing system memory. */
+	ret = hmm_dmirror_cmd(child_fd, HMM_DMIRROR_WRITE, buffer, npages);
+	ASSERT_EQ(ret, 0);
+	ASSERT_EQ(buffer->cpages, npages);
+	ASSERT_EQ(buffer->faults, 1);
+
+	/* Check what the device wrote. */
+	for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+		ASSERT_EQ(ptr[i], -i);
+
+	close(child_fd);
+	exit(0);
+}
+
+/*
+ * Write private anonymous huge page.
+ */
+TEST_F(hmm, anon_write_huge)
+{
+	struct hmm_buffer *buffer;
+	unsigned long npages;
+	unsigned long size;
+	unsigned long i;
+	void *old_ptr;
+	void *map;
+	int *ptr;
+	int ret;
+
+	size = 2 * TWOMEG;
+
+	buffer = malloc(sizeof(*buffer));
+	ASSERT_NE(buffer, NULL);
+
+	buffer->fd = -1;
+	buffer->size = size;
+	buffer->mirror = malloc(size);
+	ASSERT_NE(buffer->mirror, NULL);
+
+	buffer->ptr = mmap(NULL, size,
+			   PROT_READ | PROT_WRITE,
+			   MAP_PRIVATE | MAP_ANONYMOUS,
+			   buffer->fd, 0);
+	ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+	size = TWOMEG;
+	npages = size >> self->page_shift;
+	map = (void *)ALIGN((uintptr_t)buffer->ptr, size);
+	ret = madvise(map, size, MADV_HUGEPAGE);
+	ASSERT_EQ(ret, 0);
+	old_ptr = buffer->ptr;
+	buffer->ptr = map;
+
+	/* Initialize data that the device will write to buffer->ptr. */
+	for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+		ptr[i] = i;
+
+	/* Simulate a device writing system memory. */
+	ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_WRITE, buffer, npages);
+	ASSERT_EQ(ret, 0);
+	ASSERT_EQ(buffer->cpages, npages);
+	ASSERT_EQ(buffer->faults, 1);
+
+	/* Check what the device wrote. */
+	for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+		ASSERT_EQ(ptr[i], i);
+
+	buffer->ptr = old_ptr;
+	hmm_buffer_free(buffer);
+}
+
+/*
+ * Write huge TLBFS page.
+ */
+TEST_F(hmm, anon_write_hugetlbfs)
+{
+	struct hmm_buffer *buffer;
+	unsigned long npages;
+	unsigned long size;
+	unsigned long i;
+	int *ptr;
+	int ret;
+	long pagesizes[4];
+	int n, idx;
+
+	/* Skip test if we can't allocate a hugetlbfs page. */
+
+	n = gethugepagesizes(pagesizes, 4);
+	if (n <= 0)
+		return;
+	for (idx = 0; --n > 0; ) {
+		if (pagesizes[n] < pagesizes[idx])
+			idx = n;
+	}
+	size = ALIGN(TWOMEG, pagesizes[idx]);
+	npages = size >> self->page_shift;
+
+	buffer = malloc(sizeof(*buffer));
+	ASSERT_NE(buffer, NULL);
+
+	buffer->ptr = get_hugepage_region(size, GHR_STRICT);
+	if (buffer->ptr == NULL) {
+		free(buffer);
+		return;
+	}
+
+	buffer->fd = -1;
+	buffer->size = size;
+	buffer->mirror = malloc(size);
+	ASSERT_NE(buffer->mirror, NULL);
+
+	/* Initialize data that the device will write to buffer->ptr. */
+	for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+		ptr[i] = i;
+
+	/* Simulate a device writing system memory. */
+	ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_WRITE, buffer, npages);
+	ASSERT_EQ(ret, 0);
+	ASSERT_EQ(buffer->cpages, npages);
+	ASSERT_EQ(buffer->faults, 1);
+
+	/* Check what the device wrote. */
+	for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+		ASSERT_EQ(ptr[i], i);
+
+	free_hugepage_region(buffer->ptr);
+	buffer->ptr = NULL;
+	hmm_buffer_free(buffer);
+}
+
+/*
+ * Read mmap'ed file memory.
+ */
+TEST_F(hmm, file_read)
+{
+	struct hmm_buffer *buffer;
+	unsigned long npages;
+	unsigned long size;
+	unsigned long i;
+	int *ptr;
+	int ret;
+	int fd;
+	ssize_t len;
+
+	npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift;
+	ASSERT_NE(npages, 0);
+	size = npages << self->page_shift;
+
+	fd = hmm_create_file(size);
+	ASSERT_GE(fd, 0);
+
+	buffer = malloc(sizeof(*buffer));
+	ASSERT_NE(buffer, NULL);
+
+	buffer->fd = fd;
+	buffer->size = size;
+	buffer->mirror = malloc(size);
+	ASSERT_NE(buffer->mirror, NULL);
+
+	/* Write initial contents of the file. */
+	for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+		ptr[i] = i;
+	len = pwrite(fd, buffer->mirror, size, 0);
+	ASSERT_EQ(len, size);
+	memset(buffer->mirror, 0, size);
+
+	buffer->ptr = mmap(NULL, size,
+			   PROT_READ,
+			   MAP_SHARED,
+			   buffer->fd, 0);
+	ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+	/* Simulate a device reading system memory. */
+	ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_READ, buffer, npages);
+	ASSERT_EQ(ret, 0);
+	ASSERT_EQ(buffer->cpages, npages);
+	ASSERT_EQ(buffer->faults, 1);
+
+	/* Check what the device read. */
+	for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+		ASSERT_EQ(ptr[i], i);
+
+	hmm_buffer_free(buffer);
+}
+
+/*
+ * Write mmap'ed file memory.
+ */
+TEST_F(hmm, file_write)
+{
+	struct hmm_buffer *buffer;
+	unsigned long npages;
+	unsigned long size;
+	unsigned long i;
+	int *ptr;
+	int ret;
+	int fd;
+	ssize_t len;
+
+	npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift;
+	ASSERT_NE(npages, 0);
+	size = npages << self->page_shift;
+
+	fd = hmm_create_file(size);
+	ASSERT_GE(fd, 0);
+
+	buffer = malloc(sizeof(*buffer));
+	ASSERT_NE(buffer, NULL);
+
+	buffer->fd = fd;
+	buffer->size = size;
+	buffer->mirror = malloc(size);
+	ASSERT_NE(buffer->mirror, NULL);
+
+	buffer->ptr = mmap(NULL, size,
+			   PROT_READ | PROT_WRITE,
+			   MAP_SHARED,
+			   buffer->fd, 0);
+	ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+	/* Initialize data that the device will write to buffer->ptr. */
+	for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+		ptr[i] = i;
+
+	/* Simulate a device writing system memory. */
+	ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_WRITE, buffer, npages);
+	ASSERT_EQ(ret, 0);
+	ASSERT_EQ(buffer->cpages, npages);
+	ASSERT_EQ(buffer->faults, 1);
+
+	/* Check what the device wrote. */
+	for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+		ASSERT_EQ(ptr[i], i);
+
+	/* Check that the device also wrote the file. */
+	len = pread(fd, buffer->mirror, size, 0);
+	ASSERT_EQ(len, size);
+	for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+		ASSERT_EQ(ptr[i], i);
+
+	hmm_buffer_free(buffer);
+}
+
+/*
+ * Migrate anonymous memory to device private memory.
+ */
+TEST_F(hmm, migrate)
+{
+	struct hmm_buffer *buffer;
+	unsigned long npages;
+	unsigned long size;
+	unsigned long i;
+	int *ptr;
+	int ret;
+
+	npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift;
+	ASSERT_NE(npages, 0);
+	size = npages << self->page_shift;
+
+	buffer = malloc(sizeof(*buffer));
+	ASSERT_NE(buffer, NULL);
+
+	buffer->fd = -1;
+	buffer->size = size;
+	buffer->mirror = malloc(size);
+	ASSERT_NE(buffer->mirror, NULL);
+
+	buffer->ptr = mmap(NULL, size,
+			   PROT_READ | PROT_WRITE,
+			   MAP_PRIVATE | MAP_ANONYMOUS,
+			   buffer->fd, 0);
+	ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+	/* Initialize buffer in system memory. */
+	for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+		ptr[i] = i;
+
+	/* Migrate memory to device. */
+	ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_MIGRATE, buffer, npages);
+	ASSERT_EQ(ret, 0);
+	ASSERT_EQ(buffer->cpages, npages);
+
+	/* Check what the device read. */
+	for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+		ASSERT_EQ(ptr[i], i);
+
+	hmm_buffer_free(buffer);
+}
+
+/*
+ * Migrate anonymous memory to device private memory and fault it back to system
+ * memory.
+ */
+TEST_F(hmm, migrate_fault)
+{
+	struct hmm_buffer *buffer;
+	unsigned long npages;
+	unsigned long size;
+	unsigned long i;
+	int *ptr;
+	int ret;
+
+	npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift;
+	ASSERT_NE(npages, 0);
+	size = npages << self->page_shift;
+
+	buffer = malloc(sizeof(*buffer));
+	ASSERT_NE(buffer, NULL);
+
+	buffer->fd = -1;
+	buffer->size = size;
+	buffer->mirror = malloc(size);
+	ASSERT_NE(buffer->mirror, NULL);
+
+	buffer->ptr = mmap(NULL, size,
+			   PROT_READ | PROT_WRITE,
+			   MAP_PRIVATE | MAP_ANONYMOUS,
+			   buffer->fd, 0);
+	ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+	/* Initialize buffer in system memory. */
+	for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+		ptr[i] = i;
+
+	/* Migrate memory to device. */
+	ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_MIGRATE, buffer, npages);
+	ASSERT_EQ(ret, 0);
+	ASSERT_EQ(buffer->cpages, npages);
+
+	/* Check what the device read. */
+	for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+		ASSERT_EQ(ptr[i], i);
+
+	/* Fault pages back to system memory and check them. */
+	for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+		ASSERT_EQ(ptr[i], i);
+
+	hmm_buffer_free(buffer);
+}
+
+/*
+ * Try to migrate various memory types to device private memory.
+ */
+TEST_F(hmm2, migrate_mixed)
+{
+	struct hmm_buffer *buffer;
+	unsigned long npages;
+	unsigned long size;
+	int *ptr;
+	unsigned char *p;
+	int ret;
+	int val;
+
+	npages = 6;
+	size = npages << self->page_shift;
+
+	buffer = malloc(sizeof(*buffer));
+	ASSERT_NE(buffer, NULL);
+
+	buffer->fd = -1;
+	buffer->size = size;
+	buffer->mirror = malloc(size);
+	ASSERT_NE(buffer->mirror, NULL);
+
+	/* Reserve a range of addresses. */
+	buffer->ptr = mmap(NULL, size,
+			   PROT_NONE,
+			   MAP_PRIVATE | MAP_ANONYMOUS,
+			   buffer->fd, 0);
+	ASSERT_NE(buffer->ptr, MAP_FAILED);
+	p = buffer->ptr;
+
+	/* Migrating a protected area should be an error. */
+	ret = hmm_dmirror_cmd(self->fd1, HMM_DMIRROR_MIGRATE, buffer, npages);
+	ASSERT_EQ(ret, -EINVAL);
+
+	/* Punch a hole after the first page address. */
+	ret = munmap(buffer->ptr + self->page_size, self->page_size);
+	ASSERT_EQ(ret, 0);
+
+	/* We expect an error if the vma doesn't cover the range. */
+	ret = hmm_dmirror_cmd(self->fd1, HMM_DMIRROR_MIGRATE, buffer, 3);
+	ASSERT_EQ(ret, -EINVAL);
+
+	/* Page 2 will be a read-only zero page. */
+	ret = mprotect(buffer->ptr + 2 * self->page_size, self->page_size,
+				PROT_READ);
+	ASSERT_EQ(ret, 0);
+	ptr = (int *)(buffer->ptr + 2 * self->page_size);
+	val = *ptr + 3;
+	ASSERT_EQ(val, 3);
+
+	/* Page 3 will be read-only. */
+	ret = mprotect(buffer->ptr + 3 * self->page_size, self->page_size,
+				PROT_READ | PROT_WRITE);
+	ASSERT_EQ(ret, 0);
+	ptr = (int *)(buffer->ptr + 3 * self->page_size);
+	*ptr = val;
+	ret = mprotect(buffer->ptr + 3 * self->page_size, self->page_size,
+				PROT_READ);
+	ASSERT_EQ(ret, 0);
+
+	/* Page 4-5 will be read-write. */
+	ret = mprotect(buffer->ptr + 4 * self->page_size, 2 * self->page_size,
+				PROT_READ | PROT_WRITE);
+	ASSERT_EQ(ret, 0);
+	ptr = (int *)(buffer->ptr + 4 * self->page_size);
+	*ptr = val;
+	ptr = (int *)(buffer->ptr + 5 * self->page_size);
+	*ptr = val;
+
+	/* Now try to migrate pages 2-5 to device 1. */
+	buffer->ptr = p + 2 * self->page_size;
+	ret = hmm_dmirror_cmd(self->fd1, HMM_DMIRROR_MIGRATE, buffer, 4);
+	ASSERT_EQ(ret, 0);
+	ASSERT_EQ(buffer->cpages, 4);
+
+	/* Page 5 won't be migrated to device 0 because it's on device 1. */
+	buffer->ptr = p + 5 * self->page_size;
+	ret = hmm_dmirror_cmd(self->fd0, HMM_DMIRROR_MIGRATE, buffer, 1);
+	ASSERT_EQ(ret, -ENOENT);
+	buffer->ptr = p;
+
+	buffer->ptr = p;
+	hmm_buffer_free(buffer);
+}
+
+/*
+ * Migrate anonymous memory to device private memory and fault it back to system
+ * memory multiple times.
+ */
+TEST_F(hmm, migrate_multiple)
+{
+	struct hmm_buffer *buffer;
+	unsigned long npages;
+	unsigned long size;
+	unsigned long i;
+	unsigned long c;
+	int *ptr;
+	int ret;
+
+	npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift;
+	ASSERT_NE(npages, 0);
+	size = npages << self->page_shift;
+
+	for (c = 0; c < NTIMES; c++) {
+		buffer = malloc(sizeof(*buffer));
+		ASSERT_NE(buffer, NULL);
+
+		buffer->fd = -1;
+		buffer->size = size;
+		buffer->mirror = malloc(size);
+		ASSERT_NE(buffer->mirror, NULL);
+
+		buffer->ptr = mmap(NULL, size,
+				   PROT_READ | PROT_WRITE,
+				   MAP_PRIVATE | MAP_ANONYMOUS,
+				   buffer->fd, 0);
+		ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+		/* Initialize buffer in system memory. */
+		for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+			ptr[i] = i;
+
+		/* Migrate memory to device. */
+		ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_MIGRATE, buffer,
+				      npages);
+		ASSERT_EQ(ret, 0);
+		ASSERT_EQ(buffer->cpages, npages);
+
+		/* Check what the device read. */
+		for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+			ASSERT_EQ(ptr[i], i);
+
+		/* Fault pages back to system memory and check them. */
+		for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+			ASSERT_EQ(ptr[i], i);
+
+		hmm_buffer_free(buffer);
+	}
+}
+
+/*
+ * Read anonymous memory multiple times.
+ */
+TEST_F(hmm, anon_read_multiple)
+{
+	struct hmm_buffer *buffer;
+	unsigned long npages;
+	unsigned long size;
+	unsigned long i;
+	unsigned long c;
+	int *ptr;
+	int ret;
+
+	npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift;
+	ASSERT_NE(npages, 0);
+	size = npages << self->page_shift;
+
+	for (c = 0; c < NTIMES; c++) {
+		buffer = malloc(sizeof(*buffer));
+		ASSERT_NE(buffer, NULL);
+
+		buffer->fd = -1;
+		buffer->size = size;
+		buffer->mirror = malloc(size);
+		ASSERT_NE(buffer->mirror, NULL);
+
+		buffer->ptr = mmap(NULL, size,
+				   PROT_READ | PROT_WRITE,
+				   MAP_PRIVATE | MAP_ANONYMOUS,
+				   buffer->fd, 0);
+		ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+		/* Initialize buffer in system memory. */
+		for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+			ptr[i] = i + c;
+
+		/* Simulate a device reading system memory. */
+		ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_READ, buffer,
+				      npages);
+		ASSERT_EQ(ret, 0);
+		ASSERT_EQ(buffer->cpages, npages);
+		ASSERT_EQ(buffer->faults, 1);
+
+		/* Check what the device read. */
+		for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+			ASSERT_EQ(ptr[i], i + c);
+
+		hmm_buffer_free(buffer);
+	}
+}
+
+void *unmap_buffer(void *p)
+{
+	struct hmm_buffer *buffer = p;
+
+	/* Delay for a bit and then unmap buffer while it is being read. */
+	hmm_nanosleep(hmm_random() % 32000);
+	munmap(buffer->ptr + buffer->size / 2, buffer->size / 2);
+	buffer->ptr = NULL;
+
+	return NULL;
+}
+
+/*
+ * Try reading anonymous memory while it is being unmapped.
+ */
+TEST_F(hmm, anon_teardown)
+{
+	unsigned long npages;
+	unsigned long size;
+	unsigned long c;
+	void *ret;
+
+	npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift;
+	ASSERT_NE(npages, 0);
+	size = npages << self->page_shift;
+
+	for (c = 0; c < NTIMES; ++c) {
+		pthread_t thread;
+		struct hmm_buffer *buffer;
+		unsigned long i;
+		int *ptr;
+		int rc;
+
+		buffer = malloc(sizeof(*buffer));
+		ASSERT_NE(buffer, NULL);
+
+		buffer->fd = -1;
+		buffer->size = size;
+		buffer->mirror = malloc(size);
+		ASSERT_NE(buffer->mirror, NULL);
+
+		buffer->ptr = mmap(NULL, size,
+				   PROT_READ | PROT_WRITE,
+				   MAP_PRIVATE | MAP_ANONYMOUS,
+				   buffer->fd, 0);
+		ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+		/* Initialize buffer in system memory. */
+		for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+			ptr[i] = i + c;
+
+		rc = pthread_create(&thread, NULL, unmap_buffer, buffer);
+		ASSERT_EQ(rc, 0);
+
+		/* Simulate a device reading system memory. */
+		rc = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_READ, buffer,
+				     npages);
+		if (rc == 0) {
+			ASSERT_EQ(buffer->cpages, npages);
+			ASSERT_EQ(buffer->faults, 1);
+
+			/* Check what the device read. */
+			for (i = 0, ptr = buffer->mirror;
+			     i < size / sizeof(*ptr);
+			     ++i)
+				ASSERT_EQ(ptr[i], i + c);
+		}
+
+		pthread_join(thread, &ret);
+		hmm_buffer_free(buffer);
+	}
+}
+
+/*
+ * Test memory snapshot without faulting in pages accessed by the device.
+ */
+TEST_F(hmm2, snapshot)
+{
+	struct hmm_buffer *buffer;
+	unsigned long npages;
+	unsigned long size;
+	int *ptr;
+	unsigned char *p;
+	unsigned char *m;
+	int ret;
+	int val;
+
+	npages = 7;
+	size = npages << self->page_shift;
+
+	buffer = malloc(sizeof(*buffer));
+	ASSERT_NE(buffer, NULL);
+
+	buffer->fd = -1;
+	buffer->size = size;
+	buffer->mirror = malloc(npages);
+	ASSERT_NE(buffer->mirror, NULL);
+
+	/* Reserve a range of addresses. */
+	buffer->ptr = mmap(NULL, size,
+			   PROT_NONE,
+			   MAP_PRIVATE | MAP_ANONYMOUS,
+			   buffer->fd, 0);
+	ASSERT_NE(buffer->ptr, MAP_FAILED);
+	p = buffer->ptr;
+
+	/* Punch a hole after the first page address. */
+	ret = munmap(buffer->ptr + self->page_size, self->page_size);
+	ASSERT_EQ(ret, 0);
+
+	/* Page 2 will be read-only zero page. */
+	ret = mprotect(buffer->ptr + 2 * self->page_size, self->page_size,
+				PROT_READ);
+	ASSERT_EQ(ret, 0);
+	ptr = (int *)(buffer->ptr + 2 * self->page_size);
+	val = *ptr + 3;
+	ASSERT_EQ(val, 3);
+
+	/* Page 3 will be read-only. */
+	ret = mprotect(buffer->ptr + 3 * self->page_size, self->page_size,
+				PROT_READ | PROT_WRITE);
+	ASSERT_EQ(ret, 0);
+	ptr = (int *)(buffer->ptr + 3 * self->page_size);
+	*ptr = val;
+	ret = mprotect(buffer->ptr + 3 * self->page_size, self->page_size,
+				PROT_READ);
+	ASSERT_EQ(ret, 0);
+
+	/* Page 4-6 will be read-write. */
+	ret = mprotect(buffer->ptr + 4 * self->page_size, 3 * self->page_size,
+				PROT_READ | PROT_WRITE);
+	ASSERT_EQ(ret, 0);
+	ptr = (int *)(buffer->ptr + 4 * self->page_size);
+	*ptr = val;
+
+	/* Page 5 will be migrated to device 0. */
+	buffer->ptr = p + 5 * self->page_size;
+	ret = hmm_dmirror_cmd(self->fd0, HMM_DMIRROR_MIGRATE, buffer, 1);
+	ASSERT_EQ(ret, 0);
+	ASSERT_EQ(buffer->cpages, 1);
+
+	/* Page 6 will be migrated to device 1. */
+	buffer->ptr = p + 6 * self->page_size;
+	ret = hmm_dmirror_cmd(self->fd1, HMM_DMIRROR_MIGRATE, buffer, 1);
+	ASSERT_EQ(ret, 0);
+	ASSERT_EQ(buffer->cpages, 1);
+
+	/* Simulate a device snapshotting CPU pagetables. */
+	buffer->ptr = p;
+	ret = hmm_dmirror_cmd(self->fd0, HMM_DMIRROR_SNAPSHOT, buffer, npages);
+	ASSERT_EQ(ret, 0);
+	ASSERT_EQ(buffer->cpages, npages);
+
+	/* Check what the device saw. */
+	m = buffer->mirror;
+	ASSERT_EQ(m[0], HMM_DMIRROR_PROT_ERROR);
+	ASSERT_EQ(m[1], HMM_DMIRROR_PROT_ERROR);
+	ASSERT_EQ(m[2], HMM_DMIRROR_PROT_ZERO | HMM_DMIRROR_PROT_READ);
+	ASSERT_EQ(m[3], HMM_DMIRROR_PROT_READ);
+	ASSERT_EQ(m[4], HMM_DMIRROR_PROT_WRITE);
+	ASSERT_EQ(m[5], HMM_DMIRROR_PROT_DEV_PRIVATE_LOCAL |
+			HMM_DMIRROR_PROT_WRITE);
+	ASSERT_EQ(m[6], HMM_DMIRROR_PROT_NONE);
+
+	hmm_buffer_free(buffer);
+}
+
+/*
+ * Test two devices reading the same memory (double mapped).
+ */
+TEST_F(hmm2, double_map)
+{
+	struct hmm_buffer *buffer;
+	unsigned long npages;
+	unsigned long size;
+	unsigned long i;
+	int *ptr;
+	int ret;
+
+	npages = 6;
+	size = npages << self->page_shift;
+
+	buffer = malloc(sizeof(*buffer));
+	ASSERT_NE(buffer, NULL);
+
+	buffer->fd = -1;
+	buffer->size = size;
+	buffer->mirror = malloc(npages);
+	ASSERT_NE(buffer->mirror, NULL);
+
+	/* Reserve a range of addresses. */
+	buffer->ptr = mmap(NULL, size,
+			   PROT_READ | PROT_WRITE,
+			   MAP_PRIVATE | MAP_ANONYMOUS,
+			   buffer->fd, 0);
+	ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+	/* Initialize buffer in system memory. */
+	for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+		ptr[i] = i;
+
+	/* Make region read-only. */
+	ret = mprotect(buffer->ptr, size, PROT_READ);
+	ASSERT_EQ(ret, 0);
+
+	/* Simulate device 0 reading system memory. */
+	ret = hmm_dmirror_cmd(self->fd0, HMM_DMIRROR_READ, buffer, npages);
+	ASSERT_EQ(ret, 0);
+	ASSERT_EQ(buffer->cpages, npages);
+	ASSERT_EQ(buffer->faults, 1);
+
+	/* Check what the device read. */
+	for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+		ASSERT_EQ(ptr[i], i);
+
+	/* Simulate device 1 reading system memory. */
+	ret = hmm_dmirror_cmd(self->fd1, HMM_DMIRROR_READ, buffer, npages);
+	ASSERT_EQ(ret, 0);
+	ASSERT_EQ(buffer->cpages, npages);
+	ASSERT_EQ(buffer->faults, 1);
+
+	/* Check what the device read. */
+	for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+		ASSERT_EQ(ptr[i], i);
+
+	/* Punch a hole after the first page address. */
+	ret = munmap(buffer->ptr + self->page_size, self->page_size);
+	ASSERT_EQ(ret, 0);
+
+	hmm_buffer_free(buffer);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/vm/khugepaged.c b/tools/testing/selftests/vm/khugepaged.c
new file mode 100644
index 000000000000..51b89cedd09d
--- /dev/null
+++ b/tools/testing/selftests/vm/khugepaged.c
@@ -0,0 +1,1035 @@
+#define _GNU_SOURCE
+#include <fcntl.h>
+#include <limits.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <sys/mman.h>
+#include <sys/wait.h>
+
+#ifndef MADV_PAGEOUT
+#define MADV_PAGEOUT 21
+#endif
+
+#define BASE_ADDR ((void *)(1UL << 30))
+static unsigned long hpage_pmd_size;
+static unsigned long page_size;
+static int hpage_pmd_nr;
+
+#define THP_SYSFS "/sys/kernel/mm/transparent_hugepage/"
+#define PID_SMAPS "/proc/self/smaps"
+
+enum thp_enabled {
+	THP_ALWAYS,
+	THP_MADVISE,
+	THP_NEVER,
+};
+
+static const char *thp_enabled_strings[] = {
+	"always",
+	"madvise",
+	"never",
+	NULL
+};
+
+enum thp_defrag {
+	THP_DEFRAG_ALWAYS,
+	THP_DEFRAG_DEFER,
+	THP_DEFRAG_DEFER_MADVISE,
+	THP_DEFRAG_MADVISE,
+	THP_DEFRAG_NEVER,
+};
+
+static const char *thp_defrag_strings[] = {
+	"always",
+	"defer",
+	"defer+madvise",
+	"madvise",
+	"never",
+	NULL
+};
+
+enum shmem_enabled {
+	SHMEM_ALWAYS,
+	SHMEM_WITHIN_SIZE,
+	SHMEM_ADVISE,
+	SHMEM_NEVER,
+	SHMEM_DENY,
+	SHMEM_FORCE,
+};
+
+static const char *shmem_enabled_strings[] = {
+	"always",
+	"within_size",
+	"advise",
+	"never",
+	"deny",
+	"force",
+	NULL
+};
+
+struct khugepaged_settings {
+	bool defrag;
+	unsigned int alloc_sleep_millisecs;
+	unsigned int scan_sleep_millisecs;
+	unsigned int max_ptes_none;
+	unsigned int max_ptes_swap;
+	unsigned int max_ptes_shared;
+	unsigned long pages_to_scan;
+};
+
+struct settings {
+	enum thp_enabled thp_enabled;
+	enum thp_defrag thp_defrag;
+	enum shmem_enabled shmem_enabled;
+	bool debug_cow;
+	bool use_zero_page;
+	struct khugepaged_settings khugepaged;
+};
+
+static struct settings default_settings = {
+	.thp_enabled = THP_MADVISE,
+	.thp_defrag = THP_DEFRAG_ALWAYS,
+	.shmem_enabled = SHMEM_NEVER,
+	.debug_cow = 0,
+	.use_zero_page = 0,
+	.khugepaged = {
+		.defrag = 1,
+		.alloc_sleep_millisecs = 10,
+		.scan_sleep_millisecs = 10,
+	},
+};
+
+static struct settings saved_settings;
+static bool skip_settings_restore;
+
+static int exit_status;
+
+static void success(const char *msg)
+{
+	printf(" \e[32m%s\e[0m\n", msg);
+}
+
+static void fail(const char *msg)
+{
+	printf(" \e[31m%s\e[0m\n", msg);
+	exit_status++;
+}
+
+static int read_file(const char *path, char *buf, size_t buflen)
+{
+	int fd;
+	ssize_t numread;
+
+	fd = open(path, O_RDONLY);
+	if (fd == -1)
+		return 0;
+
+	numread = read(fd, buf, buflen - 1);
+	if (numread < 1) {
+		close(fd);
+		return 0;
+	}
+
+	buf[numread] = '\0';
+	close(fd);
+
+	return (unsigned int) numread;
+}
+
+static int write_file(const char *path, const char *buf, size_t buflen)
+{
+	int fd;
+	ssize_t numwritten;
+
+	fd = open(path, O_WRONLY);
+	if (fd == -1)
+		return 0;
+
+	numwritten = write(fd, buf, buflen - 1);
+	close(fd);
+	if (numwritten < 1)
+		return 0;
+
+	return (unsigned int) numwritten;
+}
+
+static int read_string(const char *name, const char *strings[])
+{
+	char path[PATH_MAX];
+	char buf[256];
+	char *c;
+	int ret;
+
+	ret = snprintf(path, PATH_MAX, THP_SYSFS "%s", name);
+	if (ret >= PATH_MAX) {
+		printf("%s: Pathname is too long\n", __func__);
+		exit(EXIT_FAILURE);
+	}
+
+	if (!read_file(path, buf, sizeof(buf))) {
+		perror(path);
+		exit(EXIT_FAILURE);
+	}
+
+	c = strchr(buf, '[');
+	if (!c) {
+		printf("%s: Parse failure\n", __func__);
+		exit(EXIT_FAILURE);
+	}
+
+	c++;
+	memmove(buf, c, sizeof(buf) - (c - buf));
+
+	c = strchr(buf, ']');
+	if (!c) {
+		printf("%s: Parse failure\n", __func__);
+		exit(EXIT_FAILURE);
+	}
+	*c = '\0';
+
+	ret = 0;
+	while (strings[ret]) {
+		if (!strcmp(strings[ret], buf))
+			return ret;
+		ret++;
+	}
+
+	printf("Failed to parse %s\n", name);
+	exit(EXIT_FAILURE);
+}
+
+static void write_string(const char *name, const char *val)
+{
+	char path[PATH_MAX];
+	int ret;
+
+	ret = snprintf(path, PATH_MAX, THP_SYSFS "%s", name);
+	if (ret >= PATH_MAX) {
+		printf("%s: Pathname is too long\n", __func__);
+		exit(EXIT_FAILURE);
+	}
+
+	if (!write_file(path, val, strlen(val) + 1)) {
+		perror(path);
+		exit(EXIT_FAILURE);
+	}
+}
+
+static const unsigned long read_num(const char *name)
+{
+	char path[PATH_MAX];
+	char buf[21];
+	int ret;
+
+	ret = snprintf(path, PATH_MAX, THP_SYSFS "%s", name);
+	if (ret >= PATH_MAX) {
+		printf("%s: Pathname is too long\n", __func__);
+		exit(EXIT_FAILURE);
+	}
+
+	ret = read_file(path, buf, sizeof(buf));
+	if (ret < 0) {
+		perror("read_file(read_num)");
+		exit(EXIT_FAILURE);
+	}
+
+	return strtoul(buf, NULL, 10);
+}
+
+static void write_num(const char *name, unsigned long num)
+{
+	char path[PATH_MAX];
+	char buf[21];
+	int ret;
+
+	ret = snprintf(path, PATH_MAX, THP_SYSFS "%s", name);
+	if (ret >= PATH_MAX) {
+		printf("%s: Pathname is too long\n", __func__);
+		exit(EXIT_FAILURE);
+	}
+
+	sprintf(buf, "%ld", num);
+	if (!write_file(path, buf, strlen(buf) + 1)) {
+		perror(path);
+		exit(EXIT_FAILURE);
+	}
+}
+
+static void write_settings(struct settings *settings)
+{
+	struct khugepaged_settings *khugepaged = &settings->khugepaged;
+
+	write_string("enabled", thp_enabled_strings[settings->thp_enabled]);
+	write_string("defrag", thp_defrag_strings[settings->thp_defrag]);
+	write_string("shmem_enabled",
+			shmem_enabled_strings[settings->shmem_enabled]);
+	write_num("debug_cow", settings->debug_cow);
+	write_num("use_zero_page", settings->use_zero_page);
+
+	write_num("khugepaged/defrag", khugepaged->defrag);
+	write_num("khugepaged/alloc_sleep_millisecs",
+			khugepaged->alloc_sleep_millisecs);
+	write_num("khugepaged/scan_sleep_millisecs",
+			khugepaged->scan_sleep_millisecs);
+	write_num("khugepaged/max_ptes_none", khugepaged->max_ptes_none);
+	write_num("khugepaged/max_ptes_swap", khugepaged->max_ptes_swap);
+	write_num("khugepaged/max_ptes_shared", khugepaged->max_ptes_shared);
+	write_num("khugepaged/pages_to_scan", khugepaged->pages_to_scan);
+}
+
+static void restore_settings(int sig)
+{
+	if (skip_settings_restore)
+		goto out;
+
+	printf("Restore THP and khugepaged settings...");
+	write_settings(&saved_settings);
+	success("OK");
+	if (sig)
+		exit(EXIT_FAILURE);
+out:
+	exit(exit_status);
+}
+
+static void save_settings(void)
+{
+	printf("Save THP and khugepaged settings...");
+	saved_settings = (struct settings) {
+		.thp_enabled = read_string("enabled", thp_enabled_strings),
+		.thp_defrag = read_string("defrag", thp_defrag_strings),
+		.shmem_enabled =
+			read_string("shmem_enabled", shmem_enabled_strings),
+		.debug_cow = read_num("debug_cow"),
+		.use_zero_page = read_num("use_zero_page"),
+	};
+	saved_settings.khugepaged = (struct khugepaged_settings) {
+		.defrag = read_num("khugepaged/defrag"),
+		.alloc_sleep_millisecs =
+			read_num("khugepaged/alloc_sleep_millisecs"),
+		.scan_sleep_millisecs =
+			read_num("khugepaged/scan_sleep_millisecs"),
+		.max_ptes_none = read_num("khugepaged/max_ptes_none"),
+		.max_ptes_swap = read_num("khugepaged/max_ptes_swap"),
+		.max_ptes_shared = read_num("khugepaged/max_ptes_shared"),
+		.pages_to_scan = read_num("khugepaged/pages_to_scan"),
+	};
+	success("OK");
+
+	signal(SIGTERM, restore_settings);
+	signal(SIGINT, restore_settings);
+	signal(SIGHUP, restore_settings);
+	signal(SIGQUIT, restore_settings);
+}
+
+static void adjust_settings(void)
+{
+
+	printf("Adjust settings...");
+	write_settings(&default_settings);
+	success("OK");
+}
+
+#define MAX_LINE_LENGTH 500
+
+static bool check_for_pattern(FILE *fp, char *pattern, char *buf)
+{
+	while (fgets(buf, MAX_LINE_LENGTH, fp) != NULL) {
+		if (!strncmp(buf, pattern, strlen(pattern)))
+			return true;
+	}
+	return false;
+}
+
+static bool check_huge(void *addr)
+{
+	bool thp = false;
+	int ret;
+	FILE *fp;
+	char buffer[MAX_LINE_LENGTH];
+	char addr_pattern[MAX_LINE_LENGTH];
+
+	ret = snprintf(addr_pattern, MAX_LINE_LENGTH, "%08lx-",
+		       (unsigned long) addr);
+	if (ret >= MAX_LINE_LENGTH) {
+		printf("%s: Pattern is too long\n", __func__);
+		exit(EXIT_FAILURE);
+	}
+
+
+	fp = fopen(PID_SMAPS, "r");
+	if (!fp) {
+		printf("%s: Failed to open file %s\n", __func__, PID_SMAPS);
+		exit(EXIT_FAILURE);
+	}
+	if (!check_for_pattern(fp, addr_pattern, buffer))
+		goto err_out;
+
+	ret = snprintf(addr_pattern, MAX_LINE_LENGTH, "AnonHugePages:%10ld kB",
+		       hpage_pmd_size >> 10);
+	if (ret >= MAX_LINE_LENGTH) {
+		printf("%s: Pattern is too long\n", __func__);
+		exit(EXIT_FAILURE);
+	}
+	/*
+	 * Fetch the AnonHugePages: in the same block and check whether it got
+	 * the expected number of hugeepages next.
+	 */
+	if (!check_for_pattern(fp, "AnonHugePages:", buffer))
+		goto err_out;
+
+	if (strncmp(buffer, addr_pattern, strlen(addr_pattern)))
+		goto err_out;
+
+	thp = true;
+err_out:
+	fclose(fp);
+	return thp;
+}
+
+
+static bool check_swap(void *addr, unsigned long size)
+{
+	bool swap = false;
+	int ret;
+	FILE *fp;
+	char buffer[MAX_LINE_LENGTH];
+	char addr_pattern[MAX_LINE_LENGTH];
+
+	ret = snprintf(addr_pattern, MAX_LINE_LENGTH, "%08lx-",
+		       (unsigned long) addr);
+	if (ret >= MAX_LINE_LENGTH) {
+		printf("%s: Pattern is too long\n", __func__);
+		exit(EXIT_FAILURE);
+	}
+
+
+	fp = fopen(PID_SMAPS, "r");
+	if (!fp) {
+		printf("%s: Failed to open file %s\n", __func__, PID_SMAPS);
+		exit(EXIT_FAILURE);
+	}
+	if (!check_for_pattern(fp, addr_pattern, buffer))
+		goto err_out;
+
+	ret = snprintf(addr_pattern, MAX_LINE_LENGTH, "Swap:%19ld kB",
+		       size >> 10);
+	if (ret >= MAX_LINE_LENGTH) {
+		printf("%s: Pattern is too long\n", __func__);
+		exit(EXIT_FAILURE);
+	}
+	/*
+	 * Fetch the Swap: in the same block and check whether it got
+	 * the expected number of hugeepages next.
+	 */
+	if (!check_for_pattern(fp, "Swap:", buffer))
+		goto err_out;
+
+	if (strncmp(buffer, addr_pattern, strlen(addr_pattern)))
+		goto err_out;
+
+	swap = true;
+err_out:
+	fclose(fp);
+	return swap;
+}
+
+static void *alloc_mapping(void)
+{
+	void *p;
+
+	p = mmap(BASE_ADDR, hpage_pmd_size, PROT_READ | PROT_WRITE,
+			MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+	if (p != BASE_ADDR) {
+		printf("Failed to allocate VMA at %p\n", BASE_ADDR);
+		exit(EXIT_FAILURE);
+	}
+
+	return p;
+}
+
+static void fill_memory(int *p, unsigned long start, unsigned long end)
+{
+	int i;
+
+	for (i = start / page_size; i < end / page_size; i++)
+		p[i * page_size / sizeof(*p)] = i + 0xdead0000;
+}
+
+static void validate_memory(int *p, unsigned long start, unsigned long end)
+{
+	int i;
+
+	for (i = start / page_size; i < end / page_size; i++) {
+		if (p[i * page_size / sizeof(*p)] != i + 0xdead0000) {
+			printf("Page %d is corrupted: %#x\n",
+					i, p[i * page_size / sizeof(*p)]);
+			exit(EXIT_FAILURE);
+		}
+	}
+}
+
+#define TICK 500000
+static bool wait_for_scan(const char *msg, char *p)
+{
+	int full_scans;
+	int timeout = 6; /* 3 seconds */
+
+	/* Sanity check */
+	if (check_huge(p)) {
+		printf("Unexpected huge page\n");
+		exit(EXIT_FAILURE);
+	}
+
+	madvise(p, hpage_pmd_size, MADV_HUGEPAGE);
+
+	/* Wait until the second full_scan completed */
+	full_scans = read_num("khugepaged/full_scans") + 2;
+
+	printf("%s...", msg);
+	while (timeout--) {
+		if (check_huge(p))
+			break;
+		if (read_num("khugepaged/full_scans") >= full_scans)
+			break;
+		printf(".");
+		usleep(TICK);
+	}
+
+	madvise(p, hpage_pmd_size, MADV_NOHUGEPAGE);
+
+	return !timeout;
+}
+
+static void alloc_at_fault(void)
+{
+	struct settings settings = default_settings;
+	char *p;
+
+	settings.thp_enabled = THP_ALWAYS;
+	write_settings(&settings);
+
+	p = alloc_mapping();
+	*p = 1;
+	printf("Allocate huge page on fault...");
+	if (check_huge(p))
+		success("OK");
+	else
+		fail("Fail");
+
+	write_settings(&default_settings);
+
+	madvise(p, page_size, MADV_DONTNEED);
+	printf("Split huge PMD on MADV_DONTNEED...");
+	if (!check_huge(p))
+		success("OK");
+	else
+		fail("Fail");
+	munmap(p, hpage_pmd_size);
+}
+
+static void collapse_full(void)
+{
+	void *p;
+
+	p = alloc_mapping();
+	fill_memory(p, 0, hpage_pmd_size);
+	if (wait_for_scan("Collapse fully populated PTE table", p))
+		fail("Timeout");
+	else if (check_huge(p))
+		success("OK");
+	else
+		fail("Fail");
+	validate_memory(p, 0, hpage_pmd_size);
+	munmap(p, hpage_pmd_size);
+}
+
+static void collapse_empty(void)
+{
+	void *p;
+
+	p = alloc_mapping();
+	if (wait_for_scan("Do not collapse empty PTE table", p))
+		fail("Timeout");
+	else if (check_huge(p))
+		fail("Fail");
+	else
+		success("OK");
+	munmap(p, hpage_pmd_size);
+}
+
+static void collapse_single_pte_entry(void)
+{
+	void *p;
+
+	p = alloc_mapping();
+	fill_memory(p, 0, page_size);
+	if (wait_for_scan("Collapse PTE table with single PTE entry present", p))
+		fail("Timeout");
+	else if (check_huge(p))
+		success("OK");
+	else
+		fail("Fail");
+	validate_memory(p, 0, page_size);
+	munmap(p, hpage_pmd_size);
+}
+
+static void collapse_max_ptes_none(void)
+{
+	int max_ptes_none = hpage_pmd_nr / 2;
+	struct settings settings = default_settings;
+	void *p;
+
+	settings.khugepaged.max_ptes_none = max_ptes_none;
+	write_settings(&settings);
+
+	p = alloc_mapping();
+
+	fill_memory(p, 0, (hpage_pmd_nr - max_ptes_none - 1) * page_size);
+	if (wait_for_scan("Do not collapse with max_ptes_none exceeded", p))
+		fail("Timeout");
+	else if (check_huge(p))
+		fail("Fail");
+	else
+		success("OK");
+	validate_memory(p, 0, (hpage_pmd_nr - max_ptes_none - 1) * page_size);
+
+	fill_memory(p, 0, (hpage_pmd_nr - max_ptes_none) * page_size);
+	if (wait_for_scan("Collapse with max_ptes_none PTEs empty", p))
+		fail("Timeout");
+	else if (check_huge(p))
+		success("OK");
+	else
+		fail("Fail");
+	validate_memory(p, 0, (hpage_pmd_nr - max_ptes_none) * page_size);
+
+	munmap(p, hpage_pmd_size);
+	write_settings(&default_settings);
+}
+
+static void collapse_swapin_single_pte(void)
+{
+	void *p;
+	p = alloc_mapping();
+	fill_memory(p, 0, hpage_pmd_size);
+
+	printf("Swapout one page...");
+	if (madvise(p, page_size, MADV_PAGEOUT)) {
+		perror("madvise(MADV_PAGEOUT)");
+		exit(EXIT_FAILURE);
+	}
+	if (check_swap(p, page_size)) {
+		success("OK");
+	} else {
+		fail("Fail");
+		goto out;
+	}
+
+	if (wait_for_scan("Collapse with swapping in single PTE entry", p))
+		fail("Timeout");
+	else if (check_huge(p))
+		success("OK");
+	else
+		fail("Fail");
+	validate_memory(p, 0, hpage_pmd_size);
+out:
+	munmap(p, hpage_pmd_size);
+}
+
+static void collapse_max_ptes_swap(void)
+{
+	int max_ptes_swap = read_num("khugepaged/max_ptes_swap");
+	void *p;
+
+	p = alloc_mapping();
+
+	fill_memory(p, 0, hpage_pmd_size);
+	printf("Swapout %d of %d pages...", max_ptes_swap + 1, hpage_pmd_nr);
+	if (madvise(p, (max_ptes_swap + 1) * page_size, MADV_PAGEOUT)) {
+		perror("madvise(MADV_PAGEOUT)");
+		exit(EXIT_FAILURE);
+	}
+	if (check_swap(p, (max_ptes_swap + 1) * page_size)) {
+		success("OK");
+	} else {
+		fail("Fail");
+		goto out;
+	}
+
+	if (wait_for_scan("Do not collapse with max_ptes_swap exceeded", p))
+		fail("Timeout");
+	else if (check_huge(p))
+		fail("Fail");
+	else
+		success("OK");
+	validate_memory(p, 0, hpage_pmd_size);
+
+	fill_memory(p, 0, hpage_pmd_size);
+	printf("Swapout %d of %d pages...", max_ptes_swap, hpage_pmd_nr);
+	if (madvise(p, max_ptes_swap * page_size, MADV_PAGEOUT)) {
+		perror("madvise(MADV_PAGEOUT)");
+		exit(EXIT_FAILURE);
+	}
+	if (check_swap(p, max_ptes_swap * page_size)) {
+		success("OK");
+	} else {
+		fail("Fail");
+		goto out;
+	}
+
+	if (wait_for_scan("Collapse with max_ptes_swap pages swapped out", p))
+		fail("Timeout");
+	else if (check_huge(p))
+		success("OK");
+	else
+		fail("Fail");
+	validate_memory(p, 0, hpage_pmd_size);
+out:
+	munmap(p, hpage_pmd_size);
+}
+
+static void collapse_single_pte_entry_compound(void)
+{
+	void *p;
+
+	p = alloc_mapping();
+
+	printf("Allocate huge page...");
+	madvise(p, hpage_pmd_size, MADV_HUGEPAGE);
+	fill_memory(p, 0, hpage_pmd_size);
+	if (check_huge(p))
+		success("OK");
+	else
+		fail("Fail");
+	madvise(p, hpage_pmd_size, MADV_NOHUGEPAGE);
+
+	printf("Split huge page leaving single PTE mapping compound page...");
+	madvise(p + page_size, hpage_pmd_size - page_size, MADV_DONTNEED);
+	if (!check_huge(p))
+		success("OK");
+	else
+		fail("Fail");
+
+	if (wait_for_scan("Collapse PTE table with single PTE mapping compound page", p))
+		fail("Timeout");
+	else if (check_huge(p))
+		success("OK");
+	else
+		fail("Fail");
+	validate_memory(p, 0, page_size);
+	munmap(p, hpage_pmd_size);
+}
+
+static void collapse_full_of_compound(void)
+{
+	void *p;
+
+	p = alloc_mapping();
+
+	printf("Allocate huge page...");
+	madvise(p, hpage_pmd_size, MADV_HUGEPAGE);
+	fill_memory(p, 0, hpage_pmd_size);
+	if (check_huge(p))
+		success("OK");
+	else
+		fail("Fail");
+
+	printf("Split huge page leaving single PTE page table full of compound pages...");
+	madvise(p, page_size, MADV_NOHUGEPAGE);
+	madvise(p, hpage_pmd_size, MADV_NOHUGEPAGE);
+	if (!check_huge(p))
+		success("OK");
+	else
+		fail("Fail");
+
+	if (wait_for_scan("Collapse PTE table full of compound pages", p))
+		fail("Timeout");
+	else if (check_huge(p))
+		success("OK");
+	else
+		fail("Fail");
+	validate_memory(p, 0, hpage_pmd_size);
+	munmap(p, hpage_pmd_size);
+}
+
+static void collapse_compound_extreme(void)
+{
+	void *p;
+	int i;
+
+	p = alloc_mapping();
+	for (i = 0; i < hpage_pmd_nr; i++) {
+		printf("\rConstruct PTE page table full of different PTE-mapped compound pages %3d/%d...",
+				i + 1, hpage_pmd_nr);
+
+		madvise(BASE_ADDR, hpage_pmd_size, MADV_HUGEPAGE);
+		fill_memory(BASE_ADDR, 0, hpage_pmd_size);
+		if (!check_huge(BASE_ADDR)) {
+			printf("Failed to allocate huge page\n");
+			exit(EXIT_FAILURE);
+		}
+		madvise(BASE_ADDR, hpage_pmd_size, MADV_NOHUGEPAGE);
+
+		p = mremap(BASE_ADDR - i * page_size,
+				i * page_size + hpage_pmd_size,
+				(i + 1) * page_size,
+				MREMAP_MAYMOVE | MREMAP_FIXED,
+				BASE_ADDR + 2 * hpage_pmd_size);
+		if (p == MAP_FAILED) {
+			perror("mremap+unmap");
+			exit(EXIT_FAILURE);
+		}
+
+		p = mremap(BASE_ADDR + 2 * hpage_pmd_size,
+				(i + 1) * page_size,
+				(i + 1) * page_size + hpage_pmd_size,
+				MREMAP_MAYMOVE | MREMAP_FIXED,
+				BASE_ADDR - (i + 1) * page_size);
+		if (p == MAP_FAILED) {
+			perror("mremap+alloc");
+			exit(EXIT_FAILURE);
+		}
+	}
+
+	munmap(BASE_ADDR, hpage_pmd_size);
+	fill_memory(p, 0, hpage_pmd_size);
+	if (!check_huge(p))
+		success("OK");
+	else
+		fail("Fail");
+
+	if (wait_for_scan("Collapse PTE table full of different compound pages", p))
+		fail("Timeout");
+	else if (check_huge(p))
+		success("OK");
+	else
+		fail("Fail");
+
+	validate_memory(p, 0, hpage_pmd_size);
+	munmap(p, hpage_pmd_size);
+}
+
+static void collapse_fork(void)
+{
+	int wstatus;
+	void *p;
+
+	p = alloc_mapping();
+
+	printf("Allocate small page...");
+	fill_memory(p, 0, page_size);
+	if (!check_huge(p))
+		success("OK");
+	else
+		fail("Fail");
+
+	printf("Share small page over fork()...");
+	if (!fork()) {
+		/* Do not touch settings on child exit */
+		skip_settings_restore = true;
+		exit_status = 0;
+
+		if (!check_huge(p))
+			success("OK");
+		else
+			fail("Fail");
+
+		fill_memory(p, page_size, 2 * page_size);
+
+		if (wait_for_scan("Collapse PTE table with single page shared with parent process", p))
+			fail("Timeout");
+		else if (check_huge(p))
+			success("OK");
+		else
+			fail("Fail");
+
+		validate_memory(p, 0, page_size);
+		munmap(p, hpage_pmd_size);
+		exit(exit_status);
+	}
+
+	wait(&wstatus);
+	exit_status += WEXITSTATUS(wstatus);
+
+	printf("Check if parent still has small page...");
+	if (!check_huge(p))
+		success("OK");
+	else
+		fail("Fail");
+	validate_memory(p, 0, page_size);
+	munmap(p, hpage_pmd_size);
+}
+
+static void collapse_fork_compound(void)
+{
+	int wstatus;
+	void *p;
+
+	p = alloc_mapping();
+
+	printf("Allocate huge page...");
+	madvise(p, hpage_pmd_size, MADV_HUGEPAGE);
+	fill_memory(p, 0, hpage_pmd_size);
+	if (check_huge(p))
+		success("OK");
+	else
+		fail("Fail");
+
+	printf("Share huge page over fork()...");
+	if (!fork()) {
+		/* Do not touch settings on child exit */
+		skip_settings_restore = true;
+		exit_status = 0;
+
+		if (check_huge(p))
+			success("OK");
+		else
+			fail("Fail");
+
+		printf("Split huge page PMD in child process...");
+		madvise(p, page_size, MADV_NOHUGEPAGE);
+		madvise(p, hpage_pmd_size, MADV_NOHUGEPAGE);
+		if (!check_huge(p))
+			success("OK");
+		else
+			fail("Fail");
+		fill_memory(p, 0, page_size);
+
+		write_num("khugepaged/max_ptes_shared", hpage_pmd_nr - 1);
+		if (wait_for_scan("Collapse PTE table full of compound pages in child", p))
+			fail("Timeout");
+		else if (check_huge(p))
+			success("OK");
+		else
+			fail("Fail");
+		write_num("khugepaged/max_ptes_shared",
+				default_settings.khugepaged.max_ptes_shared);
+
+		validate_memory(p, 0, hpage_pmd_size);
+		munmap(p, hpage_pmd_size);
+		exit(exit_status);
+	}
+
+	wait(&wstatus);
+	exit_status += WEXITSTATUS(wstatus);
+
+	printf("Check if parent still has huge page...");
+	if (check_huge(p))
+		success("OK");
+	else
+		fail("Fail");
+	validate_memory(p, 0, hpage_pmd_size);
+	munmap(p, hpage_pmd_size);
+}
+
+static void collapse_max_ptes_shared()
+{
+	int max_ptes_shared = read_num("khugepaged/max_ptes_shared");
+	int wstatus;
+	void *p;
+
+	p = alloc_mapping();
+
+	printf("Allocate huge page...");
+	madvise(p, hpage_pmd_size, MADV_HUGEPAGE);
+	fill_memory(p, 0, hpage_pmd_size);
+	if (check_huge(p))
+		success("OK");
+	else
+		fail("Fail");
+
+	printf("Share huge page over fork()...");
+	if (!fork()) {
+		/* Do not touch settings on child exit */
+		skip_settings_restore = true;
+		exit_status = 0;
+
+		if (check_huge(p))
+			success("OK");
+		else
+			fail("Fail");
+
+		printf("Trigger CoW on page %d of %d...",
+				hpage_pmd_nr - max_ptes_shared - 1, hpage_pmd_nr);
+		fill_memory(p, 0, (hpage_pmd_nr - max_ptes_shared - 1) * page_size);
+		if (!check_huge(p))
+			success("OK");
+		else
+			fail("Fail");
+
+		if (wait_for_scan("Do not collapse with max_ptes_shared exceeded", p))
+			fail("Timeout");
+		else if (!check_huge(p))
+			success("OK");
+		else
+			fail("Fail");
+
+		printf("Trigger CoW on page %d of %d...",
+				hpage_pmd_nr - max_ptes_shared, hpage_pmd_nr);
+		fill_memory(p, 0, (hpage_pmd_nr - max_ptes_shared) * page_size);
+		if (!check_huge(p))
+			success("OK");
+		else
+			fail("Fail");
+
+
+		if (wait_for_scan("Collapse with max_ptes_shared PTEs shared", p))
+			fail("Timeout");
+		else if (check_huge(p))
+			success("OK");
+		else
+			fail("Fail");
+
+		validate_memory(p, 0, hpage_pmd_size);
+		munmap(p, hpage_pmd_size);
+		exit(exit_status);
+	}
+
+	wait(&wstatus);
+	exit_status += WEXITSTATUS(wstatus);
+
+	printf("Check if parent still has huge page...");
+	if (check_huge(p))
+		success("OK");
+	else
+		fail("Fail");
+	validate_memory(p, 0, hpage_pmd_size);
+	munmap(p, hpage_pmd_size);
+}
+
+int main(void)
+{
+	setbuf(stdout, NULL);
+
+	page_size = getpagesize();
+	hpage_pmd_size = read_num("hpage_pmd_size");
+	hpage_pmd_nr = hpage_pmd_size / page_size;
+
+	default_settings.khugepaged.max_ptes_none = hpage_pmd_nr - 1;
+	default_settings.khugepaged.max_ptes_swap = hpage_pmd_nr / 8;
+	default_settings.khugepaged.max_ptes_shared = hpage_pmd_nr / 2;
+	default_settings.khugepaged.pages_to_scan = hpage_pmd_nr * 8;
+
+	save_settings();
+	adjust_settings();
+
+	alloc_at_fault();
+	collapse_full();
+	collapse_empty();
+	collapse_single_pte_entry();
+	collapse_max_ptes_none();
+	collapse_swapin_single_pte();
+	collapse_max_ptes_swap();
+	collapse_single_pte_entry_compound();
+	collapse_full_of_compound();
+	collapse_compound_extreme();
+	collapse_fork();
+	collapse_fork_compound();
+	collapse_max_ptes_shared();
+
+	restore_settings(0);
+}
diff --git a/tools/testing/selftests/vm/mremap_dontunmap.c b/tools/testing/selftests/vm/mremap_dontunmap.c
index ee06cb0b9efb..3a7b5ef0b0c6 100644
--- a/tools/testing/selftests/vm/mremap_dontunmap.c
+++ b/tools/testing/selftests/vm/mremap_dontunmap.c
@@ -11,7 +11,6 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
-#include <stdlib.h>
 #include <unistd.h>
 
 #include "../kselftest.h"
diff --git a/tools/testing/selftests/vm/pkey-helpers.h b/tools/testing/selftests/vm/pkey-helpers.h
new file mode 100644
index 000000000000..622a85848f61
--- /dev/null
+++ b/tools/testing/selftests/vm/pkey-helpers.h
@@ -0,0 +1,225 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _PKEYS_HELPER_H
+#define _PKEYS_HELPER_H
+#define _GNU_SOURCE
+#include <string.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <signal.h>
+#include <assert.h>
+#include <stdlib.h>
+#include <ucontext.h>
+#include <sys/mman.h>
+
+/* Define some kernel-like types */
+#define  u8 __u8
+#define u16 __u16
+#define u32 __u32
+#define u64 __u64
+
+#define PTR_ERR_ENOTSUP ((void *)-ENOTSUP)
+
+#ifndef DEBUG_LEVEL
+#define DEBUG_LEVEL 0
+#endif
+#define DPRINT_IN_SIGNAL_BUF_SIZE 4096
+extern int dprint_in_signal;
+extern char dprint_in_signal_buffer[DPRINT_IN_SIGNAL_BUF_SIZE];
+
+extern int test_nr;
+extern int iteration_nr;
+
+#ifdef __GNUC__
+__attribute__((format(printf, 1, 2)))
+#endif
+static inline void sigsafe_printf(const char *format, ...)
+{
+	va_list ap;
+
+	if (!dprint_in_signal) {
+		va_start(ap, format);
+		vprintf(format, ap);
+		va_end(ap);
+	} else {
+		int ret;
+		/*
+		 * No printf() functions are signal-safe.
+		 * They deadlock easily. Write the format
+		 * string to get some output, even if
+		 * incomplete.
+		 */
+		ret = write(1, format, strlen(format));
+		if (ret < 0)
+			exit(1);
+	}
+}
+#define dprintf_level(level, args...) do {	\
+	if (level <= DEBUG_LEVEL)		\
+		sigsafe_printf(args);		\
+} while (0)
+#define dprintf0(args...) dprintf_level(0, args)
+#define dprintf1(args...) dprintf_level(1, args)
+#define dprintf2(args...) dprintf_level(2, args)
+#define dprintf3(args...) dprintf_level(3, args)
+#define dprintf4(args...) dprintf_level(4, args)
+
+extern void abort_hooks(void);
+#define pkey_assert(condition) do {		\
+	if (!(condition)) {			\
+		dprintf0("assert() at %s::%d test_nr: %d iteration: %d\n", \
+				__FILE__, __LINE__,	\
+				test_nr, iteration_nr);	\
+		dprintf0("errno at assert: %d", errno);	\
+		abort_hooks();			\
+		exit(__LINE__);			\
+	}					\
+} while (0)
+
+__attribute__((noinline)) int read_ptr(int *ptr);
+void expected_pkey_fault(int pkey);
+int sys_pkey_alloc(unsigned long flags, unsigned long init_val);
+int sys_pkey_free(unsigned long pkey);
+int mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot,
+		unsigned long pkey);
+void record_pkey_malloc(void *ptr, long size, int prot);
+
+#if defined(__i386__) || defined(__x86_64__) /* arch */
+#include "pkey-x86.h"
+#elif defined(__powerpc64__) /* arch */
+#include "pkey-powerpc.h"
+#else /* arch */
+#error Architecture not supported
+#endif /* arch */
+
+#define PKEY_MASK	(PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE)
+
+static inline u64 set_pkey_bits(u64 reg, int pkey, u64 flags)
+{
+	u32 shift = pkey_bit_position(pkey);
+	/* mask out bits from pkey in old value */
+	reg &= ~((u64)PKEY_MASK << shift);
+	/* OR in new bits for pkey */
+	reg |= (flags & PKEY_MASK) << shift;
+	return reg;
+}
+
+static inline u64 get_pkey_bits(u64 reg, int pkey)
+{
+	u32 shift = pkey_bit_position(pkey);
+	/*
+	 * shift down the relevant bits to the lowest two, then
+	 * mask off all the other higher bits
+	 */
+	return ((reg >> shift) & PKEY_MASK);
+}
+
+extern u64 shadow_pkey_reg;
+
+static inline u64 _read_pkey_reg(int line)
+{
+	u64 pkey_reg = __read_pkey_reg();
+
+	dprintf4("read_pkey_reg(line=%d) pkey_reg: %016llx"
+			" shadow: %016llx\n",
+			line, pkey_reg, shadow_pkey_reg);
+	assert(pkey_reg == shadow_pkey_reg);
+
+	return pkey_reg;
+}
+
+#define read_pkey_reg() _read_pkey_reg(__LINE__)
+
+static inline void write_pkey_reg(u64 pkey_reg)
+{
+	dprintf4("%s() changing %016llx to %016llx\n", __func__,
+			__read_pkey_reg(), pkey_reg);
+	/* will do the shadow check for us: */
+	read_pkey_reg();
+	__write_pkey_reg(pkey_reg);
+	shadow_pkey_reg = pkey_reg;
+	dprintf4("%s(%016llx) pkey_reg: %016llx\n", __func__,
+			pkey_reg, __read_pkey_reg());
+}
+
+/*
+ * These are technically racy. since something could
+ * change PKEY register between the read and the write.
+ */
+static inline void __pkey_access_allow(int pkey, int do_allow)
+{
+	u64 pkey_reg = read_pkey_reg();
+	int bit = pkey * 2;
+
+	if (do_allow)
+		pkey_reg &= (1<<bit);
+	else
+		pkey_reg |= (1<<bit);
+
+	dprintf4("pkey_reg now: %016llx\n", read_pkey_reg());
+	write_pkey_reg(pkey_reg);
+}
+
+static inline void __pkey_write_allow(int pkey, int do_allow_write)
+{
+	u64 pkey_reg = read_pkey_reg();
+	int bit = pkey * 2 + 1;
+
+	if (do_allow_write)
+		pkey_reg &= (1<<bit);
+	else
+		pkey_reg |= (1<<bit);
+
+	write_pkey_reg(pkey_reg);
+	dprintf4("pkey_reg now: %016llx\n", read_pkey_reg());
+}
+
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof(*(x)))
+#define ALIGN_UP(x, align_to)	(((x) + ((align_to)-1)) & ~((align_to)-1))
+#define ALIGN_DOWN(x, align_to) ((x) & ~((align_to)-1))
+#define ALIGN_PTR_UP(p, ptr_align_to)	\
+	((typeof(p))ALIGN_UP((unsigned long)(p), ptr_align_to))
+#define ALIGN_PTR_DOWN(p, ptr_align_to)	\
+	((typeof(p))ALIGN_DOWN((unsigned long)(p), ptr_align_to))
+#define __stringify_1(x...)     #x
+#define __stringify(x...)       __stringify_1(x)
+
+static inline u32 *siginfo_get_pkey_ptr(siginfo_t *si)
+{
+#ifdef si_pkey
+	return &si->si_pkey;
+#else
+	return (u32 *)(((u8 *)si) + si_pkey_offset);
+#endif
+}
+
+static inline int kernel_has_pkeys(void)
+{
+	/* try allocating a key and see if it succeeds */
+	int ret = sys_pkey_alloc(0, 0);
+	if (ret <= 0) {
+		return 0;
+	}
+	sys_pkey_free(ret);
+	return 1;
+}
+
+static inline int is_pkeys_supported(void)
+{
+	/* check if the cpu supports pkeys */
+	if (!cpu_has_pkeys()) {
+		dprintf1("SKIP: %s: no CPU support\n", __func__);
+		return 0;
+	}
+
+	/* check if the kernel supports pkeys */
+	if (!kernel_has_pkeys()) {
+		dprintf1("SKIP: %s: no kernel support\n", __func__);
+		return 0;
+	}
+
+	return 1;
+}
+
+#endif /* _PKEYS_HELPER_H */
diff --git a/tools/testing/selftests/vm/pkey-powerpc.h b/tools/testing/selftests/vm/pkey-powerpc.h
new file mode 100644
index 000000000000..1ebb586b2fbc
--- /dev/null
+++ b/tools/testing/selftests/vm/pkey-powerpc.h
@@ -0,0 +1,133 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _PKEYS_POWERPC_H
+#define _PKEYS_POWERPC_H
+
+#ifndef SYS_mprotect_key
+# define SYS_mprotect_key	386
+#endif
+#ifndef SYS_pkey_alloc
+# define SYS_pkey_alloc		384
+# define SYS_pkey_free		385
+#endif
+#define REG_IP_IDX		PT_NIP
+#define REG_TRAPNO		PT_TRAP
+#define gregs			gp_regs
+#define fpregs			fp_regs
+#define si_pkey_offset		0x20
+
+#undef PKEY_DISABLE_ACCESS
+#define PKEY_DISABLE_ACCESS	0x3  /* disable read and write */
+
+#undef PKEY_DISABLE_WRITE
+#define PKEY_DISABLE_WRITE	0x2
+
+#define NR_PKEYS		32
+#define NR_RESERVED_PKEYS_4K	27 /* pkey-0, pkey-1, exec-only-pkey
+				      and 24 other keys that cannot be
+				      represented in the PTE */
+#define NR_RESERVED_PKEYS_64K_3KEYS	3 /* PowerNV and KVM: pkey-0,
+					     pkey-1 and exec-only key */
+#define NR_RESERVED_PKEYS_64K_4KEYS	4 /* PowerVM: pkey-0, pkey-1,
+					     pkey-31 and exec-only key */
+#define PKEY_BITS_PER_PKEY	2
+#define HPAGE_SIZE		(1UL << 24)
+#define PAGE_SIZE		sysconf(_SC_PAGESIZE)
+
+static inline u32 pkey_bit_position(int pkey)
+{
+	return (NR_PKEYS - pkey - 1) * PKEY_BITS_PER_PKEY;
+}
+
+static inline u64 __read_pkey_reg(void)
+{
+	u64 pkey_reg;
+
+	asm volatile("mfspr %0, 0xd" : "=r" (pkey_reg));
+
+	return pkey_reg;
+}
+
+static inline void __write_pkey_reg(u64 pkey_reg)
+{
+	u64 amr = pkey_reg;
+
+	dprintf4("%s() changing %016llx to %016llx\n",
+			 __func__, __read_pkey_reg(), pkey_reg);
+
+	asm volatile("isync; mtspr 0xd, %0; isync"
+		     : : "r" ((unsigned long)(amr)) : "memory");
+
+	dprintf4("%s() pkey register after changing %016llx to %016llx\n",
+			__func__, __read_pkey_reg(), pkey_reg);
+}
+
+static inline int cpu_has_pkeys(void)
+{
+	/* No simple way to determine this */
+	return 1;
+}
+
+static inline bool arch_is_powervm()
+{
+	struct stat buf;
+
+	if ((stat("/sys/firmware/devicetree/base/ibm,partition-name", &buf) == 0) &&
+	    (stat("/sys/firmware/devicetree/base/hmc-managed?", &buf) == 0) &&
+	    (stat("/sys/firmware/devicetree/base/chosen/qemu,graphic-width", &buf) == -1) )
+		return true;
+
+	return false;
+}
+
+static inline int get_arch_reserved_keys(void)
+{
+	if (sysconf(_SC_PAGESIZE) == 4096)
+		return NR_RESERVED_PKEYS_4K;
+	else
+		if (arch_is_powervm())
+			return NR_RESERVED_PKEYS_64K_4KEYS;
+		else
+			return NR_RESERVED_PKEYS_64K_3KEYS;
+}
+
+void expect_fault_on_read_execonly_key(void *p1, int pkey)
+{
+	/*
+	 * powerpc does not allow userspace to change permissions of exec-only
+	 * keys since those keys are not allocated by userspace. The signal
+	 * handler wont be able to reset the permissions, which means the code
+	 * will infinitely continue to segfault here.
+	 */
+	return;
+}
+
+/* 4-byte instructions * 16384 = 64K page */
+#define __page_o_noops() asm(".rept 16384 ; nop; .endr")
+
+void *malloc_pkey_with_mprotect_subpage(long size, int prot, u16 pkey)
+{
+	void *ptr;
+	int ret;
+
+	dprintf1("doing %s(size=%ld, prot=0x%x, pkey=%d)\n", __func__,
+			size, prot, pkey);
+	pkey_assert(pkey < NR_PKEYS);
+	ptr = mmap(NULL, size, prot, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
+	pkey_assert(ptr != (void *)-1);
+
+	ret = syscall(__NR_subpage_prot, ptr, size, NULL);
+	if (ret) {
+		perror("subpage_perm");
+		return PTR_ERR_ENOTSUP;
+	}
+
+	ret = mprotect_pkey((void *)ptr, PAGE_SIZE, prot, pkey);
+	pkey_assert(!ret);
+	record_pkey_malloc(ptr, size, prot);
+
+	dprintf1("%s() for pkey %d @ %p\n", __func__, pkey, ptr);
+	return ptr;
+}
+
+#endif /* _PKEYS_POWERPC_H */
diff --git a/tools/testing/selftests/vm/pkey-x86.h b/tools/testing/selftests/vm/pkey-x86.h
new file mode 100644
index 000000000000..3be20f5d5275
--- /dev/null
+++ b/tools/testing/selftests/vm/pkey-x86.h
@@ -0,0 +1,181 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _PKEYS_X86_H
+#define _PKEYS_X86_H
+
+#ifdef __i386__
+
+#ifndef SYS_mprotect_key
+# define SYS_mprotect_key	380
+#endif
+
+#ifndef SYS_pkey_alloc
+# define SYS_pkey_alloc		381
+# define SYS_pkey_free		382
+#endif
+
+#define REG_IP_IDX		REG_EIP
+#define si_pkey_offset		0x14
+
+#else
+
+#ifndef SYS_mprotect_key
+# define SYS_mprotect_key	329
+#endif
+
+#ifndef SYS_pkey_alloc
+# define SYS_pkey_alloc		330
+# define SYS_pkey_free		331
+#endif
+
+#define REG_IP_IDX		REG_RIP
+#define si_pkey_offset		0x20
+
+#endif
+
+#ifndef PKEY_DISABLE_ACCESS
+# define PKEY_DISABLE_ACCESS	0x1
+#endif
+
+#ifndef PKEY_DISABLE_WRITE
+# define PKEY_DISABLE_WRITE	0x2
+#endif
+
+#define NR_PKEYS		16
+#define NR_RESERVED_PKEYS	2 /* pkey-0 and exec-only-pkey */
+#define PKEY_BITS_PER_PKEY	2
+#define HPAGE_SIZE		(1UL<<21)
+#define PAGE_SIZE		4096
+#define MB			(1<<20)
+
+static inline void __page_o_noops(void)
+{
+	/* 8-bytes of instruction * 512 bytes = 1 page */
+	asm(".rept 512 ; nopl 0x7eeeeeee(%eax) ; .endr");
+}
+
+static inline u64 __read_pkey_reg(void)
+{
+	unsigned int eax, edx;
+	unsigned int ecx = 0;
+	unsigned pkey_reg;
+
+	asm volatile(".byte 0x0f,0x01,0xee\n\t"
+		     : "=a" (eax), "=d" (edx)
+		     : "c" (ecx));
+	pkey_reg = eax;
+	return pkey_reg;
+}
+
+static inline void __write_pkey_reg(u64 pkey_reg)
+{
+	unsigned int eax = pkey_reg;
+	unsigned int ecx = 0;
+	unsigned int edx = 0;
+
+	dprintf4("%s() changing %016llx to %016llx\n", __func__,
+			__read_pkey_reg(), pkey_reg);
+	asm volatile(".byte 0x0f,0x01,0xef\n\t"
+		     : : "a" (eax), "c" (ecx), "d" (edx));
+	assert(pkey_reg == __read_pkey_reg());
+}
+
+static inline void __cpuid(unsigned int *eax, unsigned int *ebx,
+		unsigned int *ecx, unsigned int *edx)
+{
+	/* ecx is often an input as well as an output. */
+	asm volatile(
+		"cpuid;"
+		: "=a" (*eax),
+		  "=b" (*ebx),
+		  "=c" (*ecx),
+		  "=d" (*edx)
+		: "0" (*eax), "2" (*ecx));
+}
+
+/* Intel-defined CPU features, CPUID level 0x00000007:0 (ecx) */
+#define X86_FEATURE_PKU        (1<<3) /* Protection Keys for Userspace */
+#define X86_FEATURE_OSPKE      (1<<4) /* OS Protection Keys Enable */
+
+static inline int cpu_has_pkeys(void)
+{
+	unsigned int eax;
+	unsigned int ebx;
+	unsigned int ecx;
+	unsigned int edx;
+
+	eax = 0x7;
+	ecx = 0x0;
+	__cpuid(&eax, &ebx, &ecx, &edx);
+
+	if (!(ecx & X86_FEATURE_PKU)) {
+		dprintf2("cpu does not have PKU\n");
+		return 0;
+	}
+	if (!(ecx & X86_FEATURE_OSPKE)) {
+		dprintf2("cpu does not have OSPKE\n");
+		return 0;
+	}
+	return 1;
+}
+
+static inline u32 pkey_bit_position(int pkey)
+{
+	return pkey * PKEY_BITS_PER_PKEY;
+}
+
+#define XSTATE_PKEY_BIT	(9)
+#define XSTATE_PKEY	0x200
+
+int pkey_reg_xstate_offset(void)
+{
+	unsigned int eax;
+	unsigned int ebx;
+	unsigned int ecx;
+	unsigned int edx;
+	int xstate_offset;
+	int xstate_size;
+	unsigned long XSTATE_CPUID = 0xd;
+	int leaf;
+
+	/* assume that XSTATE_PKEY is set in XCR0 */
+	leaf = XSTATE_PKEY_BIT;
+	{
+		eax = XSTATE_CPUID;
+		ecx = leaf;
+		__cpuid(&eax, &ebx, &ecx, &edx);
+
+		if (leaf == XSTATE_PKEY_BIT) {
+			xstate_offset = ebx;
+			xstate_size = eax;
+		}
+	}
+
+	if (xstate_size == 0) {
+		printf("could not find size/offset of PKEY in xsave state\n");
+		return 0;
+	}
+
+	return xstate_offset;
+}
+
+static inline int get_arch_reserved_keys(void)
+{
+	return NR_RESERVED_PKEYS;
+}
+
+void expect_fault_on_read_execonly_key(void *p1, int pkey)
+{
+	int ptr_contents;
+
+	ptr_contents = read_ptr(p1);
+	dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents);
+	expected_pkey_fault(pkey);
+}
+
+void *malloc_pkey_with_mprotect_subpage(long size, int prot, u16 pkey)
+{
+	return PTR_ERR_ENOTSUP;
+}
+
+#endif /* _PKEYS_X86_H */
diff --git a/tools/testing/selftests/x86/protection_keys.c b/tools/testing/selftests/vm/protection_keys.c
index 480995bceefa..fc19addcb5c8 100644
--- a/tools/testing/selftests/x86/protection_keys.c
+++ b/tools/testing/selftests/vm/protection_keys.c
@@ -1,11 +1,11 @@
 // SPDX-License-Identifier: GPL-2.0
 /*
- * Tests x86 Memory Protection Keys (see Documentation/core-api/protection-keys.rst)
+ * Tests Memory Protection Keys (see Documentation/vm/protection-keys.txt)
  *
  * There are examples in here of:
  *  * how to set protection keys on memory
- *  * how to set/clear bits in PKRU (the rights register)
- *  * how to handle SEGV_PKRU signals and extract pkey-relevant
+ *  * how to set/clear bits in pkey registers (the rights register)
+ *  * how to handle SEGV_PKUERR signals and extract pkey-relevant
  *    information from the siginfo
  *
  * Things to add:
@@ -22,8 +22,10 @@
  *	gcc -m32 -o protection_keys_32 -O2 -g -std=gnu99 -pthread -Wall protection_keys.c -lrt -ldl -lm
  */
 #define _GNU_SOURCE
+#define __SANE_USERSPACE_TYPES__
 #include <errno.h>
 #include <linux/futex.h>
+#include <time.h>
 #include <sys/time.h>
 #include <sys/syscall.h>
 #include <string.h>
@@ -48,34 +50,10 @@
 int iteration_nr = 1;
 int test_nr;
 
-unsigned int shadow_pkru;
-
-#define HPAGE_SIZE	(1UL<<21)
-#define ARRAY_SIZE(x) (sizeof(x) / sizeof(*(x)))
-#define ALIGN_UP(x, align_to)	(((x) + ((align_to)-1)) & ~((align_to)-1))
-#define ALIGN_DOWN(x, align_to) ((x) & ~((align_to)-1))
-#define ALIGN_PTR_UP(p, ptr_align_to)	((typeof(p))ALIGN_UP((unsigned long)(p),	ptr_align_to))
-#define ALIGN_PTR_DOWN(p, ptr_align_to)	((typeof(p))ALIGN_DOWN((unsigned long)(p),	ptr_align_to))
-#define __stringify_1(x...)     #x
-#define __stringify(x...)       __stringify_1(x)
-
-#define PTR_ERR_ENOTSUP ((void *)-ENOTSUP)
-
+u64 shadow_pkey_reg;
 int dprint_in_signal;
 char dprint_in_signal_buffer[DPRINT_IN_SIGNAL_BUF_SIZE];
 
-extern void abort_hooks(void);
-#define pkey_assert(condition) do {		\
-	if (!(condition)) {			\
-		dprintf0("assert() at %s::%d test_nr: %d iteration: %d\n", \
-				__FILE__, __LINE__,	\
-				test_nr, iteration_nr);	\
-		dprintf0("errno at assert: %d", errno);	\
-		abort_hooks();			\
-		exit(__LINE__);			\
-	}					\
-} while (0)
-
 void cat_into_file(char *str, char *file)
 {
 	int fd = open(file, O_RDWR);
@@ -158,12 +136,6 @@ void abort_hooks(void)
 #endif
 }
 
-static inline void __page_o_noops(void)
-{
-	/* 8-bytes of instruction * 512 bytes = 1 page */
-	asm(".rept 512 ; nopl 0x7eeeeeee(%eax) ; .endr");
-}
-
 /*
  * This attempts to have roughly a page of instructions followed by a few
  * instructions that do a write, and another page of instructions.  That
@@ -174,7 +146,12 @@ static inline void __page_o_noops(void)
  * will then fault, which makes sure that the fault code handles
  * execute-only memory properly.
  */
+#ifdef __powerpc64__
+/* This way, both 4K and 64K alignment are maintained */
+__attribute__((__aligned__(65536)))
+#else
 __attribute__((__aligned__(PAGE_SIZE)))
+#endif
 void lots_o_noops_around_write(int *write_to_me)
 {
 	dprintf3("running %s()\n", __func__);
@@ -186,51 +163,134 @@ void lots_o_noops_around_write(int *write_to_me)
 	dprintf3("%s() done\n", __func__);
 }
 
-/* Define some kernel-like types */
-#define  u8 uint8_t
-#define u16 uint16_t
-#define u32 uint32_t
-#define u64 uint64_t
+void dump_mem(void *dumpme, int len_bytes)
+{
+	char *c = (void *)dumpme;
+	int i;
 
-#ifdef __i386__
+	for (i = 0; i < len_bytes; i += sizeof(u64)) {
+		u64 *ptr = (u64 *)(c + i);
+		dprintf1("dump[%03d][@%p]: %016llx\n", i, ptr, *ptr);
+	}
+}
 
-#ifndef SYS_mprotect_key
-# define SYS_mprotect_key	380
-#endif
+static u32 hw_pkey_get(int pkey, unsigned long flags)
+{
+	u64 pkey_reg = __read_pkey_reg();
 
-#ifndef SYS_pkey_alloc
-# define SYS_pkey_alloc		381
-# define SYS_pkey_free		382
-#endif
+	dprintf1("%s(pkey=%d, flags=%lx) = %x / %d\n",
+			__func__, pkey, flags, 0, 0);
+	dprintf2("%s() raw pkey_reg: %016llx\n", __func__, pkey_reg);
 
-#define REG_IP_IDX		REG_EIP
-#define si_pkey_offset		0x14
+	return (u32) get_pkey_bits(pkey_reg, pkey);
+}
 
-#else
+static int hw_pkey_set(int pkey, unsigned long rights, unsigned long flags)
+{
+	u32 mask = (PKEY_DISABLE_ACCESS|PKEY_DISABLE_WRITE);
+	u64 old_pkey_reg = __read_pkey_reg();
+	u64 new_pkey_reg;
 
-#ifndef SYS_mprotect_key
-# define SYS_mprotect_key	329
-#endif
+	/* make sure that 'rights' only contains the bits we expect: */
+	assert(!(rights & ~mask));
 
-#ifndef SYS_pkey_alloc
-# define SYS_pkey_alloc		330
-# define SYS_pkey_free		331
-#endif
+	/* modify bits accordingly in old pkey_reg and assign it */
+	new_pkey_reg = set_pkey_bits(old_pkey_reg, pkey, rights);
 
-#define REG_IP_IDX		REG_RIP
-#define si_pkey_offset		0x20
+	__write_pkey_reg(new_pkey_reg);
 
-#endif
+	dprintf3("%s(pkey=%d, rights=%lx, flags=%lx) = %x"
+		" pkey_reg now: %016llx old_pkey_reg: %016llx\n",
+		__func__, pkey, rights, flags, 0, __read_pkey_reg(),
+		old_pkey_reg);
+	return 0;
+}
 
-void dump_mem(void *dumpme, int len_bytes)
+void pkey_disable_set(int pkey, int flags)
 {
-	char *c = (void *)dumpme;
-	int i;
+	unsigned long syscall_flags = 0;
+	int ret;
+	int pkey_rights;
+	u64 orig_pkey_reg = read_pkey_reg();
 
-	for (i = 0; i < len_bytes; i += sizeof(u64)) {
-		u64 *ptr = (u64 *)(c + i);
-		dprintf1("dump[%03d][@%p]: %016jx\n", i, ptr, *ptr);
-	}
+	dprintf1("START->%s(%d, 0x%x)\n", __func__,
+		pkey, flags);
+	pkey_assert(flags & (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE));
+
+	pkey_rights = hw_pkey_get(pkey, syscall_flags);
+
+	dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__,
+			pkey, pkey, pkey_rights);
+
+	pkey_assert(pkey_rights >= 0);
+
+	pkey_rights |= flags;
+
+	ret = hw_pkey_set(pkey, pkey_rights, syscall_flags);
+	assert(!ret);
+	/* pkey_reg and flags have the same format */
+	shadow_pkey_reg = set_pkey_bits(shadow_pkey_reg, pkey, pkey_rights);
+	dprintf1("%s(%d) shadow: 0x%016llx\n",
+		__func__, pkey, shadow_pkey_reg);
+
+	pkey_assert(ret >= 0);
+
+	pkey_rights = hw_pkey_get(pkey, syscall_flags);
+	dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__,
+			pkey, pkey, pkey_rights);
+
+	dprintf1("%s(%d) pkey_reg: 0x%016llx\n",
+		__func__, pkey, read_pkey_reg());
+	if (flags)
+		pkey_assert(read_pkey_reg() >= orig_pkey_reg);
+	dprintf1("END<---%s(%d, 0x%x)\n", __func__,
+		pkey, flags);
+}
+
+void pkey_disable_clear(int pkey, int flags)
+{
+	unsigned long syscall_flags = 0;
+	int ret;
+	int pkey_rights = hw_pkey_get(pkey, syscall_flags);
+	u64 orig_pkey_reg = read_pkey_reg();
+
+	pkey_assert(flags & (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE));
+
+	dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__,
+			pkey, pkey, pkey_rights);
+	pkey_assert(pkey_rights >= 0);
+
+	pkey_rights &= ~flags;
+
+	ret = hw_pkey_set(pkey, pkey_rights, 0);
+	shadow_pkey_reg = set_pkey_bits(shadow_pkey_reg, pkey, pkey_rights);
+	pkey_assert(ret >= 0);
+
+	pkey_rights = hw_pkey_get(pkey, syscall_flags);
+	dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__,
+			pkey, pkey, pkey_rights);
+
+	dprintf1("%s(%d) pkey_reg: 0x%016llx\n", __func__,
+			pkey, read_pkey_reg());
+	if (flags)
+		assert(read_pkey_reg() <= orig_pkey_reg);
+}
+
+void pkey_write_allow(int pkey)
+{
+	pkey_disable_clear(pkey, PKEY_DISABLE_WRITE);
+}
+void pkey_write_deny(int pkey)
+{
+	pkey_disable_set(pkey, PKEY_DISABLE_WRITE);
+}
+void pkey_access_allow(int pkey)
+{
+	pkey_disable_clear(pkey, PKEY_DISABLE_ACCESS);
+}
+void pkey_access_deny(int pkey)
+{
+	pkey_disable_set(pkey, PKEY_DISABLE_ACCESS);
 }
 
 /* Failed address bound checks: */
@@ -255,7 +315,7 @@ static char *si_code_str(int si_code)
 	return "UNKNOWN";
 }
 
-int pkru_faults;
+int pkey_faults;
 int last_si_pkey = -1;
 void signal_handler(int signum, siginfo_t *si, void *vucontext)
 {
@@ -263,24 +323,28 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext)
 	int trapno;
 	unsigned long ip;
 	char *fpregs;
-	u32 *pkru_ptr;
+#if defined(__i386__) || defined(__x86_64__) /* arch */
+	u32 *pkey_reg_ptr;
+	int pkey_reg_offset;
+#endif /* arch */
 	u64 siginfo_pkey;
 	u32 *si_pkey_ptr;
-	int pkru_offset;
-	fpregset_t fpregset;
 
 	dprint_in_signal = 1;
 	dprintf1(">>>>===============SIGSEGV============================\n");
-	dprintf1("%s()::%d, pkru: 0x%x shadow: %x\n", __func__, __LINE__,
-			__rdpkru(), shadow_pkru);
+	dprintf1("%s()::%d, pkey_reg: 0x%016llx shadow: %016llx\n",
+			__func__, __LINE__,
+			__read_pkey_reg(), shadow_pkey_reg);
 
 	trapno = uctxt->uc_mcontext.gregs[REG_TRAPNO];
 	ip = uctxt->uc_mcontext.gregs[REG_IP_IDX];
-	fpregset = uctxt->uc_mcontext.fpregs;
-	fpregs = (void *)fpregset;
+	fpregs = (char *) uctxt->uc_mcontext.fpregs;
 
-	dprintf2("%s() trapno: %d ip: 0x%lx info->si_code: %s/%d\n", __func__,
-			trapno, ip, si_code_str(si->si_code), si->si_code);
+	dprintf2("%s() trapno: %d ip: 0x%016lx info->si_code: %s/%d\n",
+			__func__, trapno, ip, si_code_str(si->si_code),
+			si->si_code);
+
+#if defined(__i386__) || defined(__x86_64__) /* arch */
 #ifdef __i386__
 	/*
 	 * 32-bit has some extra padding so that userspace can tell whether
@@ -288,20 +352,22 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext)
 	 * state.  We just assume that it is here.
 	 */
 	fpregs += 0x70;
-#endif
-	pkru_offset = pkru_xstate_offset();
-	pkru_ptr = (void *)(&fpregs[pkru_offset]);
+#endif /* i386 */
+	pkey_reg_offset = pkey_reg_xstate_offset();
+	pkey_reg_ptr = (void *)(&fpregs[pkey_reg_offset]);
 
-	dprintf1("siginfo: %p\n", si);
-	dprintf1(" fpregs: %p\n", fpregs);
 	/*
-	 * If we got a PKRU fault, we *HAVE* to have at least one bit set in
+	 * If we got a PKEY fault, we *HAVE* to have at least one bit set in
 	 * here.
 	 */
-	dprintf1("pkru_xstate_offset: %d\n", pkru_xstate_offset());
+	dprintf1("pkey_reg_xstate_offset: %d\n", pkey_reg_xstate_offset());
 	if (DEBUG_LEVEL > 4)
-		dump_mem(pkru_ptr - 128, 256);
-	pkey_assert(*pkru_ptr);
+		dump_mem(pkey_reg_ptr - 128, 256);
+	pkey_assert(*pkey_reg_ptr);
+#endif /* arch */
+
+	dprintf1("siginfo: %p\n", si);
+	dprintf1(" fpregs: %p\n", fpregs);
 
 	if ((si->si_code == SEGV_MAPERR) ||
 	    (si->si_code == SEGV_ACCERR) ||
@@ -310,20 +376,29 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext)
 		exit(4);
 	}
 
-	si_pkey_ptr = (u32 *)(((u8 *)si) + si_pkey_offset);
+	si_pkey_ptr = siginfo_get_pkey_ptr(si);
 	dprintf1("si_pkey_ptr: %p\n", si_pkey_ptr);
 	dump_mem((u8 *)si_pkey_ptr - 8, 24);
 	siginfo_pkey = *si_pkey_ptr;
 	pkey_assert(siginfo_pkey < NR_PKEYS);
 	last_si_pkey = siginfo_pkey;
 
-	dprintf1("signal pkru from xsave: %08x\n", *pkru_ptr);
-	/* need __rdpkru() version so we do not do shadow_pkru checking */
-	dprintf1("signal pkru from  pkru: %08x\n", __rdpkru());
-	dprintf1("pkey from siginfo: %jx\n", siginfo_pkey);
-	*(u64 *)pkru_ptr = 0x00000000;
-	dprintf1("WARNING: set PRKU=0 to allow faulting instruction to continue\n");
-	pkru_faults++;
+	/*
+	 * need __read_pkey_reg() version so we do not do shadow_pkey_reg
+	 * checking
+	 */
+	dprintf1("signal pkey_reg from  pkey_reg: %016llx\n",
+			__read_pkey_reg());
+	dprintf1("pkey from siginfo: %016llx\n", siginfo_pkey);
+#if defined(__i386__) || defined(__x86_64__) /* arch */
+	dprintf1("signal pkey_reg from xsave: %08x\n", *pkey_reg_ptr);
+	*(u64 *)pkey_reg_ptr = 0x00000000;
+	dprintf1("WARNING: set PKEY_REG=0 to allow faulting instruction to continue\n");
+#elif defined(__powerpc64__) /* arch */
+	/* restore access and let the faulting instruction continue */
+	pkey_access_allow(siginfo_pkey);
+#endif /* arch */
+	pkey_faults++;
 	dprintf1("<<<<==================================================\n");
 	dprint_in_signal = 0;
 }
@@ -391,143 +466,6 @@ pid_t fork_lazy_child(void)
 	return forkret;
 }
 
-#ifndef PKEY_DISABLE_ACCESS
-# define PKEY_DISABLE_ACCESS	0x1
-#endif
-
-#ifndef PKEY_DISABLE_WRITE
-# define PKEY_DISABLE_WRITE	0x2
-#endif
-
-static u32 hw_pkey_get(int pkey, unsigned long flags)
-{
-	u32 mask = (PKEY_DISABLE_ACCESS|PKEY_DISABLE_WRITE);
-	u32 pkru = __rdpkru();
-	u32 shifted_pkru;
-	u32 masked_pkru;
-
-	dprintf1("%s(pkey=%d, flags=%lx) = %x / %d\n",
-			__func__, pkey, flags, 0, 0);
-	dprintf2("%s() raw pkru: %x\n", __func__, pkru);
-
-	shifted_pkru = (pkru >> (pkey * PKRU_BITS_PER_PKEY));
-	dprintf2("%s() shifted_pkru: %x\n", __func__, shifted_pkru);
-	masked_pkru = shifted_pkru & mask;
-	dprintf2("%s() masked  pkru: %x\n", __func__, masked_pkru);
-	/*
-	 * shift down the relevant bits to the lowest two, then
-	 * mask off all the other high bits.
-	 */
-	return masked_pkru;
-}
-
-static int hw_pkey_set(int pkey, unsigned long rights, unsigned long flags)
-{
-	u32 mask = (PKEY_DISABLE_ACCESS|PKEY_DISABLE_WRITE);
-	u32 old_pkru = __rdpkru();
-	u32 new_pkru;
-
-	/* make sure that 'rights' only contains the bits we expect: */
-	assert(!(rights & ~mask));
-
-	/* copy old pkru */
-	new_pkru = old_pkru;
-	/* mask out bits from pkey in old value: */
-	new_pkru &= ~(mask << (pkey * PKRU_BITS_PER_PKEY));
-	/* OR in new bits for pkey: */
-	new_pkru |= (rights << (pkey * PKRU_BITS_PER_PKEY));
-
-	__wrpkru(new_pkru);
-
-	dprintf3("%s(pkey=%d, rights=%lx, flags=%lx) = %x pkru now: %x old_pkru: %x\n",
-			__func__, pkey, rights, flags, 0, __rdpkru(), old_pkru);
-	return 0;
-}
-
-void pkey_disable_set(int pkey, int flags)
-{
-	unsigned long syscall_flags = 0;
-	int ret;
-	int pkey_rights;
-	u32 orig_pkru = rdpkru();
-
-	dprintf1("START->%s(%d, 0x%x)\n", __func__,
-		pkey, flags);
-	pkey_assert(flags & (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE));
-
-	pkey_rights = hw_pkey_get(pkey, syscall_flags);
-
-	dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__,
-			pkey, pkey, pkey_rights);
-	pkey_assert(pkey_rights >= 0);
-
-	pkey_rights |= flags;
-
-	ret = hw_pkey_set(pkey, pkey_rights, syscall_flags);
-	assert(!ret);
-	/*pkru and flags have the same format */
-	shadow_pkru |= flags << (pkey * 2);
-	dprintf1("%s(%d) shadow: 0x%x\n", __func__, pkey, shadow_pkru);
-
-	pkey_assert(ret >= 0);
-
-	pkey_rights = hw_pkey_get(pkey, syscall_flags);
-	dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__,
-			pkey, pkey, pkey_rights);
-
-	dprintf1("%s(%d) pkru: 0x%x\n", __func__, pkey, rdpkru());
-	if (flags)
-		pkey_assert(rdpkru() > orig_pkru);
-	dprintf1("END<---%s(%d, 0x%x)\n", __func__,
-		pkey, flags);
-}
-
-void pkey_disable_clear(int pkey, int flags)
-{
-	unsigned long syscall_flags = 0;
-	int ret;
-	int pkey_rights = hw_pkey_get(pkey, syscall_flags);
-	u32 orig_pkru = rdpkru();
-
-	pkey_assert(flags & (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE));
-
-	dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__,
-			pkey, pkey, pkey_rights);
-	pkey_assert(pkey_rights >= 0);
-
-	pkey_rights |= flags;
-
-	ret = hw_pkey_set(pkey, pkey_rights, 0);
-	/* pkru and flags have the same format */
-	shadow_pkru &= ~(flags << (pkey * 2));
-	pkey_assert(ret >= 0);
-
-	pkey_rights = hw_pkey_get(pkey, syscall_flags);
-	dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__,
-			pkey, pkey, pkey_rights);
-
-	dprintf1("%s(%d) pkru: 0x%x\n", __func__, pkey, rdpkru());
-	if (flags)
-		assert(rdpkru() > orig_pkru);
-}
-
-void pkey_write_allow(int pkey)
-{
-	pkey_disable_clear(pkey, PKEY_DISABLE_WRITE);
-}
-void pkey_write_deny(int pkey)
-{
-	pkey_disable_set(pkey, PKEY_DISABLE_WRITE);
-}
-void pkey_access_allow(int pkey)
-{
-	pkey_disable_clear(pkey, PKEY_DISABLE_ACCESS);
-}
-void pkey_access_deny(int pkey)
-{
-	pkey_disable_set(pkey, PKEY_DISABLE_ACCESS);
-}
-
 int sys_mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot,
 		unsigned long pkey)
 {
@@ -561,33 +499,44 @@ int alloc_pkey(void)
 	int ret;
 	unsigned long init_val = 0x0;
 
-	dprintf1("alloc_pkey()::%d, pkru: 0x%x shadow: %x\n",
-			__LINE__, __rdpkru(), shadow_pkru);
+	dprintf1("%s()::%d, pkey_reg: 0x%016llx shadow: %016llx\n",
+			__func__, __LINE__, __read_pkey_reg(), shadow_pkey_reg);
 	ret = sys_pkey_alloc(0, init_val);
 	/*
-	 * pkey_alloc() sets PKRU, so we need to reflect it in
-	 * shadow_pkru:
+	 * pkey_alloc() sets PKEY register, so we need to reflect it in
+	 * shadow_pkey_reg:
 	 */
-	dprintf4("alloc_pkey()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n",
-			__LINE__, ret, __rdpkru(), shadow_pkru);
+	dprintf4("%s()::%d, ret: %d pkey_reg: 0x%016llx"
+			" shadow: 0x%016llx\n",
+			__func__, __LINE__, ret, __read_pkey_reg(),
+			shadow_pkey_reg);
 	if (ret) {
 		/* clear both the bits: */
-		shadow_pkru &= ~(0x3      << (ret * 2));
-		dprintf4("alloc_pkey()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n",
-				__LINE__, ret, __rdpkru(), shadow_pkru);
+		shadow_pkey_reg = set_pkey_bits(shadow_pkey_reg, ret,
+						~PKEY_MASK);
+		dprintf4("%s()::%d, ret: %d pkey_reg: 0x%016llx"
+				" shadow: 0x%016llx\n",
+				__func__,
+				__LINE__, ret, __read_pkey_reg(),
+				shadow_pkey_reg);
 		/*
 		 * move the new state in from init_val
-		 * (remember, we cheated and init_val == pkru format)
+		 * (remember, we cheated and init_val == pkey_reg format)
 		 */
-		shadow_pkru |=  (init_val << (ret * 2));
+		shadow_pkey_reg = set_pkey_bits(shadow_pkey_reg, ret,
+						init_val);
 	}
-	dprintf4("alloc_pkey()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n",
-			__LINE__, ret, __rdpkru(), shadow_pkru);
-	dprintf1("alloc_pkey()::%d errno: %d\n", __LINE__, errno);
+	dprintf4("%s()::%d, ret: %d pkey_reg: 0x%016llx"
+			" shadow: 0x%016llx\n",
+			__func__, __LINE__, ret, __read_pkey_reg(),
+			shadow_pkey_reg);
+	dprintf1("%s()::%d errno: %d\n", __func__, __LINE__, errno);
 	/* for shadow checking: */
-	rdpkru();
-	dprintf4("alloc_pkey()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n",
-			__LINE__, ret, __rdpkru(), shadow_pkru);
+	read_pkey_reg();
+	dprintf4("%s()::%d, ret: %d pkey_reg: 0x%016llx"
+		 " shadow: 0x%016llx\n",
+		__func__, __LINE__, ret, __read_pkey_reg(),
+		shadow_pkey_reg);
 	return ret;
 }
 
@@ -612,10 +561,10 @@ int alloc_random_pkey(void)
 	int nr_alloced = 0;
 	int random_index;
 	memset(alloced_pkeys, 0, sizeof(alloced_pkeys));
+	srand((unsigned int)time(NULL));
 
 	/* allocate every possible key and make a note of which ones we got */
 	max_nr_pkey_allocs = NR_PKEYS;
-	max_nr_pkey_allocs = 1;
 	for (i = 0; i < max_nr_pkey_allocs; i++) {
 		int new_pkey = alloc_pkey();
 		if (new_pkey < 0)
@@ -638,8 +587,9 @@ int alloc_random_pkey(void)
 		free_ret = sys_pkey_free(alloced_pkeys[i]);
 		pkey_assert(!free_ret);
 	}
-	dprintf1("%s()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", __func__,
-			__LINE__, ret, __rdpkru(), shadow_pkru);
+	dprintf1("%s()::%d, ret: %d pkey_reg: 0x%016llx"
+			 " shadow: 0x%016llx\n", __func__,
+			__LINE__, ret, __read_pkey_reg(), shadow_pkey_reg);
 	return ret;
 }
 
@@ -657,11 +607,15 @@ int mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot,
 		if (nr_iterations-- < 0)
 			break;
 
-		dprintf1("%s()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", __func__,
-			__LINE__, ret, __rdpkru(), shadow_pkru);
+		dprintf1("%s()::%d, ret: %d pkey_reg: 0x%016llx"
+			" shadow: 0x%016llx\n",
+			__func__, __LINE__, ret, __read_pkey_reg(),
+			shadow_pkey_reg);
 		sys_pkey_free(rpkey);
-		dprintf1("%s()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", __func__,
-			__LINE__, ret, __rdpkru(), shadow_pkru);
+		dprintf1("%s()::%d, ret: %d pkey_reg: 0x%016llx"
+			" shadow: 0x%016llx\n",
+			__func__, __LINE__, ret, __read_pkey_reg(),
+			shadow_pkey_reg);
 	}
 	pkey_assert(pkey < NR_PKEYS);
 
@@ -669,8 +623,9 @@ int mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot,
 	dprintf1("mprotect_pkey(%p, %zx, prot=0x%lx, pkey=%ld) ret: %d\n",
 			ptr, size, orig_prot, pkey, ret);
 	pkey_assert(!ret);
-	dprintf1("%s()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", __func__,
-			__LINE__, ret, __rdpkru(), shadow_pkru);
+	dprintf1("%s()::%d, ret: %d pkey_reg: 0x%016llx"
+			" shadow: 0x%016llx\n", __func__,
+			__LINE__, ret, __read_pkey_reg(), shadow_pkey_reg);
 	return ret;
 }
 
@@ -752,7 +707,7 @@ void *malloc_pkey_with_mprotect(long size, int prot, u16 pkey)
 	void *ptr;
 	int ret;
 
-	rdpkru();
+	read_pkey_reg();
 	dprintf1("doing %s(size=%ld, prot=0x%x, pkey=%d)\n", __func__,
 			size, prot, pkey);
 	pkey_assert(pkey < NR_PKEYS);
@@ -761,7 +716,7 @@ void *malloc_pkey_with_mprotect(long size, int prot, u16 pkey)
 	ret = mprotect_pkey((void *)ptr, PAGE_SIZE, prot, pkey);
 	pkey_assert(!ret);
 	record_pkey_malloc(ptr, size, prot);
-	rdpkru();
+	read_pkey_reg();
 
 	dprintf1("%s() for pkey %d @ %p\n", __func__, pkey, ptr);
 	return ptr;
@@ -798,12 +753,15 @@ void *malloc_pkey_anon_huge(long size, int prot, u16 pkey)
 }
 
 int hugetlb_setup_ok;
+#define SYSFS_FMT_NR_HUGE_PAGES "/sys/kernel/mm/hugepages/hugepages-%ldkB/nr_hugepages"
 #define GET_NR_HUGE_PAGES 10
 void setup_hugetlbfs(void)
 {
 	int err;
 	int fd;
-	char buf[] = "123";
+	char buf[256];
+	long hpagesz_kb;
+	long hpagesz_mb;
 
 	if (geteuid() != 0) {
 		fprintf(stderr, "WARNING: not run as root, can not do hugetlb test\n");
@@ -814,11 +772,16 @@ void setup_hugetlbfs(void)
 
 	/*
 	 * Now go make sure that we got the pages and that they
-	 * are 2M pages.  Someone might have made 1G the default.
+	 * are PMD-level pages. Someone might have made PUD-level
+	 * pages the default.
 	 */
-	fd = open("/sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages", O_RDONLY);
+	hpagesz_kb = HPAGE_SIZE / 1024;
+	hpagesz_mb = hpagesz_kb / 1024;
+	sprintf(buf, SYSFS_FMT_NR_HUGE_PAGES, hpagesz_kb);
+	fd = open(buf, O_RDONLY);
 	if (fd < 0) {
-		perror("opening sysfs 2M hugetlb config");
+		fprintf(stderr, "opening sysfs %ldM hugetlb config: %s\n",
+			hpagesz_mb, strerror(errno));
 		return;
 	}
 
@@ -826,13 +789,14 @@ void setup_hugetlbfs(void)
 	err = read(fd, buf, sizeof(buf)-1);
 	close(fd);
 	if (err <= 0) {
-		perror("reading sysfs 2M hugetlb config");
+		fprintf(stderr, "reading sysfs %ldM hugetlb config: %s\n",
+			hpagesz_mb, strerror(errno));
 		return;
 	}
 
 	if (atoi(buf) != GET_NR_HUGE_PAGES) {
-		fprintf(stderr, "could not confirm 2M pages, got: '%s' expected %d\n",
-			buf, GET_NR_HUGE_PAGES);
+		fprintf(stderr, "could not confirm %ldM pages, got: '%s' expected %d\n",
+			hpagesz_mb, buf, GET_NR_HUGE_PAGES);
 		return;
 	}
 
@@ -886,6 +850,7 @@ void *malloc_pkey_mmap_dax(long size, int prot, u16 pkey)
 void *(*pkey_malloc[])(long size, int prot, u16 pkey) = {
 
 	malloc_pkey_with_mprotect,
+	malloc_pkey_with_mprotect_subpage,
 	malloc_pkey_anon_huge,
 	malloc_pkey_hugetlb
 /* can not do direct with the pkey_mprotect() API:
@@ -924,14 +889,14 @@ void *malloc_pkey(long size, int prot, u16 pkey)
 	return ret;
 }
 
-int last_pkru_faults;
+int last_pkey_faults;
 #define UNKNOWN_PKEY -2
-void expected_pk_fault(int pkey)
+void expected_pkey_fault(int pkey)
 {
-	dprintf2("%s(): last_pkru_faults: %d pkru_faults: %d\n",
-			__func__, last_pkru_faults, pkru_faults);
+	dprintf2("%s(): last_pkey_faults: %d pkey_faults: %d\n",
+			__func__, last_pkey_faults, pkey_faults);
 	dprintf2("%s(%d): last_si_pkey: %d\n", __func__, pkey, last_si_pkey);
-	pkey_assert(last_pkru_faults + 1 == pkru_faults);
+	pkey_assert(last_pkey_faults + 1 == pkey_faults);
 
        /*
 	* For exec-only memory, we do not know the pkey in
@@ -940,24 +905,28 @@ void expected_pk_fault(int pkey)
 	if (pkey != UNKNOWN_PKEY)
 		pkey_assert(last_si_pkey == pkey);
 
+#if defined(__i386__) || defined(__x86_64__) /* arch */
 	/*
-	 * The signal handler shold have cleared out PKRU to let the
+	 * The signal handler shold have cleared out PKEY register to let the
 	 * test program continue.  We now have to restore it.
 	 */
-	if (__rdpkru() != 0)
+	if (__read_pkey_reg() != 0)
+#else /* arch */
+	if (__read_pkey_reg() != shadow_pkey_reg)
+#endif /* arch */
 		pkey_assert(0);
 
-	__wrpkru(shadow_pkru);
-	dprintf1("%s() set PKRU=%x to restore state after signal nuked it\n",
-			__func__, shadow_pkru);
-	last_pkru_faults = pkru_faults;
+	__write_pkey_reg(shadow_pkey_reg);
+	dprintf1("%s() set pkey_reg=%016llx to restore state after signal "
+		       "nuked it\n", __func__, shadow_pkey_reg);
+	last_pkey_faults = pkey_faults;
 	last_si_pkey = -1;
 }
 
-#define do_not_expect_pk_fault(msg)	do {			\
-	if (last_pkru_faults != pkru_faults)			\
-		dprintf0("unexpected PK fault: %s\n", msg);	\
-	pkey_assert(last_pkru_faults == pkru_faults);		\
+#define do_not_expect_pkey_fault(msg)	do {			\
+	if (last_pkey_faults != pkey_faults)			\
+		dprintf0("unexpected PKey fault: %s\n", msg);	\
+	pkey_assert(last_pkey_faults == pkey_faults);		\
 } while (0)
 
 int test_fds[10] = { -1 };
@@ -1000,6 +969,58 @@ __attribute__((noinline)) int read_ptr(int *ptr)
 	return *ptr;
 }
 
+void test_pkey_alloc_free_attach_pkey0(int *ptr, u16 pkey)
+{
+	int i, err;
+	int max_nr_pkey_allocs;
+	int alloced_pkeys[NR_PKEYS];
+	int nr_alloced = 0;
+	long size;
+
+	pkey_assert(pkey_last_malloc_record);
+	size = pkey_last_malloc_record->size;
+	/*
+	 * This is a bit of a hack.  But mprotect() requires
+	 * huge-page-aligned sizes when operating on hugetlbfs.
+	 * So, make sure that we use something that's a multiple
+	 * of a huge page when we can.
+	 */
+	if (size >= HPAGE_SIZE)
+		size = HPAGE_SIZE;
+
+	/* allocate every possible key and make sure key-0 never got allocated */
+	max_nr_pkey_allocs = NR_PKEYS;
+	for (i = 0; i < max_nr_pkey_allocs; i++) {
+		int new_pkey = alloc_pkey();
+		pkey_assert(new_pkey != 0);
+
+		if (new_pkey < 0)
+			break;
+		alloced_pkeys[nr_alloced++] = new_pkey;
+	}
+	/* free all the allocated keys */
+	for (i = 0; i < nr_alloced; i++) {
+		int free_ret;
+
+		if (!alloced_pkeys[i])
+			continue;
+		free_ret = sys_pkey_free(alloced_pkeys[i]);
+		pkey_assert(!free_ret);
+	}
+
+	/* attach key-0 in various modes */
+	err = sys_mprotect_pkey(ptr, size, PROT_READ, 0);
+	pkey_assert(!err);
+	err = sys_mprotect_pkey(ptr, size, PROT_WRITE, 0);
+	pkey_assert(!err);
+	err = sys_mprotect_pkey(ptr, size, PROT_EXEC, 0);
+	pkey_assert(!err);
+	err = sys_mprotect_pkey(ptr, size, PROT_READ|PROT_WRITE, 0);
+	pkey_assert(!err);
+	err = sys_mprotect_pkey(ptr, size, PROT_READ|PROT_WRITE|PROT_EXEC, 0);
+	pkey_assert(!err);
+}
+
 void test_read_of_write_disabled_region(int *ptr, u16 pkey)
 {
 	int ptr_contents;
@@ -1015,26 +1036,67 @@ void test_read_of_access_disabled_region(int *ptr, u16 pkey)
 	int ptr_contents;
 
 	dprintf1("disabling access to PKEY[%02d], doing read @ %p\n", pkey, ptr);
-	rdpkru();
+	read_pkey_reg();
+	pkey_access_deny(pkey);
+	ptr_contents = read_ptr(ptr);
+	dprintf1("*ptr: %d\n", ptr_contents);
+	expected_pkey_fault(pkey);
+}
+
+void test_read_of_access_disabled_region_with_page_already_mapped(int *ptr,
+		u16 pkey)
+{
+	int ptr_contents;
+
+	dprintf1("disabling access to PKEY[%02d], doing read @ %p\n",
+				pkey, ptr);
+	ptr_contents = read_ptr(ptr);
+	dprintf1("reading ptr before disabling the read : %d\n",
+			ptr_contents);
+	read_pkey_reg();
 	pkey_access_deny(pkey);
 	ptr_contents = read_ptr(ptr);
 	dprintf1("*ptr: %d\n", ptr_contents);
-	expected_pk_fault(pkey);
+	expected_pkey_fault(pkey);
 }
+
+void test_write_of_write_disabled_region_with_page_already_mapped(int *ptr,
+		u16 pkey)
+{
+	*ptr = __LINE__;
+	dprintf1("disabling write access; after accessing the page, "
+		"to PKEY[%02d], doing write\n", pkey);
+	pkey_write_deny(pkey);
+	*ptr = __LINE__;
+	expected_pkey_fault(pkey);
+}
+
 void test_write_of_write_disabled_region(int *ptr, u16 pkey)
 {
 	dprintf1("disabling write access to PKEY[%02d], doing write\n", pkey);
 	pkey_write_deny(pkey);
 	*ptr = __LINE__;
-	expected_pk_fault(pkey);
+	expected_pkey_fault(pkey);
 }
 void test_write_of_access_disabled_region(int *ptr, u16 pkey)
 {
 	dprintf1("disabling access to PKEY[%02d], doing write\n", pkey);
 	pkey_access_deny(pkey);
 	*ptr = __LINE__;
-	expected_pk_fault(pkey);
+	expected_pkey_fault(pkey);
 }
+
+void test_write_of_access_disabled_region_with_page_already_mapped(int *ptr,
+			u16 pkey)
+{
+	*ptr = __LINE__;
+	dprintf1("disabling access; after accessing the page, "
+		" to PKEY[%02d], doing write\n", pkey);
+	pkey_access_deny(pkey);
+	*ptr = __LINE__;
+	expected_pkey_fault(pkey);
+}
+
 void test_kernel_write_of_access_disabled_region(int *ptr, u16 pkey)
 {
 	int ret;
@@ -1160,9 +1222,11 @@ void test_pkey_alloc_exhaust(int *ptr, u16 pkey)
 		int new_pkey;
 		dprintf1("%s() alloc loop: %d\n", __func__, i);
 		new_pkey = alloc_pkey();
-		dprintf4("%s()::%d, err: %d pkru: 0x%x shadow: 0x%x\n", __func__,
-				__LINE__, err, __rdpkru(), shadow_pkru);
-		rdpkru(); /* for shadow checking */
+		dprintf4("%s()::%d, err: %d pkey_reg: 0x%016llx"
+				" shadow: 0x%016llx\n",
+				__func__, __LINE__, err, __read_pkey_reg(),
+				shadow_pkey_reg);
+		read_pkey_reg(); /* for shadow checking */
 		dprintf2("%s() errno: %d ENOSPC: %d\n", __func__, errno, ENOSPC);
 		if ((new_pkey == -1) && (errno == ENOSPC)) {
 			dprintf2("%s() failed to allocate pkey after %d tries\n",
@@ -1188,6 +1252,7 @@ void test_pkey_alloc_exhaust(int *ptr, u16 pkey)
 	dprintf3("%s()::%d\n", __func__, __LINE__);
 
 	/*
+	 * On x86:
 	 * There are 16 pkeys supported in hardware.  Three are
 	 * allocated by the time we get here:
 	 *   1. The default key (0)
@@ -1195,13 +1260,21 @@ void test_pkey_alloc_exhaust(int *ptr, u16 pkey)
 	 *   3. One allocated by the test code and passed in via
 	 *      'pkey' to this function.
 	 * Ensure that we can allocate at least another 13 (16-3).
+	 *
+	 * On powerpc:
+	 * There are either 5, 28, 29 or 32 pkeys supported in
+	 * hardware depending on the page size (4K or 64K) and
+	 * platform (powernv or powervm). Four are allocated by
+	 * the time we get here. These include pkey-0, pkey-1,
+	 * exec-only pkey and the one allocated by the test code.
+	 * Ensure that we can allocate the remaining.
 	 */
-	pkey_assert(i >= NR_PKEYS-3);
+	pkey_assert(i >= (NR_PKEYS - get_arch_reserved_keys() - 1));
 
 	for (i = 0; i < nr_allocated_pkeys; i++) {
 		err = sys_pkey_free(allocated_pkeys[i]);
 		pkey_assert(!err);
-		rdpkru(); /* for shadow checking */
+		read_pkey_reg(); /* for shadow checking */
 	}
 }
 
@@ -1287,7 +1360,7 @@ void test_ptrace_of_child(int *ptr, u16 pkey)
 	pkey_assert(ret != -1);
 	/* Now access from the current task, and expect an exception: */
 	peek_result = read_ptr(ptr);
-	expected_pk_fault(pkey);
+	expected_pkey_fault(pkey);
 
 	/*
 	 * Try to access the NON-pkey-protected "plain_ptr" via ptrace:
@@ -1297,7 +1370,7 @@ void test_ptrace_of_child(int *ptr, u16 pkey)
 	pkey_assert(ret != -1);
 	/* Now access from the current task, and expect NO exception: */
 	peek_result = read_ptr(plain_ptr);
-	do_not_expect_pk_fault("read plain pointer after ptrace");
+	do_not_expect_pkey_fault("read plain pointer after ptrace");
 
 	ret = ptrace(PTRACE_DETACH, child_pid, ignored, 0);
 	pkey_assert(ret != -1);
@@ -1347,17 +1420,15 @@ void test_executing_on_unreadable_memory(int *ptr, u16 pkey)
 	pkey_assert(!ret);
 	pkey_access_deny(pkey);
 
-	dprintf2("pkru: %x\n", rdpkru());
+	dprintf2("pkey_reg: %016llx\n", read_pkey_reg());
 
 	/*
 	 * Make sure this is an *instruction* fault
 	 */
 	madvise(p1, PAGE_SIZE, MADV_DONTNEED);
 	lots_o_noops_around_write(&scratch);
-	do_not_expect_pk_fault("executing on PROT_EXEC memory");
-	ptr_contents = read_ptr(p1);
-	dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents);
-	expected_pk_fault(pkey);
+	do_not_expect_pkey_fault("executing on PROT_EXEC memory");
+	expect_fault_on_read_execonly_key(p1, pkey);
 }
 
 void test_implicit_mprotect_exec_only_memory(int *ptr, u16 pkey)
@@ -1378,15 +1449,13 @@ void test_implicit_mprotect_exec_only_memory(int *ptr, u16 pkey)
 	ret = mprotect(p1, PAGE_SIZE, PROT_EXEC);
 	pkey_assert(!ret);
 
-	dprintf2("pkru: %x\n", rdpkru());
+	dprintf2("pkey_reg: %016llx\n", read_pkey_reg());
 
 	/* Make sure this is an *instruction* fault */
 	madvise(p1, PAGE_SIZE, MADV_DONTNEED);
 	lots_o_noops_around_write(&scratch);
-	do_not_expect_pk_fault("executing on PROT_EXEC memory");
-	ptr_contents = read_ptr(p1);
-	dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents);
-	expected_pk_fault(UNKNOWN_PKEY);
+	do_not_expect_pkey_fault("executing on PROT_EXEC memory");
+	expect_fault_on_read_execonly_key(p1, UNKNOWN_PKEY);
 
 	/*
 	 * Put the memory back to non-PROT_EXEC.  Should clear the
@@ -1400,7 +1469,7 @@ void test_implicit_mprotect_exec_only_memory(int *ptr, u16 pkey)
 	ret = mprotect(p1, PAGE_SIZE, PROT_READ|PROT_EXEC);
 	pkey_assert(!ret);
 	ptr_contents = read_ptr(p1);
-	do_not_expect_pk_fault("plain read on recently PROT_EXEC area");
+	do_not_expect_pkey_fault("plain read on recently PROT_EXEC area");
 }
 
 void test_mprotect_pkey_on_unsupported_cpu(int *ptr, u16 pkey)
@@ -1408,7 +1477,7 @@ void test_mprotect_pkey_on_unsupported_cpu(int *ptr, u16 pkey)
 	int size = PAGE_SIZE;
 	int sret;
 
-	if (cpu_has_pku()) {
+	if (cpu_has_pkeys()) {
 		dprintf1("SKIP: %s: no CPU support\n", __func__);
 		return;
 	}
@@ -1420,8 +1489,11 @@ void test_mprotect_pkey_on_unsupported_cpu(int *ptr, u16 pkey)
 void (*pkey_tests[])(int *ptr, u16 pkey) = {
 	test_read_of_write_disabled_region,
 	test_read_of_access_disabled_region,
+	test_read_of_access_disabled_region_with_page_already_mapped,
 	test_write_of_write_disabled_region,
+	test_write_of_write_disabled_region_with_page_already_mapped,
 	test_write_of_access_disabled_region,
+	test_write_of_access_disabled_region_with_page_already_mapped,
 	test_kernel_write_of_access_disabled_region,
 	test_kernel_write_of_write_disabled_region,
 	test_kernel_gup_of_access_disabled_region,
@@ -1433,6 +1505,7 @@ void (*pkey_tests[])(int *ptr, u16 pkey) = {
 	test_pkey_syscalls_on_non_allocated_pkey,
 	test_pkey_syscalls_bad_args,
 	test_pkey_alloc_exhaust,
+	test_pkey_alloc_free_attach_pkey0,
 };
 
 void run_tests_once(void)
@@ -1442,7 +1515,7 @@ void run_tests_once(void)
 
 	for (test_nr = 0; test_nr < ARRAY_SIZE(pkey_tests); test_nr++) {
 		int pkey;
-		int orig_pkru_faults = pkru_faults;
+		int orig_pkey_faults = pkey_faults;
 
 		dprintf1("======================\n");
 		dprintf1("test %d preparing...\n", test_nr);
@@ -1457,8 +1530,8 @@ void run_tests_once(void)
 		free_pkey_malloc(ptr);
 		sys_pkey_free(pkey);
 
-		dprintf1("pkru_faults: %d\n", pkru_faults);
-		dprintf1("orig_pkru_faults: %d\n", orig_pkru_faults);
+		dprintf1("pkey_faults: %d\n", pkey_faults);
+		dprintf1("orig_pkey_faults: %d\n", orig_pkey_faults);
 
 		tracing_off();
 		close_test_fds();
@@ -1471,18 +1544,19 @@ void run_tests_once(void)
 
 void pkey_setup_shadow(void)
 {
-	shadow_pkru = __rdpkru();
+	shadow_pkey_reg = __read_pkey_reg();
 }
 
 int main(void)
 {
 	int nr_iterations = 22;
+	int pkeys_supported = is_pkeys_supported();
 
 	setup_handlers();
 
-	printf("has pku: %d\n", cpu_has_pku());
+	printf("has pkeys: %d\n", pkeys_supported);
 
-	if (!cpu_has_pku()) {
+	if (!pkeys_supported) {
 		int size = PAGE_SIZE;
 		int *ptr;
 
@@ -1495,7 +1569,7 @@ int main(void)
 	}
 
 	pkey_setup_shadow();
-	printf("startup pkru: %x\n", rdpkru());
+	printf("startup pkey_reg: %016llx\n", read_pkey_reg());
 	setup_hugetlbfs();
 
 	while (nr_iterations-- > 0)
diff --git a/tools/testing/selftests/vm/run_vmtests b/tools/testing/selftests/vm/run_vmtests
index 76ca5e7a3951..a3f4f30f0a2e 100755
--- a/tools/testing/selftests/vm/run_vmtests
+++ b/tools/testing/selftests/vm/run_vmtests
@@ -307,4 +307,20 @@ else
 	echo "[FAIL]"
 	exitcode=1
 fi
+
+echo "running HMM smoke test"
+echo "------------------------------------"
+./test_hmm.sh smoke
+ret_val=$?
+
+if [ $ret_val -eq 0 ]; then
+	echo "[PASS]"
+elif [ $ret_val -eq $ksft_skip ]; then
+	echo "[SKIP]"
+	exitcode=$ksft_skip
+else
+	echo "[FAIL]"
+	exitcode=1
+fi
+
 exit $exitcode
diff --git a/tools/testing/selftests/vm/test_hmm.sh b/tools/testing/selftests/vm/test_hmm.sh
new file mode 100755
index 000000000000..0647b525a625
--- /dev/null
+++ b/tools/testing/selftests/vm/test_hmm.sh
@@ -0,0 +1,97 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Copyright (C) 2018 Uladzislau Rezki (Sony) <urezki@gmail.com>
+#
+# This is a test script for the kernel test driver to analyse vmalloc
+# allocator. Therefore it is just a kernel module loader. You can specify
+# and pass different parameters in order to:
+#     a) analyse performance of vmalloc allocations;
+#     b) stressing and stability check of vmalloc subsystem.
+
+TEST_NAME="test_hmm"
+DRIVER="test_hmm"
+
+# 1 if fails
+exitcode=1
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+check_test_requirements()
+{
+	uid=$(id -u)
+	if [ $uid -ne 0 ]; then
+		echo "$0: Must be run as root"
+		exit $ksft_skip
+	fi
+
+	if ! which modprobe > /dev/null 2>&1; then
+		echo "$0: You need modprobe installed"
+		exit $ksft_skip
+	fi
+
+	if ! modinfo $DRIVER > /dev/null 2>&1; then
+		echo "$0: You must have the following enabled in your kernel:"
+		echo "CONFIG_TEST_HMM=m"
+		exit $ksft_skip
+	fi
+}
+
+load_driver()
+{
+	modprobe $DRIVER > /dev/null 2>&1
+	if [ $? == 0 ]; then
+		major=$(awk "\$2==\"HMM_DMIRROR\" {print \$1}" /proc/devices)
+		mknod /dev/hmm_dmirror0 c $major 0
+		mknod /dev/hmm_dmirror1 c $major 1
+	fi
+}
+
+unload_driver()
+{
+	modprobe -r $DRIVER > /dev/null 2>&1
+	rm -f /dev/hmm_dmirror?
+}
+
+run_smoke()
+{
+	echo "Running smoke test. Note, this test provides basic coverage."
+
+	load_driver
+	$(dirname "${BASH_SOURCE[0]}")/hmm-tests
+	unload_driver
+}
+
+usage()
+{
+	echo -n "Usage: $0"
+	echo
+	echo "Example usage:"
+	echo
+	echo "# Shows help message"
+	echo "./${TEST_NAME}.sh"
+	echo
+	echo "# Smoke testing"
+	echo "./${TEST_NAME}.sh smoke"
+	echo
+	exit 0
+}
+
+function run_test()
+{
+	if [ $# -eq 0 ]; then
+		usage
+	else
+		if [ "$1" = "smoke" ]; then
+			run_smoke
+		else
+			usage
+		fi
+	fi
+}
+
+check_test_requirements
+run_test $@
+
+exit 0
diff --git a/tools/testing/selftests/vm/write_to_hugetlbfs.c b/tools/testing/selftests/vm/write_to_hugetlbfs.c
index 110bc4e4015d..6a2caba19ee1 100644
--- a/tools/testing/selftests/vm/write_to_hugetlbfs.c
+++ b/tools/testing/selftests/vm/write_to_hugetlbfs.c
@@ -74,8 +74,6 @@ int main(int argc, char **argv)
 	int write = 0;
 	int reserve = 1;
 
-	unsigned long i;
-
 	if (signal(SIGINT, sig_handler) == SIG_ERR)
 		err(1, "\ncan't catch SIGINT\n");
 
diff --git a/tools/testing/selftests/wireguard/qemu/Makefile b/tools/testing/selftests/wireguard/qemu/Makefile
index 90598a425c18..4bdd6c1a19d3 100644
--- a/tools/testing/selftests/wireguard/qemu/Makefile
+++ b/tools/testing/selftests/wireguard/qemu/Makefile
@@ -44,7 +44,7 @@ endef
 $(eval $(call tar_download,MUSL,musl,1.2.0,.tar.gz,https://musl.libc.org/releases/,c6de7b191139142d3f9a7b5b702c9cae1b5ee6e7f57e582da9328629408fd4e8))
 $(eval $(call tar_download,IPERF,iperf,3.7,.tar.gz,https://downloads.es.net/pub/iperf/,d846040224317caf2f75c843d309a950a7db23f9b44b94688ccbe557d6d1710c))
 $(eval $(call tar_download,BASH,bash,5.0,.tar.gz,https://ftp.gnu.org/gnu/bash/,b4a80f2ac66170b2913efbfb9f2594f1f76c7b1afd11f799e22035d63077fb4d))
-$(eval $(call tar_download,IPROUTE2,iproute2,5.4.0,.tar.xz,https://www.kernel.org/pub/linux/utils/net/iproute2/,fe97aa60a0d4c5ac830be18937e18dc3400ca713a33a89ad896ff1e3d46086ae))
+$(eval $(call tar_download,IPROUTE2,iproute2,5.6.0,.tar.xz,https://www.kernel.org/pub/linux/utils/net/iproute2/,1b5b0e25ce6e23da7526ea1da044e814ad85ba761b10dd29c2b027c056b04692))
 $(eval $(call tar_download,IPTABLES,iptables,1.8.4,.tar.bz2,https://www.netfilter.org/projects/iptables/files/,993a3a5490a544c2cbf2ef15cf7e7ed21af1845baf228318d5c36ef8827e157c))
 $(eval $(call tar_download,NMAP,nmap,7.80,.tar.bz2,https://nmap.org/dist/,fcfa5a0e42099e12e4bf7a68ebe6fde05553383a682e816a7ec9256ab4773faa))
 $(eval $(call tar_download,IPUTILS,iputils,s20190709,.tar.gz,https://github.com/iputils/iputils/archive/s20190709.tar.gz/#,a15720dd741d7538dd2645f9f516d193636ae4300ff7dbc8bfca757bf166490a))
diff --git a/tools/testing/selftests/wireguard/qemu/debug.config b/tools/testing/selftests/wireguard/qemu/debug.config
index 9803dbb54181..b50c2085c1ac 100644
--- a/tools/testing/selftests/wireguard/qemu/debug.config
+++ b/tools/testing/selftests/wireguard/qemu/debug.config
@@ -57,7 +57,6 @@ CONFIG_RCU_EQS_DEBUG=y
 CONFIG_USER_STACKTRACE_SUPPORT=y
 CONFIG_DEBUG_SG=y
 CONFIG_DEBUG_NOTIFIERS=y
-CONFIG_DOUBLEFAULT=y
 CONFIG_X86_DEBUG_FPU=y
 CONFIG_DEBUG_SECTION_MISMATCH=y
 CONFIG_DEBUG_PAGEALLOC=y
diff --git a/tools/testing/selftests/x86/.gitignore b/tools/testing/selftests/x86/.gitignore
index 022a1f3b64ef..1aaef5bf119a 100644
--- a/tools/testing/selftests/x86/.gitignore
+++ b/tools/testing/selftests/x86/.gitignore
@@ -12,5 +12,4 @@ ldt_gdt
 iopl
 mpx-mini-test
 ioperm
-protection_keys
 test_vdso
diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile
index 5d49bfec1e9a..5f16821c7f63 100644
--- a/tools/testing/selftests/x86/Makefile
+++ b/tools/testing/selftests/x86/Makefile
@@ -12,7 +12,7 @@ CAN_BUILD_WITH_NOPIE := $(shell ./check_cc.sh $(CC) trivial_program.c -no-pie)
 
 TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt test_mremap_vdso \
 			check_initial_reg_state sigreturn iopl ioperm \
-			protection_keys test_vdso test_vsyscall mov_ss_trap \
+			test_vdso test_vsyscall mov_ss_trap \
 			syscall_arg_fault
 TARGETS_C_32BIT_ONLY := entry_from_vm86 test_syscall_vdso unwind_vdso \
 			test_FCMOV test_FCOMI test_FISTTP \
diff --git a/tools/testing/selftests/x86/pkey-helpers.h b/tools/testing/selftests/x86/pkey-helpers.h
deleted file mode 100644
index 254e5436bdd9..000000000000
--- a/tools/testing/selftests/x86/pkey-helpers.h
+++ /dev/null
@@ -1,219 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _PKEYS_HELPER_H
-#define _PKEYS_HELPER_H
-#define _GNU_SOURCE
-#include <string.h>
-#include <stdarg.h>
-#include <stdio.h>
-#include <stdint.h>
-#include <stdbool.h>
-#include <signal.h>
-#include <assert.h>
-#include <stdlib.h>
-#include <ucontext.h>
-#include <sys/mman.h>
-
-#define NR_PKEYS 16
-#define PKRU_BITS_PER_PKEY 2
-
-#ifndef DEBUG_LEVEL
-#define DEBUG_LEVEL 0
-#endif
-#define DPRINT_IN_SIGNAL_BUF_SIZE 4096
-extern int dprint_in_signal;
-extern char dprint_in_signal_buffer[DPRINT_IN_SIGNAL_BUF_SIZE];
-static inline void sigsafe_printf(const char *format, ...)
-{
-	va_list ap;
-
-	if (!dprint_in_signal) {
-		va_start(ap, format);
-		vprintf(format, ap);
-		va_end(ap);
-	} else {
-		int ret;
-		/*
-		 * No printf() functions are signal-safe.
-		 * They deadlock easily. Write the format
-		 * string to get some output, even if
-		 * incomplete.
-		 */
-		ret = write(1, format, strlen(format));
-		if (ret < 0)
-			exit(1);
-	}
-}
-#define dprintf_level(level, args...) do {	\
-	if (level <= DEBUG_LEVEL)		\
-		sigsafe_printf(args);		\
-} while (0)
-#define dprintf0(args...) dprintf_level(0, args)
-#define dprintf1(args...) dprintf_level(1, args)
-#define dprintf2(args...) dprintf_level(2, args)
-#define dprintf3(args...) dprintf_level(3, args)
-#define dprintf4(args...) dprintf_level(4, args)
-
-extern unsigned int shadow_pkru;
-static inline unsigned int __rdpkru(void)
-{
-	unsigned int eax, edx;
-	unsigned int ecx = 0;
-	unsigned int pkru;
-
-	asm volatile(".byte 0x0f,0x01,0xee\n\t"
-		     : "=a" (eax), "=d" (edx)
-		     : "c" (ecx));
-	pkru = eax;
-	return pkru;
-}
-
-static inline unsigned int _rdpkru(int line)
-{
-	unsigned int pkru = __rdpkru();
-
-	dprintf4("rdpkru(line=%d) pkru: %x shadow: %x\n",
-			line, pkru, shadow_pkru);
-	assert(pkru == shadow_pkru);
-
-	return pkru;
-}
-
-#define rdpkru() _rdpkru(__LINE__)
-
-static inline void __wrpkru(unsigned int pkru)
-{
-	unsigned int eax = pkru;
-	unsigned int ecx = 0;
-	unsigned int edx = 0;
-
-	dprintf4("%s() changing %08x to %08x\n", __func__, __rdpkru(), pkru);
-	asm volatile(".byte 0x0f,0x01,0xef\n\t"
-		     : : "a" (eax), "c" (ecx), "d" (edx));
-	assert(pkru == __rdpkru());
-}
-
-static inline void wrpkru(unsigned int pkru)
-{
-	dprintf4("%s() changing %08x to %08x\n", __func__, __rdpkru(), pkru);
-	/* will do the shadow check for us: */
-	rdpkru();
-	__wrpkru(pkru);
-	shadow_pkru = pkru;
-	dprintf4("%s(%08x) pkru: %08x\n", __func__, pkru, __rdpkru());
-}
-
-/*
- * These are technically racy. since something could
- * change PKRU between the read and the write.
- */
-static inline void __pkey_access_allow(int pkey, int do_allow)
-{
-	unsigned int pkru = rdpkru();
-	int bit = pkey * 2;
-
-	if (do_allow)
-		pkru &= (1<<bit);
-	else
-		pkru |= (1<<bit);
-
-	dprintf4("pkru now: %08x\n", rdpkru());
-	wrpkru(pkru);
-}
-
-static inline void __pkey_write_allow(int pkey, int do_allow_write)
-{
-	long pkru = rdpkru();
-	int bit = pkey * 2 + 1;
-
-	if (do_allow_write)
-		pkru &= (1<<bit);
-	else
-		pkru |= (1<<bit);
-
-	wrpkru(pkru);
-	dprintf4("pkru now: %08x\n", rdpkru());
-}
-
-#define PROT_PKEY0     0x10            /* protection key value (bit 0) */
-#define PROT_PKEY1     0x20            /* protection key value (bit 1) */
-#define PROT_PKEY2     0x40            /* protection key value (bit 2) */
-#define PROT_PKEY3     0x80            /* protection key value (bit 3) */
-
-#define PAGE_SIZE 4096
-#define MB	(1<<20)
-
-static inline void __cpuid(unsigned int *eax, unsigned int *ebx,
-		unsigned int *ecx, unsigned int *edx)
-{
-	/* ecx is often an input as well as an output. */
-	asm volatile(
-		"cpuid;"
-		: "=a" (*eax),
-		  "=b" (*ebx),
-		  "=c" (*ecx),
-		  "=d" (*edx)
-		: "0" (*eax), "2" (*ecx));
-}
-
-/* Intel-defined CPU features, CPUID level 0x00000007:0 (ecx) */
-#define X86_FEATURE_PKU        (1<<3) /* Protection Keys for Userspace */
-#define X86_FEATURE_OSPKE      (1<<4) /* OS Protection Keys Enable */
-
-static inline int cpu_has_pku(void)
-{
-	unsigned int eax;
-	unsigned int ebx;
-	unsigned int ecx;
-	unsigned int edx;
-
-	eax = 0x7;
-	ecx = 0x0;
-	__cpuid(&eax, &ebx, &ecx, &edx);
-
-	if (!(ecx & X86_FEATURE_PKU)) {
-		dprintf2("cpu does not have PKU\n");
-		return 0;
-	}
-	if (!(ecx & X86_FEATURE_OSPKE)) {
-		dprintf2("cpu does not have OSPKE\n");
-		return 0;
-	}
-	return 1;
-}
-
-#define XSTATE_PKRU_BIT	(9)
-#define XSTATE_PKRU	0x200
-
-int pkru_xstate_offset(void)
-{
-	unsigned int eax;
-	unsigned int ebx;
-	unsigned int ecx;
-	unsigned int edx;
-	int xstate_offset;
-	int xstate_size;
-	unsigned long XSTATE_CPUID = 0xd;
-	int leaf;
-
-	/* assume that XSTATE_PKRU is set in XCR0 */
-	leaf = XSTATE_PKRU_BIT;
-	{
-		eax = XSTATE_CPUID;
-		ecx = leaf;
-		__cpuid(&eax, &ebx, &ecx, &edx);
-
-		if (leaf == XSTATE_PKRU_BIT) {
-			xstate_offset = ebx;
-			xstate_size = eax;
-		}
-	}
-
-	if (xstate_size == 0) {
-		printf("could not find size/offset of PKRU in xsave state\n");
-		return 0;
-	}
-
-	return xstate_offset;
-}
-
-#endif /* _PKEYS_HELPER_H */